In r131488 I misunderstood how VREV works. It splits the vector in half and splits each half. Therefore, the real problem was that we were using a VREV64 for a 4xi16, when we should have been using a VREV32.

Updated test case and reverted change to the PerfectShuffle Table. llvm-svn: 131529
2025-01-31 12:41:49 +01:00 · 2011-05-18 06:42:21 +00:00 · 2011-05-18 06:42:21 +00:00 · 06cb9cbf98
commit 06cb9cbf98
parent 2e7d0eef98
4 changed files with 1353 additions and 1347 deletions
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@ -4182,7 +4182,15 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
  switch (OpNum) {
  default: llvm_unreachable("Unknown shuffle opcode!");
  case OP_VREV:
-    return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
+    // VREV divides the vector in half and swaps within the half.
+    if (VT.getVectorElementType() == MVT::i32)
+      return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
+    // vrev <4 x i16> -> VREV32
+    if (VT.getVectorElementType() == MVT::i16)
+      return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS);
+    // vrev <4 x i8> -> VREV16
+    assert(VT.getVectorElementType() == MVT::i8);
+    return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS);
  case OP_VDUP0:
  case OP_VDUP1:
  case OP_VDUP2:
--- a/lib/Target/ARM/ARMPerfectShuffle.h
+++ b/lib/Target/ARM/ARMPerfectShuffle.h
--- a/test/CodeGen/ARM/vrev.ll
+++ b/test/CodeGen/ARM/vrev.ll
@ -148,12 +148,11 @@ define void @test_with_vcombine(<4 x float>* %v) nounwind {
  ret void
 }

-;  Test the shuffle of a 4xi16 which exposed a problem with the perfect shuffle table
-;  entry for vrev. 
+; vrev <4 x i16> should use VREV32 and not VREV64
 define void @test_vrev64(<4 x i16>* nocapture %source, <2 x i16>* nocapture %dst) nounwind ssp {
 ; CHECK: test_vrev64:
-; CHECK: vrev64.16
 ; CHECK: vext.16
+; CHECK: vrev32.16
 entry:
  %0 = bitcast <4 x i16>* %source to <8 x i16>*
  %tmp2 = load <8 x i16>* %0, align 4
--- a/utils/PerfectShuffle/PerfectShuffle.cpp
+++ b/utils/PerfectShuffle/PerfectShuffle.cpp
@ -520,7 +520,7 @@ enum {
 };

 struct vrev : public Operator {
-  vrev() : Operator(0x3210, "vrev", OP_VREV) {}
+  vrev() : Operator(0x1032, "vrev", OP_VREV) {}
 } the_vrev;

 template<unsigned Elt>