Implement Neon VTRN instructions. For now, anyway, these are selected

directly from the intrinsics produced by the frontend. If it is more convenient to have a custom DAG node for using these to implement shuffles, we can add that later. llvm-svn: 78459
2024-11-25 04:02:41 +01:00 · 2009-08-08 05:53:00 +00:00 · 2009-08-08 05:53:00 +00:00 · 935ee0c122
commit 935ee0c122
parent 591187332c
2 changed files with 48 additions and 0 deletions
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@ -1448,6 +1448,33 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
                            N->getOperand(4), N->getOperand(5) };
    return CurDAG->getTargetNode(Opc, dl, MVT::Other, Ops, 7);
  }
+
+  case ISD::INTRINSIC_WO_CHAIN: {
+    unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+    MVT VT = N->getValueType(0);
+    unsigned Opc = 0;
+
+    // Match intrinsics that return multiple values.
+    switch (IntNo) {
+    default: break;
+
+    case Intrinsic::arm_neon_vtrni:
+      switch (VT.getSimpleVT()) {
+      default: return NULL;
+      case MVT::v8i8:  Opc = ARM::VTRNd8; break;
+      case MVT::v4i16: Opc = ARM::VTRNd16; break;
+      case MVT::v2f32:
+      case MVT::v2i32: Opc = ARM::VTRNd32; break;
+      case MVT::v16i8: Opc = ARM::VTRNq8; break;
+      case MVT::v8i16: Opc = ARM::VTRNq16; break;
+      case MVT::v4f32:
+      case MVT::v4i32: Opc = ARM::VTRNq32; break;
+      }
+      return CurDAG->getTargetNode(Opc, dl, VT, VT, N->getOperand(1),
+                                   N->getOperand(2));
+    }
+    break;
+  }
  }

  return SelectCode(Op);
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@ -1917,6 +1917,27 @@ class VREV16Q<bits<2> op19_18, string OpcodeStr, ValueType Ty>
 def VREV16d8  : VREV16D<0b00, "vrev16.8", v8i8>;
 def VREV16q8  : VREV16Q<0b00, "vrev16.8", v16i8>;

+//   VTRN     : Vector Transpose
+
+class VTRND<bits<2> op19_18, string OpcodeStr>
+  : N2V<0b11, 0b11, op19_18, 0b10, 0b00001, 0, 0, (outs DPR:$dst1, DPR:$dst2),
+        (ins DPR:$src1, DPR:$src2), NoItinerary, 
+        !strconcat(OpcodeStr, "\t$dst1, $dst2"),
+        "$src1 = $dst1, $src2 = $dst2", []>;
+class VTRNQ<bits<2> op19_18, string OpcodeStr>
+  : N2V<0b11, 0b11, op19_18, 0b10, 0b00001, 1, 0, (outs QPR:$dst1, QPR:$dst2),
+        (ins QPR:$src1, QPR:$src2), NoItinerary, 
+        !strconcat(OpcodeStr, "\t$dst1, $dst2"),
+        "$src1 = $dst1, $src2 = $dst2", []>;
+
+def  VTRNd8   : VTRND<0b00, "vtrn.8">;
+def  VTRNd16  : VTRND<0b01, "vtrn.16">;
+def  VTRNd32  : VTRND<0b10, "vtrn.32">;
+
+def  VTRNq8   : VTRNQ<0b00, "vtrn.8">;
+def  VTRNq16  : VTRNQ<0b01, "vtrn.16">;
+def  VTRNq32  : VTRNQ<0b10, "vtrn.32">;
+
 //===----------------------------------------------------------------------===//
 // NEON instructions for single-precision FP math
 //===----------------------------------------------------------------------===//