[codegen,aarch64] Add a target hook to the code generator to control

vector type legalization strategies in a more fine grained manner, and change the legalization of several v1iN types and v1f32 to be widening rather than scalarization on AArch64. This fixes an assertion failure caused by scalarizing nodes like "v1i32 trunc v1i64". As v1i64 is legal it will fail to scalarize v1i32. This also provides a foundation for other targets to have more granular control over how vector types are legalized. Patch by Hao Liu, reviewed by Tim Northover. I'm committing it to allow some work to start taking place on top of this patch as it adds some really important hooks to the backend that I'd like to immediately start using. =] http://reviews.llvm.org/D4322 llvm-svn: 212242
2024-10-19 02:52:53 +02:00 · 2014-07-03 00:23:43 +00:00 · 2014-07-03 00:23:43 +00:00 · fc0fe5064b
commit fc0fe5064b
parent f60da84887
10 changed files with 98 additions and 59 deletions
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@ -185,10 +185,15 @@ public:
  /// Return true if the target has BitExtract instructions.
  bool hasExtractBitsInsn() const { return HasExtractBitsInsn; }

-  /// Return true if a vector of the given type should be split
-  /// (TypeSplitVector) instead of promoted (TypePromoteInteger) during type
-  /// legalization.
-  virtual bool shouldSplitVectorType(EVT /*VT*/) const { return false; }
+  /// Return the preferred vector type legalization action.
+  virtual TargetLoweringBase::LegalizeTypeAction
+  getPreferredVectorAction(EVT VT) const {
+    // The default action for one element vectors is to scalarize
+    if (VT.getVectorNumElements() == 1)
+      return TypeScalarizeVector;
+    // The default action for other vectors is to promote
+    return TypePromoteInteger;
+  }

  // There are two general methods for expanding a BUILD_VECTOR node:
  //  1. Use SCALAR_TO_VECTOR on the defined scalar values and then shuffle
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@ -1085,23 +1085,24 @@ void TargetLoweringBase::computeRegisterProperties() {
  for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE;
       i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
    MVT VT = (MVT::SimpleValueType) i;
-    if (isTypeLegal(VT)) continue;
+    if (isTypeLegal(VT))
+      continue;

-    // Determine if there is a legal wider type.  If so, we should promote to
-    // that wider vector type.
    MVT EltVT = VT.getVectorElementType();
    unsigned NElts = VT.getVectorNumElements();
-    if (NElts != 1 && !shouldSplitVectorType(VT)) {
    bool IsLegalWiderType = false;
-      // First try to promote the elements of integer vectors. If no legal
-      // promotion was found, fallback to the widen-vector method.
+    LegalizeTypeAction PreferredAction = getPreferredVectorAction(VT);
+    switch (PreferredAction) {
+    case TypePromoteInteger: {
+      // Try to promote the elements of integer vectors. If no legal
+      // promotion was found, fall through to the widen-vector method.
      for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
        MVT SVT = (MVT::SimpleValueType) nVT;
        // Promote vectors of integers to vectors with the same number
        // of elements, with a wider element type.
        if (SVT.getVectorElementType().getSizeInBits() > EltVT.getSizeInBits()
-            && SVT.getVectorNumElements() == NElts &&
-            isTypeLegal(SVT) && SVT.getScalarType().isInteger()) {
+            && SVT.getVectorNumElements() == NElts && isTypeLegal(SVT)
+            && SVT.getScalarType().isInteger()) {
          TransformToType[i] = SVT;
          RegisterTypeForVT[i] = SVT;
          NumRegistersForVT[i] = 1;
@ -1110,15 +1111,15 @@ void TargetLoweringBase::computeRegisterProperties() {
          break;
        }
      }
-
-      if (IsLegalWiderType) continue;
-
+      if (IsLegalWiderType)
+        break;
+    }
+    case TypeWidenVector: {
      // Try to widen the vector.
      for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
        MVT SVT = (MVT::SimpleValueType) nVT;
-        if (SVT.getVectorElementType() == EltVT &&
-            SVT.getVectorNumElements() > NElts &&
-            isTypeLegal(SVT)) {
+        if (SVT.getVectorElementType() == EltVT
+            && SVT.getVectorNumElements() > NElts && isTypeLegal(SVT)) {
          TransformToType[i] = SVT;
          RegisterTypeForVT[i] = SVT;
          NumRegistersForVT[i] = 1;
@ -1127,28 +1128,35 @@ void TargetLoweringBase::computeRegisterProperties() {
          break;
        }
      }
-      if (IsLegalWiderType) continue;
+      if (IsLegalWiderType)
+        break;
    }
-
+    case TypeSplitVector:
+    case TypeScalarizeVector: {
      MVT IntermediateVT;
      MVT RegisterVT;
      unsigned NumIntermediates;
-    NumRegistersForVT[i] =
-      getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates,
-                                RegisterVT, this);
+      NumRegistersForVT[i] = getVectorTypeBreakdownMVT(VT, IntermediateVT,
+          NumIntermediates, RegisterVT, this);
      RegisterTypeForVT[i] = RegisterVT;

      MVT NVT = VT.getPow2VectorType();
      if (NVT == VT) {
        // Type is already a power of 2.  The default action is to split.
        TransformToType[i] = MVT::Other;
-      unsigned NumElts = VT.getVectorNumElements();
-      ValueTypeActions.setTypeAction(VT,
-            NumElts > 1 ? TypeSplitVector : TypeScalarizeVector);
+        if (PreferredAction == TypeScalarizeVector)
+          ValueTypeActions.setTypeAction(VT, TypeScalarizeVector);
+        else
+          ValueTypeActions.setTypeAction(VT, TypeSplitVector);
      } else {
        TransformToType[i] = NVT;
        ValueTypeActions.setTypeAction(VT, TypeWidenVector);
      }
+      break;
+    }
+    default:
+      llvm_unreachable("Unknown vector legalization action!");
+    }
  }

  // Determine the 'representative' register class for each value type.
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@ -7886,6 +7886,18 @@ bool AArch64TargetLowering::shouldExpandAtomicInIR(Instruction *Inst) const {
  return Inst->getType()->getPrimitiveSizeInBits() <= 128;
 }

+TargetLoweringBase::LegalizeTypeAction
+AArch64TargetLowering::getPreferredVectorAction(EVT VT) const {
+  MVT SVT = VT.getSimpleVT();
+  // During type legalization, we prefer to widen v1i8, v1i16, v1i32  to v8i8,
+  // v4i16, v2i32 instead of to promote.
+  if (SVT == MVT::v1i8 || SVT == MVT::v1i16 || SVT == MVT::v1i32
+      || SVT == MVT::v1f32)
+    return TypeWidenVector;
+
+  return TargetLoweringBase::getPreferredVectorAction(VT);
+}
+
 Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
                                             AtomicOrdering Ord) const {
  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@ -324,6 +324,9 @@ public:

  bool shouldExpandAtomicInIR(Instruction *Inst) const override;

+  TargetLoweringBase::LegalizeTypeAction
+  getPreferredVectorAction(EVT VT) const override;
+
 private:
  /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
  /// make the right decision when generating code for different targets.
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@ -473,8 +473,12 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
  }
 }

-bool NVPTXTargetLowering::shouldSplitVectorType(EVT VT) const {
-  return VT.getScalarType() == MVT::i1;
+TargetLoweringBase::LegalizeTypeAction
+NVPTXTargetLowering::getPreferredVectorAction(EVT VT) const {
+  if (VT.getVectorNumElements() != 1 && VT.getScalarType() == MVT::i1)
+    return TypeSplitVector;
+
+  return TargetLoweringBase::getPreferredVectorAction(VT);
 }

 SDValue
--- a/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/lib/Target/NVPTX/NVPTXISelLowering.h
@ -242,7 +242,8 @@ public:
  // PTX always uses 32-bit shift amounts
  MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; }

-  bool shouldSplitVectorType(EVT VT) const override;
+  TargetLoweringBase::LegalizeTypeAction
+  getPreferredVectorAction(EVT VT) const override;

 private:
  const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@ -270,8 +270,12 @@ bool SITargetLowering::allowsUnalignedMemoryAccesses(EVT  VT,
  return VT.bitsGT(MVT::i32);
 }

-bool SITargetLowering::shouldSplitVectorType(EVT VT) const {
-  return VT.getScalarType().bitsLE(MVT::i16);
+TargetLoweringBase::LegalizeTypeAction
+SITargetLowering::getPreferredVectorAction(EVT VT) const {
+  if (VT.getVectorNumElements() != 1 && VT.getScalarType().bitsLE(MVT::i16))
+    return TypeSplitVector;
+
+  return TargetLoweringBase::getPreferredVectorAction(VT);
 }

 bool SITargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
--- a/lib/Target/R600/SIISelLowering.h
+++ b/lib/Target/R600/SIISelLowering.h
@ -50,7 +50,9 @@ public:
  SITargetLowering(TargetMachine &tm);
  bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AS,
                                     bool *IsFast) const override;
-  bool shouldSplitVectorType(EVT VT) const override;
+
+  TargetLoweringBase::LegalizeTypeAction
+  getPreferredVectorAction(EVT VT) const override;

  bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
                                        Type *Ty) const override;
--- a/test/CodeGen/AArch64/arm64-neon-copy.ll
+++ b/test/CodeGen/AArch64/arm64-neon-copy.ll
@ -842,7 +842,7 @@ define <2 x i64> @scalar_to_vector.v2i64(i64 %a) {

 define <8 x i8> @testDUP.v1i8(<1 x i8> %a) {
 ; CHECK-LABEL: testDUP.v1i8:
-; CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}}
+; CHECK: dup v0.8b, v0.b[0]
  %b = extractelement <1 x i8> %a, i32 0
  %c = insertelement <8 x i8> undef, i8 %b, i32 0
  %d = insertelement <8 x i8> %c, i8 %b, i32 1
@ -857,7 +857,7 @@ define <8 x i8> @testDUP.v1i8(<1 x i8> %a) {

 define <8 x i16> @testDUP.v1i16(<1 x i16> %a) {
 ; CHECK-LABEL: testDUP.v1i16:
-; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
+; CHECK: dup v0.8h, v0.h[0]
  %b = extractelement <1 x i16> %a, i32 0
  %c = insertelement <8 x i16> undef, i16 %b, i32 0
  %d = insertelement <8 x i16> %c, i16 %b, i32 1
@ -872,7 +872,7 @@ define <8 x i16> @testDUP.v1i16(<1 x i16> %a) {

 define <4 x i32> @testDUP.v1i32(<1 x i32> %a) {
 ; CHECK-LABEL: testDUP.v1i32:
-; CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}}
+; CHECK: dup v0.4s, v0.s[0]
  %b = extractelement <1 x i32> %a, i32 0
  %c = insertelement <4 x i32> undef, i32 %b, i32 0
  %d = insertelement <4 x i32> %c, i32 %b, i32 1
@ -1411,35 +1411,35 @@ define <16 x i8> @concat_vector_v16i8_const() {

 define <4 x i16> @concat_vector_v4i16(<1 x i16> %a) {
 ; CHECK-LABEL: concat_vector_v4i16:
-; CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}}
+; CHECK: dup v0.4h, v0.h[0]
 %r = shufflevector <1 x i16> %a, <1 x i16> undef, <4 x i32> zeroinitializer
 ret <4 x i16> %r
 }

 define <4 x i32> @concat_vector_v4i32(<1 x i32> %a) {
 ; CHECK-LABEL: concat_vector_v4i32:
-; CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}}
+; CHECK: dup v0.4s, v0.s[0]
 %r = shufflevector <1 x i32> %a, <1 x i32> undef, <4 x i32> zeroinitializer
 ret <4 x i32> %r
 }

 define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) {
 ; CHECK-LABEL: concat_vector_v8i8:
-; CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}}
+; CHECK: dup v0.8b, v0.b[0]
 %r = shufflevector <1 x i8> %a, <1 x i8> undef, <8 x i32> zeroinitializer
 ret <8 x i8> %r
 }

 define <8 x i16> @concat_vector_v8i16(<1 x i16> %a) {
 ; CHECK-LABEL: concat_vector_v8i16:
-; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
+; CHECK: dup v0.8h, v0.h[0]
 %r = shufflevector <1 x i16> %a, <1 x i16> undef, <8 x i32> zeroinitializer
 ret <8 x i16> %r
 }

 define <16 x i8> @concat_vector_v16i8(<1 x i8> %a) {
 ; CHECK-LABEL: concat_vector_v16i8:
-; CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}}
+; CHECK: dup v0.16b, v0.b[0]
 %r = shufflevector <1 x i8> %a, <1 x i8> undef, <16 x i32> zeroinitializer
 ret <16 x i8> %r
 }
--- a/test/CodeGen/AArch64/arm64-neon-select_cc.ll
+++ b/test/CodeGen/AArch64/arm64-neon-select_cc.ll
@ -136,8 +136,8 @@ define <2x i64> @test_select_cc_v2i64(i64 %a, i64 %b, <2x i64> %c, <2x i64> %d )

 define <1 x float> @test_select_cc_v1f32(float %a, float %b, <1 x float> %c, <1 x float> %d ) {
 ; CHECK-LABEL: test_select_cc_v1f32:
-; CHECK: fcmp s0, s1
-; CHECK-NEXT: fcsel s0, s2, s3, eq
+; CHECK: fcmeq [[MASK:v[0-9]+]].2s, v0.2s, v1.2s
+; CHECK-NEXT: bsl [[MASK]].8b, v2.8b, v3.8b
  %cmp31 = fcmp oeq float %a, %b
  %e = select i1 %cmp31, <1 x float> %c, <1 x float> %d
  ret <1 x float> %e