[SVE][CodeGen] Fix legalisation for scalable types

Summary: This patch handles illegal scalable types when lowering IR operations, addressing several places where the value of isScalableVector() is ignored. For types such as <vscale x 8 x i32>, this means splitting the operations. In this example, we would split it into two operations of type <vscale x 4 x i32> for the low and high halves. In cases such as <vscale x 2 x i32>, the elements in the vector will be promoted. In this case they will be promoted to i64 (with a vector of type <vscale x 2 x i64>) Reviewers: sdesmalen, efriedma, huntergr Reviewed By: efriedma Subscribers: david-arm, tschuett, hiraditya, rkruppe, psnobl, cfe-commits, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D78812
2024-11-26 04:32:44 +01:00 · 2020-05-07 10:01:31 +01:00 · 2020-05-07 10:01:31 +01:00 · ea6936aebd
commit ea6936aebd
parent 18a0cecf64
3 changed files with 74 additions and 12 deletions
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@ -731,10 +731,10 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
    IntermediateVT.getVectorNumElements() : 1;

  // Convert the vector to the appropriate type if necessary.
-  unsigned DestVectorNoElts = NumIntermediates * IntermediateNumElts;
-
+  auto DestEltCnt = ElementCount(NumIntermediates * IntermediateNumElts,
+                                 ValueVT.isScalableVector());
  EVT BuiltVectorTy = EVT::getVectorVT(
-      *DAG.getContext(), IntermediateVT.getScalarType(), DestVectorNoElts);
+      *DAG.getContext(), IntermediateVT.getScalarType(), DestEltCnt);
  if (ValueVT != BuiltVectorTy) {
    if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, BuiltVectorTy))
      Val = Widened;
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@ -1392,7 +1392,7 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT
                                                EVT &IntermediateVT,
                                                unsigned &NumIntermediates,
                                                MVT &RegisterVT) const {
-  unsigned NumElts = VT.getVectorNumElements();
+  ElementCount EltCnt = VT.getVectorElementCount();

  // If there is a wider vector type with the same element type as this one,
  // or a promoted vector type that has the same number of elements which
@ -1400,7 +1400,7 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT
  // This handles things like <2 x float> -> <4 x float> and
  // <4 x i1> -> <4 x i32>.
  LegalizeTypeAction TA = getTypeAction(Context, VT);
-  if (NumElts != 1 && (TA == TypeWidenVector || TA == TypePromoteInteger)) {
+  if (EltCnt.Min != 1 && (TA == TypeWidenVector || TA == TypePromoteInteger)) {
    EVT RegisterEVT = getTypeToTransformTo(Context, VT);
    if (isTypeLegal(RegisterEVT)) {
      IntermediateVT = RegisterEVT;
@ -1417,22 +1417,22 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT

  // FIXME: We don't support non-power-of-2-sized vectors for now.  Ideally we
  // could break down into LHS/RHS like LegalizeDAG does.
-  if (!isPowerOf2_32(NumElts)) {
-    NumVectorRegs = NumElts;
-    NumElts = 1;
+  if (!isPowerOf2_32(EltCnt.Min)) {
+    NumVectorRegs = EltCnt.Min;
+    EltCnt.Min = 1;
  }

  // Divide the input until we get to a supported size.  This will always
  // end with a scalar if the target doesn't support vectors.
-  while (NumElts > 1 && !isTypeLegal(
-                                   EVT::getVectorVT(Context, EltTy, NumElts))) {
-    NumElts >>= 1;
+  while (EltCnt.Min > 1 &&
+         !isTypeLegal(EVT::getVectorVT(Context, EltTy, EltCnt))) {
+    EltCnt.Min >>= 1;
    NumVectorRegs <<= 1;
  }

  NumIntermediates = NumVectorRegs;

-  EVT NewVT = EVT::getVectorVT(Context, EltTy, NumElts);
+  EVT NewVT = EVT::getVectorVT(Context, EltTy, EltCnt);
  if (!isTypeLegal(NewVT))
    NewVT = EltTy;
  IntermediateVT = NewVT;
--- a/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll
+++ b/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll
@ -22,6 +22,37 @@ define <vscale x 2 x i64> @sdiv_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b
  ret <vscale x 2 x i64> %div
 }

+define <vscale x 8 x i32> @sdiv_split_i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b) {
+; CHECK-LABEL: @sdiv_split_i32
+; CHECK-DAG: ptrue p0.s
+; CHECK-DAG: sdiv z0.s, p0/m, z0.s, z2.s
+; CHECK-DAG: sdiv z1.s, p0/m, z1.s, z3.s
+; CHECK-NEXT: ret
+  %div = sdiv <vscale x 8 x i32> %a, %b
+  ret <vscale x 8 x i32> %div
+}
+
+define <vscale x 2 x i32> @sdiv_widen_i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b) {
+; CHECK-LABEL: @sdiv_widen_i32
+; CHECK-DAG: ptrue p0.d
+; CHECK-DAG: sxtw z1.d, p0/m, z1.d
+; CHECK-DAG: sxtw z0.d, p0/m, z0.d
+; CHECK-DAG: sdiv z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %div = sdiv <vscale x 2 x i32> %a, %b
+  ret <vscale x 2 x i32> %div
+}
+
+define <vscale x 4 x i64> @sdiv_split_i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b) {
+; CHECK-LABEL: @sdiv_split_i64
+; CHECK-DAG: ptrue p0.d
+; CHECK-DAG: sdiv z0.d, p0/m, z0.d, z2.d
+; CHECK-DAG: sdiv z1.d, p0/m, z1.d, z3.d
+; CHECK-NEXT: ret
+  %div = sdiv <vscale x 4 x i64> %a, %b
+  ret <vscale x 4 x i64> %div
+}
+
 ;
 ; UDIV
 ;
@ -44,6 +75,37 @@ define <vscale x 2 x i64> @udiv_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b
  ret <vscale x 2 x i64> %div
 }

+define <vscale x 8 x i32> @udiv_split_i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b) {
+; CHECK-LABEL: @udiv_split_i32
+; CHECK-DAG: ptrue p0.s
+; CHECK-DAG: udiv z0.s, p0/m, z0.s, z2.s
+; CHECK-DAG: udiv z1.s, p0/m, z1.s, z3.s
+; CHECK-NEXT: ret
+  %div = udiv <vscale x 8 x i32> %a, %b
+  ret <vscale x 8 x i32> %div
+}
+
+define <vscale x 2 x i32> @udiv_widen_i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b) {
+; CHECK-LABEL: @udiv_widen_i32
+; CHECK-DAG: ptrue p0.d
+; CHECK-DAG: and z1.d, z1.d, #0xffffffff
+; CHECK-DAG: and z0.d, z0.d, #0xffffffff
+; CHECK-DAG: udiv z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %div = udiv <vscale x 2 x i32> %a, %b
+  ret <vscale x 2 x i32> %div
+}
+
+define <vscale x 4 x i64> @udiv_split_i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b) {
+; CHECK-LABEL: @udiv_split_i64
+; CHECK-DAG: ptrue p0.d
+; CHECK-DAG: udiv z0.d, p0/m, z0.d, z2.d
+; CHECK-DAG: udiv z1.d, p0/m, z1.d, z3.d
+; CHECK-NEXT: ret
+  %div = udiv <vscale x 4 x i64> %a, %b
+  ret <vscale x 4 x i64> %div
+}
+
 ;
 ; SMIN
 ;