[AArch64] Remove custom zext/sext legalization code.

Currently performExtendCombine assumes that the src-element bitwidth * 2 is a valid MVT. But this is not the case for i1 and it causes a crash on the v64i1 test cases added in this patch. It turns out that this code appears to not be needed; the same patterns are handled by other code and we end up with the same results, even without the custom lowering. I also added additional test cases in a50037aaa6d5df. Let's just remove the unneeded code. Reviewed By: dmgreen Differential Revision: https://reviews.llvm.org/D99437
2024-11-25 20:23:11 +01:00 · 2021-03-29 20:19:45 +01:00 · 2021-03-29 20:19:45 +01:00 · 173cbf6ad2
commit 173cbf6ad2
parent 0b68e9cb0c
2 changed files with 65 additions and 72 deletions
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@ -13939,78 +13939,7 @@ static SDValue performExtendCombine(SDNode *N,

    return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), NewABD);
  }
-
-  // This is effectively a custom type legalization for AArch64.
-  //
-  // Type legalization will split an extend of a small, legal, type to a larger
-  // illegal type by first splitting the destination type, often creating
-  // illegal source types, which then get legalized in isel-confusing ways,
-  // leading to really terrible codegen. E.g.,
-  //   %result = v8i32 sext v8i8 %value
-  // becomes
-  //   %losrc = extract_subreg %value, ...
-  //   %hisrc = extract_subreg %value, ...
-  //   %lo = v4i32 sext v4i8 %losrc
-  //   %hi = v4i32 sext v4i8 %hisrc
-  // Things go rapidly downhill from there.
-  //
-  // For AArch64, the [sz]ext vector instructions can only go up one element
-  // size, so we can, e.g., extend from i8 to i16, but to go from i8 to i32
-  // take two instructions.
-  //
-  // This implies that the most efficient way to do the extend from v8i8
-  // to two v4i32 values is to first extend the v8i8 to v8i16, then do
-  // the normal splitting to happen for the v8i16->v8i32.
-
-  // This is pre-legalization to catch some cases where the default
-  // type legalization will create ill-tempered code.
-  if (!DCI.isBeforeLegalizeOps())
-    return SDValue();
-
-  // We're only interested in cleaning things up for non-legal vector types
-  // here. If both the source and destination are legal, things will just
-  // work naturally without any fiddling.
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  EVT ResVT = N->getValueType(0);
-  if (!ResVT.isVector() || TLI.isTypeLegal(ResVT))
-    return SDValue();
-  // If the vector type isn't a simple VT, it's beyond the scope of what
-  // we're  worried about here. Let legalization do its thing and hope for
-  // the best.
-  SDValue Src = N->getOperand(0);
-  EVT SrcVT = Src->getValueType(0);
-  if (!ResVT.isSimple() || !SrcVT.isSimple())
-    return SDValue();
-
-  // If the source VT is a 64-bit fixed or scalable vector, we can play games
-  // and get the better results we want.
-  if (SrcVT.getSizeInBits().getKnownMinSize() != 64)
-    return SDValue();
-
-  unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
-  ElementCount SrcEC = SrcVT.getVectorElementCount();
-  SrcVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize * 2), SrcEC);
-  SDLoc DL(N);
-  Src = DAG.getNode(N->getOpcode(), DL, SrcVT, Src);
-
-  // Now split the rest of the operation into two halves, each with a 64
-  // bit source.
-  EVT LoVT, HiVT;
-  SDValue Lo, Hi;
-  LoVT = HiVT = ResVT.getHalfNumVectorElementsVT(*DAG.getContext());
-
-  EVT InNVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getVectorElementType(),
-                               LoVT.getVectorElementCount());
-  Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src,
-                   DAG.getConstant(0, DL, MVT::i64));
-  Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src,
-                   DAG.getConstant(InNVT.getVectorMinNumElements(), DL, MVT::i64));
-  Lo = DAG.getNode(N->getOpcode(), DL, LoVT, Lo);
-  Hi = DAG.getNode(N->getOpcode(), DL, HiVT, Hi);
-
-  // Now combine the parts back together so we still have a single result
-  // like the combiner expects.
-  return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
+  return SDValue();
 }

 static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St,
--- a/test/CodeGen/AArch64/arm64-subvector-extend.ll
+++ b/test/CodeGen/AArch64/arm64-subvector-extend.ll
@ -202,3 +202,67 @@ define <8 x i64> @sext_v8i8_to_v8i64(<8 x i8> %v0) nounwind {
  %r = sext <8 x i8> %v0 to <8 x i64>
  ret <8 x i64> %r
 }
+
+; Extends of vectors of i1.
+
+define <32 x i8> @zext_v32i1(<32 x i1> %arg) {
+; CHECK-LABEL: zext_v32i1:
+; CHECK:         and.16b v0, v0, v2
+; CHECK-NEXT:    and.16b v1, v1, v2
+; CHECK-NEXT:    ret
+  %res = zext <32 x i1> %arg to <32 x i8>
+  ret <32 x i8> %res
+}
+
+define <32 x i8> @sext_v32i1(<32 x i1> %arg) {
+; CHECK-LABEL: sext_v32i1:
+; CHECK:         shl.16b v0, v0, #7
+; CHECK-NEXT:    shl.16b v1, v1, #7
+; CHECK-NEXT:    sshr.16b v0, v0, #7
+; CHECK-NEXT:    sshr.16b v1, v1, #7
+; CHECK-NEXT:    ret
+;
+  %res = sext <32 x i1> %arg to <32 x i8>
+  ret <32 x i8> %res
+}
+
+define <64 x i8> @zext_v64i1(<64 x i1> %arg) {
+; CHECK-LABEL: zext_v64i1:
+; CHECK:         and.16b v0, v0, [[V4:v.+]]
+; CHECK-NEXT:    and.16b v1, v1, [[V4]]
+; CHECK-NEXT:    and.16b v2, v2, [[V4]]
+; CHECK-NEXT:    and.16b v3, v3, [[V4]]
+; CHECK-NEXT:    ret
+;
+  %res = zext <64 x i1> %arg to <64 x i8>
+  ret <64 x i8> %res
+}
+
+define <64 x i8> @sext_v64i1(<64 x i1> %arg) {
+; CHECK-LABEL: sext_v64i1:
+; CHECK:         shl.16b v0, v0, #7
+; CHECK-NEXT:    shl.16b v3, v3, #7
+; CHECK-NEXT:    shl.16b v2, v2, #7
+; CHECK-NEXT:    shl.16b [[V4:v.+]], v1, #7
+; CHECK-NEXT:    sshr.16b v0, v0, #7
+; CHECK-NEXT:    sshr.16b v1, v3, #7
+; CHECK-NEXT:    sshr.16b v2, v2, #7
+; CHECK-NEXT:    sshr.16b v3, [[V4]], #7
+; CHECK-NEXT:    ret
+;
+  %res = sext <64 x i1> %arg to <64 x i8>
+  ret <64 x i8> %res
+}
+
+define <1 x i128> @sext_v1x64(<1 x i64> %arg) {
+; CHECK-LABEL: sext_v1x64:
+; CHECK-NEXT:   .cfi_startproc
+; CHECK-NEXT:    fmov    x8, d0
+; CHECK-NEXT:    asr x1, x8, #63
+; CHECK-NEXT:    mov.d   v0[1], x1
+; CHECK-NEXT:    fmov    x0, d0
+; CHECK-NEXT:    ret
+;
+  %res = sext <1 x i64> %arg to <1 x i128>
+  ret <1 x i128> %res
+}