1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-18 18:42:46 +02:00

[AArch64] Remove custom zext/sext legalization code.

Currently performExtendCombine assumes that the src-element bitwidth * 2
is a valid MVT. But this is not the case for i1 and it causes a crash on
the v64i1 test cases added in this patch.

It turns out that this code appears to not be needed; the same patterns are
handled by other code and we end up with the same results, even without the
custom lowering. I also added additional test cases in a50037aaa6d5df.

Let's just remove the unneeded code.

Reviewed By: dmgreen

Differential Revision: https://reviews.llvm.org/D99437
This commit is contained in:
Florian Hahn 2021-03-29 20:19:45 +01:00
parent 0b68e9cb0c
commit 173cbf6ad2
2 changed files with 65 additions and 72 deletions

View File

@ -13939,78 +13939,7 @@ static SDValue performExtendCombine(SDNode *N,
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), NewABD);
}
// This is effectively a custom type legalization for AArch64.
//
// Type legalization will split an extend of a small, legal, type to a larger
// illegal type by first splitting the destination type, often creating
// illegal source types, which then get legalized in isel-confusing ways,
// leading to really terrible codegen. E.g.,
// %result = v8i32 sext v8i8 %value
// becomes
// %losrc = extract_subreg %value, ...
// %hisrc = extract_subreg %value, ...
// %lo = v4i32 sext v4i8 %losrc
// %hi = v4i32 sext v4i8 %hisrc
// Things go rapidly downhill from there.
//
// For AArch64, the [sz]ext vector instructions can only go up one element
// size, so we can, e.g., extend from i8 to i16, but to go from i8 to i32
// take two instructions.
//
// This implies that the most efficient way to do the extend from v8i8
// to two v4i32 values is to first extend the v8i8 to v8i16, then do
// the normal splitting to happen for the v8i16->v8i32.
// This is pre-legalization to catch some cases where the default
// type legalization will create ill-tempered code.
if (!DCI.isBeforeLegalizeOps())
return SDValue();
// We're only interested in cleaning things up for non-legal vector types
// here. If both the source and destination are legal, things will just
// work naturally without any fiddling.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT ResVT = N->getValueType(0);
if (!ResVT.isVector() || TLI.isTypeLegal(ResVT))
return SDValue();
// If the vector type isn't a simple VT, it's beyond the scope of what
// we're worried about here. Let legalization do its thing and hope for
// the best.
SDValue Src = N->getOperand(0);
EVT SrcVT = Src->getValueType(0);
if (!ResVT.isSimple() || !SrcVT.isSimple())
return SDValue();
// If the source VT is a 64-bit fixed or scalable vector, we can play games
// and get the better results we want.
if (SrcVT.getSizeInBits().getKnownMinSize() != 64)
return SDValue();
unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
ElementCount SrcEC = SrcVT.getVectorElementCount();
SrcVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize * 2), SrcEC);
SDLoc DL(N);
Src = DAG.getNode(N->getOpcode(), DL, SrcVT, Src);
// Now split the rest of the operation into two halves, each with a 64
// bit source.
EVT LoVT, HiVT;
SDValue Lo, Hi;
LoVT = HiVT = ResVT.getHalfNumVectorElementsVT(*DAG.getContext());
EVT InNVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getVectorElementType(),
LoVT.getVectorElementCount());
Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src,
DAG.getConstant(0, DL, MVT::i64));
Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src,
DAG.getConstant(InNVT.getVectorMinNumElements(), DL, MVT::i64));
Lo = DAG.getNode(N->getOpcode(), DL, LoVT, Lo);
Hi = DAG.getNode(N->getOpcode(), DL, HiVT, Hi);
// Now combine the parts back together so we still have a single result
// like the combiner expects.
return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
return SDValue();
}
static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St,

View File

@ -202,3 +202,67 @@ define <8 x i64> @sext_v8i8_to_v8i64(<8 x i8> %v0) nounwind {
%r = sext <8 x i8> %v0 to <8 x i64>
ret <8 x i64> %r
}
; Extends of vectors of i1.
define <32 x i8> @zext_v32i1(<32 x i1> %arg) {
; CHECK-LABEL: zext_v32i1:
; CHECK: and.16b v0, v0, v2
; CHECK-NEXT: and.16b v1, v1, v2
; CHECK-NEXT: ret
%res = zext <32 x i1> %arg to <32 x i8>
ret <32 x i8> %res
}
define <32 x i8> @sext_v32i1(<32 x i1> %arg) {
; CHECK-LABEL: sext_v32i1:
; CHECK: shl.16b v0, v0, #7
; CHECK-NEXT: shl.16b v1, v1, #7
; CHECK-NEXT: sshr.16b v0, v0, #7
; CHECK-NEXT: sshr.16b v1, v1, #7
; CHECK-NEXT: ret
;
%res = sext <32 x i1> %arg to <32 x i8>
ret <32 x i8> %res
}
define <64 x i8> @zext_v64i1(<64 x i1> %arg) {
; CHECK-LABEL: zext_v64i1:
; CHECK: and.16b v0, v0, [[V4:v.+]]
; CHECK-NEXT: and.16b v1, v1, [[V4]]
; CHECK-NEXT: and.16b v2, v2, [[V4]]
; CHECK-NEXT: and.16b v3, v3, [[V4]]
; CHECK-NEXT: ret
;
%res = zext <64 x i1> %arg to <64 x i8>
ret <64 x i8> %res
}
define <64 x i8> @sext_v64i1(<64 x i1> %arg) {
; CHECK-LABEL: sext_v64i1:
; CHECK: shl.16b v0, v0, #7
; CHECK-NEXT: shl.16b v3, v3, #7
; CHECK-NEXT: shl.16b v2, v2, #7
; CHECK-NEXT: shl.16b [[V4:v.+]], v1, #7
; CHECK-NEXT: sshr.16b v0, v0, #7
; CHECK-NEXT: sshr.16b v1, v3, #7
; CHECK-NEXT: sshr.16b v2, v2, #7
; CHECK-NEXT: sshr.16b v3, [[V4]], #7
; CHECK-NEXT: ret
;
%res = sext <64 x i1> %arg to <64 x i8>
ret <64 x i8> %res
}
define <1 x i128> @sext_v1x64(<1 x i64> %arg) {
; CHECK-LABEL: sext_v1x64:
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: fmov x8, d0
; CHECK-NEXT: asr x1, x8, #63
; CHECK-NEXT: mov.d v0[1], x1
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
;
%res = sext <1 x i64> %arg to <1 x i128>
ret <1 x i128> %res
}