mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 20:23:11 +01:00
[AArch64] Remove custom zext/sext legalization code.
Currently performExtendCombine assumes that the src-element bitwidth * 2 is a valid MVT. But this is not the case for i1 and it causes a crash on the v64i1 test cases added in this patch. It turns out that this code appears to not be needed; the same patterns are handled by other code and we end up with the same results, even without the custom lowering. I also added additional test cases in a50037aaa6d5df. Let's just remove the unneeded code. Reviewed By: dmgreen Differential Revision: https://reviews.llvm.org/D99437
This commit is contained in:
parent
0b68e9cb0c
commit
173cbf6ad2
@ -13939,78 +13939,7 @@ static SDValue performExtendCombine(SDNode *N,
|
||||
|
||||
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), NewABD);
|
||||
}
|
||||
|
||||
// This is effectively a custom type legalization for AArch64.
|
||||
//
|
||||
// Type legalization will split an extend of a small, legal, type to a larger
|
||||
// illegal type by first splitting the destination type, often creating
|
||||
// illegal source types, which then get legalized in isel-confusing ways,
|
||||
// leading to really terrible codegen. E.g.,
|
||||
// %result = v8i32 sext v8i8 %value
|
||||
// becomes
|
||||
// %losrc = extract_subreg %value, ...
|
||||
// %hisrc = extract_subreg %value, ...
|
||||
// %lo = v4i32 sext v4i8 %losrc
|
||||
// %hi = v4i32 sext v4i8 %hisrc
|
||||
// Things go rapidly downhill from there.
|
||||
//
|
||||
// For AArch64, the [sz]ext vector instructions can only go up one element
|
||||
// size, so we can, e.g., extend from i8 to i16, but to go from i8 to i32
|
||||
// take two instructions.
|
||||
//
|
||||
// This implies that the most efficient way to do the extend from v8i8
|
||||
// to two v4i32 values is to first extend the v8i8 to v8i16, then do
|
||||
// the normal splitting to happen for the v8i16->v8i32.
|
||||
|
||||
// This is pre-legalization to catch some cases where the default
|
||||
// type legalization will create ill-tempered code.
|
||||
if (!DCI.isBeforeLegalizeOps())
|
||||
return SDValue();
|
||||
|
||||
// We're only interested in cleaning things up for non-legal vector types
|
||||
// here. If both the source and destination are legal, things will just
|
||||
// work naturally without any fiddling.
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
EVT ResVT = N->getValueType(0);
|
||||
if (!ResVT.isVector() || TLI.isTypeLegal(ResVT))
|
||||
return SDValue();
|
||||
// If the vector type isn't a simple VT, it's beyond the scope of what
|
||||
// we're worried about here. Let legalization do its thing and hope for
|
||||
// the best.
|
||||
SDValue Src = N->getOperand(0);
|
||||
EVT SrcVT = Src->getValueType(0);
|
||||
if (!ResVT.isSimple() || !SrcVT.isSimple())
|
||||
return SDValue();
|
||||
|
||||
// If the source VT is a 64-bit fixed or scalable vector, we can play games
|
||||
// and get the better results we want.
|
||||
if (SrcVT.getSizeInBits().getKnownMinSize() != 64)
|
||||
return SDValue();
|
||||
|
||||
unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
|
||||
ElementCount SrcEC = SrcVT.getVectorElementCount();
|
||||
SrcVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize * 2), SrcEC);
|
||||
SDLoc DL(N);
|
||||
Src = DAG.getNode(N->getOpcode(), DL, SrcVT, Src);
|
||||
|
||||
// Now split the rest of the operation into two halves, each with a 64
|
||||
// bit source.
|
||||
EVT LoVT, HiVT;
|
||||
SDValue Lo, Hi;
|
||||
LoVT = HiVT = ResVT.getHalfNumVectorElementsVT(*DAG.getContext());
|
||||
|
||||
EVT InNVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getVectorElementType(),
|
||||
LoVT.getVectorElementCount());
|
||||
Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src,
|
||||
DAG.getConstant(0, DL, MVT::i64));
|
||||
Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src,
|
||||
DAG.getConstant(InNVT.getVectorMinNumElements(), DL, MVT::i64));
|
||||
Lo = DAG.getNode(N->getOpcode(), DL, LoVT, Lo);
|
||||
Hi = DAG.getNode(N->getOpcode(), DL, HiVT, Hi);
|
||||
|
||||
// Now combine the parts back together so we still have a single result
|
||||
// like the combiner expects.
|
||||
return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St,
|
||||
|
@ -202,3 +202,67 @@ define <8 x i64> @sext_v8i8_to_v8i64(<8 x i8> %v0) nounwind {
|
||||
%r = sext <8 x i8> %v0 to <8 x i64>
|
||||
ret <8 x i64> %r
|
||||
}
|
||||
|
||||
; Extends of vectors of i1.
|
||||
|
||||
define <32 x i8> @zext_v32i1(<32 x i1> %arg) {
|
||||
; CHECK-LABEL: zext_v32i1:
|
||||
; CHECK: and.16b v0, v0, v2
|
||||
; CHECK-NEXT: and.16b v1, v1, v2
|
||||
; CHECK-NEXT: ret
|
||||
%res = zext <32 x i1> %arg to <32 x i8>
|
||||
ret <32 x i8> %res
|
||||
}
|
||||
|
||||
define <32 x i8> @sext_v32i1(<32 x i1> %arg) {
|
||||
; CHECK-LABEL: sext_v32i1:
|
||||
; CHECK: shl.16b v0, v0, #7
|
||||
; CHECK-NEXT: shl.16b v1, v1, #7
|
||||
; CHECK-NEXT: sshr.16b v0, v0, #7
|
||||
; CHECK-NEXT: sshr.16b v1, v1, #7
|
||||
; CHECK-NEXT: ret
|
||||
;
|
||||
%res = sext <32 x i1> %arg to <32 x i8>
|
||||
ret <32 x i8> %res
|
||||
}
|
||||
|
||||
define <64 x i8> @zext_v64i1(<64 x i1> %arg) {
|
||||
; CHECK-LABEL: zext_v64i1:
|
||||
; CHECK: and.16b v0, v0, [[V4:v.+]]
|
||||
; CHECK-NEXT: and.16b v1, v1, [[V4]]
|
||||
; CHECK-NEXT: and.16b v2, v2, [[V4]]
|
||||
; CHECK-NEXT: and.16b v3, v3, [[V4]]
|
||||
; CHECK-NEXT: ret
|
||||
;
|
||||
%res = zext <64 x i1> %arg to <64 x i8>
|
||||
ret <64 x i8> %res
|
||||
}
|
||||
|
||||
define <64 x i8> @sext_v64i1(<64 x i1> %arg) {
|
||||
; CHECK-LABEL: sext_v64i1:
|
||||
; CHECK: shl.16b v0, v0, #7
|
||||
; CHECK-NEXT: shl.16b v3, v3, #7
|
||||
; CHECK-NEXT: shl.16b v2, v2, #7
|
||||
; CHECK-NEXT: shl.16b [[V4:v.+]], v1, #7
|
||||
; CHECK-NEXT: sshr.16b v0, v0, #7
|
||||
; CHECK-NEXT: sshr.16b v1, v3, #7
|
||||
; CHECK-NEXT: sshr.16b v2, v2, #7
|
||||
; CHECK-NEXT: sshr.16b v3, [[V4]], #7
|
||||
; CHECK-NEXT: ret
|
||||
;
|
||||
%res = sext <64 x i1> %arg to <64 x i8>
|
||||
ret <64 x i8> %res
|
||||
}
|
||||
|
||||
define <1 x i128> @sext_v1x64(<1 x i64> %arg) {
|
||||
; CHECK-LABEL: sext_v1x64:
|
||||
; CHECK-NEXT: .cfi_startproc
|
||||
; CHECK-NEXT: fmov x8, d0
|
||||
; CHECK-NEXT: asr x1, x8, #63
|
||||
; CHECK-NEXT: mov.d v0[1], x1
|
||||
; CHECK-NEXT: fmov x0, d0
|
||||
; CHECK-NEXT: ret
|
||||
;
|
||||
%res = sext <1 x i64> %arg to <1 x i128>
|
||||
ret <1 x i128> %res
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user