mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[DAG] Fold vector (aext (load x)) -> (zext (truncate (zextload x)))
We currently don't do anything to fold any_extend vector loads as no target has such an instruction. Instead I've added support for folding to a zextload, SimplifyDemandedBits does a good job of adjusting the zext(truncate(()) stages as required later on. We still need the custom scalar extload handling instead of using the tryToFoldExtOfLoad helper as it has different legality tests - we can probably tweak that to reduce most of the code duplication. Fixes the regression I mentioned in rG99a971cadff7 Differential Revision: https://reviews.llvm.org/D85129
This commit is contained in:
parent
af54a4ae5b
commit
850778022a
@ -10606,22 +10606,26 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
|
||||
|
||||
// fold (aext (load x)) -> (aext (truncate (extload x)))
|
||||
// None of the supported targets knows how to perform load and any_ext
|
||||
// on vectors in one instruction. We only perform this transformation on
|
||||
// scalars.
|
||||
if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
|
||||
// on vectors in one instruction, so attempt to fold to zext instead.
|
||||
if (VT.isVector()) {
|
||||
// Try to simplify (zext (load x)).
|
||||
if (SDValue foldedExt =
|
||||
tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
|
||||
ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
|
||||
return foldedExt;
|
||||
} else if (ISD::isNON_EXTLoad(N0.getNode()) &&
|
||||
ISD::isUNINDEXEDLoad(N0.getNode()) &&
|
||||
TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
|
||||
bool DoXform = true;
|
||||
SmallVector<SDNode*, 4> SetCCs;
|
||||
SmallVector<SDNode *, 4> SetCCs;
|
||||
if (!N0.hasOneUse())
|
||||
DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs,
|
||||
TLI);
|
||||
DoXform =
|
||||
ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
|
||||
if (DoXform) {
|
||||
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
|
||||
SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
|
||||
LN0->getChain(),
|
||||
LN0->getBasePtr(), N0.getValueType(),
|
||||
LN0->getMemOperand());
|
||||
LN0->getChain(), LN0->getBasePtr(),
|
||||
N0.getValueType(), LN0->getMemOperand());
|
||||
ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
|
||||
// If the load value is used only by N, replace it via CombineTo N.
|
||||
bool NoReplaceTrunc = N0.hasOneUse();
|
||||
@ -10630,8 +10634,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
|
||||
DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
|
||||
recursivelyDeleteUnusedNodes(LN0);
|
||||
} else {
|
||||
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
|
||||
N0.getValueType(), ExtLoad);
|
||||
SDValue Trunc =
|
||||
DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
|
||||
CombineTo(LN0, Trunc, ExtLoad.getValue(1));
|
||||
}
|
||||
return SDValue(N, 0); // Return N so it doesn't get rechecked!
|
||||
|
@ -4,25 +4,13 @@
|
||||
|
||||
|
||||
define void @any_extend_load_v8i64(<8 x i8> * %ptr) {
|
||||
; KNL-LABEL: any_extend_load_v8i64:
|
||||
; KNL: # %bb.0:
|
||||
; KNL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; KNL-NEXT: vpmovzxbq {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
|
||||
; KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
|
||||
; KNL-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
|
||||
; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
|
||||
; KNL-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
|
||||
; KNL-NEXT: vpmovqb %zmm0, (%rdi)
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: any_extend_load_v8i64:
|
||||
; SKX: # %bb.0:
|
||||
; SKX-NEXT: vpmovzxbq {{.*#+}} zmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
|
||||
; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
|
||||
; SKX-NEXT: vpmovqb %zmm0, (%rdi)
|
||||
; SKX-NEXT: vzeroupper
|
||||
; SKX-NEXT: retq
|
||||
; ALL-LABEL: any_extend_load_v8i64:
|
||||
; ALL: # %bb.0:
|
||||
; ALL-NEXT: vpmovzxbq {{.*#+}} zmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
|
||||
; ALL-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
|
||||
; ALL-NEXT: vpmovqb %zmm0, (%rdi)
|
||||
; ALL-NEXT: vzeroupper
|
||||
; ALL-NEXT: retq
|
||||
%wide.load = load <8 x i8>, <8 x i8>* %ptr, align 1
|
||||
%1 = zext <8 x i8> %wide.load to <8 x i64>
|
||||
%2 = add nuw nsw <8 x i64> %1, <i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4>
|
||||
|
@ -4368,9 +4368,8 @@ define <2 x double> @test_masked_z_8xdouble_to_2xdouble_perm_mem_mask1(<8 x doub
|
||||
define void @test_zext_v8i8_to_v8i16(<8 x i8>* %arg, <8 x i16>* %arg1) {
|
||||
; CHECK-LABEL: test_zext_v8i8_to_v8i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovq (%rdi), %xmm0 # xmm0 = mem[0],zero
|
||||
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; CHECK-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
|
||||
; CHECK-NEXT: vpmovzxbw (%rdi), %xmm0 # xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
|
||||
; CHECK-NEXT: vpsllw $8, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vmovdqa %xmm0, (%rsi)
|
||||
; CHECK-NEXT: retq
|
||||
%tmp = getelementptr <8 x i8>, <8 x i8>* %arg, i32 0
|
||||
|
@ -1904,8 +1904,7 @@ define <2 x i64> @mul_v2i64_zext_cross_bb(<2 x i32>* %in, <2 x i32>* %y) {
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X86-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
|
||||
; X86-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
|
||||
; X86-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0,0,1,1]
|
||||
; X86-NEXT: pmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
|
||||
; X86-NEXT: pmuludq %xmm1, %xmm0
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
|
Loading…
x
Reference in New Issue
Block a user