1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

[DAG] Fold vector (aext (load x)) -> (zext (truncate (zextload x)))

We currently don't do anything to fold any_extend vector loads as no target has such an instruction.

Instead I've added support for folding to a zextload, SimplifyDemandedBits does a good job of adjusting the zext(truncate(()) stages as required later on.

We still need the custom scalar extload handling instead of using the tryToFoldExtOfLoad helper as it has different legality tests - we can probably tweak that to reduce most of the code duplication.

Fixes the regression I mentioned in rG99a971cadff7

Differential Revision: https://reviews.llvm.org/D85129
This commit is contained in:
Simon Pilgrim 2020-08-05 11:22:07 +01:00
parent af54a4ae5b
commit 850778022a
4 changed files with 27 additions and 37 deletions

View File

@ -10606,22 +10606,26 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
// fold (aext (load x)) -> (aext (truncate (extload x)))
// None of the supported targets knows how to perform load and any_ext
// on vectors in one instruction. We only perform this transformation on
// scalars.
if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
// on vectors in one instruction, so attempt to fold to zext instead.
if (VT.isVector()) {
// Try to simplify (zext (load x)).
if (SDValue foldedExt =
tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
return foldedExt;
} else if (ISD::isNON_EXTLoad(N0.getNode()) &&
ISD::isUNINDEXEDLoad(N0.getNode()) &&
TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
bool DoXform = true;
SmallVector<SDNode*, 4> SetCCs;
SmallVector<SDNode *, 4> SetCCs;
if (!N0.hasOneUse())
DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs,
TLI);
DoXform =
ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
if (DoXform) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
LN0->getChain(),
LN0->getBasePtr(), N0.getValueType(),
LN0->getMemOperand());
LN0->getChain(), LN0->getBasePtr(),
N0.getValueType(), LN0->getMemOperand());
ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
// If the load value is used only by N, replace it via CombineTo N.
bool NoReplaceTrunc = N0.hasOneUse();
@ -10630,8 +10634,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
recursivelyDeleteUnusedNodes(LN0);
} else {
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
N0.getValueType(), ExtLoad);
SDValue Trunc =
DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
CombineTo(LN0, Trunc, ExtLoad.getValue(1));
}
return SDValue(N, 0); // Return N so it doesn't get rechecked!

View File

@ -4,25 +4,13 @@
define void @any_extend_load_v8i64(<8 x i8> * %ptr) {
; KNL-LABEL: any_extend_load_v8i64:
; KNL: # %bb.0:
; KNL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; KNL-NEXT: vpmovzxbq {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
; KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
; KNL-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
; KNL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; KNL-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
; KNL-NEXT: vpmovqb %zmm0, (%rdi)
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: any_extend_load_v8i64:
; SKX: # %bb.0:
; SKX-NEXT: vpmovzxbq {{.*#+}} zmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
; SKX-NEXT: vpmovqb %zmm0, (%rdi)
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
; ALL-LABEL: any_extend_load_v8i64:
; ALL: # %bb.0:
; ALL-NEXT: vpmovzxbq {{.*#+}} zmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
; ALL-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
; ALL-NEXT: vpmovqb %zmm0, (%rdi)
; ALL-NEXT: vzeroupper
; ALL-NEXT: retq
%wide.load = load <8 x i8>, <8 x i8>* %ptr, align 1
%1 = zext <8 x i8> %wide.load to <8 x i64>
%2 = add nuw nsw <8 x i64> %1, <i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4>

View File

@ -4368,9 +4368,8 @@ define <2 x double> @test_masked_z_8xdouble_to_2xdouble_perm_mem_mask1(<8 x doub
define void @test_zext_v8i8_to_v8i16(<8 x i8>* %arg, <8 x i16>* %arg1) {
; CHECK-LABEL: test_zext_v8i8_to_v8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovq (%rdi), %xmm0 # xmm0 = mem[0],zero
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; CHECK-NEXT: vpmovzxbw (%rdi), %xmm0 # xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
; CHECK-NEXT: vpsllw $8, %xmm0, %xmm0
; CHECK-NEXT: vmovdqa %xmm0, (%rsi)
; CHECK-NEXT: retq
%tmp = getelementptr <8 x i8>, <8 x i8>* %arg, i32 0

View File

@ -1904,8 +1904,7 @@ define <2 x i64> @mul_v2i64_zext_cross_bb(<2 x i32>* %in, <2 x i32>* %y) {
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
; X86-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; X86-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0,0,1,1]
; X86-NEXT: pmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
; X86-NEXT: pmuludq %xmm1, %xmm0
; X86-NEXT: retl
;