1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 03:02:36 +01:00

[DAG] Don't permit EXTLOAD when combining FSHL/FSHR consecutive loads (PR45265)

Technically we can permit EXTLOAD of the LHS operand but only if all the extended bits are shifted out. Until we test coverage for that case, I'm just disabling this to fix PR45265.
This commit is contained in:
Simon Pilgrim 2020-03-21 10:33:53 +00:00
parent ae37969223
commit d76e062375
2 changed files with 67 additions and 1 deletions

View File

@ -8325,13 +8325,15 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
// fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
// TODO - bigendian support once we have test coverage.
// TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
// TODO - permit LHS EXTLOAD if extensions are shifted out.
if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
!DAG.getDataLayout().isBigEndian()) {
auto *LHS = dyn_cast<LoadSDNode>(N0);
auto *RHS = dyn_cast<LoadSDNode>(N1);
if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
LHS->getAddressSpace() == RHS->getAddressSpace() &&
(LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS)) {
(LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) &&
ISD::isNON_EXTLoad(LHS)) {
if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
SDLoc DL(RHS);
uint64_t PtrOff =

View File

@ -918,3 +918,67 @@ define <4 x i32> @fshr_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) nounw
ret <4 x i32> %f
}
%struct.S = type { [11 x i8], i8 }
define void @PR45265(i32 %0, %struct.S* nocapture readonly %1) nounwind {
; X32-SSE2-LABEL: PR45265:
; X32-SSE2: # %bb.0:
; X32-SSE2-NEXT: pushl %edi
; X32-SSE2-NEXT: pushl %esi
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-SSE2-NEXT: leal (%eax,%eax,2), %edx
; X32-SSE2-NEXT: movzwl 8(%ecx,%edx,4), %esi
; X32-SSE2-NEXT: movsbl 10(%ecx,%edx,4), %edi
; X32-SSE2-NEXT: shll $16, %edi
; X32-SSE2-NEXT: orl %edi, %esi
; X32-SSE2-NEXT: movl 4(%ecx,%edx,4), %ecx
; X32-SSE2-NEXT: shrdl $8, %esi, %ecx
; X32-SSE2-NEXT: xorl %eax, %ecx
; X32-SSE2-NEXT: sarl $31, %eax
; X32-SSE2-NEXT: sarl $31, %edi
; X32-SSE2-NEXT: shldl $24, %esi, %edi
; X32-SSE2-NEXT: xorl %eax, %edi
; X32-SSE2-NEXT: orl %edi, %ecx
; X32-SSE2-NEXT: jne .LBB44_1
; X32-SSE2-NEXT: # %bb.2:
; X32-SSE2-NEXT: popl %esi
; X32-SSE2-NEXT: popl %edi
; X32-SSE2-NEXT: jmp _Z3foov # TAILCALL
; X32-SSE2-NEXT: .LBB44_1:
; X32-SSE2-NEXT: popl %esi
; X32-SSE2-NEXT: popl %edi
; X32-SSE2-NEXT: retl
;
; X64-AVX2-LABEL: PR45265:
; X64-AVX2: # %bb.0:
; X64-AVX2-NEXT: movslq %edi, %rax
; X64-AVX2-NEXT: leaq (%rax,%rax,2), %rcx
; X64-AVX2-NEXT: movsbq 10(%rsi,%rcx,4), %rdx
; X64-AVX2-NEXT: shlq $16, %rdx
; X64-AVX2-NEXT: movzwl 8(%rsi,%rcx,4), %edi
; X64-AVX2-NEXT: orq %rdx, %rdi
; X64-AVX2-NEXT: movq (%rsi,%rcx,4), %rcx
; X64-AVX2-NEXT: shrdq $40, %rdi, %rcx
; X64-AVX2-NEXT: cmpq %rax, %rcx
; X64-AVX2-NEXT: jne .LBB44_1
; X64-AVX2-NEXT: # %bb.2:
; X64-AVX2-NEXT: jmp _Z3foov # TAILCALL
; X64-AVX2-NEXT: .LBB44_1:
; X64-AVX2-NEXT: retq
%3 = sext i32 %0 to i64
%4 = getelementptr inbounds %struct.S, %struct.S* %1, i64 %3
%5 = bitcast %struct.S* %4 to i88*
%6 = load i88, i88* %5, align 1
%7 = ashr i88 %6, 40
%8 = trunc i88 %7 to i64
%9 = icmp eq i64 %8, %3
br i1 %9, label %10, label %11
10:
tail call void @_Z3foov()
br label %11
11:
ret void
}
declare dso_local void @_Z3foov()