mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 19:52:54 +01:00
[DAGCombine] Improve ReduceLoadWidth for SRL
Recommitting rL321259. Previosuly this caused an issue with PPCBE but I didn't receieve a reproducer and didn't have the time to follow up. If the issue appears again, please provide a reproducer so I can fix it. Original commit message: If the SRL node is only used by an AND, we may be able to set the ExtVT to the width of the mask, making the AND redundant. To support this, another check has been added in isLegalNarrowLoad which queries whether the load is valid. Differential Revision: https://reviews.llvm.org/D41350 llvm-svn: 329160
This commit is contained in:
parent
304b2da27b
commit
09b5caa8ce
@ -3793,6 +3793,16 @@ bool DAGCombiner::isLegalNarrowLoad(LoadSDNode *LoadN, ISD::LoadExtType ExtType,
|
||||
if (LoadN->getNumValues() > 2)
|
||||
return false;
|
||||
|
||||
// Only allow byte offsets.
|
||||
if (ShAmt % 8)
|
||||
return false;
|
||||
|
||||
// Ensure that this isn't going to produce an unsupported unaligned access.
|
||||
if (ShAmt && !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
|
||||
ExtVT, LoadN->getAddressSpace(),
|
||||
ShAmt / 8))
|
||||
return false;
|
||||
|
||||
// If the load that we're shrinking is an extload and we're not just
|
||||
// discarding the extension we can't simply shrink the load. Bail.
|
||||
// TODO: It would be possible to merge the extensions in some cases.
|
||||
@ -8344,6 +8354,22 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
|
||||
// then the result of the shift+trunc is zero/undef (handled elsewhere).
|
||||
if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits())
|
||||
return SDValue();
|
||||
|
||||
// If the SRL is only used by a masking AND, we may be able to adjust
|
||||
// the ExtVT to make the AND redundant.
|
||||
SDNode *Mask = *(N->use_begin());
|
||||
if (Mask->getOpcode() == ISD::AND &&
|
||||
isa<ConstantSDNode>(Mask->getOperand(1))) {
|
||||
const APInt &ShiftMask =
|
||||
cast<ConstantSDNode>(Mask->getOperand(1))->getAPIntValue();
|
||||
if (ShiftMask.isMask()) {
|
||||
EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
|
||||
ShiftMask.countTrailingOnes());
|
||||
// Recompute the type.
|
||||
if (TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT))
|
||||
ExtVT = MaskedVT;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -217,10 +217,23 @@ entry:
|
||||
ret i32 %conv
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_shift8_mask8
|
||||
; CHECK-LABEL: test_shift7_mask8
|
||||
; CHECK-BE: ldr r1, [r0]
|
||||
; CHECK-COMMON: ldr r1, [r0]
|
||||
; CHECK-COMMON: ubfx r1, r1, #8, #8
|
||||
; CHECK-COMMON: ubfx r1, r1, #7, #8
|
||||
; CHECK-COMMON: str r1, [r0]
|
||||
define arm_aapcscc void @test_shift7_mask8(i32* nocapture %p) {
|
||||
entry:
|
||||
%0 = load i32, i32* %p, align 4
|
||||
%shl = lshr i32 %0, 7
|
||||
%and = and i32 %shl, 255
|
||||
store i32 %and, i32* %p, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_shift8_mask8
|
||||
; CHECK-BE: ldrb r1, [r0, #2]
|
||||
; CHECK-COMMON: ldrb r1, [r0, #1]
|
||||
; CHECK-COMMON: str r1, [r0]
|
||||
define arm_aapcscc void @test_shift8_mask8(i32* nocapture %p) {
|
||||
entry:
|
||||
@ -231,10 +244,40 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_shift8_mask16
|
||||
; CHECK-LABEL: test_shift8_mask7
|
||||
; CHECK-BE: ldr r1, [r0]
|
||||
; CHECK-COMMON: ldr r1, [r0]
|
||||
; CHECK-COMMON: ubfx r1, r1, #8, #16
|
||||
; CHECK-COMMON: ubfx r1, r1, #8, #7
|
||||
; CHECK-COMMON: str r1, [r0]
|
||||
define arm_aapcscc void @test_shift8_mask7(i32* nocapture %p) {
|
||||
entry:
|
||||
%0 = load i32, i32* %p, align 4
|
||||
%shl = lshr i32 %0, 8
|
||||
%and = and i32 %shl, 127
|
||||
store i32 %and, i32* %p, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_shift9_mask8
|
||||
; CHECK-BE: ldr r1, [r0]
|
||||
; CHECK-COMMON: ldr r1, [r0]
|
||||
; CHECK-COMMON: ubfx r1, r1, #9, #8
|
||||
; CHECK-COMMON: str r1, [r0]
|
||||
define arm_aapcscc void @test_shift9_mask8(i32* nocapture %p) {
|
||||
entry:
|
||||
%0 = load i32, i32* %p, align 4
|
||||
%shl = lshr i32 %0, 9
|
||||
%and = and i32 %shl, 255
|
||||
store i32 %and, i32* %p, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_shift8_mask16
|
||||
; CHECK-ALIGN: ldr r1, [r0]
|
||||
; CHECK-ALIGN: ubfx r1, r1, #8, #16
|
||||
; CHECK-BE: ldrh r1, [r0, #1]
|
||||
; CHECK-ARM: ldrh r1, [r0, #1]
|
||||
; CHECK-THUMB: ldrh.w r1, [r0, #1]
|
||||
; CHECK-COMMON: str r1, [r0]
|
||||
define arm_aapcscc void @test_shift8_mask16(i32* nocapture %p) {
|
||||
entry:
|
||||
@ -245,6 +288,61 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_shift15_mask16
|
||||
; CHECK-COMMON: ldr r1, [r0]
|
||||
; CHECK-COMMON: ubfx r1, r1, #15, #16
|
||||
; CHECK-COMMON: str r1, [r0]
|
||||
define arm_aapcscc void @test_shift15_mask16(i32* nocapture %p) {
|
||||
entry:
|
||||
%0 = load i32, i32* %p, align 4
|
||||
%shl = lshr i32 %0, 15
|
||||
%and = and i32 %shl, 65535
|
||||
store i32 %and, i32* %p, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_shift16_mask15
|
||||
; CHECK-BE: ldrh r1, [r0]
|
||||
; CHECK-COMMON: ldrh r1, [r0, #2]
|
||||
; CHECK-COMMON: bfc r1, #15, #17
|
||||
; CHECK-COMMON: str r1, [r0]
|
||||
define arm_aapcscc void @test_shift16_mask15(i32* nocapture %p) {
|
||||
entry:
|
||||
%0 = load i32, i32* %p, align 4
|
||||
%shl = lshr i32 %0, 16
|
||||
%and = and i32 %shl, 32767
|
||||
store i32 %and, i32* %p, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_shift8_mask24
|
||||
; CHECK-BE: ldr r1, [r0]
|
||||
; CHECK-COMMON: ldr r1, [r0]
|
||||
; CHECK-ARM: lsr r1, r1, #8
|
||||
; CHECK-THUMB: lsrs r1, r1, #8
|
||||
; CHECK-COMMON: str r1, [r0]
|
||||
define arm_aapcscc void @test_shift8_mask24(i32* nocapture %p) {
|
||||
entry:
|
||||
%0 = load i32, i32* %p, align 4
|
||||
%shl = lshr i32 %0, 8
|
||||
%and = and i32 %shl, 16777215
|
||||
store i32 %and, i32* %p, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_shift24_mask16
|
||||
; CHECK-BE: ldrb r1, [r0]
|
||||
; CHECK-COMMON: ldrb r1, [r0, #3]
|
||||
; CHECK-COMMON: str r1, [r0]
|
||||
define arm_aapcscc void @test_shift24_mask16(i32* nocapture %p) {
|
||||
entry:
|
||||
%0 = load i32, i32* %p, align 4
|
||||
%shl = lshr i32 %0, 24
|
||||
%and = and i32 %shl, 65535
|
||||
store i32 %and, i32* %p, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_sext_shift8_mask8
|
||||
; CHECK-BE: ldrb r0, [r0]
|
||||
; CHECK-COMMON: ldrb r0, [r0, #1]
|
||||
|
@ -22,20 +22,17 @@ define i64 @foo(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g, i64 %h)
|
||||
; CHECK-NEXT: movzbl %ah, %eax
|
||||
; CHECK-NEXT: movq %rax, %r10
|
||||
; CHECK-NEXT: movzbl %dh, %edx
|
||||
; CHECK-NEXT: movzbl %ch, %eax
|
||||
; CHECK-NEXT: movq %rax, %r11
|
||||
; CHECK-NEXT: movzbl %ch, %ebp
|
||||
; CHECK-NEXT: movq %r8, %rax
|
||||
; CHECK-NEXT: movzbl %ah, %ecx
|
||||
; CHECK-NEXT: movq %r9, %rax
|
||||
; CHECK-NEXT: movzbl %ah, %ebp
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax
|
||||
; CHECK-NEXT: movzbl %ah, %eax
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %ebx
|
||||
; CHECK-NEXT: movzbl %bh, %edi
|
||||
; CHECK-NEXT: movzbl %ah, %ebx
|
||||
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %eax
|
||||
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
|
||||
; CHECK-NEXT: addq %r10, %rsi
|
||||
; CHECK-NEXT: addq %r11, %rdx
|
||||
; CHECK-NEXT: addq %rbp, %rdx
|
||||
; CHECK-NEXT: addq %rsi, %rdx
|
||||
; CHECK-NEXT: addq %rbp, %rcx
|
||||
; CHECK-NEXT: addq %rbx, %rcx
|
||||
; CHECK-NEXT: addq %rdi, %rax
|
||||
; CHECK-NEXT: addq %rcx, %rax
|
||||
; CHECK-NEXT: addq %rdx, %rax
|
||||
@ -57,20 +54,17 @@ define i64 @foo(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g, i64 %h)
|
||||
; GNUX32-NEXT: movzbl %ah, %eax
|
||||
; GNUX32-NEXT: movq %rax, %r10
|
||||
; GNUX32-NEXT: movzbl %dh, %edx
|
||||
; GNUX32-NEXT: movzbl %ch, %eax
|
||||
; GNUX32-NEXT: movq %rax, %r11
|
||||
; GNUX32-NEXT: movzbl %ch, %ebp
|
||||
; GNUX32-NEXT: movq %r8, %rax
|
||||
; GNUX32-NEXT: movzbl %ah, %ecx
|
||||
; GNUX32-NEXT: movq %r9, %rax
|
||||
; GNUX32-NEXT: movzbl %ah, %ebp
|
||||
; GNUX32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; GNUX32-NEXT: movzbl %ah, %eax
|
||||
; GNUX32-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
||||
; GNUX32-NEXT: movzbl %bh, %edi
|
||||
; GNUX32-NEXT: movzbl %ah, %ebx
|
||||
; GNUX32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
||||
; GNUX32-NEXT: movzbl {{[0-9]+}}(%esp), %edi
|
||||
; GNUX32-NEXT: addq %r10, %rsi
|
||||
; GNUX32-NEXT: addq %r11, %rdx
|
||||
; GNUX32-NEXT: addq %rbp, %rdx
|
||||
; GNUX32-NEXT: addq %rsi, %rdx
|
||||
; GNUX32-NEXT: addq %rbp, %rcx
|
||||
; GNUX32-NEXT: addq %rbx, %rcx
|
||||
; GNUX32-NEXT: addq %rdi, %rax
|
||||
; GNUX32-NEXT: addq %rcx, %rax
|
||||
; GNUX32-NEXT: addq %rdx, %rax
|
||||
|
Loading…
Reference in New Issue
Block a user