mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[DAGCombiner] try to convert opposing shifts to casts
This reverses a questionable IR canonicalization when a truncate is free: sra (add (shl X, N1C), AddC), N1C --> sext (add (trunc X to (width - N1C)), AddC') https://rise4fun.com/Alive/slRC More details in PR42644: https://bugs.llvm.org/show_bug.cgi?id=42644 I limited this to pre-legalization for code simplicity because that should be enough to reverse the IR patterns. I don't have any evidence (no regression test diffs) that we need to try this later. Differential Revision: https://reviews.llvm.org/D65607 llvm-svn: 367710
This commit is contained in:
parent
3ca8e94f65
commit
6449e66c97
@ -7616,6 +7616,32 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
|
||||
}
|
||||
}
|
||||
|
||||
// We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
|
||||
// sra (add (shl X, N1C), AddC), N1C -->
|
||||
// sext (add (trunc X to (width - N1C)), AddC')
|
||||
if (!LegalOperations && N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C &&
|
||||
N0.getOperand(0).getOpcode() == ISD::SHL &&
|
||||
N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) {
|
||||
if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) {
|
||||
SDValue Shl = N0.getOperand(0);
|
||||
// Determine what the truncate's type would be and ask the target if that
|
||||
// is a free operation.
|
||||
LLVMContext &Ctx = *DAG.getContext();
|
||||
unsigned ShiftAmt = N1C->getZExtValue();
|
||||
EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
|
||||
if (VT.isVector())
|
||||
TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
|
||||
if (TLI.isTruncateFree(VT, TruncVT)) {
|
||||
SDLoc DL(N);
|
||||
SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
|
||||
SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).
|
||||
trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT);
|
||||
SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
|
||||
return DAG.getSExtOrTrunc(Add, DL, VT);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
|
||||
if (N1.getOpcode() == ISD::TRUNCATE &&
|
||||
N1.getOperand(0).getOpcode() == ISD::AND) {
|
||||
|
@ -78,9 +78,8 @@ entry:
|
||||
define i64 @ashr_add_shl_i32(i64 %r) {
|
||||
; CHECK-LABEL: ashr_add_shl_i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov x8, #4294967296
|
||||
; CHECK-NEXT: add x8, x8, x0, lsl #32
|
||||
; CHECK-NEXT: asr x0, x8, #32
|
||||
; CHECK-NEXT: add w8, w0, #1 // =1
|
||||
; CHECK-NEXT: sxtw x0, w8
|
||||
; CHECK-NEXT: ret
|
||||
%conv = shl i64 %r, 32
|
||||
%sext = add i64 %conv, 4294967296
|
||||
@ -91,9 +90,8 @@ define i64 @ashr_add_shl_i32(i64 %r) {
|
||||
define i64 @ashr_add_shl_i8(i64 %r) {
|
||||
; CHECK-LABEL: ashr_add_shl_i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov x8, #72057594037927936
|
||||
; CHECK-NEXT: add x8, x8, x0, lsl #56
|
||||
; CHECK-NEXT: asr x0, x8, #56
|
||||
; CHECK-NEXT: add w8, w0, #1 // =1
|
||||
; CHECK-NEXT: sxtb x0, w8
|
||||
; CHECK-NEXT: ret
|
||||
%conv = shl i64 %r, 56
|
||||
%sext = add i64 %conv, 72057594037927936
|
||||
|
@ -168,10 +168,8 @@ define i64 @ashr_add_shl_i32(i64 %r) nounwind {
|
||||
;
|
||||
; X64-LABEL: ashr_add_shl_i32:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: shlq $32, %rdi
|
||||
; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000
|
||||
; X64-NEXT: addq %rdi, %rax
|
||||
; X64-NEXT: sarq $32, %rax
|
||||
; X64-NEXT: incl %edi
|
||||
; X64-NEXT: movslq %edi, %rax
|
||||
; X64-NEXT: retq
|
||||
%conv = shl i64 %r, 32
|
||||
%sext = add i64 %conv, 4294967296
|
||||
@ -182,20 +180,17 @@ define i64 @ashr_add_shl_i32(i64 %r) nounwind {
|
||||
define i64 @ashr_add_shl_i8(i64 %r) nounwind {
|
||||
; X32-LABEL: ashr_add_shl_i8:
|
||||
; X32: # %bb.0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X32-NEXT: shll $24, %edx
|
||||
; X32-NEXT: addl $33554432, %edx # imm = 0x2000000
|
||||
; X32-NEXT: movl %edx, %eax
|
||||
; X32-NEXT: sarl $24, %eax
|
||||
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X32-NEXT: addb $2, %al
|
||||
; X32-NEXT: movsbl %al, %eax
|
||||
; X32-NEXT: movl %eax, %edx
|
||||
; X32-NEXT: sarl $31, %edx
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: ashr_add_shl_i8:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: shlq $56, %rdi
|
||||
; X64-NEXT: movabsq $144115188075855872, %rax # imm = 0x200000000000000
|
||||
; X64-NEXT: addq %rdi, %rax
|
||||
; X64-NEXT: sarq $56, %rax
|
||||
; X64-NEXT: addb $2, %dil
|
||||
; X64-NEXT: movsbq %dil, %rax
|
||||
; X64-NEXT: retq
|
||||
%conv = shl i64 %r, 56
|
||||
%sext = add i64 %conv, 144115188075855872
|
||||
@ -209,34 +204,31 @@ define <4 x i32> @ashr_add_shl_v4i8(<4 x i32> %r) nounwind {
|
||||
; X32-NEXT: pushl %edi
|
||||
; X32-NEXT: pushl %esi
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
|
||||
; X32-NEXT: shll $24, %edi
|
||||
; X32-NEXT: shll $24, %esi
|
||||
; X32-NEXT: shll $24, %edx
|
||||
; X32-NEXT: shll $24, %ecx
|
||||
; X32-NEXT: addl $16777216, %ecx # imm = 0x1000000
|
||||
; X32-NEXT: addl $16777216, %edx # imm = 0x1000000
|
||||
; X32-NEXT: addl $16777216, %esi # imm = 0x1000000
|
||||
; X32-NEXT: addl $16777216, %edi # imm = 0x1000000
|
||||
; X32-NEXT: sarl $24, %edi
|
||||
; X32-NEXT: sarl $24, %esi
|
||||
; X32-NEXT: sarl $24, %edx
|
||||
; X32-NEXT: sarl $24, %ecx
|
||||
; X32-NEXT: movb {{[0-9]+}}(%esp), %cl
|
||||
; X32-NEXT: movb {{[0-9]+}}(%esp), %dl
|
||||
; X32-NEXT: movb {{[0-9]+}}(%esp), %ch
|
||||
; X32-NEXT: movb {{[0-9]+}}(%esp), %dh
|
||||
; X32-NEXT: incb %dh
|
||||
; X32-NEXT: movsbl %dh, %esi
|
||||
; X32-NEXT: incb %ch
|
||||
; X32-NEXT: movsbl %ch, %edi
|
||||
; X32-NEXT: incb %dl
|
||||
; X32-NEXT: movsbl %dl, %edx
|
||||
; X32-NEXT: incb %cl
|
||||
; X32-NEXT: movsbl %cl, %ecx
|
||||
; X32-NEXT: movl %ecx, 12(%eax)
|
||||
; X32-NEXT: movl %edx, 8(%eax)
|
||||
; X32-NEXT: movl %esi, 4(%eax)
|
||||
; X32-NEXT: movl %edi, (%eax)
|
||||
; X32-NEXT: movl %edi, 4(%eax)
|
||||
; X32-NEXT: movl %esi, (%eax)
|
||||
; X32-NEXT: popl %esi
|
||||
; X32-NEXT: popl %edi
|
||||
; X32-NEXT: retl $4
|
||||
;
|
||||
; X64-LABEL: ashr_add_shl_v4i8:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: pcmpeqd %xmm1, %xmm1
|
||||
; X64-NEXT: psubd %xmm1, %xmm0
|
||||
; X64-NEXT: pslld $24, %xmm0
|
||||
; X64-NEXT: paddd {{.*}}(%rip), %xmm0
|
||||
; X64-NEXT: psrad $24, %xmm0
|
||||
; X64-NEXT: retq
|
||||
%conv = shl <4 x i32> %r, <i32 24, i32 24, i32 24, i32 24>
|
||||
|
Loading…
x
Reference in New Issue
Block a user