1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 19:12:56 +02:00

Prevent a DAGCombine from firing where there are two uses of

a combined-away node and the result of the combine isn't substantially
smaller than the input, it's just canonicalized. This is the first part
of a significant (7%) performance gain for Snappy's hot decompression
loop.

llvm-svn: 147604
This commit is contained in:
Chandler Carruth 2012-01-05 11:05:55 +00:00
parent 75ff869d6a
commit 90f124f420
2 changed files with 23 additions and 1 deletions

View File

@ -3331,7 +3331,9 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
// (and (srl x, (sub c1, c2), MASK)
if (N1C && N0.getOpcode() == ISD::SRL &&
// Only fold this if the inner shift has no other uses -- if it does, folding
// this will increase the total number of instructions.
if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
N0.getOperand(1).getOpcode() == ISD::Constant) {
uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
if (c1 < VT.getSizeInBits()) {

View File

@ -48,3 +48,23 @@ entry:
%tmp512 = lshr i32 %tmp4, 24
ret i32 %tmp512
}
define i64 @test5(i16 %i, i32* %arr) {
; Ensure that we don't fold away shifts which have multiple uses, as they are
; just re-introduced for the second use.
; CHECK: test5:
; CHECK-NOT: shrl
; CHECK: shrl $11
; CHECK-NOT: shrl
; CHECK: ret
entry:
%i.zext = zext i16 %i to i32
%index = lshr i32 %i.zext, 11
%index.zext = zext i32 %index to i64
%val.ptr = getelementptr inbounds i32* %arr, i64 %index.zext
%val = load i32* %val.ptr
%val.zext = zext i32 %val to i64
%sum = add i64 %val.zext, %index.zext
ret i64 %sum
}