diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index bd127b5a646..47c332a5be4 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2861,6 +2861,14 @@ SDValue DAGCombiner::visitAND(SDNode *N) { } } + // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const) + if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) { + SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), + N0.getOperand(1), false); + if (BSwap.getNode()) + return BSwap; + } + return SDValue(); } @@ -2945,13 +2953,23 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, if (N00 != N10) return SDValue(); - // Make sure everything beyond the low halfword is zero since the SRL 16 - // will clear the top bits. + // Make sure everything beyond the low halfword gets set to zero since the SRL + // 16 will clear the top bits. unsigned OpSizeInBits = VT.getSizeInBits(); - if (DemandHighBits && OpSizeInBits > 16 && - (!LookPassAnd0 || !LookPassAnd1) && - !DAG.MaskedValueIsZero(N10, APInt::getHighBitsSet(OpSizeInBits, 16))) - return SDValue(); + if (DemandHighBits && OpSizeInBits > 16) { + // If the left-shift isn't masked out then the only way this is a bswap is + // if all bits beyond the low 8 are 0. In that case the entire pattern + // reduces to a left shift anyway: leave it for other parts of the combiner. + if (!LookPassAnd0) + return SDValue(); + + // However, if the right shift isn't masked out then it might be because + // it's not needed. See if we can spot that too. + if (!LookPassAnd1 && + !DAG.MaskedValueIsZero( + N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16))) + return SDValue(); + } SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00); if (OpSizeInBits > 16) diff --git a/test/CodeGen/X86/bswap.ll b/test/CodeGen/X86/bswap.ll index 9e46592a4ac..e6a456c39dd 100644 --- a/test/CodeGen/X86/bswap.ll +++ b/test/CodeGen/X86/bswap.ll @@ -1,6 +1,7 @@ ; bswap should be constant folded when it is passed a constant argument ; RUN: llc < %s -march=x86 -mcpu=i686 | FileCheck %s +; RUN: llc < %s -march=x86-64 | FileCheck %s --check-prefix=CHECK64 declare i16 @llvm.bswap.i16(i16) @@ -11,6 +12,9 @@ declare i64 @llvm.bswap.i64(i64) define i16 @W(i16 %A) { ; CHECK-LABEL: W: ; CHECK: rolw $8, %ax + +; CHECK64-LABEL: W: +; CHECK64: rolw $8, % %Z = call i16 @llvm.bswap.i16( i16 %A ) ; [#uses=1] ret i16 %Z } @@ -18,6 +22,9 @@ define i16 @W(i16 %A) { define i32 @X(i32 %A) { ; CHECK-LABEL: X: ; CHECK: bswapl %eax + +; CHECK64-LABEL: X: +; CHECK64: bswapl % %Z = call i32 @llvm.bswap.i32( i32 %A ) ; [#uses=1] ret i32 %Z } @@ -26,6 +33,9 @@ define i64 @Y(i64 %A) { ; CHECK-LABEL: Y: ; CHECK: bswapl %eax ; CHECK: bswapl %edx + +; CHECK64-LABEL: Y: +; CHECK64: bswapq % %Z = call i64 @llvm.bswap.i64( i64 %A ) ; [#uses=1] ret i64 %Z } @@ -33,9 +43,13 @@ define i64 @Y(i64 %A) { ; rdar://9164521 define i32 @test1(i32 %a) nounwind readnone { entry: -; CHECK: test1 -; CHECK: bswapl %eax -; CHECK: shrl $16, %eax +; CHECK-LABEL: test1: +; CHECK: bswapl [[REG:%.*]] +; CHECK: shrl $16, [[REG]] + +; CHECK64-LABEL: test1: +; CHECK64: bswapl [[REG:%.*]] +; CHECK64: shrl $16, [[REG]] %and = lshr i32 %a, 8 %shr3 = and i32 %and, 255 %and2 = shl i32 %a, 8 @@ -46,9 +60,13 @@ entry: define i32 @test2(i32 %a) nounwind readnone { entry: -; CHECK: test2 -; CHECK: bswapl %eax -; CHECK: sarl $16, %eax +; CHECK-LABEL: test2: +; CHECK: bswapl [[REG:%.*]] +; CHECK: sarl $16, [[REG]] + +; CHECK64-LABEL: test2: +; CHECK64: bswapl [[REG:%.*]] +; CHECK64: sarl $16, [[REG]] %and = lshr i32 %a, 8 %shr4 = and i32 %and, 255 %and2 = shl i32 %a, 8 @@ -57,3 +75,80 @@ entry: %conv3 = ashr exact i32 %sext, 16 ret i32 %conv3 } + +@var8 = global i8 0 +@var16 = global i16 0 + +; The "shl" below can move bits into the high parts of the value, so the +; operation is not a "bswap, shr" pair. + +; rdar://problem/14814049 +define i64 @not_bswap() { +; CHECK-LABEL: not_bswap: +; CHECK-NOT: bswapl +; CHECK: ret + +; CHECK64-LABEL: not_bswap: +; CHECK64-NOT: bswapq +; CHECK64: ret + %init = load i16* @var16 + %big = zext i16 %init to i64 + + %hishifted = lshr i64 %big, 8 + %loshifted = shl i64 %big, 8 + + %notswapped = or i64 %hishifted, %loshifted + + ret i64 %notswapped +} + +; This time, the lshr (and subsequent or) is completely useless. While it's +; technically correct to convert this into a "bswap, shr", it's suboptimal. A +; simple shl works better. + +define i64 @not_useful_bswap() { +; CHECK-LABEL: not_useful_bswap: +; CHECK-NOT: bswapl +; CHECK: ret + +; CHECK64-LABEL: not_useful_bswap: +; CHECK64-NOT: bswapq +; CHECK64: ret + + %init = load i8* @var8 + %big = zext i8 %init to i64 + + %hishifted = lshr i64 %big, 8 + %loshifted = shl i64 %big, 8 + + %notswapped = or i64 %hishifted, %loshifted + + ret i64 %notswapped +} + +; Finally, it *is* OK to just mask off the shl if we know that the value is zero +; beyond 16 bits anyway. This is a legitimate bswap. + +define i64 @finally_useful_bswap() { +; CHECK-LABEL: finally_useful_bswap: +; CHECK: bswapl [[REG:%.*]] +; CHECK: shrl $16, [[REG]] +; CHECK: ret + +; CHECK64-LABEL: finally_useful_bswap: +; CHECK64: bswapq [[REG:%.*]] +; CHECK64: shrq $48, [[REG]] +; CHECK64: ret + + %init = load i16* @var16 + %big = zext i16 %init to i64 + + %hishifted = lshr i64 %big, 8 + %lomasked = and i64 %big, 255 + %loshifted = shl i64 %lomasked, 8 + + %swapped = or i64 %hishifted, %loshifted + + ret i64 %swapped +} +