1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 11:02:59 +02:00

AVX-512: Fixed BT instruction selection.

The following condition expression ( a >> n) & 1 is converted to "bt a, n" instruction. It works on all intel targets.
But on AVX-512 it was broken because the expression is modified to (truncate (a >>n) to i1).

I added the new sequence (truncate (a >>n) to i1) to the BT pattern.

Differential Revision: https://reviews.llvm.org/D22354

llvm-svn: 275950
This commit is contained in:
Elena Demikhovsky 2016-07-19 07:14:21 +00:00
parent b649360c0a
commit 37e609e198
3 changed files with 139 additions and 477 deletions

View File

@ -1468,6 +1468,10 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
// Will get folded away.
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
if (MinBits == 1 && C1 == 1)
// Invert the condition.
return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
return DAG.getSetCC(dl, VT, Trunc, C, Cond);
}

View File

@ -15010,9 +15010,32 @@ unsigned X86TargetLowering::combineRepeatedFPDivisors() const {
return 2;
}
/// Create a BT (Bit Test) node - Test bit \p BitNo in \p Src and set condition
/// according to equal/not-equal condition code \p CC.
static SDValue getBitTestCondition(SDValue Src, SDValue BitNo, ISD::CondCode CC,
const SDLoc &dl, SelectionDAG &DAG) {
// If Src is i8, promote it to i32 with any_extend. There is no i8 BT
// instruction. Since the shift amount is in-range-or-undefined, we know
// that doing a bittest on the i32 value is ok. We extend to i32 because
// the encoding for the i16 version is larger than the i32 version.
// Also promote i16 to i32 for performance / code size reason.
if (Src.getValueType() == MVT::i8 || Src.getValueType() == MVT::i16)
Src = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Src);
// If the operand types disagree, extend the shift amount to match. Since
// BT ignores high bits (like shifts) we can use anyextend.
if (Src.getValueType() != BitNo.getValueType())
BitNo = DAG.getNode(ISD::ANY_EXTEND, dl, Src.getValueType(), BitNo);
SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, Src, BitNo);
X86::CondCode Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B;
return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
DAG.getConstant(Cond, dl, MVT::i8), BT);
}
/// Result of 'and' is compared against zero. Change to a BT node if possible.
SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
const SDLoc &dl, SelectionDAG &DAG) const {
static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC,
const SDLoc &dl, SelectionDAG &DAG) {
SDValue Op0 = And.getOperand(0);
SDValue Op1 = And.getOperand(1);
if (Op0.getOpcode() == ISD::TRUNCATE)
@ -15055,27 +15078,35 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
}
}
if (LHS.getNode()) {
// If LHS is i8, promote it to i32 with any_extend. There is no i8 BT
// instruction. Since the shift amount is in-range-or-undefined, we know
// that doing a bittest on the i32 value is ok. We extend to i32 because
// the encoding for the i16 version is larger than the i32 version.
// Also promote i16 to i32 for performance / code size reason.
if (LHS.getValueType() == MVT::i8 ||
LHS.getValueType() == MVT::i16)
LHS = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS);
if (LHS.getNode())
return getBitTestCondition(LHS, RHS, CC, dl, DAG);
// If the operand types disagree, extend the shift amount to match. Since
// BT ignores high bits (like shifts) we can use anyextend.
if (LHS.getValueType() != RHS.getValueType())
RHS = DAG.getNode(ISD::ANY_EXTEND, dl, LHS.getValueType(), RHS);
return SDValue();
}
SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, LHS, RHS);
X86::CondCode Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B;
return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
DAG.getConstant(Cond, dl, MVT::i8), BT);
}
// Convert (truncate (srl X, N) to i1) to (bt X, N)
static SDValue LowerTruncateToBT(SDValue Op, ISD::CondCode CC,
const SDLoc &dl, SelectionDAG &DAG) {
assert(Op.getOpcode() == ISD::TRUNCATE && Op.getValueType() == MVT::i1 &&
"Expected TRUNCATE to i1 node");
if (Op.getOperand(0).getOpcode() != ISD::SRL)
return SDValue();
SDValue ShiftRight = Op.getOperand(0);
return getBitTestCondition(ShiftRight.getOperand(0), ShiftRight.getOperand(1),
CC, dl, DAG);
}
/// Result of 'and' or 'trunc to i1' is compared against zero.
/// Change to a BT node if possible.
SDValue X86TargetLowering::LowerToBT(SDValue Op, ISD::CondCode CC,
const SDLoc &dl, SelectionDAG &DAG) const {
if (Op.getOpcode() == ISD::AND)
return LowerAndToBT(Op, CC, dl, DAG);
if (Op.getOpcode() == ISD::TRUNCATE && Op.getValueType() == MVT::i1)
return LowerTruncateToBT(Op, CC, dl, DAG);
return SDValue();
}
@ -15606,8 +15637,8 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
// Lower (X & (1 << N)) == 0 to BT(X, N).
// Lower ((X >>u N) & 1) != 0 to BT(X, N).
// Lower ((X >>s N) & 1) != 0 to BT(X, N).
if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() &&
isNullConstant(Op1) &&
// Lower (trunc (X >> N) to i1) to BT(X, N).
if (Op0.hasOneUse() && isNullConstant(Op1) &&
(CC == ISD::SETEQ || CC == ISD::SETNE)) {
if (SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG)) {
if (VT == MVT::i1) {
@ -16798,9 +16829,8 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
// Look pass the truncate if the high bits are known zero.
Cond = getCondAfterTruncWithZeroHighBitsInput(Cond, DAG);
// We know the result of AND is compared against zero. Try to match
// it to BT.
if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
// We know the result is compared against zero. Try to match it to BT.
if (Cond.hasOneUse()) {
if (SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG)) {
CC = NewSetCC.getOperand(0);
Cond = NewSetCC.getOperand(1);

View File

@ -1,7 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck --check-prefix=CHECK --check-prefix=PENTIUM4 %s
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck --check-prefix=CHECK --check-prefix=AVX-512 %s
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s
; PR3253
; The register+memory form of the BT instruction should be usable on
@ -21,29 +20,11 @@
; - The and can be commuted.
define void @test2(i32 %x, i32 %n) nounwind {
; PENTIUM4-LABEL: test2:
; PENTIUM4: # BB#0: # %entry
; PENTIUM4-NEXT: btl %esi, %edi
; PENTIUM4-NEXT: jb .LBB0_2
; PENTIUM4-NEXT: # BB#1: # %bb
; PENTIUM4-NEXT: pushq %rax
; PENTIUM4-NEXT: callq foo
; PENTIUM4-NEXT: popq %rax
; PENTIUM4-NEXT: .LBB0_2: # %UnifiedReturnBlock
; PENTIUM4-NEXT: retq
; CHECK-LABEL: test2:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: btl %esi, %edi
; CHECK-NEXT: jb .LBB0_2
;
; AVX-512-LABEL: test2:
; AVX-512: # BB#0: # %entry
; AVX-512-NEXT: movl %esi, %ecx
; AVX-512-NEXT: shrl %cl, %edi
; AVX-512-NEXT: testb $1, %dil
; AVX-512-NEXT: jne .LBB0_2
; AVX-512-NEXT: # BB#1: # %bb
; AVX-512-NEXT: pushq %rax
; AVX-512-NEXT: callq foo
; AVX-512-NEXT: popq %rax
; AVX-512-NEXT: .LBB0_2: # %UnifiedReturnBlock
; AVX-512-NEXT: retq
entry:
%tmp29 = lshr i32 %x, %n
%tmp3 = and i32 %tmp29, 1
@ -59,29 +40,11 @@ UnifiedReturnBlock:
}
define void @test2b(i32 %x, i32 %n) nounwind {
; PENTIUM4-LABEL: test2b:
; PENTIUM4: # BB#0: # %entry
; PENTIUM4-NEXT: btl %esi, %edi
; PENTIUM4-NEXT: jb .LBB1_2
; PENTIUM4-NEXT: # BB#1: # %bb
; PENTIUM4-NEXT: pushq %rax
; PENTIUM4-NEXT: callq foo
; PENTIUM4-NEXT: popq %rax
; PENTIUM4-NEXT: .LBB1_2: # %UnifiedReturnBlock
; PENTIUM4-NEXT: retq
; CHECK-LABEL: test2b:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: btl %esi, %edi
; CHECK-NEXT: jb .LBB1_2
;
; AVX-512-LABEL: test2b:
; AVX-512: # BB#0: # %entry
; AVX-512-NEXT: movl %esi, %ecx
; AVX-512-NEXT: shrl %cl, %edi
; AVX-512-NEXT: testb $1, %dil
; AVX-512-NEXT: jne .LBB1_2
; AVX-512-NEXT: # BB#1: # %bb
; AVX-512-NEXT: pushq %rax
; AVX-512-NEXT: callq foo
; AVX-512-NEXT: popq %rax
; AVX-512-NEXT: .LBB1_2: # %UnifiedReturnBlock
; AVX-512-NEXT: retq
entry:
%tmp29 = lshr i32 %x, %n
%tmp3 = and i32 1, %tmp29
@ -97,29 +60,11 @@ UnifiedReturnBlock:
}
define void @atest2(i32 %x, i32 %n) nounwind {
; PENTIUM4-LABEL: atest2:
; PENTIUM4: # BB#0: # %entry
; PENTIUM4-NEXT: btl %esi, %edi
; PENTIUM4-NEXT: jb .LBB2_2
; PENTIUM4-NEXT: # BB#1: # %bb
; PENTIUM4-NEXT: pushq %rax
; PENTIUM4-NEXT: callq foo
; PENTIUM4-NEXT: popq %rax
; PENTIUM4-NEXT: .LBB2_2: # %UnifiedReturnBlock
; PENTIUM4-NEXT: retq
; CHECK-LABEL: atest2:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: btl %esi, %edi
; CHECK-NEXT: jb .LBB2_2
;
; AVX-512-LABEL: atest2:
; AVX-512: # BB#0: # %entry
; AVX-512-NEXT: movl %esi, %ecx
; AVX-512-NEXT: shrl %cl, %edi
; AVX-512-NEXT: testb $1, %dil
; AVX-512-NEXT: jne .LBB2_2
; AVX-512-NEXT: # BB#1: # %bb
; AVX-512-NEXT: pushq %rax
; AVX-512-NEXT: callq foo
; AVX-512-NEXT: popq %rax
; AVX-512-NEXT: .LBB2_2: # %UnifiedReturnBlock
; AVX-512-NEXT: retq
entry:
%tmp29 = ashr i32 %x, %n
%tmp3 = and i32 %tmp29, 1
@ -135,29 +80,11 @@ UnifiedReturnBlock:
}
define void @atest2b(i32 %x, i32 %n) nounwind {
; PENTIUM4-LABEL: atest2b:
; PENTIUM4: # BB#0: # %entry
; PENTIUM4-NEXT: btl %esi, %edi
; PENTIUM4-NEXT: jb .LBB3_2
; PENTIUM4-NEXT: # BB#1: # %bb
; PENTIUM4-NEXT: pushq %rax
; PENTIUM4-NEXT: callq foo
; PENTIUM4-NEXT: popq %rax
; PENTIUM4-NEXT: .LBB3_2: # %UnifiedReturnBlock
; PENTIUM4-NEXT: retq
; CHECK-LABEL: atest2b:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: btl %esi, %edi
; CHECK-NEXT: jb .LBB3_2
;
; AVX-512-LABEL: atest2b:
; AVX-512: # BB#0: # %entry
; AVX-512-NEXT: movl %esi, %ecx
; AVX-512-NEXT: shrl %cl, %edi
; AVX-512-NEXT: testb $1, %dil
; AVX-512-NEXT: jne .LBB3_2
; AVX-512-NEXT: # BB#1: # %bb
; AVX-512-NEXT: pushq %rax
; AVX-512-NEXT: callq foo
; AVX-512-NEXT: popq %rax
; AVX-512-NEXT: .LBB3_2: # %UnifiedReturnBlock
; AVX-512-NEXT: retq
entry:
%tmp29 = ashr i32 %x, %n
%tmp3 = and i32 1, %tmp29
@ -177,12 +104,7 @@ define void @test3(i32 %x, i32 %n) nounwind {
; CHECK: # BB#0: # %entry
; CHECK-NEXT: btl %esi, %edi
; CHECK-NEXT: jb .LBB4_2
; CHECK-NEXT: # BB#1: # %bb
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq foo
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .LBB4_2: # %UnifiedReturnBlock
; CHECK-NEXT: retq
;
entry:
%tmp29 = shl i32 1, %n
%tmp3 = and i32 %tmp29, %x
@ -202,12 +124,7 @@ define void @test3b(i32 %x, i32 %n) nounwind {
; CHECK: # BB#0: # %entry
; CHECK-NEXT: btl %esi, %edi
; CHECK-NEXT: jb .LBB5_2
; CHECK-NEXT: # BB#1: # %bb
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq foo
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .LBB5_2: # %UnifiedReturnBlock
; CHECK-NEXT: retq
;
entry:
%tmp29 = shl i32 1, %n
%tmp3 = and i32 %x, %tmp29
@ -223,29 +140,11 @@ UnifiedReturnBlock:
}
define void @testne2(i32 %x, i32 %n) nounwind {
; PENTIUM4-LABEL: testne2:
; PENTIUM4: # BB#0: # %entry
; PENTIUM4-NEXT: btl %esi, %edi
; PENTIUM4-NEXT: jae .LBB6_2
; PENTIUM4-NEXT: # BB#1: # %bb
; PENTIUM4-NEXT: pushq %rax
; PENTIUM4-NEXT: callq foo
; PENTIUM4-NEXT: popq %rax
; PENTIUM4-NEXT: .LBB6_2: # %UnifiedReturnBlock
; PENTIUM4-NEXT: retq
; CHECK-LABEL: testne2:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: btl %esi, %edi
; CHECK-NEXT: jae .LBB6_2
;
; AVX-512-LABEL: testne2:
; AVX-512: # BB#0: # %entry
; AVX-512-NEXT: movl %esi, %ecx
; AVX-512-NEXT: shrl %cl, %edi
; AVX-512-NEXT: testb $1, %dil
; AVX-512-NEXT: je .LBB6_2
; AVX-512-NEXT: # BB#1: # %bb
; AVX-512-NEXT: pushq %rax
; AVX-512-NEXT: callq foo
; AVX-512-NEXT: popq %rax
; AVX-512-NEXT: .LBB6_2: # %UnifiedReturnBlock
; AVX-512-NEXT: retq
entry:
%tmp29 = lshr i32 %x, %n
%tmp3 = and i32 %tmp29, 1
@ -261,29 +160,11 @@ UnifiedReturnBlock:
}
define void @testne2b(i32 %x, i32 %n) nounwind {
; PENTIUM4-LABEL: testne2b:
; PENTIUM4: # BB#0: # %entry
; PENTIUM4-NEXT: btl %esi, %edi
; PENTIUM4-NEXT: jae .LBB7_2
; PENTIUM4-NEXT: # BB#1: # %bb
; PENTIUM4-NEXT: pushq %rax
; PENTIUM4-NEXT: callq foo
; PENTIUM4-NEXT: popq %rax
; PENTIUM4-NEXT: .LBB7_2: # %UnifiedReturnBlock
; PENTIUM4-NEXT: retq
; CHECK-LABEL: testne2b:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: btl %esi, %edi
; CHECK-NEXT: jae .LBB7_2
;
; AVX-512-LABEL: testne2b:
; AVX-512: # BB#0: # %entry
; AVX-512-NEXT: movl %esi, %ecx
; AVX-512-NEXT: shrl %cl, %edi
; AVX-512-NEXT: testb $1, %dil
; AVX-512-NEXT: je .LBB7_2
; AVX-512-NEXT: # BB#1: # %bb
; AVX-512-NEXT: pushq %rax
; AVX-512-NEXT: callq foo
; AVX-512-NEXT: popq %rax
; AVX-512-NEXT: .LBB7_2: # %UnifiedReturnBlock
; AVX-512-NEXT: retq
entry:
%tmp29 = lshr i32 %x, %n
%tmp3 = and i32 1, %tmp29
@ -299,29 +180,11 @@ UnifiedReturnBlock:
}
define void @atestne2(i32 %x, i32 %n) nounwind {
; PENTIUM4-LABEL: atestne2:
; PENTIUM4: # BB#0: # %entry
; PENTIUM4-NEXT: btl %esi, %edi
; PENTIUM4-NEXT: jae .LBB8_2
; PENTIUM4-NEXT: # BB#1: # %bb
; PENTIUM4-NEXT: pushq %rax
; PENTIUM4-NEXT: callq foo
; PENTIUM4-NEXT: popq %rax
; PENTIUM4-NEXT: .LBB8_2: # %UnifiedReturnBlock
; PENTIUM4-NEXT: retq
; CHECK-LABEL: atestne2:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: btl %esi, %edi
; CHECK-NEXT: jae .LBB8_2
;
; AVX-512-LABEL: atestne2:
; AVX-512: # BB#0: # %entry
; AVX-512-NEXT: movl %esi, %ecx
; AVX-512-NEXT: shrl %cl, %edi
; AVX-512-NEXT: testb $1, %dil
; AVX-512-NEXT: je .LBB8_2
; AVX-512-NEXT: # BB#1: # %bb
; AVX-512-NEXT: pushq %rax
; AVX-512-NEXT: callq foo
; AVX-512-NEXT: popq %rax
; AVX-512-NEXT: .LBB8_2: # %UnifiedReturnBlock
; AVX-512-NEXT: retq
entry:
%tmp29 = ashr i32 %x, %n
%tmp3 = and i32 %tmp29, 1
@ -337,29 +200,11 @@ UnifiedReturnBlock:
}
define void @atestne2b(i32 %x, i32 %n) nounwind {
; PENTIUM4-LABEL: atestne2b:
; PENTIUM4: # BB#0: # %entry
; PENTIUM4-NEXT: btl %esi, %edi
; PENTIUM4-NEXT: jae .LBB9_2
; PENTIUM4-NEXT: # BB#1: # %bb
; PENTIUM4-NEXT: pushq %rax
; PENTIUM4-NEXT: callq foo
; PENTIUM4-NEXT: popq %rax
; PENTIUM4-NEXT: .LBB9_2: # %UnifiedReturnBlock
; PENTIUM4-NEXT: retq
; CHECK-LABEL: atestne2b:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: btl %esi, %edi
; CHECK-NEXT: jae .LBB9_2
;
; AVX-512-LABEL: atestne2b:
; AVX-512: # BB#0: # %entry
; AVX-512-NEXT: movl %esi, %ecx
; AVX-512-NEXT: shrl %cl, %edi
; AVX-512-NEXT: testb $1, %dil
; AVX-512-NEXT: je .LBB9_2
; AVX-512-NEXT: # BB#1: # %bb
; AVX-512-NEXT: pushq %rax
; AVX-512-NEXT: callq foo
; AVX-512-NEXT: popq %rax
; AVX-512-NEXT: .LBB9_2: # %UnifiedReturnBlock
; AVX-512-NEXT: retq
entry:
%tmp29 = ashr i32 %x, %n
%tmp3 = and i32 1, %tmp29
@ -379,12 +224,7 @@ define void @testne3(i32 %x, i32 %n) nounwind {
; CHECK: # BB#0: # %entry
; CHECK-NEXT: btl %esi, %edi
; CHECK-NEXT: jae .LBB10_2
; CHECK-NEXT: # BB#1: # %bb
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq foo
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .LBB10_2: # %UnifiedReturnBlock
; CHECK-NEXT: retq
;
entry:
%tmp29 = shl i32 1, %n
%tmp3 = and i32 %tmp29, %x
@ -404,12 +244,7 @@ define void @testne3b(i32 %x, i32 %n) nounwind {
; CHECK: # BB#0: # %entry
; CHECK-NEXT: btl %esi, %edi
; CHECK-NEXT: jae .LBB11_2
; CHECK-NEXT: # BB#1: # %bb
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq foo
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .LBB11_2: # %UnifiedReturnBlock
; CHECK-NEXT: retq
;
entry:
%tmp29 = shl i32 1, %n
%tmp3 = and i32 %x, %tmp29
@ -425,29 +260,11 @@ UnifiedReturnBlock:
}
define void @query2(i32 %x, i32 %n) nounwind {
; PENTIUM4-LABEL: query2:
; PENTIUM4: # BB#0: # %entry
; PENTIUM4-NEXT: btl %esi, %edi
; PENTIUM4-NEXT: jae .LBB12_2
; PENTIUM4-NEXT: # BB#1: # %bb
; PENTIUM4-NEXT: pushq %rax
; PENTIUM4-NEXT: callq foo
; PENTIUM4-NEXT: popq %rax
; PENTIUM4-NEXT: .LBB12_2: # %UnifiedReturnBlock
; PENTIUM4-NEXT: retq
; CHECK-LABEL: query2:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: btl %esi, %edi
; CHECK-NEXT: jae .LBB12_2
;
; AVX-512-LABEL: query2:
; AVX-512: # BB#0: # %entry
; AVX-512-NEXT: movl %esi, %ecx
; AVX-512-NEXT: shrl %cl, %edi
; AVX-512-NEXT: testb $1, %dil
; AVX-512-NEXT: je .LBB12_2
; AVX-512-NEXT: # BB#1: # %bb
; AVX-512-NEXT: pushq %rax
; AVX-512-NEXT: callq foo
; AVX-512-NEXT: popq %rax
; AVX-512-NEXT: .LBB12_2: # %UnifiedReturnBlock
; AVX-512-NEXT: retq
entry:
%tmp29 = lshr i32 %x, %n
%tmp3 = and i32 %tmp29, 1
@ -463,29 +280,11 @@ UnifiedReturnBlock:
}
define void @query2b(i32 %x, i32 %n) nounwind {
; PENTIUM4-LABEL: query2b:
; PENTIUM4: # BB#0: # %entry
; PENTIUM4-NEXT: btl %esi, %edi
; PENTIUM4-NEXT: jae .LBB13_2
; PENTIUM4-NEXT: # BB#1: # %bb
; PENTIUM4-NEXT: pushq %rax
; PENTIUM4-NEXT: callq foo
; PENTIUM4-NEXT: popq %rax
; PENTIUM4-NEXT: .LBB13_2: # %UnifiedReturnBlock
; PENTIUM4-NEXT: retq
; CHECK-LABEL: query2b:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: btl %esi, %edi
; CHECK-NEXT: jae .LBB13_2
;
; AVX-512-LABEL: query2b:
; AVX-512: # BB#0: # %entry
; AVX-512-NEXT: movl %esi, %ecx
; AVX-512-NEXT: shrl %cl, %edi
; AVX-512-NEXT: testb $1, %dil
; AVX-512-NEXT: je .LBB13_2
; AVX-512-NEXT: # BB#1: # %bb
; AVX-512-NEXT: pushq %rax
; AVX-512-NEXT: callq foo
; AVX-512-NEXT: popq %rax
; AVX-512-NEXT: .LBB13_2: # %UnifiedReturnBlock
; AVX-512-NEXT: retq
entry:
%tmp29 = lshr i32 %x, %n
%tmp3 = and i32 1, %tmp29
@ -501,29 +300,11 @@ UnifiedReturnBlock:
}
define void @aquery2(i32 %x, i32 %n) nounwind {
; PENTIUM4-LABEL: aquery2:
; PENTIUM4: # BB#0: # %entry
; PENTIUM4-NEXT: btl %esi, %edi
; PENTIUM4-NEXT: jae .LBB14_2
; PENTIUM4-NEXT: # BB#1: # %bb
; PENTIUM4-NEXT: pushq %rax
; PENTIUM4-NEXT: callq foo
; PENTIUM4-NEXT: popq %rax
; PENTIUM4-NEXT: .LBB14_2: # %UnifiedReturnBlock
; PENTIUM4-NEXT: retq
; CHECK-LABEL: aquery2:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: btl %esi, %edi
; CHECK-NEXT: jae .LBB14_2
;
; AVX-512-LABEL: aquery2:
; AVX-512: # BB#0: # %entry
; AVX-512-NEXT: movl %esi, %ecx
; AVX-512-NEXT: shrl %cl, %edi
; AVX-512-NEXT: testb $1, %dil
; AVX-512-NEXT: je .LBB14_2
; AVX-512-NEXT: # BB#1: # %bb
; AVX-512-NEXT: pushq %rax
; AVX-512-NEXT: callq foo
; AVX-512-NEXT: popq %rax
; AVX-512-NEXT: .LBB14_2: # %UnifiedReturnBlock
; AVX-512-NEXT: retq
entry:
%tmp29 = ashr i32 %x, %n
%tmp3 = and i32 %tmp29, 1
@ -539,29 +320,11 @@ UnifiedReturnBlock:
}
define void @aquery2b(i32 %x, i32 %n) nounwind {
; PENTIUM4-LABEL: aquery2b:
; PENTIUM4: # BB#0: # %entry
; PENTIUM4-NEXT: btl %esi, %edi
; PENTIUM4-NEXT: jae .LBB15_2
; PENTIUM4-NEXT: # BB#1: # %bb
; PENTIUM4-NEXT: pushq %rax
; PENTIUM4-NEXT: callq foo
; PENTIUM4-NEXT: popq %rax
; PENTIUM4-NEXT: .LBB15_2: # %UnifiedReturnBlock
; PENTIUM4-NEXT: retq
; CHECK-LABEL: aquery2b:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: btl %esi, %edi
; CHECK-NEXT: jae .LBB15_2
;
; AVX-512-LABEL: aquery2b:
; AVX-512: # BB#0: # %entry
; AVX-512-NEXT: movl %esi, %ecx
; AVX-512-NEXT: shrl %cl, %edi
; AVX-512-NEXT: testb $1, %dil
; AVX-512-NEXT: je .LBB15_2
; AVX-512-NEXT: # BB#1: # %bb
; AVX-512-NEXT: pushq %rax
; AVX-512-NEXT: callq foo
; AVX-512-NEXT: popq %rax
; AVX-512-NEXT: .LBB15_2: # %UnifiedReturnBlock
; AVX-512-NEXT: retq
entry:
%tmp29 = ashr i32 %x, %n
%tmp3 = and i32 1, %tmp29
@ -581,12 +344,7 @@ define void @query3(i32 %x, i32 %n) nounwind {
; CHECK: # BB#0: # %entry
; CHECK-NEXT: btl %esi, %edi
; CHECK-NEXT: jae .LBB16_2
; CHECK-NEXT: # BB#1: # %bb
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq foo
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .LBB16_2: # %UnifiedReturnBlock
; CHECK-NEXT: retq
;
entry:
%tmp29 = shl i32 1, %n
%tmp3 = and i32 %tmp29, %x
@ -606,12 +364,7 @@ define void @query3b(i32 %x, i32 %n) nounwind {
; CHECK: # BB#0: # %entry
; CHECK-NEXT: btl %esi, %edi
; CHECK-NEXT: jae .LBB17_2
; CHECK-NEXT: # BB#1: # %bb
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq foo
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .LBB17_2: # %UnifiedReturnBlock
; CHECK-NEXT: retq
;
entry:
%tmp29 = shl i32 1, %n
%tmp3 = and i32 %x, %tmp29
@ -631,12 +384,7 @@ define void @query3x(i32 %x, i32 %n) nounwind {
; CHECK: # BB#0: # %entry
; CHECK-NEXT: btl %esi, %edi
; CHECK-NEXT: jae .LBB18_2
; CHECK-NEXT: # BB#1: # %bb
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq foo
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .LBB18_2: # %UnifiedReturnBlock
; CHECK-NEXT: retq
;
entry:
%tmp29 = shl i32 1, %n
%tmp3 = and i32 %tmp29, %x
@ -656,12 +404,7 @@ define void @query3bx(i32 %x, i32 %n) nounwind {
; CHECK: # BB#0: # %entry
; CHECK-NEXT: btl %esi, %edi
; CHECK-NEXT: jae .LBB19_2
; CHECK-NEXT: # BB#1: # %bb
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq foo
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .LBB19_2: # %UnifiedReturnBlock
; CHECK-NEXT: retq
;
entry:
%tmp29 = shl i32 1, %n
%tmp3 = and i32 %x, %tmp29
@ -677,35 +420,11 @@ UnifiedReturnBlock:
}
define void @queryne2(i32 %x, i32 %n) nounwind {
; PENTIUM4-LABEL: queryne2:
; PENTIUM4: # BB#0: # %entry
; PENTIUM4-NEXT: btl %esi, %edi
; PENTIUM4-NEXT: jb .LBB20_2
; PENTIUM4-NEXT: # BB#1: # %bb
; PENTIUM4-NEXT: pushq %rax
; PENTIUM4-NEXT: callq foo
; PENTIUM4-NEXT: popq %rax
; PENTIUM4-NEXT: .LBB20_2: # %UnifiedReturnBlock
; PENTIUM4-NEXT: retq
; CHECK-LABEL: queryne2:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: btl %esi, %edi
; CHECK-NEXT: jb .LBB20_2
;
; AVX-512-LABEL: queryne2:
; AVX-512: # BB#0: # %entry
; AVX-512-NEXT: movl %esi, %ecx
; AVX-512-NEXT: shrl %cl, %edi
; AVX-512-NEXT: andl $1, %edi
; AVX-512-NEXT: kmovw %edi, %k0
; AVX-512-NEXT: kxnorw %k0, %k0, %k1
; AVX-512-NEXT: kshiftrw $15, %k1, %k1
; AVX-512-NEXT: kxorw %k1, %k0, %k0
; AVX-512-NEXT: kmovw %k0, %eax
; AVX-512-NEXT: testb %al, %al
; AVX-512-NEXT: je .LBB20_2
; AVX-512-NEXT: # BB#1: # %bb
; AVX-512-NEXT: pushq %rax
; AVX-512-NEXT: callq foo
; AVX-512-NEXT: popq %rax
; AVX-512-NEXT: .LBB20_2: # %UnifiedReturnBlock
; AVX-512-NEXT: retq
entry:
%tmp29 = lshr i32 %x, %n
%tmp3 = and i32 %tmp29, 1
@ -721,35 +440,11 @@ UnifiedReturnBlock:
}
define void @queryne2b(i32 %x, i32 %n) nounwind {
; PENTIUM4-LABEL: queryne2b:
; PENTIUM4: # BB#0: # %entry
; PENTIUM4-NEXT: btl %esi, %edi
; PENTIUM4-NEXT: jb .LBB21_2
; PENTIUM4-NEXT: # BB#1: # %bb
; PENTIUM4-NEXT: pushq %rax
; PENTIUM4-NEXT: callq foo
; PENTIUM4-NEXT: popq %rax
; PENTIUM4-NEXT: .LBB21_2: # %UnifiedReturnBlock
; PENTIUM4-NEXT: retq
; CHECK-LABEL: queryne2b:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: btl %esi, %edi
; CHECK-NEXT: jb .LBB21_2
;
; AVX-512-LABEL: queryne2b:
; AVX-512: # BB#0: # %entry
; AVX-512-NEXT: movl %esi, %ecx
; AVX-512-NEXT: shrl %cl, %edi
; AVX-512-NEXT: andl $1, %edi
; AVX-512-NEXT: kmovw %edi, %k0
; AVX-512-NEXT: kxnorw %k0, %k0, %k1
; AVX-512-NEXT: kshiftrw $15, %k1, %k1
; AVX-512-NEXT: kxorw %k1, %k0, %k0
; AVX-512-NEXT: kmovw %k0, %eax
; AVX-512-NEXT: testb %al, %al
; AVX-512-NEXT: je .LBB21_2
; AVX-512-NEXT: # BB#1: # %bb
; AVX-512-NEXT: pushq %rax
; AVX-512-NEXT: callq foo
; AVX-512-NEXT: popq %rax
; AVX-512-NEXT: .LBB21_2: # %UnifiedReturnBlock
; AVX-512-NEXT: retq
entry:
%tmp29 = lshr i32 %x, %n
%tmp3 = and i32 1, %tmp29
@ -765,35 +460,11 @@ UnifiedReturnBlock:
}
define void @aqueryne2(i32 %x, i32 %n) nounwind {
; PENTIUM4-LABEL: aqueryne2:
; PENTIUM4: # BB#0: # %entry
; PENTIUM4-NEXT: btl %esi, %edi
; PENTIUM4-NEXT: jb .LBB22_2
; PENTIUM4-NEXT: # BB#1: # %bb
; PENTIUM4-NEXT: pushq %rax
; PENTIUM4-NEXT: callq foo
; PENTIUM4-NEXT: popq %rax
; PENTIUM4-NEXT: .LBB22_2: # %UnifiedReturnBlock
; PENTIUM4-NEXT: retq
; CHECK-LABEL: aqueryne2:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: btl %esi, %edi
; CHECK-NEXT: jb .LBB22_2
;
; AVX-512-LABEL: aqueryne2:
; AVX-512: # BB#0: # %entry
; AVX-512-NEXT: movl %esi, %ecx
; AVX-512-NEXT: shrl %cl, %edi
; AVX-512-NEXT: andl $1, %edi
; AVX-512-NEXT: kmovw %edi, %k0
; AVX-512-NEXT: kxnorw %k0, %k0, %k1
; AVX-512-NEXT: kshiftrw $15, %k1, %k1
; AVX-512-NEXT: kxorw %k1, %k0, %k0
; AVX-512-NEXT: kmovw %k0, %eax
; AVX-512-NEXT: testb %al, %al
; AVX-512-NEXT: je .LBB22_2
; AVX-512-NEXT: # BB#1: # %bb
; AVX-512-NEXT: pushq %rax
; AVX-512-NEXT: callq foo
; AVX-512-NEXT: popq %rax
; AVX-512-NEXT: .LBB22_2: # %UnifiedReturnBlock
; AVX-512-NEXT: retq
entry:
%tmp29 = ashr i32 %x, %n
%tmp3 = and i32 %tmp29, 1
@ -809,35 +480,11 @@ UnifiedReturnBlock:
}
define void @aqueryne2b(i32 %x, i32 %n) nounwind {
; PENTIUM4-LABEL: aqueryne2b:
; PENTIUM4: # BB#0: # %entry
; PENTIUM4-NEXT: btl %esi, %edi
; PENTIUM4-NEXT: jb .LBB23_2
; PENTIUM4-NEXT: # BB#1: # %bb
; PENTIUM4-NEXT: pushq %rax
; PENTIUM4-NEXT: callq foo
; PENTIUM4-NEXT: popq %rax
; PENTIUM4-NEXT: .LBB23_2: # %UnifiedReturnBlock
; PENTIUM4-NEXT: retq
; CHECK-LABEL: aqueryne2b:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: btl %esi, %edi
; CHECK-NEXT: jb .LBB23_2
;
; AVX-512-LABEL: aqueryne2b:
; AVX-512: # BB#0: # %entry
; AVX-512-NEXT: movl %esi, %ecx
; AVX-512-NEXT: shrl %cl, %edi
; AVX-512-NEXT: andl $1, %edi
; AVX-512-NEXT: kmovw %edi, %k0
; AVX-512-NEXT: kxnorw %k0, %k0, %k1
; AVX-512-NEXT: kshiftrw $15, %k1, %k1
; AVX-512-NEXT: kxorw %k1, %k0, %k0
; AVX-512-NEXT: kmovw %k0, %eax
; AVX-512-NEXT: testb %al, %al
; AVX-512-NEXT: je .LBB23_2
; AVX-512-NEXT: # BB#1: # %bb
; AVX-512-NEXT: pushq %rax
; AVX-512-NEXT: callq foo
; AVX-512-NEXT: popq %rax
; AVX-512-NEXT: .LBB23_2: # %UnifiedReturnBlock
; AVX-512-NEXT: retq
entry:
%tmp29 = ashr i32 %x, %n
%tmp3 = and i32 1, %tmp29
@ -857,12 +504,7 @@ define void @queryne3(i32 %x, i32 %n) nounwind {
; CHECK: # BB#0: # %entry
; CHECK-NEXT: btl %esi, %edi
; CHECK-NEXT: jb .LBB24_2
; CHECK-NEXT: # BB#1: # %bb
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq foo
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .LBB24_2: # %UnifiedReturnBlock
; CHECK-NEXT: retq
;
entry:
%tmp29 = shl i32 1, %n
%tmp3 = and i32 %tmp29, %x
@ -882,12 +524,7 @@ define void @queryne3b(i32 %x, i32 %n) nounwind {
; CHECK: # BB#0: # %entry
; CHECK-NEXT: btl %esi, %edi
; CHECK-NEXT: jb .LBB25_2
; CHECK-NEXT: # BB#1: # %bb
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq foo
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .LBB25_2: # %UnifiedReturnBlock
; CHECK-NEXT: retq
;
entry:
%tmp29 = shl i32 1, %n
%tmp3 = and i32 %x, %tmp29
@ -907,12 +544,7 @@ define void @queryne3x(i32 %x, i32 %n) nounwind {
; CHECK: # BB#0: # %entry
; CHECK-NEXT: btl %esi, %edi
; CHECK-NEXT: jb .LBB26_2
; CHECK-NEXT: # BB#1: # %bb
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq foo
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .LBB26_2: # %UnifiedReturnBlock
; CHECK-NEXT: retq
;
entry:
%tmp29 = shl i32 1, %n
%tmp3 = and i32 %tmp29, %x
@ -932,12 +564,7 @@ define void @queryne3bx(i32 %x, i32 %n) nounwind {
; CHECK: # BB#0: # %entry
; CHECK-NEXT: btl %esi, %edi
; CHECK-NEXT: jb .LBB27_2
; CHECK-NEXT: # BB#1: # %bb
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: callq foo
; CHECK-NEXT: popq %rax
; CHECK-NEXT: .LBB27_2: # %UnifiedReturnBlock
; CHECK-NEXT: retq
;
entry:
%tmp29 = shl i32 1, %n
%tmp3 = and i32 %x, %tmp29
@ -961,6 +588,7 @@ define zeroext i1 @invert(i32 %flags, i32 %flag) nounwind {
; CHECK-NEXT: btl %esi, %edi
; CHECK-NEXT: setb %al
; CHECK-NEXT: retq
;
%neg = xor i32 %flags, -1
%shl = shl i32 1, %flag
%and = and i32 %shl, %neg