1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 19:23:23 +01:00

[InstCombine] allow icmp (div X, Y), C folds for splat constant vectors

Converting all of the overflow ops to APInt looked risky, so I've left that as a TODO.

llvm-svn: 280299
This commit is contained in:
Sanjay Patel 2016-08-31 21:57:21 +00:00
parent a193e467ba
commit 002c7088e9
5 changed files with 53 additions and 81 deletions

View File

@ -35,11 +35,6 @@ using namespace PatternMatch;
// How many times is a select replaced by one of its operands? // How many times is a select replaced by one of its operands?
STATISTIC(NumSel, "Number of select opts"); STATISTIC(NumSel, "Number of select opts");
// Initialization Routines
static ConstantInt *getOne(Constant *C) {
return ConstantInt::get(cast<IntegerType>(C->getType()), 1);
}
static ConstantInt *ExtractElement(Constant *V, Constant *Idx) { static ConstantInt *ExtractElement(Constant *V, Constant *Idx) {
return cast<ConstantInt>(ConstantExpr::getExtractElement(V, Idx)); return cast<ConstantInt>(ConstantExpr::getExtractElement(V, Idx));
@ -2001,33 +1996,28 @@ Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp,
assert(!(DivIsSigned && C2->isAllOnesValue()) && assert(!(DivIsSigned && C2->isAllOnesValue()) &&
"The overflow computation will fail."); "The overflow computation will fail.");
// FIXME: These checks restrict all folds under here to scalar types. // TODO: We could do all of the computations below using APInt.
ConstantInt *RHS = dyn_cast<ConstantInt>(Cmp.getOperand(1)); Constant *CmpRHS = cast<Constant>(Cmp.getOperand(1));
if (!RHS) Constant *DivRHS = cast<Constant>(Div->getOperand(1));
return nullptr;
ConstantInt *DivRHS = dyn_cast<ConstantInt>(Div->getOperand(1)); // Compute Prod = CmpRHS * DivRHS. We are essentially solving an equation of
if (!DivRHS) // form X / C2 = C. We solve for X by multiplying C2 (DivRHS) and C (CmpRHS).
return nullptr; // By solving for X, we can turn this into a range check instead of computing
// a divide.
Constant *Prod = ConstantExpr::getMul(CmpRHS, DivRHS);
// Compute Prod = CI * DivRHS. We are essentially solving an equation // Determine if the product overflows by seeing if the product is not equal to
// of form X/C2=C. We solve for X by multiplying C2 (DivRHS) and // the divide. Make sure we do the same kind of divide as in the LHS
// C (CI). By solving for X we can turn this into a range check // instruction that we're folding.
// instead of computing a divide. bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS)
Constant *Prod = ConstantExpr::getMul(RHS, DivRHS); : ConstantExpr::getUDiv(Prod, DivRHS)) != CmpRHS;
// Determine if the product overflows by seeing if the product is
// not equal to the divide. Make sure we do the same kind of divide
// as in the LHS instruction that we're folding.
bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) :
ConstantExpr::getUDiv(Prod, DivRHS)) != RHS;
// Get the ICmp opcode
ICmpInst::Predicate Pred = Cmp.getPredicate(); ICmpInst::Predicate Pred = Cmp.getPredicate();
// If the division is known to be exact, then there is no remainder from the // If the division is known to be exact, then there is no remainder from the
// divide, so the covered range size is unit, otherwise it is the divisor. // divide, so the covered range size is unit, otherwise it is the divisor.
ConstantInt *RangeSize = Div->isExact() ? getOne(Prod) : DivRHS; Constant *RangeSize =
Div->isExact() ? ConstantInt::get(Div->getType(), 1) : DivRHS;
// Figure out the interval that is being checked. For example, a comparison // Figure out the interval that is being checked. For example, a comparison
// like "X /u 5 == 0" is really checking that X is in the interval [0, 5). // like "X /u 5 == 0" is really checking that X is in the interval [0, 5).
@ -2048,7 +2038,7 @@ Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp,
// to the same result value. // to the same result value.
HiOverflow = AddWithOverflow(HiBound, LoBound, RangeSize, false); HiOverflow = AddWithOverflow(HiBound, LoBound, RangeSize, false);
} }
} else if (DivRHS->getValue().isStrictlyPositive()) { // Divisor is > 0. } else if (C2->isStrictlyPositive()) { // Divisor is > 0.
if (*C == 0) { // (X / pos) op 0 if (*C == 0) { // (X / pos) op 0
// Can't overflow. e.g. X/2 op 0 --> [-1, 2) // Can't overflow. e.g. X/2 op 0 --> [-1, 2)
LoBound = ConstantExpr::getNeg(SubOne(RangeSize)); LoBound = ConstantExpr::getNeg(SubOne(RangeSize));
@ -2063,17 +2053,17 @@ Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp,
HiBound = AddOne(Prod); HiBound = AddOne(Prod);
LoOverflow = HiOverflow = ProdOV ? -1 : 0; LoOverflow = HiOverflow = ProdOV ? -1 : 0;
if (!LoOverflow) { if (!LoOverflow) {
ConstantInt *DivNeg =cast<ConstantInt>(ConstantExpr::getNeg(RangeSize)); Constant *DivNeg = ConstantExpr::getNeg(RangeSize);
LoOverflow = AddWithOverflow(LoBound, HiBound, DivNeg, true) ? -1 : 0; LoOverflow = AddWithOverflow(LoBound, HiBound, DivNeg, true) ? -1 : 0;
} }
} }
} else if (DivRHS->isNegative()) { // Divisor is < 0. } else if (C2->isNegative()) { // Divisor is < 0.
if (Div->isExact()) if (Div->isExact())
RangeSize = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize)); RangeSize = ConstantExpr::getNeg(RangeSize);
if (*C == 0) { // (X / neg) op 0 if (*C == 0) { // (X / neg) op 0
// e.g. X/-5 op 0 --> [-4, 5) // e.g. X/-5 op 0 --> [-4, 5)
LoBound = AddOne(RangeSize); LoBound = AddOne(RangeSize);
HiBound = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize)); HiBound = ConstantExpr::getNeg(RangeSize);
if (HiBound == DivRHS) { // -INTMIN = INTMIN if (HiBound == DivRHS) { // -INTMIN = INTMIN
HiOverflow = 1; // [INTMIN+1, overflow) HiOverflow = 1; // [INTMIN+1, overflow)
HiBound = nullptr; // e.g. X/INTMIN = 0 --> X > INTMIN HiBound = nullptr; // e.g. X/INTMIN = 0 --> X > INTMIN
@ -2108,9 +2098,8 @@ Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp,
return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
ICmpInst::ICMP_ULT, X, HiBound); ICmpInst::ICMP_ULT, X, HiBound);
return replaceInstUsesWith( return replaceInstUsesWith(
Cmp, insertRangeTest(X, cast<ConstantInt>(LoBound)->getValue(), Cmp, insertRangeTest(X, LoBound->getUniqueInteger(),
cast<ConstantInt>(HiBound)->getValue(), HiBound->getUniqueInteger(), DivIsSigned, true));
DivIsSigned, true));
case ICmpInst::ICMP_NE: case ICmpInst::ICMP_NE:
if (LoOverflow && HiOverflow) if (LoOverflow && HiOverflow)
return replaceInstUsesWith(Cmp, Builder->getTrue()); return replaceInstUsesWith(Cmp, Builder->getTrue());
@ -2120,10 +2109,10 @@ Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp,
if (LoOverflow) if (LoOverflow)
return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
ICmpInst::ICMP_UGE, X, HiBound); ICmpInst::ICMP_UGE, X, HiBound);
return replaceInstUsesWith( return replaceInstUsesWith(Cmp,
Cmp, insertRangeTest(X, cast<ConstantInt>(LoBound)->getValue(), insertRangeTest(X, LoBound->getUniqueInteger(),
cast<ConstantInt>(HiBound)->getValue(), HiBound->getUniqueInteger(),
DivIsSigned, false)); DivIsSigned, false));
case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_ULT:
case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLT:
if (LoOverflow == +1) // Low bound is greater than input range. if (LoOverflow == +1) // Low bound is greater than input range.

View File

@ -14,11 +14,10 @@ define i1 @test(i32 %tmp6) {
ret i1 %1 ret i1 %1
} }
; FIXME: Vectors should fold the same way.
define <2 x i1> @test_vec(<2 x i32> %tmp6) { define <2 x i1> @test_vec(<2 x i32> %tmp6) {
; CHECK-LABEL: @test_vec( ; CHECK-LABEL: @test_vec(
; CHECK-NEXT: [[TMP7:%.*]] = sdiv <2 x i32> %tmp6, <i32 12, i32 12> ; CHECK-NEXT: [[TMP6_OFF:%.*]] = add <2 x i32> %tmp6, <i32 83, i32 83>
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i32> [[TMP7]], <i32 -6, i32 -6> ; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <2 x i32> [[TMP6_OFF]], <i32 11, i32 11>
; CHECK-NEXT: ret <2 x i1> [[TMP1]] ; CHECK-NEXT: ret <2 x i1> [[TMP1]]
; ;
%tmp7 = sdiv <2 x i32> %tmp6, <i32 12, i32 12> %tmp7 = sdiv <2 x i32> %tmp6, <i32 12, i32 12>

View File

@ -72,12 +72,11 @@ define i1 @test7(i32 %A) {
ret i1 %C ret i1 %C
} }
; FIXME: Vectors should fold the same way.
define <2 x i1> @test7vec(<2 x i32> %A) { define <2 x i1> @test7vec(<2 x i32> %A) {
; CHECK-LABEL: @test7vec( ; CHECK-LABEL: @test7vec(
; CHECK-NEXT: [[B:%.*]] = udiv <2 x i32> %A, <i32 10, i32 10> ; CHECK-NEXT: [[A_OFF:%.*]] = add <2 x i32> %A, <i32 -20, i32 -20>
; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i32> [[B]], <i32 2, i32 2> ; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i32> [[A_OFF]], <i32 10, i32 10>
; CHECK-NEXT: ret <2 x i1> [[C]] ; CHECK-NEXT: ret <2 x i1> [[TMP1]]
; ;
%B = udiv <2 x i32> %A, <i32 10, i32 10> %B = udiv <2 x i32> %A, <i32 10, i32 10>
%C = icmp eq <2 x i32> %B, <i32 2, i32 2> %C = icmp eq <2 x i32> %B, <i32 2, i32 2>
@ -95,11 +94,9 @@ define i1 @test8(i8 %A) {
ret i1 %C ret i1 %C
} }
; FIXME: Vectors should fold the same way.
define <2 x i1> @test8vec(<2 x i8> %A) { define <2 x i1> @test8vec(<2 x i8> %A) {
; CHECK-LABEL: @test8vec( ; CHECK-LABEL: @test8vec(
; CHECK-NEXT: [[B:%.*]] = udiv <2 x i8> %A, <i8 123, i8 123> ; CHECK-NEXT: [[C:%.*]] = icmp ugt <2 x i8> %A, <i8 -11, i8 -11>
; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i8> [[B]], <i8 2, i8 2>
; CHECK-NEXT: ret <2 x i1> [[C]] ; CHECK-NEXT: ret <2 x i1> [[C]]
; ;
%B = udiv <2 x i8> %A, <i8 123, i8 123> %B = udiv <2 x i8> %A, <i8 123, i8 123>
@ -118,11 +115,9 @@ define i1 @test9(i8 %A) {
ret i1 %C ret i1 %C
} }
; FIXME: Vectors should fold the same way.
define <2 x i1> @test9vec(<2 x i8> %A) { define <2 x i1> @test9vec(<2 x i8> %A) {
; CHECK-LABEL: @test9vec( ; CHECK-LABEL: @test9vec(
; CHECK-NEXT: [[B:%.*]] = udiv <2 x i8> %A, <i8 123, i8 123> ; CHECK-NEXT: [[C:%.*]] = icmp ult <2 x i8> %A, <i8 -10, i8 -10>
; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i8> [[B]], <i8 2, i8 2>
; CHECK-NEXT: ret <2 x i1> [[C]] ; CHECK-NEXT: ret <2 x i1> [[C]]
; ;
%B = udiv <2 x i8> %A, <i8 123, i8 123> %B = udiv <2 x i8> %A, <i8 123, i8 123>

View File

@ -159,8 +159,8 @@ define i1 @udiv_icmp1(i64 %X) {
define <2 x i1> @udiv_icmp1_vec(<2 x i64> %X) { define <2 x i1> @udiv_icmp1_vec(<2 x i64> %X) {
; CHECK-LABEL: @udiv_icmp1_vec( ; CHECK-LABEL: @udiv_icmp1_vec(
; CHECK-NEXT: [[B:%.*]] = icmp ugt <2 x i64> %X, <i64 4, i64 4> ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i64> %X, zeroinitializer
; CHECK-NEXT: ret <2 x i1> [[B]] ; CHECK-NEXT: ret <2 x i1> [[TMP1]]
; ;
%A = udiv exact <2 x i64> %X, <i64 5, i64 5> %A = udiv exact <2 x i64> %X, <i64 5, i64 5>
%B = icmp ne <2 x i64> %A, zeroinitializer %B = icmp ne <2 x i64> %A, zeroinitializer
@ -177,10 +177,11 @@ define i1 @udiv_icmp2(i64 %X) {
ret i1 %B ret i1 %B
} }
; FIXME: missing vector fold for ult 1 -> eq 0
define <2 x i1> @udiv_icmp2_vec(<2 x i64> %X) { define <2 x i1> @udiv_icmp2_vec(<2 x i64> %X) {
; CHECK-LABEL: @udiv_icmp2_vec( ; CHECK-LABEL: @udiv_icmp2_vec(
; CHECK-NEXT: [[B:%.*]] = icmp ult <2 x i64> %X, <i64 5, i64 5> ; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i64> %X, <i64 1, i64 1>
; CHECK-NEXT: ret <2 x i1> [[B]] ; CHECK-NEXT: ret <2 x i1> [[TMP1]]
; ;
%A = udiv exact <2 x i64> %X, <i64 5, i64 5> %A = udiv exact <2 x i64> %X, <i64 5, i64 5>
%B = icmp eq <2 x i64> %A, zeroinitializer %B = icmp eq <2 x i64> %A, zeroinitializer
@ -197,12 +198,11 @@ define i1 @sdiv_icmp1(i64 %X) {
ret i1 %B ret i1 %B
} }
; FIXME: Vectors should fold too. ; FIXME: missing vector fold for ult 1 -> eq 0
define <2 x i1> @sdiv_icmp1_vec(<2 x i64> %X) { define <2 x i1> @sdiv_icmp1_vec(<2 x i64> %X) {
; CHECK-LABEL: @sdiv_icmp1_vec( ; CHECK-LABEL: @sdiv_icmp1_vec(
; CHECK-NEXT: [[A:%.*]] = sdiv exact <2 x i64> %X, <i64 5, i64 5> ; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i64> %X, <i64 1, i64 1>
; CHECK-NEXT: [[B:%.*]] = icmp eq <2 x i64> [[A]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[TMP1]]
; CHECK-NEXT: ret <2 x i1> [[B]]
; ;
%A = sdiv exact <2 x i64> %X, <i64 5, i64 5> %A = sdiv exact <2 x i64> %X, <i64 5, i64 5>
%B = icmp eq <2 x i64> %A, zeroinitializer %B = icmp eq <2 x i64> %A, zeroinitializer
@ -219,12 +219,10 @@ define i1 @sdiv_icmp2(i64 %X) {
ret i1 %B ret i1 %B
} }
; FIXME: Vectors should fold too.
define <2 x i1> @sdiv_icmp2_vec(<2 x i64> %X) { define <2 x i1> @sdiv_icmp2_vec(<2 x i64> %X) {
; CHECK-LABEL: @sdiv_icmp2_vec( ; CHECK-LABEL: @sdiv_icmp2_vec(
; CHECK-NEXT: [[A:%.*]] = sdiv exact <2 x i64> %X, <i64 5, i64 5> ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i64> %X, <i64 5, i64 5>
; CHECK-NEXT: [[B:%.*]] = icmp eq <2 x i64> [[A]], <i64 1, i64 1> ; CHECK-NEXT: ret <2 x i1> [[TMP1]]
; CHECK-NEXT: ret <2 x i1> [[B]]
; ;
%A = sdiv exact <2 x i64> %X, <i64 5, i64 5> %A = sdiv exact <2 x i64> %X, <i64 5, i64 5>
%B = icmp eq <2 x i64> %A, <i64 1, i64 1> %B = icmp eq <2 x i64> %A, <i64 1, i64 1>
@ -241,12 +239,10 @@ define i1 @sdiv_icmp3(i64 %X) {
ret i1 %B ret i1 %B
} }
; FIXME: Vectors should fold too.
define <2 x i1> @sdiv_icmp3_vec(<2 x i64> %X) { define <2 x i1> @sdiv_icmp3_vec(<2 x i64> %X) {
; CHECK-LABEL: @sdiv_icmp3_vec( ; CHECK-LABEL: @sdiv_icmp3_vec(
; CHECK-NEXT: [[A:%.*]] = sdiv exact <2 x i64> %X, <i64 5, i64 5> ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i64> %X, <i64 -5, i64 -5>
; CHECK-NEXT: [[B:%.*]] = icmp eq <2 x i64> [[A]], <i64 -1, i64 -1> ; CHECK-NEXT: ret <2 x i1> [[TMP1]]
; CHECK-NEXT: ret <2 x i1> [[B]]
; ;
%A = sdiv exact <2 x i64> %X, <i64 5, i64 5> %A = sdiv exact <2 x i64> %X, <i64 5, i64 5>
%B = icmp eq <2 x i64> %A, <i64 -1, i64 -1> %B = icmp eq <2 x i64> %A, <i64 -1, i64 -1>
@ -263,12 +259,11 @@ define i1 @sdiv_icmp4(i64 %X) {
ret i1 %B ret i1 %B
} }
; FIXME: Vectors should fold too. ; FIXME: missing vector fold for ult 1 -> eq 0
define <2 x i1> @sdiv_icmp4_vec(<2 x i64> %X) { define <2 x i1> @sdiv_icmp4_vec(<2 x i64> %X) {
; CHECK-LABEL: @sdiv_icmp4_vec( ; CHECK-LABEL: @sdiv_icmp4_vec(
; CHECK-NEXT: [[A:%.*]] = sdiv exact <2 x i64> %X, <i64 -5, i64 -5> ; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i64> %X, <i64 1, i64 1>
; CHECK-NEXT: [[B:%.*]] = icmp eq <2 x i64> [[A]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[TMP1]]
; CHECK-NEXT: ret <2 x i1> [[B]]
; ;
%A = sdiv exact <2 x i64> %X, <i64 -5, i64 -5> %A = sdiv exact <2 x i64> %X, <i64 -5, i64 -5>
%B = icmp eq <2 x i64> %A, zeroinitializer %B = icmp eq <2 x i64> %A, zeroinitializer
@ -285,12 +280,10 @@ define i1 @sdiv_icmp5(i64 %X) {
ret i1 %B ret i1 %B
} }
; FIXME: Vectors should fold too.
define <2 x i1> @sdiv_icmp5_vec(<2 x i64> %X) { define <2 x i1> @sdiv_icmp5_vec(<2 x i64> %X) {
; CHECK-LABEL: @sdiv_icmp5_vec( ; CHECK-LABEL: @sdiv_icmp5_vec(
; CHECK-NEXT: [[A:%.*]] = sdiv exact <2 x i64> %X, <i64 -5, i64 -5> ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i64> %X, <i64 -5, i64 -5>
; CHECK-NEXT: [[B:%.*]] = icmp eq <2 x i64> [[A]], <i64 1, i64 1> ; CHECK-NEXT: ret <2 x i1> [[TMP1]]
; CHECK-NEXT: ret <2 x i1> [[B]]
; ;
%A = sdiv exact <2 x i64> %X, <i64 -5, i64 -5> %A = sdiv exact <2 x i64> %X, <i64 -5, i64 -5>
%B = icmp eq <2 x i64> %A, <i64 1, i64 1> %B = icmp eq <2 x i64> %A, <i64 1, i64 1>
@ -307,12 +300,10 @@ define i1 @sdiv_icmp6(i64 %X) {
ret i1 %B ret i1 %B
} }
; FIXME: Vectors should fold too.
define <2 x i1> @sdiv_icmp6_vec(<2 x i64> %X) { define <2 x i1> @sdiv_icmp6_vec(<2 x i64> %X) {
; CHECK-LABEL: @sdiv_icmp6_vec( ; CHECK-LABEL: @sdiv_icmp6_vec(
; CHECK-NEXT: [[A:%.*]] = sdiv exact <2 x i64> %X, <i64 -5, i64 -5> ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i64> %X, <i64 5, i64 5>
; CHECK-NEXT: [[B:%.*]] = icmp eq <2 x i64> [[A]], <i64 -1, i64 -1> ; CHECK-NEXT: ret <2 x i1> [[TMP1]]
; CHECK-NEXT: ret <2 x i1> [[B]]
; ;
%A = sdiv exact <2 x i64> %X, <i64 -5, i64 -5> %A = sdiv exact <2 x i64> %X, <i64 -5, i64 -5>
%B = icmp eq <2 x i64> %A, <i64 -1, i64 -1> %B = icmp eq <2 x i64> %A, <i64 -1, i64 -1>

View File

@ -282,11 +282,9 @@ define i1 @test23(i32 %x) {
ret i1 %i4 ret i1 %i4
} }
; FIXME: Vectors should fold too.
define <2 x i1> @test23vec(<2 x i32> %x) { define <2 x i1> @test23vec(<2 x i32> %x) {
; CHECK-LABEL: @test23vec( ; CHECK-LABEL: @test23vec(
; CHECK-NEXT: [[I3:%.*]] = sdiv <2 x i32> %x, <i32 -1328634635, i32 -1328634635> ; CHECK-NEXT: [[I4:%.*]] = icmp sgt <2 x i32> %x, <i32 1328634634, i32 1328634634>
; CHECK-NEXT: [[I4:%.*]] = icmp eq <2 x i32> [[I3]], <i32 -1, i32 -1>
; CHECK-NEXT: ret <2 x i1> [[I4]] ; CHECK-NEXT: ret <2 x i1> [[I4]]
; ;
%i3 = sdiv <2 x i32> %x, <i32 -1328634635, i32 -1328634635> %i3 = sdiv <2 x i32> %x, <i32 -1328634635, i32 -1328634635>