1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00

[GuardWidening] Introduce range check merging

Sequences of range checks expressed using guards, like

  guard((I - 2) u< L)
  guard((I - 1) u< L)
  guard((I + 0) u< L)
  guard((I + 1) u< L)
  guard((I + 2) u< L)

can sometimes be combined into a smaller sequence:

  guard((I - 2) u< L AND (I + 2) u< L)

if we can prove that (I - 2) u< L AND (I + 2) u< L implies all of checks
expressed in the previous sequence.

This change teaches GuardWidening to do this kind of merging when
feasible.

llvm-svn: 270151
This commit is contained in:
Sanjoy Das 2016-05-19 22:55:46 +00:00
parent 970400db38
commit 14b186a828
2 changed files with 441 additions and 0 deletions

View File

@ -130,6 +130,55 @@ class GuardWideningImpl {
bool widenCondCommon(Value *Cond0, Value *Cond1, Instruction *InsertPt,
Value *&Result);
/// Represents a range check of the form \c Base + \c Offset u< \c Length,
/// with the constraint that \c Length is not negative. \c CheckInst is the
/// pre-existing instruction in the IR that computes the result of this range
/// check.
struct RangeCheck {
Value *Base;
ConstantInt *Offset;
Value *Length;
ICmpInst *CheckInst;
RangeCheck() {}
explicit RangeCheck(Value *Base, ConstantInt *Offset, Value *Length,
ICmpInst *CheckInst)
: Base(Base), Offset(Offset), Length(Length), CheckInst(CheckInst) {}
void print(raw_ostream &OS, bool PrintTypes = false) {
OS << "Base: ";
Base->printAsOperand(OS, PrintTypes);
OS << " Offset: ";
Offset->printAsOperand(OS, PrintTypes);
OS << " Length: ";
Length->printAsOperand(OS, PrintTypes);
}
LLVM_DUMP_METHOD void dump() {
print(dbgs());
dbgs() << "\n";
}
};
/// Parse \p CheckCond into a conjunction (logical-and) of range checks; and
/// append them to \p Checks. Returns true on success, may clobber \c Checks
/// on failure.
bool parseRangeChecks(Value *CheckCond, SmallVectorImpl<RangeCheck> &Checks) {
SmallPtrSet<Value *, 8> Visited;
return parseRangeChecks(CheckCond, Checks, Visited);
}
bool parseRangeChecks(Value *CheckCond, SmallVectorImpl<RangeCheck> &Checks,
SmallPtrSetImpl<Value *> &Visited);
/// Combine the checks in \p Checks into a smaller set of checks and append
/// them into \p CombinedChecks. Return true on success (i.e. all of checks
/// in \p Checks were combined into \p CombinedChecks). Clobbers \p Checks
/// and \p CombinedChecks on success and on failure.
bool combineRangeChecks(SmallVectorImpl<RangeCheck> &Checks,
SmallVectorImpl<RangeCheck> &CombinedChecks);
/// Can we compute the logical AND of \p Cond0 and \p Cond1 for the price of
/// computing only one of the two expressions?
bool isWideningCondProfitable(Value *Cond0, Value *Cond1) {
@ -386,6 +435,27 @@ bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1,
}
}
{
SmallVector<GuardWideningImpl::RangeCheck, 4> Checks, CombinedChecks;
if (parseRangeChecks(Cond0, Checks) && parseRangeChecks(Cond1, Checks) &&
combineRangeChecks(Checks, CombinedChecks)) {
if (InsertPt) {
Result = nullptr;
for (auto &RC : CombinedChecks) {
makeAvailableAt(RC.CheckInst, InsertPt);
if (Result)
Result =
BinaryOperator::CreateAnd(RC.CheckInst, Result, "", InsertPt);
else
Result = RC.CheckInst;
}
Result->setName("wide.chk");
}
return true;
}
}
// Base case -- just logical-and the two conditions together.
if (InsertPt) {
@ -399,6 +469,180 @@ bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1,
return false;
}
bool GuardWideningImpl::parseRangeChecks(
Value *CheckCond, SmallVectorImpl<GuardWideningImpl::RangeCheck> &Checks,
SmallPtrSetImpl<Value *> &Visited) {
if (!Visited.insert(CheckCond).second)
return true;
using namespace llvm::PatternMatch;
{
Value *AndLHS, *AndRHS;
if (match(CheckCond, m_And(m_Value(AndLHS), m_Value(AndRHS))))
return parseRangeChecks(AndLHS, Checks) &&
parseRangeChecks(AndRHS, Checks);
}
auto *IC = dyn_cast<ICmpInst>(CheckCond);
if (!IC || !IC->getOperand(0)->getType()->isIntegerTy() ||
(IC->getPredicate() != ICmpInst::ICMP_ULT &&
IC->getPredicate() != ICmpInst::ICMP_UGT))
return false;
Value *CmpLHS = IC->getOperand(0), *CmpRHS = IC->getOperand(1);
if (IC->getPredicate() == ICmpInst::ICMP_UGT)
std::swap(CmpLHS, CmpRHS);
auto &DL = IC->getModule()->getDataLayout();
GuardWideningImpl::RangeCheck Check;
Check.Base = CmpLHS;
Check.Offset =
cast<ConstantInt>(ConstantInt::getNullValue(CmpRHS->getType()));
Check.Length = CmpRHS;
Check.CheckInst = IC;
if (!isKnownNonNegative(Check.Length, DL))
return false;
// What we have in \c Check now is a correct interpretation of \p CheckCond.
// Try to see if we can move some constant offsets into the \c Offset field.
bool Changed;
do {
Value *OpLHS;
ConstantInt *OpRHS;
Changed = false;
#ifndef NDEBUG
auto *BaseInst = dyn_cast<Instruction>(Check.Base);
assert((!BaseInst || DT.isReachableFromEntry(BaseInst->getParent())) &&
"Unreachable instruction?");
#endif
if (match(Check.Base, m_Add(m_Value(OpLHS), m_ConstantInt(OpRHS)))) {
Check.Base = OpLHS;
Check.Offset =
ConstantInt::get(Check.Offset->getContext(),
Check.Offset->getValue() + OpRHS->getValue());
Changed = true;
} else if (match(Check.Base, m_Or(m_Value(OpLHS), m_ConstantInt(OpRHS)))) {
unsigned BitWidth = OpLHS->getType()->getScalarSizeInBits();
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
computeKnownBits(OpLHS, KnownZero, KnownOne, DL);
if ((OpRHS->getValue() & KnownZero) == OpRHS->getValue()) {
Check.Base = OpLHS;
Check.Offset =
ConstantInt::get(Check.Offset->getContext(),
Check.Offset->getValue() + OpRHS->getValue());
Changed = true;
}
}
} while (Changed);
Checks.push_back(Check);
return true;
}
bool GuardWideningImpl::combineRangeChecks(
SmallVectorImpl<GuardWideningImpl::RangeCheck> &Checks,
SmallVectorImpl<GuardWideningImpl::RangeCheck> &RangeChecksOut) {
unsigned OldCount = Checks.size();
while (!Checks.empty()) {
Value *Base = Checks[0].Base;
Value *Length = Checks[0].Length;
auto ChecksStart =
remove_if(Checks, [&](GuardWideningImpl::RangeCheck &RC) {
return RC.Base == Base && RC.Length == Length;
});
unsigned CheckCount = std::distance(ChecksStart, Checks.end());
assert(CheckCount != 0 && "We know we have at least one!");
if (CheckCount < 3) {
RangeChecksOut.insert(RangeChecksOut.end(), ChecksStart, Checks.end());
Checks.erase(ChecksStart, Checks.end());
continue;
}
// CheckCount will typically be 3 here, but so far there has been no need to
// hard-code that fact.
std::sort(ChecksStart, Checks.end(),
[&](GuardWideningImpl::RangeCheck &LHS,
GuardWideningImpl::RangeCheck &RHS) {
return LHS.Offset->getValue().slt(RHS.Offset->getValue());
});
// Note: std::sort should not invalidate the ChecksStart iterator.
ConstantInt *MinOffset = ChecksStart->Offset,
*MaxOffset = Checks.back().Offset;
unsigned BitWidth = MaxOffset->getValue().getBitWidth();
if ((MaxOffset->getValue() - MinOffset->getValue())
.ugt(APInt::getSignedMinValue(BitWidth)))
return false;
APInt MaxDiff = MaxOffset->getValue() - MinOffset->getValue();
APInt HighOffset = MaxOffset->getValue();
auto OffsetOK = [&](GuardWideningImpl::RangeCheck &RC) {
return (HighOffset - RC.Offset->getValue()).ult(MaxDiff);
};
if (MaxDiff.isMinValue() ||
!std::all_of(std::next(ChecksStart), Checks.end(), OffsetOK))
return false;
// We have a series of f+1 checks as:
//
// I+k_0 u< L ... Chk_0
// I_k_1 u< L ... Chk_1
// ...
// I_k_f u< L ... Chk_(f+1)
//
// with forall i in [0,f): k_f-k_i u< k_f-k_0 ... Precond_0
// k_f-k_0 u< INT_MIN+k_f ... Precond_1
// k_f != k_0 ... Precond_2
//
// Claim:
// Chk_0 AND Chk_(f+1) implies all the other checks
//
// Informal proof sketch:
//
// We will show that the integer range [I+k_0,I+k_f] does not unsigned-wrap
// (i.e. going from I+k_0 to I+k_f does not cross the -1,0 boundary) and
// thus I+k_f is the greatest unsigned value in that range.
//
// This combined with Ckh_(f+1) shows that everything in that range is u< L.
// Via Precond_0 we know that all of the indices in Chk_0 through Chk_(f+1)
// lie in [I+k_0,I+k_f], this proving our claim.
//
// To see that [I+k_0,I+k_f] is not a wrapping range, note that there are
// two possibilities: I+k_0 u< I+k_f or I+k_0 >u I+k_f (they can't be equal
// since k_0 != k_f). In the former case, [I+k_0,I+k_f] is not a wrapping
// range by definition, and the latter case is impossible:
//
// 0-----I+k_f---I+k_0----L---INT_MAX,INT_MIN------------------(-1)
// xxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
//
// For Chk_0 to succeed, we'd have to have k_f-k_0 (the range highlighted
// with 'x' above) to be at least >u INT_MIN.
RangeChecksOut.emplace_back(Base, MinOffset, Length,
ChecksStart->CheckInst);
RangeChecksOut.emplace_back(Base, MaxOffset, Length,
Checks.back().CheckInst);
Checks.erase(ChecksStart, Checks.end());
}
assert(RangeChecksOut.size() <= OldCount && "We pessimized!");
return RangeChecksOut.size() != OldCount;
}
PreservedAnalyses GuardWideningPass::run(Function &F,
AnalysisManager<Function> &AM) {
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);

View File

@ -0,0 +1,197 @@
; RUN: opt -S -guard-widening < %s | FileCheck %s
declare void @llvm.experimental.guard(i1,...)
define void @f_0(i32 %x, i32* %length_buf) {
; CHECK-LABEL: @f_0(
; CHECK-NOT: @llvm.experimental.guard
; CHECK: %wide.chk2 = and i1 %chk3, %chk0
; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk2) [ "deopt"() ]
; CHECK: ret void
entry:
%length = load i32, i32* %length_buf, !range !0
%chk0 = icmp ult i32 %x, %length
call void(i1, ...) @llvm.experimental.guard(i1 %chk0) [ "deopt"() ]
%x.inc1 = add i32 %x, 1
%chk1 = icmp ult i32 %x.inc1, %length
call void(i1, ...) @llvm.experimental.guard(i1 %chk1) [ "deopt"() ]
%x.inc2 = add i32 %x, 2
%chk2 = icmp ult i32 %x.inc2, %length
call void(i1, ...) @llvm.experimental.guard(i1 %chk2) [ "deopt"() ]
%x.inc3 = add i32 %x, 3
%chk3 = icmp ult i32 %x.inc3, %length
call void(i1, ...) @llvm.experimental.guard(i1 %chk3) [ "deopt"() ]
ret void
}
define void @f_1(i32 %x, i32* %length_buf) {
; CHECK-LABEL: @f_1(
; CHECK-NOT: llvm.experimental.guard
; CHECK: %wide.chk2 = and i1 %chk3, %chk0
; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk2) [ "deopt"() ]
; CHECK: ret void
entry:
%length = load i32, i32* %length_buf, !range !0
%chk0 = icmp ult i32 %x, %length
call void(i1, ...) @llvm.experimental.guard(i1 %chk0) [ "deopt"() ]
%x.inc1 = add i32 %x, 1
%chk1 = icmp ult i32 %x.inc1, %length
call void(i1, ...) @llvm.experimental.guard(i1 %chk1) [ "deopt"() ]
%x.inc2 = add i32 %x.inc1, 2
%chk2 = icmp ult i32 %x.inc2, %length
call void(i1, ...) @llvm.experimental.guard(i1 %chk2) [ "deopt"() ]
%x.inc3 = add i32 %x.inc2, 3
%chk3 = icmp ult i32 %x.inc3, %length
call void(i1, ...) @llvm.experimental.guard(i1 %chk3) [ "deopt"() ]
ret void
}
define void @f_2(i32 %a, i32* %length_buf) {
; CHECK-LABEL: @f_2(
; CHECK-NOT: llvm.experimental.guard
; CHECK: %wide.chk2 = and i1 %chk3, %chk0
; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk2) [ "deopt"() ]
; CHECK: ret void
entry:
%x = and i32 %a, 4294967040 ;; 4294967040 == 0xffffff00
%length = load i32, i32* %length_buf, !range !0
%chk0 = icmp ult i32 %x, %length
call void(i1, ...) @llvm.experimental.guard(i1 %chk0) [ "deopt"() ]
%x.inc1 = or i32 %x, 1
%chk1 = icmp ult i32 %x.inc1, %length
call void(i1, ...) @llvm.experimental.guard(i1 %chk1) [ "deopt"() ]
%x.inc2 = or i32 %x, 2
%chk2 = icmp ult i32 %x.inc2, %length
call void(i1, ...) @llvm.experimental.guard(i1 %chk2) [ "deopt"() ]
%x.inc3 = or i32 %x, 3
%chk3 = icmp ult i32 %x.inc3, %length
call void(i1, ...) @llvm.experimental.guard(i1 %chk3) [ "deopt"() ]
ret void
}
define void @f_3(i32 %a, i32* %length_buf) {
; CHECK-LABEL: @f_3(
; CHECK-NOT: llvm.experimental.guard
; CHECK: %wide.chk2 = and i1 %chk3, %chk0
; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk2) [ "deopt"() ]
; CHECK: ret void
entry:
%x = and i32 %a, 4294967040 ;; 4294967040 == 0xffffff00
%length = load i32, i32* %length_buf, !range !0
%chk0 = icmp ult i32 %x, %length
call void(i1, ...) @llvm.experimental.guard(i1 %chk0) [ "deopt"() ]
%x.inc1 = add i32 %x, 1
%chk1 = icmp ult i32 %x.inc1, %length
call void(i1, ...) @llvm.experimental.guard(i1 %chk1) [ "deopt"() ]
%x.inc2 = or i32 %x.inc1, 2
%chk2 = icmp ult i32 %x.inc2, %length
call void(i1, ...) @llvm.experimental.guard(i1 %chk2) [ "deopt"() ]
%x.inc3 = add i32 %x.inc2, 3
%chk3 = icmp ult i32 %x.inc3, %length
call void(i1, ...) @llvm.experimental.guard(i1 %chk3) [ "deopt"() ]
ret void
}
define void @f_4(i32 %x, i32* %length_buf) {
; CHECK-LABEL: @f_4(
; CHECK-NOT: llvm.experimental.guard
; Note: we NOT guarding on "and i1 %chk3, %chk0", that would be incorrect.
; CHECK: %wide.chk2 = and i1 %chk3, %chk1
; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk2) [ "deopt"() ]
; CHECK: ret void
entry:
%length = load i32, i32* %length_buf, !range !0
%chk0 = icmp ult i32 %x, %length
call void(i1, ...) @llvm.experimental.guard(i1 %chk0) [ "deopt"() ]
%x.inc1 = add i32 %x, -1024
%chk1 = icmp ult i32 %x.inc1, %length
call void(i1, ...) @llvm.experimental.guard(i1 %chk1) [ "deopt"() ]
%x.inc2 = add i32 %x, 2
%chk2 = icmp ult i32 %x.inc2, %length
call void(i1, ...) @llvm.experimental.guard(i1 %chk2) [ "deopt"() ]
%x.inc3 = add i32 %x, 3
%chk3 = icmp ult i32 %x.inc3, %length
call void(i1, ...) @llvm.experimental.guard(i1 %chk3) [ "deopt"() ]
ret void
}
define void @f_5(i32 %x, i32* %length_buf) {
; CHECK-LABEL: @f_5(
; CHECK-NOT: llvm.experimental.guard
; CHECK: %wide.chk2 = and i1 %chk1, %chk2
; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk2) [ "deopt"() ]
; CHECK: ret void
entry:
%length = load i32, i32* %length_buf, !range !0
%chk0 = icmp ult i32 %x, %length
call void(i1, ...) @llvm.experimental.guard(i1 %chk0) [ "deopt"() ]
%x.inc1 = add i32 %x, 1
%chk1 = icmp ult i32 %x.inc1, %length
call void(i1, ...) @llvm.experimental.guard(i1 %chk1) [ "deopt"() ]
%x.inc2 = add i32 %x.inc1, -200
%chk2 = icmp ult i32 %x.inc2, %length
call void(i1, ...) @llvm.experimental.guard(i1 %chk2) [ "deopt"() ]
%x.inc3 = add i32 %x.inc2, 3
%chk3 = icmp ult i32 %x.inc3, %length
call void(i1, ...) @llvm.experimental.guard(i1 %chk3) [ "deopt"() ]
ret void
}
; Negative test: we can't merge these checks into
;
; (%x + -2147483647) u< L && (%x + 3) u< L
;
; because if %length == INT_MAX and %x == -3 then
;
; (%x + -2147483647) == i32 2147483646 u< L (L is 2147483647)
; (%x + 3) == 0 u< L
;
; But (%x + 2) == -1 is not u< L
;
define void @f_6(i32 %x, i32* %length_buf) {
; CHECK-LABEL: @f_6(
; CHECK-NOT: llvm.experimental.guard
; CHECK: %wide.chk = and i1 %chk0, %chk1
; CHECK: %wide.chk1 = and i1 %wide.chk, %chk2
; CHECK: %wide.chk2 = and i1 %wide.chk1, %chk3
; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk2) [ "deopt"() ]
entry:
%length = load i32, i32* %length_buf, !range !0
%chk0 = icmp ult i32 %x, %length
call void(i1, ...) @llvm.experimental.guard(i1 %chk0) [ "deopt"() ]
%x.inc1 = add i32 %x, -2147483647 ;; -2147483647 == (i32 INT_MIN)+1 == -(i32 INT_MAX)
%chk1 = icmp ult i32 %x.inc1, %length
call void(i1, ...) @llvm.experimental.guard(i1 %chk1) [ "deopt"() ]
%x.inc2 = add i32 %x, 2
%chk2 = icmp ult i32 %x.inc2, %length
call void(i1, ...) @llvm.experimental.guard(i1 %chk2) [ "deopt"() ]
%x.inc3 = add i32 %x, 3
%chk3 = icmp ult i32 %x.inc3, %length
call void(i1, ...) @llvm.experimental.guard(i1 %chk3) [ "deopt"() ]
ret void
}
!0 = !{i32 0, i32 2147483648}