1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-20 19:42:54 +02:00

[LoopPredication] Enable predication when latchCheckIV is wider than rangeCheck

Summary:
This patch allows us to predicate range checks that have a type narrower than
the latch check type. We leverage SCEV analysis to identify a truncate for the
latchLimit and latchStart.
There is also safety checks in place which requires the start and limit to be
known at compile time. We require this to make sure that the SCEV truncate expr
for the IV corresponding to the latch does not cause us to lose information
about the IV range.
Added tests show the loop predication over range checks that are of various
types and are narrower than the latch type.
This enhancement has been in our downstream tree for a while.

Reviewers: apilipenko, sanjoy, mkazantsev

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D39500

llvm-svn: 317269
This commit is contained in:
Anna Thomas 2017-11-02 21:21:02 +00:00
parent 2e3db2532b
commit 93dc82e6f3
2 changed files with 234 additions and 10 deletions

View File

@ -174,6 +174,9 @@
using namespace llvm;
static cl::opt<bool> EnableIVTruncation("loop-predication-enable-iv-truncation",
cl::Hidden, cl::init(true));
namespace {
class LoopPredication {
/// Represents an induction variable check:
@ -212,6 +215,22 @@ class LoopPredication {
IRBuilder<> &Builder);
bool widenGuardConditions(IntrinsicInst *II, SCEVExpander &Expander);
// When the IV type is wider than the range operand type, we can still do loop
// predication, by generating SCEVs for the range and latch that are of the
// same type. We achieve this by generating a SCEV truncate expression for the
// latch IV. This is done iff truncation of the IV is a safe operation,
// without loss of information.
// Another way to achieve this is by generating a wider type SCEV for the
// range check operand, however, this needs a more involved check that
// operands do not overflow. This can lead to loss of information when the
// range operand is of the form: add i32 %offset, %iv. We need to prove that
// sext(x + y) is same as sext(x) + sext(y).
// This function returns true if we can safely represent the IV type in
// the RangeCheckType without loss of information.
bool isSafeToTruncateWideIVType(Type *RangeCheckType);
// Return the loopLatchCheck corresponding to the RangeCheckType if safe to do
// so.
Optional<LoopICmp> generateLoopLatchCheck(Type *RangeCheckType);
public:
LoopPredication(ScalarEvolution *SE) : SE(SE){};
bool runOnLoop(Loop *L);
@ -301,6 +320,34 @@ Value *LoopPredication::expandCheck(SCEVExpander &Expander,
return Builder.CreateICmp(Pred, LHSV, RHSV);
}
Optional<LoopPredication::LoopICmp>
LoopPredication::generateLoopLatchCheck(Type *RangeCheckType) {
auto *LatchType = LatchCheck.IV->getType();
if (RangeCheckType == LatchType)
return LatchCheck;
// For now, bail out if latch type is narrower than range type.
if (DL->getTypeSizeInBits(LatchType) < DL->getTypeSizeInBits(RangeCheckType))
return None;
if (!isSafeToTruncateWideIVType(RangeCheckType))
return None;
// We can now safely identify the truncated version of the IV and limit for
// RangeCheckType.
LoopICmp NewLatchCheck;
NewLatchCheck.Pred = LatchCheck.Pred;
NewLatchCheck.IV = dyn_cast<SCEVAddRecExpr>(
SE->getTruncateExpr(LatchCheck.IV, RangeCheckType));
if (!NewLatchCheck.IV)
return None;
NewLatchCheck.Limit = SE->getTruncateExpr(LatchCheck.Limit, RangeCheckType);
DEBUG(dbgs() << "IV of type: " << *LatchType
<< "can be represented as range check type:" << *RangeCheckType
<< "\n");
DEBUG(dbgs() << "LatchCheck.IV: " << *NewLatchCheck.IV << "\n");
DEBUG(dbgs() << "LatchCheck.Limit: " << *NewLatchCheck.Limit << "\n");
return NewLatchCheck;
}
/// If ICI can be widened to a loop invariant condition emits the loop
/// invariant condition in the loop preheader and return it, otherwise
/// returns None.
@ -325,22 +372,31 @@ Optional<Value *> LoopPredication::widenICmpRangeCheck(ICmpInst *ICI,
return None;
}
auto *RangeCheckIV = RangeCheck->IV;
auto *Ty = RangeCheckIV->getType();
if (Ty != LatchCheck.IV->getType()) {
DEBUG(dbgs() << "Type mismatch between range check and latch IVs!\n");
return None;
}
if (!RangeCheckIV->isAffine()) {
DEBUG(dbgs() << "Range check IV is not affine!\n");
return None;
}
auto *Step = RangeCheckIV->getStepRecurrence(*SE);
if (Step != LatchCheck.IV->getStepRecurrence(*SE)) {
// We cannot just compare with latch IV step because the latch and range IVs
// may have different types.
if (!Step->isOne()) {
DEBUG(dbgs() << "Range check and latch have IVs different steps!\n");
return None;
}
assert(Step->isOne() && "must be one");
auto *Ty = RangeCheckIV->getType();
auto CurrLatchCheckOpt = generateLoopLatchCheck(Ty);
if (!CurrLatchCheckOpt) {
DEBUG(dbgs() << "Failed to generate a loop latch check "
"corresponding to range type: "
<< *Ty << "\n");
return None;
}
LoopICmp CurrLatchCheck = *CurrLatchCheckOpt;
// At this point the range check step and latch step should have the same
// value and type.
assert(Step == CurrLatchCheck.IV->getStepRecurrence(*SE) &&
"Range and latch should have same step recurrence!");
// Generate the widened condition:
// guardStart u< guardLimit &&
// latchLimit <pred> guardLimit - 1 - guardStart + latchStart
@ -348,8 +404,8 @@ Optional<Value *> LoopPredication::widenICmpRangeCheck(ICmpInst *ICI,
// header comment for the reasoning.
const SCEV *GuardStart = RangeCheckIV->getStart();
const SCEV *GuardLimit = RangeCheck->Limit;
const SCEV *LatchStart = LatchCheck.IV->getStart();
const SCEV *LatchLimit = LatchCheck.Limit;
const SCEV *LatchStart = CurrLatchCheck.IV->getStart();
const SCEV *LatchLimit = CurrLatchCheck.Limit;
// guardLimit - guardStart + latchStart - 1
const SCEV *RHS =
@ -357,7 +413,7 @@ Optional<Value *> LoopPredication::widenICmpRangeCheck(ICmpInst *ICI,
SE->getMinusSCEV(LatchStart, SE->getOne(Ty)));
ICmpInst::Predicate LimitCheckPred;
switch (LatchCheck.Pred) {
switch (CurrLatchCheck.Pred) {
case ICmpInst::ICMP_ULT:
LimitCheckPred = ICmpInst::ICMP_ULE;
break;
@ -510,6 +566,36 @@ Optional<LoopPredication::LoopICmp> LoopPredication::parseLoopLatchICmp() {
return Result;
}
// Returns true if its safe to truncate the IV to RangeCheckType.
bool LoopPredication::isSafeToTruncateWideIVType(Type *RangeCheckType) {
if (!EnableIVTruncation)
return false;
assert(DL->getTypeSizeInBits(LatchCheck.IV->getType()) >
DL->getTypeSizeInBits(RangeCheckType) &&
"Expected latch check IV type to be larger than range check operand "
"type!");
// The start and end values of the IV should be known. This is to guarantee
// that truncating the wide type will not lose information.
auto *Limit = dyn_cast<SCEVConstant>(LatchCheck.Limit);
auto *Start = dyn_cast<SCEVConstant>(LatchCheck.IV->getStart());
if (!Limit || !Start)
return false;
// This check makes sure that the IV does not change sign during loop
// iterations. Consider latchType = i64, LatchStart = 5, Pred = ICMP_SGE,
// LatchEnd = 2, rangeCheckType = i32. If it's not a monotonic predicate, the
// IV wraps around, and the truncation of the IV would lose the range of
// iterations between 2^32 and 2^64.
bool Increasing;
if (!SE->isMonotonicPredicate(LatchCheck.IV, LatchCheck.Pred, Increasing))
return false;
// The active bits should be less than the bits in the RangeCheckType. This
// guarantees that truncating the latch check to RangeCheckType is a safe
// operation.
auto RangeCheckTypeBitSize = DL->getTypeSizeInBits(RangeCheckType);
return Start->getAPInt().getActiveBits() < RangeCheckTypeBitSize &&
Limit->getAPInt().getActiveBits() < RangeCheckTypeBitSize;
}
bool LoopPredication::runOnLoop(Loop *Loop) {
L = Loop;

View File

@ -0,0 +1,138 @@
; RUN: opt -S -loop-predication -loop-predication-enable-iv-truncation=true < %s 2>&1 | FileCheck %s
declare void @llvm.experimental.guard(i1, ...)
declare i32 @length(i8*)
declare i16 @short_length(i8*)
; Consider range check of type i16 and i32, while IV is of type i64
; We can loop predicate this because the IV range is within i16 and within i32.
define i64 @iv_wider_type_rc_two_narrow_types(i32 %offA, i16 %offB, i8* %arrA, i8* %arrB) {
; CHECK-LABEL: iv_wider_type_rc_two_narrow_types
entry:
; CHECK-LABEL: entry:
; CHECK: [[idxB:[^ ]+]] = sub i16 %lengthB, %offB
; CHECK-NEXT: [[limit_checkB:[^ ]+]] = icmp ule i16 16, [[idxB]]
; CHECK-NEXT: [[first_iteration_checkB:[^ ]+]] = icmp ult i16 %offB, %lengthB
; CHECK-NEXT: [[WideChkB:[^ ]+]] = and i1 [[first_iteration_checkB]], [[limit_checkB]]
; CHECK-NEXT: [[idxA:[^ ]+]] = sub i32 %lengthA, %offA
; CHECK-NEXT: [[limit_checkA:[^ ]+]] = icmp ule i32 16, [[idxA]]
; CHECK-NEXT: [[first_iteration_checkA:[^ ]+]] = icmp ult i32 %offA, %lengthA
; CHECK-NEXT: [[WideChkA:[^ ]+]] = and i1 [[first_iteration_checkA]], [[limit_checkA]]
%lengthA = call i32 @length(i8* %arrA)
%lengthB = call i16 @short_length(i8* %arrB)
br label %loop
loop:
; CHECK-LABEL: loop:
; CHECK: [[invariant_check:[^ ]+]] = and i1 [[WideChkB]], [[WideChkA]]
; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[invariant_check]], i32 9)
%iv = phi i64 [0, %entry ], [ %iv.next, %loop ]
%iv.trunc.32 = trunc i64 %iv to i32
%iv.trunc.16 = trunc i64 %iv to i16
%indexA = add i32 %iv.trunc.32, %offA
%indexB = add i16 %iv.trunc.16, %offB
%rcA = icmp ult i32 %indexA, %lengthA
%rcB = icmp ult i16 %indexB, %lengthB
%wide.chk = and i1 %rcA, %rcB
call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk, i32 9) [ "deopt"() ]
%indexA.ext = zext i32 %indexA to i64
%addrA = getelementptr inbounds i8, i8* %arrA, i64 %indexA.ext
%eltA = load i8, i8* %addrA
%indexB.ext = zext i16 %indexB to i64
%addrB = getelementptr inbounds i8, i8* %arrB, i64 %indexB.ext
store i8 %eltA, i8* %addrB
%iv.next = add nuw nsw i64 %iv, 1
%latch.check = icmp ult i64 %iv.next, 16
br i1 %latch.check, label %loop, label %exit
exit:
ret i64 %iv
}
; Consider an IV of type long and an array access into int array.
; IV is of type i64 while the range check operands are of type i32 and i64.
define i64 @iv_rc_different_types(i32 %offA, i32 %offB, i8* %arrA, i8* %arrB, i64 %max)
{
; CHECK-LABEL: iv_rc_different_types
entry:
; CHECK-LABEL: entry:
; CHECK: [[lenB:[^ ]+]] = add i32 %lengthB, -1
; CHECK-NEXT: [[idxB:[^ ]+]] = sub i32 [[lenB]], %offB
; CHECK-NEXT: [[limit_checkB:[^ ]+]] = icmp ule i32 15, [[idxB]]
; CHECK-NEXT: [[first_iteration_checkB:[^ ]+]] = icmp ult i32 %offB, %lengthB
; CHECK-NEXT: [[WideChkB:[^ ]+]] = and i1 [[first_iteration_checkB]], [[limit_checkB]]
; CHECK-NEXT: [[maxMinusOne:[^ ]+]] = add i64 %max, -1
; CHECK-NEXT: [[limit_checkMax:[^ ]+]] = icmp ule i64 15, [[maxMinusOne]]
; CHECK-NEXT: [[first_iteration_checkMax:[^ ]+]] = icmp ult i64 0, %max
; CHECK-NEXT: [[WideChkMax:[^ ]+]] = and i1 [[first_iteration_checkMax]], [[limit_checkMax]]
; CHECK-NEXT: [[lenA:[^ ]+]] = add i32 %lengthA, -1
; CHECK-NEXT: [[idxA:[^ ]+]] = sub i32 [[lenA]], %offA
; CHECK-NEXT: [[limit_checkA:[^ ]+]] = icmp ule i32 15, [[idxA]]
; CHECK-NEXT: [[first_iteration_checkA:[^ ]+]] = icmp ult i32 %offA, %lengthA
; CHECK-NEXT: [[WideChkA:[^ ]+]] = and i1 [[first_iteration_checkA]], [[limit_checkA]]
%lengthA = call i32 @length(i8* %arrA)
%lengthB = call i32 @length(i8* %arrB)
br label %loop
loop:
; CHECK-LABEL: loop:
; CHECK: [[BandMax:[^ ]+]] = and i1 [[WideChkB]], [[WideChkMax]]
; CHECK: [[ABandMax:[^ ]+]] = and i1 [[BandMax]], [[WideChkA]]
; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[ABandMax]], i32 9)
%iv = phi i64 [0, %entry ], [ %iv.next, %loop ]
%iv.trunc = trunc i64 %iv to i32
%indexA = add i32 %iv.trunc, %offA
%indexB = add i32 %iv.trunc, %offB
%rcA = icmp ult i32 %indexA, %lengthA
%rcIV = icmp ult i64 %iv, %max
%wide.chk = and i1 %rcA, %rcIV
%rcB = icmp ult i32 %indexB, %lengthB
%wide.chk.final = and i1 %wide.chk, %rcB
call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk.final, i32 9) [ "deopt"() ]
%indexA.ext = zext i32 %indexA to i64
%addrA = getelementptr inbounds i8, i8* %arrA, i64 %indexA.ext
%eltA = load i8, i8* %addrA
%indexB.ext = zext i32 %indexB to i64
%addrB = getelementptr inbounds i8, i8* %arrB, i64 %indexB.ext
%eltB = load i8, i8* %addrB
%result = xor i8 %eltA, %eltB
store i8 %result, i8* %addrA
%iv.next = add nuw nsw i64 %iv, 1
%latch.check = icmp ult i64 %iv, 15
br i1 %latch.check, label %loop, label %exit
exit:
ret i64 %iv
}
; cannot narrow the IV to the range type, because we lose information.
; for (i64 i= 5; i>= 2; i++)
; this loop wraps around after reaching 2^64.
define i64 @iv_rc_different_type(i32 %offA, i8* %arrA) {
; CHECK-LABEL: iv_rc_different_type
entry:
%lengthA = call i32 @length(i8* %arrA)
br label %loop
loop:
; CHECK-LABEL: loop:
; CHECK: %rcA = icmp ult i32 %indexA, %lengthA
; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %rcA, i32 9)
%iv = phi i64 [ 5, %entry ], [ %iv.next, %loop ]
%iv.trunc.32 = trunc i64 %iv to i32
%indexA = add i32 %iv.trunc.32, %offA
%rcA = icmp ult i32 %indexA, %lengthA
call void (i1, ...) @llvm.experimental.guard(i1 %rcA, i32 9) [ "deopt"() ]
%indexA.ext = zext i32 %indexA to i64
%addrA = getelementptr inbounds i8, i8* %arrA, i64 %indexA.ext
%eltA = load i8, i8* %addrA
%res = add i8 %eltA, 2
store i8 %eltA, i8* %addrA
%iv.next = add i64 %iv, 1
%latch.check = icmp sge i64 %iv.next, 2
br i1 %latch.check, label %loop, label %exit
exit:
ret i64 %iv
}