1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 10:42:39 +01:00

[Attributor] Use AAValueConstantRange to infer dereferencability.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D76208
This commit is contained in:
Kuter Dinel 2020-05-13 12:49:15 -05:00 committed by Johannes Doerfert
parent 00fc32f855
commit 2e59c57eca
7 changed files with 221 additions and 66 deletions

View File

@ -547,13 +547,24 @@ public:
/// Accumulate the constant address offset of this GEP if possible.
///
/// This routine accepts an APInt into which it will accumulate the constant
/// offset of this GEP if the GEP is in fact constant. If the GEP is not
/// all-constant, it returns false and the value of the offset APInt is
/// undefined (it is *not* preserved!). The APInt passed into this routine
/// must be at exactly as wide as the IntPtr type for the address space of the
/// base GEP pointer.
bool accumulateConstantOffset(const DataLayout &DL, APInt &Offset) const;
/// This routine accepts an APInt into which it will try to accumulate the
/// constant offset of this GEP.
///
/// If \p ExternalAnalysis is provided it will be used to calculate a offset
/// when a operand of GEP is not constant.
/// For example, for a value \p ExternalAnalysis might try to calculate a
/// lower bound. If \p ExternalAnalysis is successful, it should return true.
///
/// If the \p ExternalAnalysis returns false or the value returned by \p
/// ExternalAnalysis results in a overflow/underflow, this routine returns
/// false and the value of the offset APInt is undefined (it is *not*
/// preserved!).
///
/// The APInt passed into this routine must be at exactly as wide as the
/// IntPtr type for the address space of the base GEP pointer.
bool accumulateConstantOffset(
const DataLayout &DL, APInt &Offset,
function_ref<bool(Value &, APInt &)> ExternalAnalysis = nullptr) const;
};
class PtrToIntOperator

View File

@ -593,18 +593,23 @@ public:
}
/// Accumulate the constant offset this value has compared to a base pointer.
/// Only 'getelementptr' instructions (GEPs) with constant indices are
/// accumulated but other instructions, e.g., casts, are stripped away as
/// well. The accumulated constant offset is added to \p Offset and the base
/// Only 'getelementptr' instructions (GEPs) are accumulated but other
/// instructions, e.g., casts, are stripped away as well.
/// The accumulated constant offset is added to \p Offset and the base
/// pointer is returned.
///
/// The APInt \p Offset has to have a bit-width equal to the IntPtr type for
/// the address space of 'this' pointer value, e.g., use
/// DataLayout::getIndexTypeSizeInBits(Ty).
///
/// If \p AllowNonInbounds is true, constant offsets in GEPs are stripped and
/// If \p AllowNonInbounds is true, offsets in GEPs are stripped and
/// accumulated even if the GEP is not "inbounds".
///
/// If \p ExternalAnalysis is provided it will be used to calculate a offset
/// when a operand of GEP is not constant.
/// For example, for a value \p ExternalAnalysis might try to calculate a
/// lower bound. If \p ExternalAnalysis is successful, it should return true.
///
/// If this is called on a non-pointer value, it returns 'this' and the
/// \p Offset is not modified.
///
@ -613,9 +618,10 @@ public:
/// between the underlying value and the returned one. Thus, if no constant
/// offset was found, the returned value is the underlying one and \p Offset
/// is unchanged.
const Value *stripAndAccumulateConstantOffsets(const DataLayout &DL,
APInt &Offset,
bool AllowNonInbounds) const;
const Value *stripAndAccumulateConstantOffsets(
const DataLayout &DL, APInt &Offset, bool AllowNonInbounds,
function_ref<bool(Value &Value, APInt &Offset)> ExternalAnalysis =
nullptr) const;
Value *stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset,
bool AllowNonInbounds) {
return const_cast<Value *>(

View File

@ -31,36 +31,79 @@ Type *GEPOperator::getResultElementType() const {
return cast<GetElementPtrConstantExpr>(this)->getResultElementType();
}
bool GEPOperator::accumulateConstantOffset(const DataLayout &DL,
APInt &Offset) const {
assert(Offset.getBitWidth() ==
DL.getIndexSizeInBits(getPointerAddressSpace()) &&
"The offset bit width does not match DL specification.");
bool GEPOperator::accumulateConstantOffset(
const DataLayout &DL, APInt &Offset,
function_ref<bool(Value &, APInt &)> ExternalAnalysis) const {
assert(Offset.getBitWidth() ==
DL.getIndexSizeInBits(getPointerAddressSpace()) &&
"The offset bit width does not match DL specification.");
bool UsedExternalAnalysis = false;
auto AccumulateOffset = [&](APInt Index, uint64_t Size) -> bool {
Index = Index.sextOrTrunc(Offset.getBitWidth());
APInt IndexedSize = APInt(Offset.getBitWidth(), Size);
// For array or vector indices, scale the index by the size of the type.
if (!UsedExternalAnalysis) {
Offset += Index * IndexedSize;
} else {
// External Analysis can return a result higher/lower than the value
// represents. We need to detect overflow/underflow.
bool Overflow = false;
APInt OffsetPlus = Index.smul_ov(IndexedSize, Overflow);
if (Overflow)
return false;
Offset = Offset.sadd_ov(OffsetPlus, Overflow);
if (Overflow)
return false;
}
return true;
};
for (gep_type_iterator GTI = gep_type_begin(this), GTE = gep_type_end(this);
GTI != GTE; ++GTI) {
ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
if (!OpC)
return false;
if (OpC->isZero())
continue;
// Scalable vectors have are multiplied by a runtime constant.
// Scalable vectors are multiplied by a runtime constant.
bool ScalableType = false;
if (isa<ScalableVectorType>(GTI.getIndexedType()))
return false;
ScalableType = true;
// Handle a struct index, which adds its field offset to the pointer.
if (StructType *STy = GTI.getStructTypeOrNull()) {
unsigned ElementIdx = OpC->getZExtValue();
const StructLayout *SL = DL.getStructLayout(STy);
Offset += APInt(Offset.getBitWidth(), SL->getElementOffset(ElementIdx));
Value *V = GTI.getOperand();
StructType *STy = GTI.getStructTypeOrNull();
// Handle ConstantInt if possible.
if (auto ConstOffset = dyn_cast<ConstantInt>(V)) {
if (ConstOffset->isZero())
continue;
// if the type is scalable and the constant is not zero (vscale * n * 0 =
// 0) bailout.
if (ScalableType)
return false;
// Handle a struct index, which adds its field offset to the pointer.
if (STy) {
unsigned ElementIdx = ConstOffset->getZExtValue();
const StructLayout *SL = DL.getStructLayout(STy);
// Element offset is in bytes.
if (!AccumulateOffset(
APInt(Offset.getBitWidth(), SL->getElementOffset(ElementIdx)),
1))
return false;
continue;
}
if (!AccumulateOffset(ConstOffset->getValue(),
DL.getTypeAllocSize(GTI.getIndexedType())))
return false;
continue;
}
// For array or vector indices, scale the index by the size of the type.
APInt Index = OpC->getValue().sextOrTrunc(Offset.getBitWidth());
Offset += Index * APInt(Offset.getBitWidth(),
DL.getTypeAllocSize(GTI.getIndexedType()));
// The operand is not constant, check if an external analysis was provided.
// External analsis is not applicable to a struct type.
if (!ExternalAnalysis || STy || ScalableType)
return false;
APInt AnalysisIndex;
if (!ExternalAnalysis(*V, AnalysisIndex))
return false;
UsedExternalAnalysis = true;
if (!AccumulateOffset(AnalysisIndex,
DL.getTypeAllocSize(GTI.getIndexedType())))
return false;
}
return true;
}

View File

@ -599,9 +599,9 @@ const Value *Value::stripPointerCastsAndInvariantGroups() const {
return stripPointerCastsAndOffsets<PSK_ZeroIndicesAndInvariantGroups>(this);
}
const Value *
Value::stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset,
bool AllowNonInbounds) const {
const Value *Value::stripAndAccumulateConstantOffsets(
const DataLayout &DL, APInt &Offset, bool AllowNonInbounds,
function_ref<bool(Value &, APInt &)> ExternalAnalysis) const {
if (!getType()->isPtrOrPtrVectorTy())
return this;
@ -627,7 +627,7 @@ Value::stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset,
// of GEP's pointer type rather than the size of the original
// pointer type.
APInt GEPOffset(DL.getIndexTypeSizeInBits(V->getType()), 0);
if (!GEP->accumulateConstantOffset(DL, GEPOffset))
if (!GEP->accumulateConstantOffset(DL, GEPOffset, ExternalAnalysis))
return V;
// Stop traversal if the pointer offset wouldn't fit in the bit-width
@ -636,7 +636,20 @@ Value::stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset,
if (GEPOffset.getMinSignedBits() > BitWidth)
return V;
Offset += GEPOffset.sextOrTrunc(BitWidth);
// External Analysis can return a result higher/lower than the value
// represents. We need to detect overflow/underflow.
APInt GEPOffsetST = GEPOffset.sextOrTrunc(BitWidth);
if (!ExternalAnalysis) {
Offset += GEPOffsetST;
} else {
bool Overflow = false;
APInt OldOffset = Offset;
Offset = Offset.sadd_ov(GEPOffsetST, Overflow);
if (Overflow) {
Offset = OldOffset;
return V;
}
}
V = GEP->getPointerOperand();
} else if (Operator::getOpcode(V) == Instruction::BitCast ||
Operator::getOpcode(V) == Instruction::AddrSpaceCast) {

View File

@ -349,6 +349,43 @@ static bool genericValueTraversal(
return true;
}
const Value *stripAndAccumulateMinimalOffsets(
Attributor &A, const AbstractAttribute &QueryingAA, const Value *Val,
const DataLayout &DL, APInt &Offset, bool AllowNonInbounds,
bool UseAssumed = false) {
auto AttributorAnalysis = [&](Value &V, APInt &ROffset) -> bool {
const IRPosition &Pos = IRPosition::value(V);
// Only track dependence if we are going to use the assumed info.
const AAValueConstantRange &ValueConstantRangeAA =
A.getAAFor<AAValueConstantRange>(QueryingAA, Pos,
/* TrackDependence */ UseAssumed);
ConstantRange Range = UseAssumed ? ValueConstantRangeAA.getAssumed()
: ValueConstantRangeAA.getKnown();
// We can only use the lower part of the range because the upper part can
// be higher than what the value can really be.
ROffset = Range.getSignedMin();
return true;
};
return Val->stripAndAccumulateConstantOffsets(DL, Offset, AllowNonInbounds,
AttributorAnalysis);
}
static const Value *getMinimalBaseOfAccsesPointerOperand(
Attributor &A, const AbstractAttribute &QueryingAA, const Instruction *I,
int64_t &BytesOffset, const DataLayout &DL, bool AllowNonInbounds = false) {
const Value *Ptr = getPointerOperand(I, /* AllowVolatile */ false);
if (!Ptr)
return nullptr;
APInt OffsetAPInt(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
const Value *Base = stripAndAccumulateMinimalOffsets(
A, QueryingAA, Ptr, DL, OffsetAPInt, AllowNonInbounds);
BytesOffset = OffsetAPInt.getSExtValue();
return Base;
}
static const Value *
getBasePointerOfAccessPointerOperand(const Instruction *I, int64_t &BytesOffset,
const DataLayout &DL,
@ -1586,14 +1623,16 @@ static int64_t getKnownNonNullAndDerefBytesForUse(
TrackUse = true;
return 0;
}
if (auto *GEP = dyn_cast<GetElementPtrInst>(I))
if (GEP->hasAllConstantIndices()) {
TrackUse = true;
return 0;
}
if (isa<GetElementPtrInst>(I)) {
TrackUse = true;
return 0;
}
int64_t Offset;
if (const Value *Base = getBasePointerOfAccessPointerOperand(I, Offset, DL)) {
const Value *Base =
getMinimalBaseOfAccsesPointerOperand(A, QueryingAA, I, Offset, DL);
if (Base) {
if (Base == &AssociatedValue &&
getPointerOperand(I, /* AllowVolatile */ false) == UseV) {
int64_t DerefBytes =
@ -1605,8 +1644,9 @@ static int64_t getKnownNonNullAndDerefBytesForUse(
}
/// Corner case when an offset is 0.
if (const Value *Base = getBasePointerOfAccessPointerOperand(
I, Offset, DL, /*AllowNonInbounds*/ true)) {
Base = getBasePointerOfAccessPointerOperand(I, Offset, DL,
/*AllowNonInbounds*/ true);
if (Base) {
if (Offset == 0 && Base == &AssociatedValue &&
getPointerOperand(I, /* AllowVolatile */ false) == UseV) {
int64_t DerefBytes =
@ -3311,6 +3351,8 @@ struct AADereferenceableImpl : AADereferenceable {
bool TrackUse = false;
int64_t DerefBytes = getKnownNonNullAndDerefBytesForUse(
A, *this, getAssociatedValue(), U, I, IsNonNull, TrackUse);
LLVM_DEBUG(dbgs() << "[AADereferenceable] Deref bytes: " << DerefBytes
<< " for instruction " << *I << "\n");
addAccessedBytesForUse(A, U, I, State);
State.takeKnownDerefBytesMaximum(DerefBytes);
@ -3359,13 +3401,13 @@ struct AADereferenceableFloating : AADereferenceableImpl {
ChangeStatus updateImpl(Attributor &A) override {
const DataLayout &DL = A.getDataLayout();
auto VisitValueCB = [&](Value &V, const Instruction *, DerefState &T,
auto VisitValueCB = [&](const Value &V, const Instruction *, DerefState &T,
bool Stripped) -> bool {
unsigned IdxWidth =
DL.getIndexSizeInBits(V.getType()->getPointerAddressSpace());
APInt Offset(IdxWidth, 0);
const Value *Base =
V.stripAndAccumulateInBoundsConstantOffsets(DL, Offset);
stripAndAccumulateMinimalOffsets(A, *this, &V, DL, Offset, false);
const auto &AA =
A.getAAFor<AADereferenceable>(*this, IRPosition::value(*Base));
@ -3382,7 +3424,6 @@ struct AADereferenceableFloating : AADereferenceableImpl {
T.GlobalState &= DS.GlobalState;
}
// TODO: Use `AAConstantRange` to infer dereferenceable bytes.
// For now we do not try to "increase" dereferenceability due to negative
// indices as we first have to come up with code to deal with loops and

View File

@ -298,6 +298,32 @@ define void @deref_or_null_and_nonnull(i32* dereferenceable_or_null(100) %0) {
; fill_range(p, *range);
; }
; FIXME: %ptr should be dereferenceable(31)
define void @test8(i8* %ptr) #0 {
br label %1
1: ; preds = %5, %0
%i.0 = phi i32 [ 20, %0 ], [ %4, %5 ]
%2 = sext i32 %i.0 to i64
%3 = getelementptr inbounds i8, i8* %ptr, i64 %2
store i8 32, i8* %3, align 1
%4 = add nsw i32 %i.0, 1
br label %5
5: ; preds = %1
%6 = icmp slt i32 %4, 30
br i1 %6, label %1, label %7
7: ; preds = %5
ret void
}
; 8.2 (negative case)
define void @test8_neg(i32 %i, i8* %ptr) #0 {
%1 = sext i32 %i to i64
%2 = getelementptr inbounds i8, i8* %ptr, i64 %1
store i8 65, i8* %2, align 1
ret void
}
; void fill_range(int* p, long long int start){
; for(long long int i = start;i<start+10;i++){
; // If p[i] is inbounds, p is dereferenceable(40) at least.

View File

@ -426,20 +426,35 @@ declare i32 @__gxx_personality_v0(...)
; IS__TUNIT____: Function Attrs: argmemonly nofree noinline nosync nounwind readonly uwtable
; IS__CGSCC____: Function Attrs: argmemonly nofree noinline norecurse nosync nounwind readonly uwtable
define i32 @loop_constant_trip_count(i32* nocapture readonly %0) #0 {
; CHECK-LABEL: define {{[^@]+}}@loop_constant_trip_count
; CHECK-SAME: (i32* nocapture nofree readonly [[TMP0:%.*]])
; CHECK-NEXT: br label [[TMP3:%.*]]
; CHECK: 2:
; CHECK-NEXT: ret i32 [[TMP8:%.*]]
; CHECK: 3:
; CHECK-NEXT: [[TMP4:%.*]] = phi i64 [ 0, [[TMP1:%.*]] ], [ [[TMP9:%.*]], [[TMP3]] ]
; CHECK-NEXT: [[TMP5:%.*]] = phi i32 [ 0, [[TMP1]] ], [ [[TMP8]], [[TMP3]] ]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 [[TMP4]]
; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4
; CHECK-NEXT: [[TMP8]] = add nsw i32 [[TMP7]], [[TMP5]]
; CHECK-NEXT: [[TMP9]] = add nuw nsw i64 [[TMP4]], 1
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 10
; CHECK-NEXT: br i1 [[TMP10]], label [[TMP2:%.*]], label [[TMP3]]
; IS________OPM-LABEL: define {{[^@]+}}@loop_constant_trip_count
; IS________OPM-SAME: (i32* nocapture nofree readonly [[TMP0:%.*]])
; IS________OPM-NEXT: br label [[TMP3:%.*]]
; IS________OPM: 2:
; IS________OPM-NEXT: ret i32 [[TMP8:%.*]]
; IS________OPM: 3:
; IS________OPM-NEXT: [[TMP4:%.*]] = phi i64 [ 0, [[TMP1:%.*]] ], [ [[TMP9:%.*]], [[TMP3]] ]
; IS________OPM-NEXT: [[TMP5:%.*]] = phi i32 [ 0, [[TMP1]] ], [ [[TMP8]], [[TMP3]] ]
; IS________OPM-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 [[TMP4]]
; IS________OPM-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4
; IS________OPM-NEXT: [[TMP8]] = add nsw i32 [[TMP7]], [[TMP5]]
; IS________OPM-NEXT: [[TMP9]] = add nuw nsw i64 [[TMP4]], 1
; IS________OPM-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 10
; IS________OPM-NEXT: br i1 [[TMP10]], label [[TMP2:%.*]], label [[TMP3]]
;
; IS________NPM-LABEL: define {{[^@]+}}@loop_constant_trip_count
; IS________NPM-SAME: (i32* nocapture nofree nonnull readonly dereferenceable(4) [[TMP0:%.*]])
; IS________NPM-NEXT: br label [[TMP3:%.*]]
; IS________NPM: 2:
; IS________NPM-NEXT: ret i32 [[TMP8:%.*]]
; IS________NPM: 3:
; IS________NPM-NEXT: [[TMP4:%.*]] = phi i64 [ 0, [[TMP1:%.*]] ], [ [[TMP9:%.*]], [[TMP3]] ]
; IS________NPM-NEXT: [[TMP5:%.*]] = phi i32 [ 0, [[TMP1]] ], [ [[TMP8]], [[TMP3]] ]
; IS________NPM-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 [[TMP4]]
; IS________NPM-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4
; IS________NPM-NEXT: [[TMP8]] = add nsw i32 [[TMP7]], [[TMP5]]
; IS________NPM-NEXT: [[TMP9]] = add nuw nsw i64 [[TMP4]], 1
; IS________NPM-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 10
; IS________NPM-NEXT: br i1 [[TMP10]], label [[TMP2:%.*]], label [[TMP3]]
;
br label %3