diff --git a/include/llvm/IR/Operator.h b/include/llvm/IR/Operator.h index ec594666f0a..0fc7bf9034f 100644 --- a/include/llvm/IR/Operator.h +++ b/include/llvm/IR/Operator.h @@ -547,13 +547,24 @@ public: /// Accumulate the constant address offset of this GEP if possible. /// - /// This routine accepts an APInt into which it will accumulate the constant - /// offset of this GEP if the GEP is in fact constant. If the GEP is not - /// all-constant, it returns false and the value of the offset APInt is - /// undefined (it is *not* preserved!). The APInt passed into this routine - /// must be at exactly as wide as the IntPtr type for the address space of the - /// base GEP pointer. - bool accumulateConstantOffset(const DataLayout &DL, APInt &Offset) const; + /// This routine accepts an APInt into which it will try to accumulate the + /// constant offset of this GEP. + /// + /// If \p ExternalAnalysis is provided it will be used to calculate a offset + /// when a operand of GEP is not constant. + /// For example, for a value \p ExternalAnalysis might try to calculate a + /// lower bound. If \p ExternalAnalysis is successful, it should return true. + /// + /// If the \p ExternalAnalysis returns false or the value returned by \p + /// ExternalAnalysis results in a overflow/underflow, this routine returns + /// false and the value of the offset APInt is undefined (it is *not* + /// preserved!). + /// + /// The APInt passed into this routine must be at exactly as wide as the + /// IntPtr type for the address space of the base GEP pointer. + bool accumulateConstantOffset( + const DataLayout &DL, APInt &Offset, + function_ref ExternalAnalysis = nullptr) const; }; class PtrToIntOperator diff --git a/include/llvm/IR/Value.h b/include/llvm/IR/Value.h index fdaa2aed225..f7dc5343015 100644 --- a/include/llvm/IR/Value.h +++ b/include/llvm/IR/Value.h @@ -593,18 +593,23 @@ public: } /// Accumulate the constant offset this value has compared to a base pointer. - /// Only 'getelementptr' instructions (GEPs) with constant indices are - /// accumulated but other instructions, e.g., casts, are stripped away as - /// well. The accumulated constant offset is added to \p Offset and the base + /// Only 'getelementptr' instructions (GEPs) are accumulated but other + /// instructions, e.g., casts, are stripped away as well. + /// The accumulated constant offset is added to \p Offset and the base /// pointer is returned. /// /// The APInt \p Offset has to have a bit-width equal to the IntPtr type for /// the address space of 'this' pointer value, e.g., use /// DataLayout::getIndexTypeSizeInBits(Ty). /// - /// If \p AllowNonInbounds is true, constant offsets in GEPs are stripped and + /// If \p AllowNonInbounds is true, offsets in GEPs are stripped and /// accumulated even if the GEP is not "inbounds". /// + /// If \p ExternalAnalysis is provided it will be used to calculate a offset + /// when a operand of GEP is not constant. + /// For example, for a value \p ExternalAnalysis might try to calculate a + /// lower bound. If \p ExternalAnalysis is successful, it should return true. + /// /// If this is called on a non-pointer value, it returns 'this' and the /// \p Offset is not modified. /// @@ -613,9 +618,10 @@ public: /// between the underlying value and the returned one. Thus, if no constant /// offset was found, the returned value is the underlying one and \p Offset /// is unchanged. - const Value *stripAndAccumulateConstantOffsets(const DataLayout &DL, - APInt &Offset, - bool AllowNonInbounds) const; + const Value *stripAndAccumulateConstantOffsets( + const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, + function_ref ExternalAnalysis = + nullptr) const; Value *stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, bool AllowNonInbounds) { return const_cast( diff --git a/lib/IR/Operator.cpp b/lib/IR/Operator.cpp index fc4c28c053d..baf3a054496 100644 --- a/lib/IR/Operator.cpp +++ b/lib/IR/Operator.cpp @@ -31,36 +31,79 @@ Type *GEPOperator::getResultElementType() const { return cast(this)->getResultElementType(); } -bool GEPOperator::accumulateConstantOffset(const DataLayout &DL, - APInt &Offset) const { - assert(Offset.getBitWidth() == - DL.getIndexSizeInBits(getPointerAddressSpace()) && - "The offset bit width does not match DL specification."); +bool GEPOperator::accumulateConstantOffset( + const DataLayout &DL, APInt &Offset, + function_ref ExternalAnalysis) const { + assert(Offset.getBitWidth() == + DL.getIndexSizeInBits(getPointerAddressSpace()) && + "The offset bit width does not match DL specification."); + + bool UsedExternalAnalysis = false; + auto AccumulateOffset = [&](APInt Index, uint64_t Size) -> bool { + Index = Index.sextOrTrunc(Offset.getBitWidth()); + APInt IndexedSize = APInt(Offset.getBitWidth(), Size); + // For array or vector indices, scale the index by the size of the type. + if (!UsedExternalAnalysis) { + Offset += Index * IndexedSize; + } else { + // External Analysis can return a result higher/lower than the value + // represents. We need to detect overflow/underflow. + bool Overflow = false; + APInt OffsetPlus = Index.smul_ov(IndexedSize, Overflow); + if (Overflow) + return false; + Offset = Offset.sadd_ov(OffsetPlus, Overflow); + if (Overflow) + return false; + } + return true; + }; for (gep_type_iterator GTI = gep_type_begin(this), GTE = gep_type_end(this); GTI != GTE; ++GTI) { - ConstantInt *OpC = dyn_cast(GTI.getOperand()); - if (!OpC) - return false; - if (OpC->isZero()) - continue; - - // Scalable vectors have are multiplied by a runtime constant. + // Scalable vectors are multiplied by a runtime constant. + bool ScalableType = false; if (isa(GTI.getIndexedType())) - return false; + ScalableType = true; - // Handle a struct index, which adds its field offset to the pointer. - if (StructType *STy = GTI.getStructTypeOrNull()) { - unsigned ElementIdx = OpC->getZExtValue(); - const StructLayout *SL = DL.getStructLayout(STy); - Offset += APInt(Offset.getBitWidth(), SL->getElementOffset(ElementIdx)); + Value *V = GTI.getOperand(); + StructType *STy = GTI.getStructTypeOrNull(); + // Handle ConstantInt if possible. + if (auto ConstOffset = dyn_cast(V)) { + if (ConstOffset->isZero()) + continue; + // if the type is scalable and the constant is not zero (vscale * n * 0 = + // 0) bailout. + if (ScalableType) + return false; + // Handle a struct index, which adds its field offset to the pointer. + if (STy) { + unsigned ElementIdx = ConstOffset->getZExtValue(); + const StructLayout *SL = DL.getStructLayout(STy); + // Element offset is in bytes. + if (!AccumulateOffset( + APInt(Offset.getBitWidth(), SL->getElementOffset(ElementIdx)), + 1)) + return false; + continue; + } + if (!AccumulateOffset(ConstOffset->getValue(), + DL.getTypeAllocSize(GTI.getIndexedType()))) + return false; continue; } - // For array or vector indices, scale the index by the size of the type. - APInt Index = OpC->getValue().sextOrTrunc(Offset.getBitWidth()); - Offset += Index * APInt(Offset.getBitWidth(), - DL.getTypeAllocSize(GTI.getIndexedType())); + // The operand is not constant, check if an external analysis was provided. + // External analsis is not applicable to a struct type. + if (!ExternalAnalysis || STy || ScalableType) + return false; + APInt AnalysisIndex; + if (!ExternalAnalysis(*V, AnalysisIndex)) + return false; + UsedExternalAnalysis = true; + if (!AccumulateOffset(AnalysisIndex, + DL.getTypeAllocSize(GTI.getIndexedType()))) + return false; } return true; } diff --git a/lib/IR/Value.cpp b/lib/IR/Value.cpp index 015bf209dca..cf547401871 100644 --- a/lib/IR/Value.cpp +++ b/lib/IR/Value.cpp @@ -599,9 +599,9 @@ const Value *Value::stripPointerCastsAndInvariantGroups() const { return stripPointerCastsAndOffsets(this); } -const Value * -Value::stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, - bool AllowNonInbounds) const { +const Value *Value::stripAndAccumulateConstantOffsets( + const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, + function_ref ExternalAnalysis) const { if (!getType()->isPtrOrPtrVectorTy()) return this; @@ -627,7 +627,7 @@ Value::stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, // of GEP's pointer type rather than the size of the original // pointer type. APInt GEPOffset(DL.getIndexTypeSizeInBits(V->getType()), 0); - if (!GEP->accumulateConstantOffset(DL, GEPOffset)) + if (!GEP->accumulateConstantOffset(DL, GEPOffset, ExternalAnalysis)) return V; // Stop traversal if the pointer offset wouldn't fit in the bit-width @@ -636,7 +636,20 @@ Value::stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, if (GEPOffset.getMinSignedBits() > BitWidth) return V; - Offset += GEPOffset.sextOrTrunc(BitWidth); + // External Analysis can return a result higher/lower than the value + // represents. We need to detect overflow/underflow. + APInt GEPOffsetST = GEPOffset.sextOrTrunc(BitWidth); + if (!ExternalAnalysis) { + Offset += GEPOffsetST; + } else { + bool Overflow = false; + APInt OldOffset = Offset; + Offset = Offset.sadd_ov(GEPOffsetST, Overflow); + if (Overflow) { + Offset = OldOffset; + return V; + } + } V = GEP->getPointerOperand(); } else if (Operator::getOpcode(V) == Instruction::BitCast || Operator::getOpcode(V) == Instruction::AddrSpaceCast) { diff --git a/lib/Transforms/IPO/AttributorAttributes.cpp b/lib/Transforms/IPO/AttributorAttributes.cpp index aa459c4b050..31880c653dc 100644 --- a/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/lib/Transforms/IPO/AttributorAttributes.cpp @@ -349,6 +349,43 @@ static bool genericValueTraversal( return true; } +const Value *stripAndAccumulateMinimalOffsets( + Attributor &A, const AbstractAttribute &QueryingAA, const Value *Val, + const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, + bool UseAssumed = false) { + + auto AttributorAnalysis = [&](Value &V, APInt &ROffset) -> bool { + const IRPosition &Pos = IRPosition::value(V); + // Only track dependence if we are going to use the assumed info. + const AAValueConstantRange &ValueConstantRangeAA = + A.getAAFor(QueryingAA, Pos, + /* TrackDependence */ UseAssumed); + ConstantRange Range = UseAssumed ? ValueConstantRangeAA.getAssumed() + : ValueConstantRangeAA.getKnown(); + // We can only use the lower part of the range because the upper part can + // be higher than what the value can really be. + ROffset = Range.getSignedMin(); + return true; + }; + + return Val->stripAndAccumulateConstantOffsets(DL, Offset, AllowNonInbounds, + AttributorAnalysis); +} + +static const Value *getMinimalBaseOfAccsesPointerOperand( + Attributor &A, const AbstractAttribute &QueryingAA, const Instruction *I, + int64_t &BytesOffset, const DataLayout &DL, bool AllowNonInbounds = false) { + const Value *Ptr = getPointerOperand(I, /* AllowVolatile */ false); + if (!Ptr) + return nullptr; + APInt OffsetAPInt(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); + const Value *Base = stripAndAccumulateMinimalOffsets( + A, QueryingAA, Ptr, DL, OffsetAPInt, AllowNonInbounds); + + BytesOffset = OffsetAPInt.getSExtValue(); + return Base; +} + static const Value * getBasePointerOfAccessPointerOperand(const Instruction *I, int64_t &BytesOffset, const DataLayout &DL, @@ -1586,14 +1623,16 @@ static int64_t getKnownNonNullAndDerefBytesForUse( TrackUse = true; return 0; } - if (auto *GEP = dyn_cast(I)) - if (GEP->hasAllConstantIndices()) { - TrackUse = true; - return 0; - } + + if (isa(I)) { + TrackUse = true; + return 0; + } int64_t Offset; - if (const Value *Base = getBasePointerOfAccessPointerOperand(I, Offset, DL)) { + const Value *Base = + getMinimalBaseOfAccsesPointerOperand(A, QueryingAA, I, Offset, DL); + if (Base) { if (Base == &AssociatedValue && getPointerOperand(I, /* AllowVolatile */ false) == UseV) { int64_t DerefBytes = @@ -1605,8 +1644,9 @@ static int64_t getKnownNonNullAndDerefBytesForUse( } /// Corner case when an offset is 0. - if (const Value *Base = getBasePointerOfAccessPointerOperand( - I, Offset, DL, /*AllowNonInbounds*/ true)) { + Base = getBasePointerOfAccessPointerOperand(I, Offset, DL, + /*AllowNonInbounds*/ true); + if (Base) { if (Offset == 0 && Base == &AssociatedValue && getPointerOperand(I, /* AllowVolatile */ false) == UseV) { int64_t DerefBytes = @@ -3311,6 +3351,8 @@ struct AADereferenceableImpl : AADereferenceable { bool TrackUse = false; int64_t DerefBytes = getKnownNonNullAndDerefBytesForUse( A, *this, getAssociatedValue(), U, I, IsNonNull, TrackUse); + LLVM_DEBUG(dbgs() << "[AADereferenceable] Deref bytes: " << DerefBytes + << " for instruction " << *I << "\n"); addAccessedBytesForUse(A, U, I, State); State.takeKnownDerefBytesMaximum(DerefBytes); @@ -3359,13 +3401,13 @@ struct AADereferenceableFloating : AADereferenceableImpl { ChangeStatus updateImpl(Attributor &A) override { const DataLayout &DL = A.getDataLayout(); - auto VisitValueCB = [&](Value &V, const Instruction *, DerefState &T, + auto VisitValueCB = [&](const Value &V, const Instruction *, DerefState &T, bool Stripped) -> bool { unsigned IdxWidth = DL.getIndexSizeInBits(V.getType()->getPointerAddressSpace()); APInt Offset(IdxWidth, 0); const Value *Base = - V.stripAndAccumulateInBoundsConstantOffsets(DL, Offset); + stripAndAccumulateMinimalOffsets(A, *this, &V, DL, Offset, false); const auto &AA = A.getAAFor(*this, IRPosition::value(*Base)); @@ -3382,7 +3424,6 @@ struct AADereferenceableFloating : AADereferenceableImpl { T.GlobalState &= DS.GlobalState; } - // TODO: Use `AAConstantRange` to infer dereferenceable bytes. // For now we do not try to "increase" dereferenceability due to negative // indices as we first have to come up with code to deal with loops and diff --git a/test/Transforms/Attributor/dereferenceable-1.ll b/test/Transforms/Attributor/dereferenceable-1.ll index 43acdaa4d79..0b62dc86097 100644 --- a/test/Transforms/Attributor/dereferenceable-1.ll +++ b/test/Transforms/Attributor/dereferenceable-1.ll @@ -298,6 +298,32 @@ define void @deref_or_null_and_nonnull(i32* dereferenceable_or_null(100) %0) { ; fill_range(p, *range); ; } +; FIXME: %ptr should be dereferenceable(31) +define void @test8(i8* %ptr) #0 { + br label %1 +1: ; preds = %5, %0 + %i.0 = phi i32 [ 20, %0 ], [ %4, %5 ] + %2 = sext i32 %i.0 to i64 + %3 = getelementptr inbounds i8, i8* %ptr, i64 %2 + store i8 32, i8* %3, align 1 + %4 = add nsw i32 %i.0, 1 + br label %5 +5: ; preds = %1 + %6 = icmp slt i32 %4, 30 + br i1 %6, label %1, label %7 + +7: ; preds = %5 + ret void +} + +; 8.2 (negative case) +define void @test8_neg(i32 %i, i8* %ptr) #0 { + %1 = sext i32 %i to i64 + %2 = getelementptr inbounds i8, i8* %ptr, i64 %1 + store i8 65, i8* %2, align 1 + ret void +} + ; void fill_range(int* p, long long int start){ ; for(long long int i = start;i