From 1f5e832e3982a739c0886ea3242f613da3d7441e Mon Sep 17 00:00:00 2001 From: dfukalov Date: Tue, 16 Mar 2021 16:36:17 +0300 Subject: [PATCH] [AA][NFC] Convert AliasResult to class containing offset for PartialAlias case. Add an ability to store `Offset` between partially aliased location. Use this storage within returned `ResultAlias` instead of caching it in `AAQueryInfo`. Reviewed By: asbirlea Differential Revision: https://reviews.llvm.org/D98718 --- include/llvm/Analysis/AliasAnalysis.h | 115 +++++++++--------- include/llvm/Analysis/AliasSetTracker.h | 7 +- include/llvm/Analysis/MemorySSA.h | 5 +- lib/Analysis/AliasAnalysis.cpp | 11 -- lib/Analysis/BasicAliasAnalysis.cpp | 32 +++-- lib/Analysis/MemorySSA.cpp | 20 +-- .../Scalar/DeadStoreElimination.cpp | 10 +- unittests/Analysis/AliasAnalysisTest.cpp | 17 +-- unittests/Analysis/MemorySSATest.cpp | 18 ++- 9 files changed, 118 insertions(+), 117 deletions(-) diff --git a/include/llvm/Analysis/AliasAnalysis.h b/include/llvm/Analysis/AliasAnalysis.h index f1506945273..1341e78d8b4 100644 --- a/include/llvm/Analysis/AliasAnalysis.h +++ b/include/llvm/Analysis/AliasAnalysis.h @@ -78,21 +78,64 @@ class Value; /// /// See docs/AliasAnalysis.html for more information on the specific meanings /// of these values. -enum AliasResult : uint8_t { - /// The two locations do not alias at all. - /// - /// This value is arranged to convert to false, while all other values - /// convert to true. This allows a boolean context to convert the result to - /// a binary flag indicating whether there is the possibility of aliasing. - NoAlias = 0, - /// The two locations may or may not alias. This is the least precise result. - MayAlias, - /// The two locations alias, but only due to a partial overlap. - PartialAlias, - /// The two locations precisely alias each other. - MustAlias, +class AliasResult { +private: + static const int OffsetBits = 23; + static const int AliasBits = 8; + static_assert(AliasBits + 1 + OffsetBits <= 32, + "AliasResult size is intended to be 4 bytes!"); + + unsigned int Alias : AliasBits; + unsigned int HasOffset : 1; + signed int Offset : OffsetBits; + +public: + enum Result : uint8_t { + /// The two locations do not alias at all. + /// + /// This value is arranged to convert to false, while all other values + /// convert to true. This allows a boolean context to convert the result to + /// a binary flag indicating whether there is the possibility of aliasing. + NoAlias = 0, + /// The two locations may or may not alias. This is the least precise + /// result. + MayAlias, + /// The two locations alias, but only due to a partial overlap. + PartialAlias, + /// The two locations precisely alias each other. + MustAlias, + }; + static_assert(MustAlias < (1 << AliasBits), + "Not enough bit field size for the enum!"); + + explicit AliasResult() = delete; + constexpr AliasResult(const Result &Alias) + : Alias(Alias), HasOffset(false), Offset(0) {} + + operator Result() const { return static_cast(Alias); } + + constexpr bool hasOffset() const { return HasOffset; } + constexpr int32_t getOffset() const { + assert(HasOffset && "No offset!"); + return Offset; + } + void setOffset(int32_t NewOffset) { + if (isInt(NewOffset)) { + HasOffset = true; + Offset = NewOffset; + } + } + + /// Helper for processing AliasResult for swapped memory location pairs. + void swap(bool DoSwap) { + if (DoSwap && hasOffset()) + setOffset(-getOffset()); + } }; +static_assert(sizeof(AliasResult) == 4, + "AliasResult size is intended to be 4 bytes!"); + /// << operator for AliasResult. raw_ostream &operator<<(raw_ostream &OS, AliasResult AR); @@ -369,16 +412,6 @@ template <> struct DenseMapInfo { /// The information stored in an `AAQueryInfo` is currently limitted to the /// caches used by BasicAA, but can further be extended to fit other AA needs. class AAQueryInfo { - /// Storage for estimated relative offsets between two partially aliased - /// values. Used to optimize out redundant parts of loads/stores (in GVN/DSE). - /// These users cannot process quite complicated addresses (e.g. GEPs with - /// non-constant offsets). Used by BatchAAResults only. - bool CacheOffsets = false; - SmallDenseMap, - std::pair>, - int64_t, 4> - ClobberOffsets; - public: using LocPair = std::pair; struct CacheEntry { @@ -406,9 +439,7 @@ public: /// assumption is disproven. SmallVector AssumptionBasedResults; - AAQueryInfo(bool CacheOffsets = false) - : CacheOffsets(CacheOffsets), ClobberOffsets(), AliasCache(), - IsCapturedCache() {} + AAQueryInfo() : AliasCache(), IsCapturedCache() {} /// Create a new AAQueryInfo based on this one, but with the cache cleared. /// This is used for recursive queries across phis, where cache results may @@ -418,33 +449,6 @@ public: NewAAQI.Depth = Depth; return NewAAQI; } - - Optional getClobberOffset(const Value *Ptr1, const Value *Ptr2, - uint64_t Size1, uint64_t Size2) const { - assert(CacheOffsets && "Clobber offset cached in batch mode only!"); - const bool Swapped = Ptr1 > Ptr2; - if (Swapped) { - std::swap(Ptr1, Ptr2); - std::swap(Size1, Size2); - } - const auto IOff = ClobberOffsets.find({{Ptr1, Ptr2}, {Size1, Size2}}); - if (IOff != ClobberOffsets.end()) - return Swapped ? -IOff->second : IOff->second; - return None; - } - - void setClobberOffset(const Value *Ptr1, const Value *Ptr2, uint64_t Size1, - uint64_t Size2, int64_t Offset) { - // Cache offset for batch mode only. - if (!CacheOffsets) - return; - if (Ptr1 > Ptr2) { - std::swap(Ptr1, Ptr2); - std::swap(Size1, Size2); - Offset = -Offset; - } - ClobberOffsets[{{Ptr1, Ptr2}, {Size1, Size2}}] = Offset; - } }; class BatchAAResults; @@ -887,8 +891,7 @@ class BatchAAResults { AAQueryInfo AAQI; public: - BatchAAResults(AAResults &AAR, bool CacheOffsets = false) - : AA(AAR), AAQI(CacheOffsets) {} + BatchAAResults(AAResults &AAR) : AA(AAR), AAQI() {} AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB) { return AA.alias(LocA, LocB, AAQI); } @@ -922,8 +925,6 @@ public: MemoryLocation(V2, LocationSize::precise(1))) == AliasResult::MustAlias; } - Optional getClobberOffset(const MemoryLocation &LocA, - const MemoryLocation &LocB) const; }; /// Temporary typedef for legacy code that uses a generic \c AliasAnalysis diff --git a/include/llvm/Analysis/AliasSetTracker.h b/include/llvm/Analysis/AliasSetTracker.h index ee6c371435e..b9d8593937d 100644 --- a/include/llvm/Analysis/AliasSetTracker.h +++ b/include/llvm/Analysis/AliasSetTracker.h @@ -35,18 +35,17 @@ namespace llvm { class AAResults; +class AliasResult; class AliasSetTracker; -class BasicBlock; -class LoadInst; class AnyMemSetInst; class AnyMemTransferInst; +class BasicBlock; +class LoadInst; class raw_ostream; class StoreInst; class VAArgInst; class Value; -enum AliasResult : uint8_t; - class AliasSet : public ilist_node { friend class AliasSetTracker; diff --git a/include/llvm/Analysis/MemorySSA.h b/include/llvm/Analysis/MemorySSA.h index 22d3eae1edd..5b32f5cec88 100644 --- a/include/llvm/Analysis/MemorySSA.h +++ b/include/llvm/Analysis/MemorySSA.h @@ -299,8 +299,9 @@ protected: OptimizedAccessAlias = AR; } - void setDefiningAccess(MemoryAccess *DMA, bool Optimized = false, - Optional AR = AliasResult::MayAlias) { + void setDefiningAccess( + MemoryAccess *DMA, bool Optimized = false, + Optional AR = AliasResult(AliasResult::MayAlias)) { if (!Optimized) { setOperand(0, DMA); return; diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp index f4e9e143ff8..c73989731ab 100644 --- a/lib/Analysis/AliasAnalysis.cpp +++ b/lib/Analysis/AliasAnalysis.cpp @@ -958,17 +958,6 @@ AAResults llvm::createLegacyPMAAResults(Pass &P, Function &F, return AAR; } -Optional -BatchAAResults::getClobberOffset(const MemoryLocation &LocA, - const MemoryLocation &LocB) const { - if (!LocA.Size.hasValue() || !LocB.Size.hasValue()) - return None; - const Value *V1 = LocA.Ptr->stripPointerCastsForAliasAnalysis(); - const Value *V2 = LocB.Ptr->stripPointerCastsForAliasAnalysis(); - return AAQI.getClobberOffset(V1, V2, LocA.Size.getValue(), - LocB.Size.getValue()); -} - bool llvm::isNoAliasCall(const Value *V) { if (const auto *Call = dyn_cast(V)) return Call->hasRetAttr(Attribute::NoAlias); diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index 9992725ebe7..4fe7d668b1d 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -1015,14 +1015,15 @@ AliasResult BasicAAResult::aliasGEP( // TODO: This limitation exists for compile-time reasons. Relax it if we // can avoid exponential pathological cases. if (!isa(V2)) - return MayAlias; + return AliasResult::MayAlias; // If both accesses have unknown size, we can only check whether the base // objects don't alias. AliasResult BaseAlias = getBestAAResults().alias( MemoryLocation::getBeforeOrAfter(UnderlyingV1), MemoryLocation::getBeforeOrAfter(UnderlyingV2), AAQI); - return BaseAlias == NoAlias ? NoAlias : MayAlias; + return BaseAlias == AliasResult::NoAlias ? AliasResult::NoAlias + : AliasResult::MayAlias; } DecomposedGEP DecompGEP1 = DecomposeGEPExpression(GEP1, DL, &AC, DT); @@ -1090,8 +1091,9 @@ AliasResult BasicAAResult::aliasGEP( const Value *RightPtr = GEP1; LocationSize VLeftSize = V2Size; LocationSize VRightSize = V1Size; + const bool Swapped = Off.isNegative(); - if (Off.isNegative()) { + if (Swapped) { // Swap if we have the situation where: // + + // | BaseOffset | @@ -1108,16 +1110,16 @@ AliasResult BasicAAResult::aliasGEP( if (Off.ult(LSize)) { // Conservatively drop processing if a phi was visited and/or offset is // too big. + AliasResult AR = AliasResult::PartialAlias; if (VisitedPhiBBs.empty() && VRightSize.hasValue() && - Off.ule(INT64_MAX)) { + Off.ule(INT32_MAX) && (Off + VRightSize.getValue()).ule(LSize)) { // Memory referenced by right pointer is nested. Save the offset in - // cache. - const uint64_t RSize = VRightSize.getValue(); - if ((Off + RSize).ule(LSize)) - AAQI.setClobberOffset(LeftPtr, RightPtr, LSize, RSize, - Off.getSExtValue()); + // cache. Note that originally offset estimated as GEP1-V2, but + // AliasResult contains the shift that represents GEP1+Offset=V2. + AR.setOffset(-Off.getSExtValue()); + AR.swap(Swapped); } - return AliasResult::PartialAlias; + return AR; } return AliasResult::NoAlias; } @@ -1531,7 +1533,8 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size, // Check the cache before climbing up use-def chains. This also terminates // otherwise infinitely recursive queries. AAQueryInfo::LocPair Locs({V1, V1Size}, {V2, V2Size}); - if (V1 > V2) + const bool Swapped = V1 > V2; + if (Swapped) std::swap(Locs.first, Locs.second); const auto &Pair = AAQI.AliasCache.try_emplace( Locs, AAQueryInfo::CacheEntry{AliasResult::NoAlias, 0}); @@ -1542,7 +1545,10 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size, ++Entry.NumAssumptionUses; ++AAQI.NumAssumptionUses; } - return Entry.Result; + // Cache contains sorted {V1,V2} pairs but we should return original order. + auto Result = Entry.Result; + Result.swap(Swapped); + return Result; } int OrigNumAssumptionUses = AAQI.NumAssumptionUses; @@ -1563,6 +1569,8 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size, // This is a definitive result now, when considered as a root query. AAQI.NumAssumptionUses -= Entry.NumAssumptionUses; Entry.Result = Result; + // Cache contains sorted {V1,V2} pairs. + Entry.Result.swap(Swapped); Entry.NumAssumptionUses = -1; // If the assumption has been disproven, remove any results that may have diff --git a/lib/Analysis/MemorySSA.cpp b/lib/Analysis/MemorySSA.cpp index 3576354984d..f6f7d9cb3ce 100644 --- a/lib/Analysis/MemorySSA.cpp +++ b/lib/Analysis/MemorySSA.cpp @@ -286,7 +286,7 @@ instructionClobbersQuery(const MemoryDef *MD, const MemoryLocation &UseLoc, case Intrinsic::invariant_end: case Intrinsic::assume: case Intrinsic::experimental_noalias_scope_decl: - return {false, AliasResult::NoAlias}; + return {false, AliasResult(AliasResult::NoAlias)}; case Intrinsic::dbg_addr: case Intrinsic::dbg_declare: case Intrinsic::dbg_label: @@ -305,7 +305,8 @@ instructionClobbersQuery(const MemoryDef *MD, const MemoryLocation &UseLoc, if (auto *DefLoad = dyn_cast(DefInst)) if (auto *UseLoad = dyn_cast_or_null(UseInst)) - return {!areLoadsReorderable(UseLoad, DefLoad), AliasResult::MayAlias}; + return {!areLoadsReorderable(UseLoad, DefLoad), + AliasResult(AliasResult::MayAlias)}; ModRefInfo I = AA.getModRefInfo(DefInst, UseLoc); AR = isMustSet(I) ? AliasResult::MustAlias : AliasResult::MayAlias; @@ -344,7 +345,7 @@ struct UpwardsMemoryQuery { const Instruction *Inst = nullptr; // The MemoryAccess we actually got called with, used to test local domination const MemoryAccess *OriginalAccess = nullptr; - Optional AR = AliasResult::MayAlias; + Optional AR = AliasResult(AliasResult::MayAlias); bool SkipSelfAccess = false; UpwardsMemoryQuery() = default; @@ -570,14 +571,14 @@ template class ClobberWalker { for (MemoryAccess *Current : def_chain(Desc.Last)) { Desc.Last = Current; if (Current == StopAt || Current == SkipStopAt) - return {Current, false, AliasResult::MayAlias}; + return {Current, false, AliasResult(AliasResult::MayAlias)}; if (auto *MD = dyn_cast(Current)) { if (MSSA.isLiveOnEntryDef(MD)) - return {MD, true, AliasResult::MustAlias}; + return {MD, true, AliasResult(AliasResult::MustAlias)}; if (!--*UpwardWalkLimit) - return {Current, true, AliasResult::MayAlias}; + return {Current, true, AliasResult(AliasResult::MayAlias)}; ClobberAlias CA = instructionClobbersQuery(MD, Desc.Loc, Query->Inst, AA); @@ -591,7 +592,7 @@ template class ClobberWalker { assert(isa(Desc.Last) && "Ended at a non-clobber that's not a phi?"); - return {Desc.Last, false, AliasResult::MayAlias}; + return {Desc.Last, false, AliasResult(AliasResult::MayAlias)}; } void addSearches(MemoryPhi *Phi, SmallVectorImpl &PausedSearches, @@ -2490,8 +2491,9 @@ MemorySSA::ClobberWalkerBase::getClobberingMemoryAccessBase( StartingAccess->setOptimized(OptimizedAccess); if (MSSA->isLiveOnEntryDef(OptimizedAccess)) StartingAccess->setOptimizedAccessType(None); - else if (Q.AR == AliasResult::MustAlias) - StartingAccess->setOptimizedAccessType(AliasResult::MustAlias); + else if (Q.AR && *Q.AR == AliasResult::MustAlias) + StartingAccess->setOptimizedAccessType( + AliasResult(AliasResult::MustAlias)); } else OptimizedAccess = StartingAccess->getOptimized(); diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index 4b5b705442d..d3052e0a4cc 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -402,9 +402,9 @@ isOverwrite(const Instruction *LaterI, const Instruction *EarlierI, } // If we hit a partial alias we may have a full overwrite - if (AAR == AliasResult::PartialAlias) { - int64_t Off = AA.getClobberOffset(Later, Earlier).getValueOr(0); - if (Off > 0 && (uint64_t)Off + EarlierSize <= LaterSize) + if (AAR == AliasResult::PartialAlias && AAR.hasOffset()) { + int32_t Off = AAR.getOffset(); + if (Off >= 0 && (uint64_t)Off + EarlierSize <= LaterSize) return OW_Complete; } @@ -996,8 +996,8 @@ struct DSEState { DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT, PostDominatorTree &PDT, const TargetLibraryInfo &TLI) - : F(F), AA(AA), BatchAA(AA, /*CacheOffsets =*/true), MSSA(MSSA), DT(DT), - PDT(PDT), TLI(TLI), DL(F.getParent()->getDataLayout()) {} + : F(F), AA(AA), BatchAA(AA), MSSA(MSSA), DT(DT), PDT(PDT), TLI(TLI), + DL(F.getParent()->getDataLayout()) {} static DSEState get(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT, PostDominatorTree &PDT, diff --git a/unittests/Analysis/AliasAnalysisTest.cpp b/unittests/Analysis/AliasAnalysisTest.cpp index 66f29a08320..e7a70e736a1 100644 --- a/unittests/Analysis/AliasAnalysisTest.cpp +++ b/unittests/Analysis/AliasAnalysisTest.cpp @@ -309,11 +309,11 @@ TEST_F(AliasAnalysisTest, PartialAliasOffset) { %i2 = zext i32 %i to i64 %i3 = getelementptr inbounds float, float* %arg, i64 %i2 %i4 = bitcast float* %i3 to <2 x float>* - %L2 = load <2 x float>, <2 x float>* %i4, align 16 + %L1 = load <2 x float>, <2 x float>* %i4, align 16 %i7 = add nuw nsw i32 %i, 1 %i8 = zext i32 %i7 to i64 %i9 = getelementptr inbounds float, float* %arg, i64 %i8 - %L1 = load float, float* %i9, align 4 + %L2 = load float, float* %i9, align 4 ret void } )", @@ -324,18 +324,13 @@ TEST_F(AliasAnalysisTest, PartialAliasOffset) { Function *F = M->getFunction("foo"); const auto Loc1 = MemoryLocation::get(getInstructionByName(*F, "L1")); - auto Loc2 = MemoryLocation::get(getInstructionByName(*F, "L2")); + const auto Loc2 = MemoryLocation::get(getInstructionByName(*F, "L2")); auto &AA = getAAResults(*F); - BatchAAResults BatchAA(AA, /*CacheOffsets =*/true); - EXPECT_EQ(AliasResult::PartialAlias, BatchAA.alias(Loc1, Loc2)); - EXPECT_EQ(-4, BatchAA.getClobberOffset(Loc1, Loc2).getValueOr(0)); - EXPECT_EQ(4, BatchAA.getClobberOffset(Loc2, Loc1).getValueOr(0)); - - // Check that no offset returned for different size. - Loc2.Size = LocationSize::precise(42); - EXPECT_EQ(0, BatchAA.getClobberOffset(Loc1, Loc2).getValueOr(0)); + const auto AR = AA.alias(Loc1, Loc2); + EXPECT_EQ(AR, AliasResult::PartialAlias); + EXPECT_EQ(4, AR.getOffset()); } class AAPassInfraTest : public testing::Test { diff --git a/unittests/Analysis/MemorySSATest.cpp b/unittests/Analysis/MemorySSATest.cpp index 6785a649aae..3c8aa0159e6 100644 --- a/unittests/Analysis/MemorySSATest.cpp +++ b/unittests/Analysis/MemorySSATest.cpp @@ -1038,7 +1038,8 @@ TEST_F(MemorySSATest, TestLoadMustAlias) { } for (LoadInst *V : {LA3, LA4}) { MemoryUse *MemUse = dyn_cast_or_null(MSSA.getMemoryAccess(V)); - EXPECT_EQ(MemUse->getOptimizedAccessType(), AliasResult::MustAlias) + EXPECT_EQ(MemUse->getOptimizedAccessType().getValue(), + AliasResult::MustAlias) << "Load " << I << " doesn't have the correct alias information"; // EXPECT_EQ expands such that if we increment I above, it won't get // incremented except when we try to print the error message. @@ -1087,7 +1088,8 @@ TEST_F(MemorySSATest, TestStoreMustAlias) { EXPECT_EQ(MemDef->getOptimizedAccessType(), None) << "Store " << I << " doesn't have the correct alias information"; else - EXPECT_EQ(MemDef->getOptimizedAccessType(), AliasResult::MustAlias) + EXPECT_EQ(MemDef->getOptimizedAccessType().getValue(), + AliasResult::MustAlias) << "Store " << I << " doesn't have the correct alias information"; // EXPECT_EQ expands such that if we increment I above, it won't get // incremented except when we try to print the error message. @@ -1121,7 +1123,8 @@ TEST_F(MemorySSATest, TestLoadMayAlias) { unsigned I = 0; for (LoadInst *V : {LA1, LB1}) { MemoryUse *MemUse = dyn_cast_or_null(MSSA.getMemoryAccess(V)); - EXPECT_EQ(MemUse->getOptimizedAccessType(), AliasResult::MayAlias) + EXPECT_EQ(MemUse->getOptimizedAccessType().getValue(), + AliasResult::MayAlias) << "Load " << I << " doesn't have the correct alias information"; // EXPECT_EQ expands such that if we increment I above, it won't get // incremented except when we try to print the error message. @@ -1129,7 +1132,8 @@ TEST_F(MemorySSATest, TestLoadMayAlias) { } for (LoadInst *V : {LA2, LB2}) { MemoryUse *MemUse = dyn_cast_or_null(MSSA.getMemoryAccess(V)); - EXPECT_EQ(MemUse->getOptimizedAccessType(), AliasResult::MustAlias) + EXPECT_EQ(MemUse->getOptimizedAccessType().getValue(), + AliasResult::MustAlias) << "Load " << I << " doesn't have the correct alias information"; // EXPECT_EQ expands such that if we increment I above, it won't get // incremented except when we try to print the error message. @@ -1189,13 +1193,15 @@ TEST_F(MemorySSATest, TestStoreMayAlias) { EXPECT_EQ(MemDef->isOptimized(), true) << "Store " << I << " was not optimized"; if (I == 1 || I == 3 || I == 4) - EXPECT_EQ(MemDef->getOptimizedAccessType(), AliasResult::MayAlias) + EXPECT_EQ(MemDef->getOptimizedAccessType().getValue(), + AliasResult::MayAlias) << "Store " << I << " doesn't have the correct alias information"; else if (I == 0 || I == 2) EXPECT_EQ(MemDef->getOptimizedAccessType(), None) << "Store " << I << " doesn't have the correct alias information"; else - EXPECT_EQ(MemDef->getOptimizedAccessType(), AliasResult::MustAlias) + EXPECT_EQ(MemDef->getOptimizedAccessType().getValue(), + AliasResult::MustAlias) << "Store " << I << " doesn't have the correct alias information"; // EXPECT_EQ expands such that if we increment I above, it won't get // incremented except when we try to print the error message.