//===- CFLAndersAliasAnalysis.cpp - Unification-based Alias Analysis ------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements a CFL-based, summary-based alias analysis algorithm. It // differs from CFLSteensAliasAnalysis in its inclusion-based nature while // CFLSteensAliasAnalysis is unification-based. This pass has worse performance // than CFLSteensAliasAnalysis (the worst case complexity of // CFLAndersAliasAnalysis is cubic, while the worst case complexity of // CFLSteensAliasAnalysis is almost linear), but it is able to yield more // precise analysis result. The precision of this analysis is roughly the same // as that of an one level context-sensitive Andersen's algorithm. // // The algorithm used here is based on recursive state machine matching scheme // proposed in "Demand-driven alias analysis for C" by Xin Zheng and Radu // Rugina. The general idea is to extend the traditional transitive closure // algorithm to perform CFL matching along the way: instead of recording // "whether X is reachable from Y", we keep track of "whether X is reachable // from Y at state Z", where the "state" field indicates where we are in the CFL // matching process. To understand the matching better, it is advisable to have // the state machine shown in Figure 3 of the paper available when reading the // codes: all we do here is to selectively expand the transitive closure by // discarding edges that are not recognized by the state machine. // // There are two differences between our current implementation and the one // described in the paper: // - Our algorithm eagerly computes all alias pairs after the CFLGraph is built, // while in the paper the authors did the computation in a demand-driven // fashion. We did not implement the demand-driven algorithm due to the // additional coding complexity and higher memory profile, but if we found it // necessary we may switch to it eventually. // - In the paper the authors use a state machine that does not distinguish // value reads from value writes. For example, if Y is reachable from X at state // S3, it may be the case that X is written into Y, or it may be the case that // there's a third value Z that writes into both X and Y. To make that // distinction (which is crucial in building function summary as well as // retrieving mod-ref info), we choose to duplicate some of the states in the // paper's proposed state machine. The duplication does not change the set the // machine accepts. Given a pair of reachable values, it only provides more // detailed information on which value is being written into and which is being // read from. // //===----------------------------------------------------------------------===// // N.B. AliasAnalysis as a whole is phrased as a FunctionPass at the moment, and // CFLAndersAA is interprocedural. This is *technically* A Bad Thing, because // FunctionPasses are only allowed to inspect the Function that they're being // run on. Realistically, this likely isn't a problem until we allow // FunctionPasses to run concurrently. #include "llvm/Analysis/CFLAndersAliasAnalysis.h" #include "AliasAnalysisSummary.h" #include "CFLGraph.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Function.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include #include #include #include #include #include #include #include using namespace llvm; using namespace llvm::cflaa; #define DEBUG_TYPE "cfl-anders-aa" CFLAndersAAResult::CFLAndersAAResult( std::function GetTLI) : GetTLI(std::move(GetTLI)) {} CFLAndersAAResult::CFLAndersAAResult(CFLAndersAAResult &&RHS) : AAResultBase(std::move(RHS)), GetTLI(std::move(RHS.GetTLI)) {} CFLAndersAAResult::~CFLAndersAAResult() = default; namespace { enum class MatchState : uint8_t { // The following state represents S1 in the paper. FlowFromReadOnly = 0, // The following two states together represent S2 in the paper. // The 'NoReadWrite' suffix indicates that there exists an alias path that // does not contain assignment and reverse assignment edges. // The 'ReadOnly' suffix indicates that there exists an alias path that // contains reverse assignment edges only. FlowFromMemAliasNoReadWrite, FlowFromMemAliasReadOnly, // The following two states together represent S3 in the paper. // The 'WriteOnly' suffix indicates that there exists an alias path that // contains assignment edges only. // The 'ReadWrite' suffix indicates that there exists an alias path that // contains both assignment and reverse assignment edges. Note that if X and Y // are reachable at 'ReadWrite' state, it does NOT mean X is both read from // and written to Y. Instead, it means that a third value Z is written to both // X and Y. FlowToWriteOnly, FlowToReadWrite, // The following two states together represent S4 in the paper. FlowToMemAliasWriteOnly, FlowToMemAliasReadWrite, }; using StateSet = std::bitset<7>; const unsigned ReadOnlyStateMask = (1U << static_cast(MatchState::FlowFromReadOnly)) | (1U << static_cast(MatchState::FlowFromMemAliasReadOnly)); const unsigned WriteOnlyStateMask = (1U << static_cast(MatchState::FlowToWriteOnly)) | (1U << static_cast(MatchState::FlowToMemAliasWriteOnly)); // A pair that consists of a value and an offset struct OffsetValue { const Value *Val; int64_t Offset; }; bool operator==(OffsetValue LHS, OffsetValue RHS) { return LHS.Val == RHS.Val && LHS.Offset == RHS.Offset; } bool operator<(OffsetValue LHS, OffsetValue RHS) { return std::less()(LHS.Val, RHS.Val) || (LHS.Val == RHS.Val && LHS.Offset < RHS.Offset); } // A pair that consists of an InstantiatedValue and an offset struct OffsetInstantiatedValue { InstantiatedValue IVal; int64_t Offset; }; bool operator==(OffsetInstantiatedValue LHS, OffsetInstantiatedValue RHS) { return LHS.IVal == RHS.IVal && LHS.Offset == RHS.Offset; } // We use ReachabilitySet to keep track of value aliases (The nonterminal "V" in // the paper) during the analysis. class ReachabilitySet { using ValueStateMap = DenseMap; using ValueReachMap = DenseMap; ValueReachMap ReachMap; public: using const_valuestate_iterator = ValueStateMap::const_iterator; using const_value_iterator = ValueReachMap::const_iterator; // Insert edge 'From->To' at state 'State' bool insert(InstantiatedValue From, InstantiatedValue To, MatchState State) { assert(From != To); auto &States = ReachMap[To][From]; auto Idx = static_cast(State); if (!States.test(Idx)) { States.set(Idx); return true; } return false; } // Return the set of all ('From', 'State') pair for a given node 'To' iterator_range reachableValueAliases(InstantiatedValue V) const { auto Itr = ReachMap.find(V); if (Itr == ReachMap.end()) return make_range(const_valuestate_iterator(), const_valuestate_iterator()); return make_range(Itr->second.begin(), Itr->second.end()); } iterator_range value_mappings() const { return make_range(ReachMap.begin(), ReachMap.end()); } }; // We use AliasMemSet to keep track of all memory aliases (the nonterminal "M" // in the paper) during the analysis. class AliasMemSet { using MemSet = DenseSet; using MemMapType = DenseMap; MemMapType MemMap; public: using const_mem_iterator = MemSet::const_iterator; bool insert(InstantiatedValue LHS, InstantiatedValue RHS) { // Top-level values can never be memory aliases because one cannot take the // addresses of them assert(LHS.DerefLevel > 0 && RHS.DerefLevel > 0); return MemMap[LHS].insert(RHS).second; } const MemSet *getMemoryAliases(InstantiatedValue V) const { auto Itr = MemMap.find(V); if (Itr == MemMap.end()) return nullptr; return &Itr->second; } }; // We use AliasAttrMap to keep track of the AliasAttr of each node. class AliasAttrMap { using MapType = DenseMap; MapType AttrMap; public: using const_iterator = MapType::const_iterator; bool add(InstantiatedValue V, AliasAttrs Attr) { auto &OldAttr = AttrMap[V]; auto NewAttr = OldAttr | Attr; if (OldAttr == NewAttr) return false; OldAttr = NewAttr; return true; } AliasAttrs getAttrs(InstantiatedValue V) const { AliasAttrs Attr; auto Itr = AttrMap.find(V); if (Itr != AttrMap.end()) Attr = Itr->second; return Attr; } iterator_range mappings() const { return make_range(AttrMap.begin(), AttrMap.end()); } }; struct WorkListItem { InstantiatedValue From; InstantiatedValue To; MatchState State; }; struct ValueSummary { struct Record { InterfaceValue IValue; unsigned DerefLevel; }; SmallVector FromRecords, ToRecords; }; } // end anonymous namespace namespace llvm { // Specialize DenseMapInfo for OffsetValue. template <> struct DenseMapInfo { static OffsetValue getEmptyKey() { return OffsetValue{DenseMapInfo::getEmptyKey(), DenseMapInfo::getEmptyKey()}; } static OffsetValue getTombstoneKey() { return OffsetValue{DenseMapInfo::getTombstoneKey(), DenseMapInfo::getEmptyKey()}; } static unsigned getHashValue(const OffsetValue &OVal) { return DenseMapInfo>::getHashValue( std::make_pair(OVal.Val, OVal.Offset)); } static bool isEqual(const OffsetValue &LHS, const OffsetValue &RHS) { return LHS == RHS; } }; // Specialize DenseMapInfo for OffsetInstantiatedValue. template <> struct DenseMapInfo { static OffsetInstantiatedValue getEmptyKey() { return OffsetInstantiatedValue{ DenseMapInfo::getEmptyKey(), DenseMapInfo::getEmptyKey()}; } static OffsetInstantiatedValue getTombstoneKey() { return OffsetInstantiatedValue{ DenseMapInfo::getTombstoneKey(), DenseMapInfo::getEmptyKey()}; } static unsigned getHashValue(const OffsetInstantiatedValue &OVal) { return DenseMapInfo>::getHashValue( std::make_pair(OVal.IVal, OVal.Offset)); } static bool isEqual(const OffsetInstantiatedValue &LHS, const OffsetInstantiatedValue &RHS) { return LHS == RHS; } }; } // end namespace llvm class CFLAndersAAResult::FunctionInfo { /// Map a value to other values that may alias it /// Since the alias relation is symmetric, to save some space we assume values /// are properly ordered: if a and b alias each other, and a < b, then b is in /// AliasMap[a] but not vice versa. DenseMap> AliasMap; /// Map a value to its corresponding AliasAttrs DenseMap AttrMap; /// Summary of externally visible effects. AliasSummary Summary; Optional getAttrs(const Value *) const; public: FunctionInfo(const Function &, const SmallVectorImpl &, const ReachabilitySet &, const AliasAttrMap &); bool mayAlias(const Value *, LocationSize, const Value *, LocationSize) const; const AliasSummary &getAliasSummary() const { return Summary; } }; static bool hasReadOnlyState(StateSet Set) { return (Set & StateSet(ReadOnlyStateMask)).any(); } static bool hasWriteOnlyState(StateSet Set) { return (Set & StateSet(WriteOnlyStateMask)).any(); } static Optional getInterfaceValue(InstantiatedValue IValue, const SmallVectorImpl &RetVals) { auto Val = IValue.Val; Optional Index; if (auto Arg = dyn_cast(Val)) Index = Arg->getArgNo() + 1; else if (is_contained(RetVals, Val)) Index = 0; if (Index) return InterfaceValue{*Index, IValue.DerefLevel}; return None; } static void populateAttrMap(DenseMap &AttrMap, const AliasAttrMap &AMap) { for (const auto &Mapping : AMap.mappings()) { auto IVal = Mapping.first; // Insert IVal into the map auto &Attr = AttrMap[IVal.Val]; // AttrMap only cares about top-level values if (IVal.DerefLevel == 0) Attr |= Mapping.second; } } static void populateAliasMap(DenseMap> &AliasMap, const ReachabilitySet &ReachSet) { for (const auto &OuterMapping : ReachSet.value_mappings()) { // AliasMap only cares about top-level values if (OuterMapping.first.DerefLevel > 0) continue; auto Val = OuterMapping.first.Val; auto &AliasList = AliasMap[Val]; for (const auto &InnerMapping : OuterMapping.second) { // Again, AliasMap only cares about top-level values if (InnerMapping.first.DerefLevel == 0) AliasList.push_back(OffsetValue{InnerMapping.first.Val, UnknownOffset}); } // Sort AliasList for faster lookup llvm::sort(AliasList); } } static void populateExternalRelations( SmallVectorImpl &ExtRelations, const Function &Fn, const SmallVectorImpl &RetVals, const ReachabilitySet &ReachSet) { // If a function only returns one of its argument X, then X will be both an // argument and a return value at the same time. This is an edge case that // needs special handling here. for (const auto &Arg : Fn.args()) { if (is_contained(RetVals, &Arg)) { auto ArgVal = InterfaceValue{Arg.getArgNo() + 1, 0}; auto RetVal = InterfaceValue{0, 0}; ExtRelations.push_back(ExternalRelation{ArgVal, RetVal, 0}); } } // Below is the core summary construction logic. // A naive solution of adding only the value aliases that are parameters or // return values in ReachSet to the summary won't work: It is possible that a // parameter P is written into an intermediate value I, and the function // subsequently returns *I. In that case, *I is does not value alias anything // in ReachSet, and the naive solution will miss a summary edge from (P, 1) to // (I, 1). // To account for the aforementioned case, we need to check each non-parameter // and non-return value for the possibility of acting as an intermediate. // 'ValueMap' here records, for each value, which InterfaceValues read from or // write into it. If both the read list and the write list of a given value // are non-empty, we know that a particular value is an intermidate and we // need to add summary edges from the writes to the reads. DenseMap ValueMap; for (const auto &OuterMapping : ReachSet.value_mappings()) { if (auto Dst = getInterfaceValue(OuterMapping.first, RetVals)) { for (const auto &InnerMapping : OuterMapping.second) { // If Src is a param/return value, we get a same-level assignment. if (auto Src = getInterfaceValue(InnerMapping.first, RetVals)) { // This may happen if both Dst and Src are return values if (*Dst == *Src) continue; if (hasReadOnlyState(InnerMapping.second)) ExtRelations.push_back(ExternalRelation{*Dst, *Src, UnknownOffset}); // No need to check for WriteOnly state, since ReachSet is symmetric } else { // If Src is not a param/return, add it to ValueMap auto SrcIVal = InnerMapping.first; if (hasReadOnlyState(InnerMapping.second)) ValueMap[SrcIVal.Val].FromRecords.push_back( ValueSummary::Record{*Dst, SrcIVal.DerefLevel}); if (hasWriteOnlyState(InnerMapping.second)) ValueMap[SrcIVal.Val].ToRecords.push_back( ValueSummary::Record{*Dst, SrcIVal.DerefLevel}); } } } } for (const auto &Mapping : ValueMap) { for (const auto &FromRecord : Mapping.second.FromRecords) { for (const auto &ToRecord : Mapping.second.ToRecords) { auto ToLevel = ToRecord.DerefLevel; auto FromLevel = FromRecord.DerefLevel; // Same-level assignments should have already been processed by now if (ToLevel == FromLevel) continue; auto SrcIndex = FromRecord.IValue.Index; auto SrcLevel = FromRecord.IValue.DerefLevel; auto DstIndex = ToRecord.IValue.Index; auto DstLevel = ToRecord.IValue.DerefLevel; if (ToLevel > FromLevel) SrcLevel += ToLevel - FromLevel; else DstLevel += FromLevel - ToLevel; ExtRelations.push_back(ExternalRelation{ InterfaceValue{SrcIndex, SrcLevel}, InterfaceValue{DstIndex, DstLevel}, UnknownOffset}); } } } // Remove duplicates in ExtRelations llvm::sort(ExtRelations); ExtRelations.erase(std::unique(ExtRelations.begin(), ExtRelations.end()), ExtRelations.end()); } static void populateExternalAttributes( SmallVectorImpl &ExtAttributes, const Function &Fn, const SmallVectorImpl &RetVals, const AliasAttrMap &AMap) { for (const auto &Mapping : AMap.mappings()) { if (auto IVal = getInterfaceValue(Mapping.first, RetVals)) { auto Attr = getExternallyVisibleAttrs(Mapping.second); if (Attr.any()) ExtAttributes.push_back(ExternalAttribute{*IVal, Attr}); } } } CFLAndersAAResult::FunctionInfo::FunctionInfo( const Function &Fn, const SmallVectorImpl &RetVals, const ReachabilitySet &ReachSet, const AliasAttrMap &AMap) { populateAttrMap(AttrMap, AMap); populateExternalAttributes(Summary.RetParamAttributes, Fn, RetVals, AMap); populateAliasMap(AliasMap, ReachSet); populateExternalRelations(Summary.RetParamRelations, Fn, RetVals, ReachSet); } Optional CFLAndersAAResult::FunctionInfo::getAttrs(const Value *V) const { assert(V != nullptr); auto Itr = AttrMap.find(V); if (Itr != AttrMap.end()) return Itr->second; return None; } bool CFLAndersAAResult::FunctionInfo::mayAlias( const Value *LHS, LocationSize MaybeLHSSize, const Value *RHS, LocationSize MaybeRHSSize) const { assert(LHS && RHS); // Check if we've seen LHS and RHS before. Sometimes LHS or RHS can be created // after the analysis gets executed, and we want to be conservative in those // cases. auto MaybeAttrsA = getAttrs(LHS); auto MaybeAttrsB = getAttrs(RHS); if (!MaybeAttrsA || !MaybeAttrsB) return true; // Check AliasAttrs before AliasMap lookup since it's cheaper auto AttrsA = *MaybeAttrsA; auto AttrsB = *MaybeAttrsB; if (hasUnknownOrCallerAttr(AttrsA)) return AttrsB.any(); if (hasUnknownOrCallerAttr(AttrsB)) return AttrsA.any(); if (isGlobalOrArgAttr(AttrsA)) return isGlobalOrArgAttr(AttrsB); if (isGlobalOrArgAttr(AttrsB)) return isGlobalOrArgAttr(AttrsA); // At this point both LHS and RHS should point to locally allocated objects auto Itr = AliasMap.find(LHS); if (Itr != AliasMap.end()) { // Find out all (X, Offset) where X == RHS auto Comparator = [](OffsetValue LHS, OffsetValue RHS) { return std::less()(LHS.Val, RHS.Val); }; #ifdef EXPENSIVE_CHECKS assert(llvm::is_sorted(Itr->second, Comparator)); #endif auto RangePair = std::equal_range(Itr->second.begin(), Itr->second.end(), OffsetValue{RHS, 0}, Comparator); if (RangePair.first != RangePair.second) { // Be conservative about unknown sizes if (!MaybeLHSSize.hasValue() || !MaybeRHSSize.hasValue()) return true; const uint64_t LHSSize = MaybeLHSSize.getValue(); const uint64_t RHSSize = MaybeRHSSize.getValue(); for (const auto &OVal : make_range(RangePair)) { // Be conservative about UnknownOffset if (OVal.Offset == UnknownOffset) return true; // We know that LHS aliases (RHS + OVal.Offset) if the control flow // reaches here. The may-alias query essentially becomes integer // range-overlap queries over two ranges [OVal.Offset, OVal.Offset + // LHSSize) and [0, RHSSize). // Try to be conservative on super large offsets if (LLVM_UNLIKELY(LHSSize > INT64_MAX || RHSSize > INT64_MAX)) return true; auto LHSStart = OVal.Offset; // FIXME: Do we need to guard against integer overflow? auto LHSEnd = OVal.Offset + static_cast(LHSSize); auto RHSStart = 0; auto RHSEnd = static_cast(RHSSize); if (LHSEnd > RHSStart && LHSStart < RHSEnd) return true; } } } return false; } static void propagate(InstantiatedValue From, InstantiatedValue To, MatchState State, ReachabilitySet &ReachSet, std::vector &WorkList) { if (From == To) return; if (ReachSet.insert(From, To, State)) WorkList.push_back(WorkListItem{From, To, State}); } static void initializeWorkList(std::vector &WorkList, ReachabilitySet &ReachSet, const CFLGraph &Graph) { for (const auto &Mapping : Graph.value_mappings()) { auto Val = Mapping.first; auto &ValueInfo = Mapping.second; assert(ValueInfo.getNumLevels() > 0); // Insert all immediate assignment neighbors to the worklist for (unsigned I = 0, E = ValueInfo.getNumLevels(); I < E; ++I) { auto Src = InstantiatedValue{Val, I}; // If there's an assignment edge from X to Y, it means Y is reachable from // X at S3 and X is reachable from Y at S1 for (auto &Edge : ValueInfo.getNodeInfoAtLevel(I).Edges) { propagate(Edge.Other, Src, MatchState::FlowFromReadOnly, ReachSet, WorkList); propagate(Src, Edge.Other, MatchState::FlowToWriteOnly, ReachSet, WorkList); } } } } static Optional getNodeBelow(const CFLGraph &Graph, InstantiatedValue V) { auto NodeBelow = InstantiatedValue{V.Val, V.DerefLevel + 1}; if (Graph.getNode(NodeBelow)) return NodeBelow; return None; } static void processWorkListItem(const WorkListItem &Item, const CFLGraph &Graph, ReachabilitySet &ReachSet, AliasMemSet &MemSet, std::vector &WorkList) { auto FromNode = Item.From; auto ToNode = Item.To; auto NodeInfo = Graph.getNode(ToNode); assert(NodeInfo != nullptr); // TODO: propagate field offsets // FIXME: Here is a neat trick we can do: since both ReachSet and MemSet holds // relations that are symmetric, we could actually cut the storage by half by // sorting FromNode and ToNode before insertion happens. // The newly added value alias pair may potentially generate more memory // alias pairs. Check for them here. auto FromNodeBelow = getNodeBelow(Graph, FromNode); auto ToNodeBelow = getNodeBelow(Graph, ToNode); if (FromNodeBelow && ToNodeBelow && MemSet.insert(*FromNodeBelow, *ToNodeBelow)) { propagate(*FromNodeBelow, *ToNodeBelow, MatchState::FlowFromMemAliasNoReadWrite, ReachSet, WorkList); for (const auto &Mapping : ReachSet.reachableValueAliases(*FromNodeBelow)) { auto Src = Mapping.first; auto MemAliasPropagate = [&](MatchState FromState, MatchState ToState) { if (Mapping.second.test(static_cast(FromState))) propagate(Src, *ToNodeBelow, ToState, ReachSet, WorkList); }; MemAliasPropagate(MatchState::FlowFromReadOnly, MatchState::FlowFromMemAliasReadOnly); MemAliasPropagate(MatchState::FlowToWriteOnly, MatchState::FlowToMemAliasWriteOnly); MemAliasPropagate(MatchState::FlowToReadWrite, MatchState::FlowToMemAliasReadWrite); } } // This is the core of the state machine walking algorithm. We expand ReachSet // based on which state we are at (which in turn dictates what edges we // should examine) // From a high-level point of view, the state machine here guarantees two // properties: // - If *X and *Y are memory aliases, then X and Y are value aliases // - If Y is an alias of X, then reverse assignment edges (if there is any) // should precede any assignment edges on the path from X to Y. auto NextAssignState = [&](MatchState State) { for (const auto &AssignEdge : NodeInfo->Edges) propagate(FromNode, AssignEdge.Other, State, ReachSet, WorkList); }; auto NextRevAssignState = [&](MatchState State) { for (const auto &RevAssignEdge : NodeInfo->ReverseEdges) propagate(FromNode, RevAssignEdge.Other, State, ReachSet, WorkList); }; auto NextMemState = [&](MatchState State) { if (auto AliasSet = MemSet.getMemoryAliases(ToNode)) { for (const auto &MemAlias : *AliasSet) propagate(FromNode, MemAlias, State, ReachSet, WorkList); } }; switch (Item.State) { case MatchState::FlowFromReadOnly: NextRevAssignState(MatchState::FlowFromReadOnly); NextAssignState(MatchState::FlowToReadWrite); NextMemState(MatchState::FlowFromMemAliasReadOnly); break; case MatchState::FlowFromMemAliasNoReadWrite: NextRevAssignState(MatchState::FlowFromReadOnly); NextAssignState(MatchState::FlowToWriteOnly); break; case MatchState::FlowFromMemAliasReadOnly: NextRevAssignState(MatchState::FlowFromReadOnly); NextAssignState(MatchState::FlowToReadWrite); break; case MatchState::FlowToWriteOnly: NextAssignState(MatchState::FlowToWriteOnly); NextMemState(MatchState::FlowToMemAliasWriteOnly); break; case MatchState::FlowToReadWrite: NextAssignState(MatchState::FlowToReadWrite); NextMemState(MatchState::FlowToMemAliasReadWrite); break; case MatchState::FlowToMemAliasWriteOnly: NextAssignState(MatchState::FlowToWriteOnly); break; case MatchState::FlowToMemAliasReadWrite: NextAssignState(MatchState::FlowToReadWrite); break; } } static AliasAttrMap buildAttrMap(const CFLGraph &Graph, const ReachabilitySet &ReachSet) { AliasAttrMap AttrMap; std::vector WorkList, NextList; // Initialize each node with its original AliasAttrs in CFLGraph for (const auto &Mapping : Graph.value_mappings()) { auto Val = Mapping.first; auto &ValueInfo = Mapping.second; for (unsigned I = 0, E = ValueInfo.getNumLevels(); I < E; ++I) { auto Node = InstantiatedValue{Val, I}; AttrMap.add(Node, ValueInfo.getNodeInfoAtLevel(I).Attr); WorkList.push_back(Node); } } while (!WorkList.empty()) { for (const auto &Dst : WorkList) { auto DstAttr = AttrMap.getAttrs(Dst); if (DstAttr.none()) continue; // Propagate attr on the same level for (const auto &Mapping : ReachSet.reachableValueAliases(Dst)) { auto Src = Mapping.first; if (AttrMap.add(Src, DstAttr)) NextList.push_back(Src); } // Propagate attr to the levels below auto DstBelow = getNodeBelow(Graph, Dst); while (DstBelow) { if (AttrMap.add(*DstBelow, DstAttr)) { NextList.push_back(*DstBelow); break; } DstBelow = getNodeBelow(Graph, *DstBelow); } } WorkList.swap(NextList); NextList.clear(); } return AttrMap; } CFLAndersAAResult::FunctionInfo CFLAndersAAResult::buildInfoFrom(const Function &Fn) { CFLGraphBuilder GraphBuilder( *this, GetTLI(const_cast(Fn)), // Cast away the constness here due to GraphBuilder's API requirement const_cast(Fn)); auto &Graph = GraphBuilder.getCFLGraph(); ReachabilitySet ReachSet; AliasMemSet MemSet; std::vector WorkList, NextList; initializeWorkList(WorkList, ReachSet, Graph); // TODO: make sure we don't stop before the fix point is reached while (!WorkList.empty()) { for (const auto &Item : WorkList) processWorkListItem(Item, Graph, ReachSet, MemSet, NextList); NextList.swap(WorkList); NextList.clear(); } // Now that we have all the reachability info, propagate AliasAttrs according // to it auto IValueAttrMap = buildAttrMap(Graph, ReachSet); return FunctionInfo(Fn, GraphBuilder.getReturnValues(), ReachSet, std::move(IValueAttrMap)); } void CFLAndersAAResult::scan(const Function &Fn) { auto InsertPair = Cache.insert(std::make_pair(&Fn, Optional())); (void)InsertPair; assert(InsertPair.second && "Trying to scan a function that has already been cached"); // Note that we can't do Cache[Fn] = buildSetsFrom(Fn) here: the function call // may get evaluated after operator[], potentially triggering a DenseMap // resize and invalidating the reference returned by operator[] auto FunInfo = buildInfoFrom(Fn); Cache[&Fn] = std::move(FunInfo); Handles.emplace_front(const_cast(&Fn), this); } void CFLAndersAAResult::evict(const Function *Fn) { Cache.erase(Fn); } const Optional & CFLAndersAAResult::ensureCached(const Function &Fn) { auto Iter = Cache.find(&Fn); if (Iter == Cache.end()) { scan(Fn); Iter = Cache.find(&Fn); assert(Iter != Cache.end()); assert(Iter->second.hasValue()); } return Iter->second; } const AliasSummary *CFLAndersAAResult::getAliasSummary(const Function &Fn) { auto &FunInfo = ensureCached(Fn); if (FunInfo.hasValue()) return &FunInfo->getAliasSummary(); else return nullptr; } AliasResult CFLAndersAAResult::query(const MemoryLocation &LocA, const MemoryLocation &LocB) { auto *ValA = LocA.Ptr; auto *ValB = LocB.Ptr; if (!ValA->getType()->isPointerTy() || !ValB->getType()->isPointerTy()) return AliasResult::NoAlias; auto *Fn = parentFunctionOfValue(ValA); if (!Fn) { Fn = parentFunctionOfValue(ValB); if (!Fn) { // The only times this is known to happen are when globals + InlineAsm are // involved LLVM_DEBUG( dbgs() << "CFLAndersAA: could not extract parent function information.\n"); return AliasResult::MayAlias; } } else { assert(!parentFunctionOfValue(ValB) || parentFunctionOfValue(ValB) == Fn); } assert(Fn != nullptr); auto &FunInfo = ensureCached(*Fn); // AliasMap lookup if (FunInfo->mayAlias(ValA, LocA.Size, ValB, LocB.Size)) return AliasResult::MayAlias; return AliasResult::NoAlias; } AliasResult CFLAndersAAResult::alias(const MemoryLocation &LocA, const MemoryLocation &LocB, AAQueryInfo &AAQI) { if (LocA.Ptr == LocB.Ptr) return AliasResult::MustAlias; // Comparisons between global variables and other constants should be // handled by BasicAA. // CFLAndersAA may report NoAlias when comparing a GlobalValue and // ConstantExpr, but every query needs to have at least one Value tied to a // Function, and neither GlobalValues nor ConstantExprs are. if (isa(LocA.Ptr) && isa(LocB.Ptr)) return AAResultBase::alias(LocA, LocB, AAQI); AliasResult QueryResult = query(LocA, LocB); if (QueryResult == AliasResult::MayAlias) return AAResultBase::alias(LocA, LocB, AAQI); return QueryResult; } AnalysisKey CFLAndersAA::Key; CFLAndersAAResult CFLAndersAA::run(Function &F, FunctionAnalysisManager &AM) { auto GetTLI = [&AM](Function &F) -> TargetLibraryInfo & { return AM.getResult(F); }; return CFLAndersAAResult(GetTLI); } char CFLAndersAAWrapperPass::ID = 0; INITIALIZE_PASS(CFLAndersAAWrapperPass, "cfl-anders-aa", "Inclusion-Based CFL Alias Analysis", false, true) ImmutablePass *llvm::createCFLAndersAAWrapperPass() { return new CFLAndersAAWrapperPass(); } CFLAndersAAWrapperPass::CFLAndersAAWrapperPass() : ImmutablePass(ID) { initializeCFLAndersAAWrapperPassPass(*PassRegistry::getPassRegistry()); } void CFLAndersAAWrapperPass::initializePass() { auto GetTLI = [this](Function &F) -> TargetLibraryInfo & { return this->getAnalysis().getTLI(F); }; Result.reset(new CFLAndersAAResult(GetTLI)); } void CFLAndersAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequired(); }