|  | //===- CFLAndersAliasAnalysis.cpp - Unification-based Alias Analysis ------===// | 
|  | // | 
|  | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|  | // See https://llvm.org/LICENSE.txt for license information. | 
|  | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  | // | 
|  | // This file implements a CFL-based, summary-based alias analysis algorithm. It | 
|  | // differs from CFLSteensAliasAnalysis in its inclusion-based nature while | 
|  | // CFLSteensAliasAnalysis is unification-based. This pass has worse performance | 
|  | // than CFLSteensAliasAnalysis (the worst case complexity of | 
|  | // CFLAndersAliasAnalysis is cubic, while the worst case complexity of | 
|  | // CFLSteensAliasAnalysis is almost linear), but it is able to yield more | 
|  | // precise analysis result. The precision of this analysis is roughly the same | 
|  | // as that of an one level context-sensitive Andersen's algorithm. | 
|  | // | 
|  | // The algorithm used here is based on recursive state machine matching scheme | 
|  | // proposed in "Demand-driven alias analysis for C" by Xin Zheng and Radu | 
|  | // Rugina. The general idea is to extend the traditional transitive closure | 
|  | // algorithm to perform CFL matching along the way: instead of recording | 
|  | // "whether X is reachable from Y", we keep track of "whether X is reachable | 
|  | // from Y at state Z", where the "state" field indicates where we are in the CFL | 
|  | // matching process. To understand the matching better, it is advisable to have | 
|  | // the state machine shown in Figure 3 of the paper available when reading the | 
|  | // codes: all we do here is to selectively expand the transitive closure by | 
|  | // discarding edges that are not recognized by the state machine. | 
|  | // | 
|  | // There are two differences between our current implementation and the one | 
|  | // described in the paper: | 
|  | // - Our algorithm eagerly computes all alias pairs after the CFLGraph is built, | 
|  | // while in the paper the authors did the computation in a demand-driven | 
|  | // fashion. We did not implement the demand-driven algorithm due to the | 
|  | // additional coding complexity and higher memory profile, but if we found it | 
|  | // necessary we may switch to it eventually. | 
|  | // - In the paper the authors use a state machine that does not distinguish | 
|  | // value reads from value writes. For example, if Y is reachable from X at state | 
|  | // S3, it may be the case that X is written into Y, or it may be the case that | 
|  | // there's a third value Z that writes into both X and Y. To make that | 
|  | // distinction (which is crucial in building function summary as well as | 
|  | // retrieving mod-ref info), we choose to duplicate some of the states in the | 
|  | // paper's proposed state machine. The duplication does not change the set the | 
|  | // machine accepts. Given a pair of reachable values, it only provides more | 
|  | // detailed information on which value is being written into and which is being | 
|  | // read from. | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | // N.B. AliasAnalysis as a whole is phrased as a FunctionPass at the moment, and | 
|  | // CFLAndersAA is interprocedural. This is *technically* A Bad Thing, because | 
|  | // FunctionPasses are only allowed to inspect the Function that they're being | 
|  | // run on. Realistically, this likely isn't a problem until we allow | 
|  | // FunctionPasses to run concurrently. | 
|  |  | 
|  | #include "llvm/Analysis/CFLAndersAliasAnalysis.h" | 
|  | #include "AliasAnalysisSummary.h" | 
|  | #include "CFLGraph.h" | 
|  | #include "llvm/ADT/DenseMap.h" | 
|  | #include "llvm/ADT/DenseMapInfo.h" | 
|  | #include "llvm/ADT/DenseSet.h" | 
|  | #include "llvm/ADT/None.h" | 
|  | #include "llvm/ADT/Optional.h" | 
|  | #include "llvm/ADT/STLExtras.h" | 
|  | #include "llvm/ADT/SmallVector.h" | 
|  | #include "llvm/ADT/iterator_range.h" | 
|  | #include "llvm/Analysis/AliasAnalysis.h" | 
|  | #include "llvm/Analysis/MemoryLocation.h" | 
|  | #include "llvm/IR/Argument.h" | 
|  | #include "llvm/IR/Function.h" | 
|  | #include "llvm/IR/PassManager.h" | 
|  | #include "llvm/IR/Type.h" | 
|  | #include "llvm/InitializePasses.h" | 
|  | #include "llvm/Pass.h" | 
|  | #include "llvm/Support/Casting.h" | 
|  | #include "llvm/Support/Compiler.h" | 
|  | #include "llvm/Support/Debug.h" | 
|  | #include "llvm/Support/raw_ostream.h" | 
|  | #include <algorithm> | 
|  | #include <bitset> | 
|  | #include <cassert> | 
|  | #include <cstddef> | 
|  | #include <cstdint> | 
|  | #include <functional> | 
|  | #include <utility> | 
|  | #include <vector> | 
|  |  | 
|  | using namespace llvm; | 
|  | using namespace llvm::cflaa; | 
|  |  | 
|  | #define DEBUG_TYPE "cfl-anders-aa" | 
|  |  | 
|  | CFLAndersAAResult::CFLAndersAAResult( | 
|  | std::function<const TargetLibraryInfo &(Function &F)> GetTLI) | 
|  | : GetTLI(std::move(GetTLI)) {} | 
|  | CFLAndersAAResult::CFLAndersAAResult(CFLAndersAAResult &&RHS) | 
|  | : AAResultBase(std::move(RHS)), GetTLI(std::move(RHS.GetTLI)) {} | 
|  | CFLAndersAAResult::~CFLAndersAAResult() = default; | 
|  |  | 
|  | namespace { | 
|  |  | 
|  | enum class MatchState : uint8_t { | 
|  | // The following state represents S1 in the paper. | 
|  | FlowFromReadOnly = 0, | 
|  | // The following two states together represent S2 in the paper. | 
|  | // The 'NoReadWrite' suffix indicates that there exists an alias path that | 
|  | // does not contain assignment and reverse assignment edges. | 
|  | // The 'ReadOnly' suffix indicates that there exists an alias path that | 
|  | // contains reverse assignment edges only. | 
|  | FlowFromMemAliasNoReadWrite, | 
|  | FlowFromMemAliasReadOnly, | 
|  | // The following two states together represent S3 in the paper. | 
|  | // The 'WriteOnly' suffix indicates that there exists an alias path that | 
|  | // contains assignment edges only. | 
|  | // The 'ReadWrite' suffix indicates that there exists an alias path that | 
|  | // contains both assignment and reverse assignment edges. Note that if X and Y | 
|  | // are reachable at 'ReadWrite' state, it does NOT mean X is both read from | 
|  | // and written to Y. Instead, it means that a third value Z is written to both | 
|  | // X and Y. | 
|  | FlowToWriteOnly, | 
|  | FlowToReadWrite, | 
|  | // The following two states together represent S4 in the paper. | 
|  | FlowToMemAliasWriteOnly, | 
|  | FlowToMemAliasReadWrite, | 
|  | }; | 
|  |  | 
|  | using StateSet = std::bitset<7>; | 
|  |  | 
|  | const unsigned ReadOnlyStateMask = | 
|  | (1U << static_cast<uint8_t>(MatchState::FlowFromReadOnly)) | | 
|  | (1U << static_cast<uint8_t>(MatchState::FlowFromMemAliasReadOnly)); | 
|  | const unsigned WriteOnlyStateMask = | 
|  | (1U << static_cast<uint8_t>(MatchState::FlowToWriteOnly)) | | 
|  | (1U << static_cast<uint8_t>(MatchState::FlowToMemAliasWriteOnly)); | 
|  |  | 
|  | // A pair that consists of a value and an offset | 
|  | struct OffsetValue { | 
|  | const Value *Val; | 
|  | int64_t Offset; | 
|  | }; | 
|  |  | 
|  | bool operator==(OffsetValue LHS, OffsetValue RHS) { | 
|  | return LHS.Val == RHS.Val && LHS.Offset == RHS.Offset; | 
|  | } | 
|  | bool operator<(OffsetValue LHS, OffsetValue RHS) { | 
|  | return std::less<const Value *>()(LHS.Val, RHS.Val) || | 
|  | (LHS.Val == RHS.Val && LHS.Offset < RHS.Offset); | 
|  | } | 
|  |  | 
|  | // A pair that consists of an InstantiatedValue and an offset | 
|  | struct OffsetInstantiatedValue { | 
|  | InstantiatedValue IVal; | 
|  | int64_t Offset; | 
|  | }; | 
|  |  | 
|  | bool operator==(OffsetInstantiatedValue LHS, OffsetInstantiatedValue RHS) { | 
|  | return LHS.IVal == RHS.IVal && LHS.Offset == RHS.Offset; | 
|  | } | 
|  |  | 
|  | // We use ReachabilitySet to keep track of value aliases (The nonterminal "V" in | 
|  | // the paper) during the analysis. | 
|  | class ReachabilitySet { | 
|  | using ValueStateMap = DenseMap<InstantiatedValue, StateSet>; | 
|  | using ValueReachMap = DenseMap<InstantiatedValue, ValueStateMap>; | 
|  |  | 
|  | ValueReachMap ReachMap; | 
|  |  | 
|  | public: | 
|  | using const_valuestate_iterator = ValueStateMap::const_iterator; | 
|  | using const_value_iterator = ValueReachMap::const_iterator; | 
|  |  | 
|  | // Insert edge 'From->To' at state 'State' | 
|  | bool insert(InstantiatedValue From, InstantiatedValue To, MatchState State) { | 
|  | assert(From != To); | 
|  | auto &States = ReachMap[To][From]; | 
|  | auto Idx = static_cast<size_t>(State); | 
|  | if (!States.test(Idx)) { | 
|  | States.set(Idx); | 
|  | return true; | 
|  | } | 
|  | return false; | 
|  | } | 
|  |  | 
|  | // Return the set of all ('From', 'State') pair for a given node 'To' | 
|  | iterator_range<const_valuestate_iterator> | 
|  | reachableValueAliases(InstantiatedValue V) const { | 
|  | auto Itr = ReachMap.find(V); | 
|  | if (Itr == ReachMap.end()) | 
|  | return make_range<const_valuestate_iterator>(const_valuestate_iterator(), | 
|  | const_valuestate_iterator()); | 
|  | return make_range<const_valuestate_iterator>(Itr->second.begin(), | 
|  | Itr->second.end()); | 
|  | } | 
|  |  | 
|  | iterator_range<const_value_iterator> value_mappings() const { | 
|  | return make_range<const_value_iterator>(ReachMap.begin(), ReachMap.end()); | 
|  | } | 
|  | }; | 
|  |  | 
|  | // We use AliasMemSet to keep track of all memory aliases (the nonterminal "M" | 
|  | // in the paper) during the analysis. | 
|  | class AliasMemSet { | 
|  | using MemSet = DenseSet<InstantiatedValue>; | 
|  | using MemMapType = DenseMap<InstantiatedValue, MemSet>; | 
|  |  | 
|  | MemMapType MemMap; | 
|  |  | 
|  | public: | 
|  | using const_mem_iterator = MemSet::const_iterator; | 
|  |  | 
|  | bool insert(InstantiatedValue LHS, InstantiatedValue RHS) { | 
|  | // Top-level values can never be memory aliases because one cannot take the | 
|  | // addresses of them | 
|  | assert(LHS.DerefLevel > 0 && RHS.DerefLevel > 0); | 
|  | return MemMap[LHS].insert(RHS).second; | 
|  | } | 
|  |  | 
|  | const MemSet *getMemoryAliases(InstantiatedValue V) const { | 
|  | auto Itr = MemMap.find(V); | 
|  | if (Itr == MemMap.end()) | 
|  | return nullptr; | 
|  | return &Itr->second; | 
|  | } | 
|  | }; | 
|  |  | 
|  | // We use AliasAttrMap to keep track of the AliasAttr of each node. | 
|  | class AliasAttrMap { | 
|  | using MapType = DenseMap<InstantiatedValue, AliasAttrs>; | 
|  |  | 
|  | MapType AttrMap; | 
|  |  | 
|  | public: | 
|  | using const_iterator = MapType::const_iterator; | 
|  |  | 
|  | bool add(InstantiatedValue V, AliasAttrs Attr) { | 
|  | auto &OldAttr = AttrMap[V]; | 
|  | auto NewAttr = OldAttr | Attr; | 
|  | if (OldAttr == NewAttr) | 
|  | return false; | 
|  | OldAttr = NewAttr; | 
|  | return true; | 
|  | } | 
|  |  | 
|  | AliasAttrs getAttrs(InstantiatedValue V) const { | 
|  | AliasAttrs Attr; | 
|  | auto Itr = AttrMap.find(V); | 
|  | if (Itr != AttrMap.end()) | 
|  | Attr = Itr->second; | 
|  | return Attr; | 
|  | } | 
|  |  | 
|  | iterator_range<const_iterator> mappings() const { | 
|  | return make_range<const_iterator>(AttrMap.begin(), AttrMap.end()); | 
|  | } | 
|  | }; | 
|  |  | 
|  | struct WorkListItem { | 
|  | InstantiatedValue From; | 
|  | InstantiatedValue To; | 
|  | MatchState State; | 
|  | }; | 
|  |  | 
|  | struct ValueSummary { | 
|  | struct Record { | 
|  | InterfaceValue IValue; | 
|  | unsigned DerefLevel; | 
|  | }; | 
|  | SmallVector<Record, 4> FromRecords, ToRecords; | 
|  | }; | 
|  |  | 
|  | } // end anonymous namespace | 
|  |  | 
|  | namespace llvm { | 
|  |  | 
|  | // Specialize DenseMapInfo for OffsetValue. | 
|  | template <> struct DenseMapInfo<OffsetValue> { | 
|  | static OffsetValue getEmptyKey() { | 
|  | return OffsetValue{DenseMapInfo<const Value *>::getEmptyKey(), | 
|  | DenseMapInfo<int64_t>::getEmptyKey()}; | 
|  | } | 
|  |  | 
|  | static OffsetValue getTombstoneKey() { | 
|  | return OffsetValue{DenseMapInfo<const Value *>::getTombstoneKey(), | 
|  | DenseMapInfo<int64_t>::getEmptyKey()}; | 
|  | } | 
|  |  | 
|  | static unsigned getHashValue(const OffsetValue &OVal) { | 
|  | return DenseMapInfo<std::pair<const Value *, int64_t>>::getHashValue( | 
|  | std::make_pair(OVal.Val, OVal.Offset)); | 
|  | } | 
|  |  | 
|  | static bool isEqual(const OffsetValue &LHS, const OffsetValue &RHS) { | 
|  | return LHS == RHS; | 
|  | } | 
|  | }; | 
|  |  | 
|  | // Specialize DenseMapInfo for OffsetInstantiatedValue. | 
|  | template <> struct DenseMapInfo<OffsetInstantiatedValue> { | 
|  | static OffsetInstantiatedValue getEmptyKey() { | 
|  | return OffsetInstantiatedValue{ | 
|  | DenseMapInfo<InstantiatedValue>::getEmptyKey(), | 
|  | DenseMapInfo<int64_t>::getEmptyKey()}; | 
|  | } | 
|  |  | 
|  | static OffsetInstantiatedValue getTombstoneKey() { | 
|  | return OffsetInstantiatedValue{ | 
|  | DenseMapInfo<InstantiatedValue>::getTombstoneKey(), | 
|  | DenseMapInfo<int64_t>::getEmptyKey()}; | 
|  | } | 
|  |  | 
|  | static unsigned getHashValue(const OffsetInstantiatedValue &OVal) { | 
|  | return DenseMapInfo<std::pair<InstantiatedValue, int64_t>>::getHashValue( | 
|  | std::make_pair(OVal.IVal, OVal.Offset)); | 
|  | } | 
|  |  | 
|  | static bool isEqual(const OffsetInstantiatedValue &LHS, | 
|  | const OffsetInstantiatedValue &RHS) { | 
|  | return LHS == RHS; | 
|  | } | 
|  | }; | 
|  |  | 
|  | } // end namespace llvm | 
|  |  | 
|  | class CFLAndersAAResult::FunctionInfo { | 
|  | /// Map a value to other values that may alias it | 
|  | /// Since the alias relation is symmetric, to save some space we assume values | 
|  | /// are properly ordered: if a and b alias each other, and a < b, then b is in | 
|  | /// AliasMap[a] but not vice versa. | 
|  | DenseMap<const Value *, std::vector<OffsetValue>> AliasMap; | 
|  |  | 
|  | /// Map a value to its corresponding AliasAttrs | 
|  | DenseMap<const Value *, AliasAttrs> AttrMap; | 
|  |  | 
|  | /// Summary of externally visible effects. | 
|  | AliasSummary Summary; | 
|  |  | 
|  | Optional<AliasAttrs> getAttrs(const Value *) const; | 
|  |  | 
|  | public: | 
|  | FunctionInfo(const Function &, const SmallVectorImpl<Value *> &, | 
|  | const ReachabilitySet &, const AliasAttrMap &); | 
|  |  | 
|  | bool mayAlias(const Value *, LocationSize, const Value *, LocationSize) const; | 
|  | const AliasSummary &getAliasSummary() const { return Summary; } | 
|  | }; | 
|  |  | 
|  | static bool hasReadOnlyState(StateSet Set) { | 
|  | return (Set & StateSet(ReadOnlyStateMask)).any(); | 
|  | } | 
|  |  | 
|  | static bool hasWriteOnlyState(StateSet Set) { | 
|  | return (Set & StateSet(WriteOnlyStateMask)).any(); | 
|  | } | 
|  |  | 
|  | static Optional<InterfaceValue> | 
|  | getInterfaceValue(InstantiatedValue IValue, | 
|  | const SmallVectorImpl<Value *> &RetVals) { | 
|  | auto Val = IValue.Val; | 
|  |  | 
|  | Optional<unsigned> Index; | 
|  | if (auto Arg = dyn_cast<Argument>(Val)) | 
|  | Index = Arg->getArgNo() + 1; | 
|  | else if (is_contained(RetVals, Val)) | 
|  | Index = 0; | 
|  |  | 
|  | if (Index) | 
|  | return InterfaceValue{*Index, IValue.DerefLevel}; | 
|  | return None; | 
|  | } | 
|  |  | 
|  | static void populateAttrMap(DenseMap<const Value *, AliasAttrs> &AttrMap, | 
|  | const AliasAttrMap &AMap) { | 
|  | for (const auto &Mapping : AMap.mappings()) { | 
|  | auto IVal = Mapping.first; | 
|  |  | 
|  | // Insert IVal into the map | 
|  | auto &Attr = AttrMap[IVal.Val]; | 
|  | // AttrMap only cares about top-level values | 
|  | if (IVal.DerefLevel == 0) | 
|  | Attr |= Mapping.second; | 
|  | } | 
|  | } | 
|  |  | 
|  | static void | 
|  | populateAliasMap(DenseMap<const Value *, std::vector<OffsetValue>> &AliasMap, | 
|  | const ReachabilitySet &ReachSet) { | 
|  | for (const auto &OuterMapping : ReachSet.value_mappings()) { | 
|  | // AliasMap only cares about top-level values | 
|  | if (OuterMapping.first.DerefLevel > 0) | 
|  | continue; | 
|  |  | 
|  | auto Val = OuterMapping.first.Val; | 
|  | auto &AliasList = AliasMap[Val]; | 
|  | for (const auto &InnerMapping : OuterMapping.second) { | 
|  | // Again, AliasMap only cares about top-level values | 
|  | if (InnerMapping.first.DerefLevel == 0) | 
|  | AliasList.push_back(OffsetValue{InnerMapping.first.Val, UnknownOffset}); | 
|  | } | 
|  |  | 
|  | // Sort AliasList for faster lookup | 
|  | llvm::sort(AliasList); | 
|  | } | 
|  | } | 
|  |  | 
|  | static void populateExternalRelations( | 
|  | SmallVectorImpl<ExternalRelation> &ExtRelations, const Function &Fn, | 
|  | const SmallVectorImpl<Value *> &RetVals, const ReachabilitySet &ReachSet) { | 
|  | // If a function only returns one of its argument X, then X will be both an | 
|  | // argument and a return value at the same time. This is an edge case that | 
|  | // needs special handling here. | 
|  | for (const auto &Arg : Fn.args()) { | 
|  | if (is_contained(RetVals, &Arg)) { | 
|  | auto ArgVal = InterfaceValue{Arg.getArgNo() + 1, 0}; | 
|  | auto RetVal = InterfaceValue{0, 0}; | 
|  | ExtRelations.push_back(ExternalRelation{ArgVal, RetVal, 0}); | 
|  | } | 
|  | } | 
|  |  | 
|  | // Below is the core summary construction logic. | 
|  | // A naive solution of adding only the value aliases that are parameters or | 
|  | // return values in ReachSet to the summary won't work: It is possible that a | 
|  | // parameter P is written into an intermediate value I, and the function | 
|  | // subsequently returns *I. In that case, *I is does not value alias anything | 
|  | // in ReachSet, and the naive solution will miss a summary edge from (P, 1) to | 
|  | // (I, 1). | 
|  | // To account for the aforementioned case, we need to check each non-parameter | 
|  | // and non-return value for the possibility of acting as an intermediate. | 
|  | // 'ValueMap' here records, for each value, which InterfaceValues read from or | 
|  | // write into it. If both the read list and the write list of a given value | 
|  | // are non-empty, we know that a particular value is an intermidate and we | 
|  | // need to add summary edges from the writes to the reads. | 
|  | DenseMap<Value *, ValueSummary> ValueMap; | 
|  | for (const auto &OuterMapping : ReachSet.value_mappings()) { | 
|  | if (auto Dst = getInterfaceValue(OuterMapping.first, RetVals)) { | 
|  | for (const auto &InnerMapping : OuterMapping.second) { | 
|  | // If Src is a param/return value, we get a same-level assignment. | 
|  | if (auto Src = getInterfaceValue(InnerMapping.first, RetVals)) { | 
|  | // This may happen if both Dst and Src are return values | 
|  | if (*Dst == *Src) | 
|  | continue; | 
|  |  | 
|  | if (hasReadOnlyState(InnerMapping.second)) | 
|  | ExtRelations.push_back(ExternalRelation{*Dst, *Src, UnknownOffset}); | 
|  | // No need to check for WriteOnly state, since ReachSet is symmetric | 
|  | } else { | 
|  | // If Src is not a param/return, add it to ValueMap | 
|  | auto SrcIVal = InnerMapping.first; | 
|  | if (hasReadOnlyState(InnerMapping.second)) | 
|  | ValueMap[SrcIVal.Val].FromRecords.push_back( | 
|  | ValueSummary::Record{*Dst, SrcIVal.DerefLevel}); | 
|  | if (hasWriteOnlyState(InnerMapping.second)) | 
|  | ValueMap[SrcIVal.Val].ToRecords.push_back( | 
|  | ValueSummary::Record{*Dst, SrcIVal.DerefLevel}); | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | for (const auto &Mapping : ValueMap) { | 
|  | for (const auto &FromRecord : Mapping.second.FromRecords) { | 
|  | for (const auto &ToRecord : Mapping.second.ToRecords) { | 
|  | auto ToLevel = ToRecord.DerefLevel; | 
|  | auto FromLevel = FromRecord.DerefLevel; | 
|  | // Same-level assignments should have already been processed by now | 
|  | if (ToLevel == FromLevel) | 
|  | continue; | 
|  |  | 
|  | auto SrcIndex = FromRecord.IValue.Index; | 
|  | auto SrcLevel = FromRecord.IValue.DerefLevel; | 
|  | auto DstIndex = ToRecord.IValue.Index; | 
|  | auto DstLevel = ToRecord.IValue.DerefLevel; | 
|  | if (ToLevel > FromLevel) | 
|  | SrcLevel += ToLevel - FromLevel; | 
|  | else | 
|  | DstLevel += FromLevel - ToLevel; | 
|  |  | 
|  | ExtRelations.push_back(ExternalRelation{ | 
|  | InterfaceValue{SrcIndex, SrcLevel}, | 
|  | InterfaceValue{DstIndex, DstLevel}, UnknownOffset}); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | // Remove duplicates in ExtRelations | 
|  | llvm::sort(ExtRelations); | 
|  | ExtRelations.erase(std::unique(ExtRelations.begin(), ExtRelations.end()), | 
|  | ExtRelations.end()); | 
|  | } | 
|  |  | 
|  | static void populateExternalAttributes( | 
|  | SmallVectorImpl<ExternalAttribute> &ExtAttributes, const Function &Fn, | 
|  | const SmallVectorImpl<Value *> &RetVals, const AliasAttrMap &AMap) { | 
|  | for (const auto &Mapping : AMap.mappings()) { | 
|  | if (auto IVal = getInterfaceValue(Mapping.first, RetVals)) { | 
|  | auto Attr = getExternallyVisibleAttrs(Mapping.second); | 
|  | if (Attr.any()) | 
|  | ExtAttributes.push_back(ExternalAttribute{*IVal, Attr}); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | CFLAndersAAResult::FunctionInfo::FunctionInfo( | 
|  | const Function &Fn, const SmallVectorImpl<Value *> &RetVals, | 
|  | const ReachabilitySet &ReachSet, const AliasAttrMap &AMap) { | 
|  | populateAttrMap(AttrMap, AMap); | 
|  | populateExternalAttributes(Summary.RetParamAttributes, Fn, RetVals, AMap); | 
|  | populateAliasMap(AliasMap, ReachSet); | 
|  | populateExternalRelations(Summary.RetParamRelations, Fn, RetVals, ReachSet); | 
|  | } | 
|  |  | 
|  | Optional<AliasAttrs> | 
|  | CFLAndersAAResult::FunctionInfo::getAttrs(const Value *V) const { | 
|  | assert(V != nullptr); | 
|  |  | 
|  | auto Itr = AttrMap.find(V); | 
|  | if (Itr != AttrMap.end()) | 
|  | return Itr->second; | 
|  | return None; | 
|  | } | 
|  |  | 
|  | bool CFLAndersAAResult::FunctionInfo::mayAlias( | 
|  | const Value *LHS, LocationSize MaybeLHSSize, const Value *RHS, | 
|  | LocationSize MaybeRHSSize) const { | 
|  | assert(LHS && RHS); | 
|  |  | 
|  | // Check if we've seen LHS and RHS before. Sometimes LHS or RHS can be created | 
|  | // after the analysis gets executed, and we want to be conservative in those | 
|  | // cases. | 
|  | auto MaybeAttrsA = getAttrs(LHS); | 
|  | auto MaybeAttrsB = getAttrs(RHS); | 
|  | if (!MaybeAttrsA || !MaybeAttrsB) | 
|  | return true; | 
|  |  | 
|  | // Check AliasAttrs before AliasMap lookup since it's cheaper | 
|  | auto AttrsA = *MaybeAttrsA; | 
|  | auto AttrsB = *MaybeAttrsB; | 
|  | if (hasUnknownOrCallerAttr(AttrsA)) | 
|  | return AttrsB.any(); | 
|  | if (hasUnknownOrCallerAttr(AttrsB)) | 
|  | return AttrsA.any(); | 
|  | if (isGlobalOrArgAttr(AttrsA)) | 
|  | return isGlobalOrArgAttr(AttrsB); | 
|  | if (isGlobalOrArgAttr(AttrsB)) | 
|  | return isGlobalOrArgAttr(AttrsA); | 
|  |  | 
|  | // At this point both LHS and RHS should point to locally allocated objects | 
|  |  | 
|  | auto Itr = AliasMap.find(LHS); | 
|  | if (Itr != AliasMap.end()) { | 
|  |  | 
|  | // Find out all (X, Offset) where X == RHS | 
|  | auto Comparator = [](OffsetValue LHS, OffsetValue RHS) { | 
|  | return std::less<const Value *>()(LHS.Val, RHS.Val); | 
|  | }; | 
|  | #ifdef EXPENSIVE_CHECKS | 
|  | assert(llvm::is_sorted(Itr->second, Comparator)); | 
|  | #endif | 
|  | auto RangePair = std::equal_range(Itr->second.begin(), Itr->second.end(), | 
|  | OffsetValue{RHS, 0}, Comparator); | 
|  |  | 
|  | if (RangePair.first != RangePair.second) { | 
|  | // Be conservative about unknown sizes | 
|  | if (!MaybeLHSSize.hasValue() || !MaybeRHSSize.hasValue()) | 
|  | return true; | 
|  |  | 
|  | const uint64_t LHSSize = MaybeLHSSize.getValue(); | 
|  | const uint64_t RHSSize = MaybeRHSSize.getValue(); | 
|  |  | 
|  | for (const auto &OVal : make_range(RangePair)) { | 
|  | // Be conservative about UnknownOffset | 
|  | if (OVal.Offset == UnknownOffset) | 
|  | return true; | 
|  |  | 
|  | // We know that LHS aliases (RHS + OVal.Offset) if the control flow | 
|  | // reaches here. The may-alias query essentially becomes integer | 
|  | // range-overlap queries over two ranges [OVal.Offset, OVal.Offset + | 
|  | // LHSSize) and [0, RHSSize). | 
|  |  | 
|  | // Try to be conservative on super large offsets | 
|  | if (LLVM_UNLIKELY(LHSSize > INT64_MAX || RHSSize > INT64_MAX)) | 
|  | return true; | 
|  |  | 
|  | auto LHSStart = OVal.Offset; | 
|  | // FIXME: Do we need to guard against integer overflow? | 
|  | auto LHSEnd = OVal.Offset + static_cast<int64_t>(LHSSize); | 
|  | auto RHSStart = 0; | 
|  | auto RHSEnd = static_cast<int64_t>(RHSSize); | 
|  | if (LHSEnd > RHSStart && LHSStart < RHSEnd) | 
|  | return true; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | return false; | 
|  | } | 
|  |  | 
|  | static void propagate(InstantiatedValue From, InstantiatedValue To, | 
|  | MatchState State, ReachabilitySet &ReachSet, | 
|  | std::vector<WorkListItem> &WorkList) { | 
|  | if (From == To) | 
|  | return; | 
|  | if (ReachSet.insert(From, To, State)) | 
|  | WorkList.push_back(WorkListItem{From, To, State}); | 
|  | } | 
|  |  | 
|  | static void initializeWorkList(std::vector<WorkListItem> &WorkList, | 
|  | ReachabilitySet &ReachSet, | 
|  | const CFLGraph &Graph) { | 
|  | for (const auto &Mapping : Graph.value_mappings()) { | 
|  | auto Val = Mapping.first; | 
|  | auto &ValueInfo = Mapping.second; | 
|  | assert(ValueInfo.getNumLevels() > 0); | 
|  |  | 
|  | // Insert all immediate assignment neighbors to the worklist | 
|  | for (unsigned I = 0, E = ValueInfo.getNumLevels(); I < E; ++I) { | 
|  | auto Src = InstantiatedValue{Val, I}; | 
|  | // If there's an assignment edge from X to Y, it means Y is reachable from | 
|  | // X at S3 and X is reachable from Y at S1 | 
|  | for (auto &Edge : ValueInfo.getNodeInfoAtLevel(I).Edges) { | 
|  | propagate(Edge.Other, Src, MatchState::FlowFromReadOnly, ReachSet, | 
|  | WorkList); | 
|  | propagate(Src, Edge.Other, MatchState::FlowToWriteOnly, ReachSet, | 
|  | WorkList); | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | static Optional<InstantiatedValue> getNodeBelow(const CFLGraph &Graph, | 
|  | InstantiatedValue V) { | 
|  | auto NodeBelow = InstantiatedValue{V.Val, V.DerefLevel + 1}; | 
|  | if (Graph.getNode(NodeBelow)) | 
|  | return NodeBelow; | 
|  | return None; | 
|  | } | 
|  |  | 
|  | static void processWorkListItem(const WorkListItem &Item, const CFLGraph &Graph, | 
|  | ReachabilitySet &ReachSet, AliasMemSet &MemSet, | 
|  | std::vector<WorkListItem> &WorkList) { | 
|  | auto FromNode = Item.From; | 
|  | auto ToNode = Item.To; | 
|  |  | 
|  | auto NodeInfo = Graph.getNode(ToNode); | 
|  | assert(NodeInfo != nullptr); | 
|  |  | 
|  | // TODO: propagate field offsets | 
|  |  | 
|  | // FIXME: Here is a neat trick we can do: since both ReachSet and MemSet holds | 
|  | // relations that are symmetric, we could actually cut the storage by half by | 
|  | // sorting FromNode and ToNode before insertion happens. | 
|  |  | 
|  | // The newly added value alias pair may potentially generate more memory | 
|  | // alias pairs. Check for them here. | 
|  | auto FromNodeBelow = getNodeBelow(Graph, FromNode); | 
|  | auto ToNodeBelow = getNodeBelow(Graph, ToNode); | 
|  | if (FromNodeBelow && ToNodeBelow && | 
|  | MemSet.insert(*FromNodeBelow, *ToNodeBelow)) { | 
|  | propagate(*FromNodeBelow, *ToNodeBelow, | 
|  | MatchState::FlowFromMemAliasNoReadWrite, ReachSet, WorkList); | 
|  | for (const auto &Mapping : ReachSet.reachableValueAliases(*FromNodeBelow)) { | 
|  | auto Src = Mapping.first; | 
|  | auto MemAliasPropagate = [&](MatchState FromState, MatchState ToState) { | 
|  | if (Mapping.second.test(static_cast<size_t>(FromState))) | 
|  | propagate(Src, *ToNodeBelow, ToState, ReachSet, WorkList); | 
|  | }; | 
|  |  | 
|  | MemAliasPropagate(MatchState::FlowFromReadOnly, | 
|  | MatchState::FlowFromMemAliasReadOnly); | 
|  | MemAliasPropagate(MatchState::FlowToWriteOnly, | 
|  | MatchState::FlowToMemAliasWriteOnly); | 
|  | MemAliasPropagate(MatchState::FlowToReadWrite, | 
|  | MatchState::FlowToMemAliasReadWrite); | 
|  | } | 
|  | } | 
|  |  | 
|  | // This is the core of the state machine walking algorithm. We expand ReachSet | 
|  | // based on which state we are at (which in turn dictates what edges we | 
|  | // should examine) | 
|  | // From a high-level point of view, the state machine here guarantees two | 
|  | // properties: | 
|  | // - If *X and *Y are memory aliases, then X and Y are value aliases | 
|  | // - If Y is an alias of X, then reverse assignment edges (if there is any) | 
|  | // should precede any assignment edges on the path from X to Y. | 
|  | auto NextAssignState = [&](MatchState State) { | 
|  | for (const auto &AssignEdge : NodeInfo->Edges) | 
|  | propagate(FromNode, AssignEdge.Other, State, ReachSet, WorkList); | 
|  | }; | 
|  | auto NextRevAssignState = [&](MatchState State) { | 
|  | for (const auto &RevAssignEdge : NodeInfo->ReverseEdges) | 
|  | propagate(FromNode, RevAssignEdge.Other, State, ReachSet, WorkList); | 
|  | }; | 
|  | auto NextMemState = [&](MatchState State) { | 
|  | if (auto AliasSet = MemSet.getMemoryAliases(ToNode)) { | 
|  | for (const auto &MemAlias : *AliasSet) | 
|  | propagate(FromNode, MemAlias, State, ReachSet, WorkList); | 
|  | } | 
|  | }; | 
|  |  | 
|  | switch (Item.State) { | 
|  | case MatchState::FlowFromReadOnly: | 
|  | NextRevAssignState(MatchState::FlowFromReadOnly); | 
|  | NextAssignState(MatchState::FlowToReadWrite); | 
|  | NextMemState(MatchState::FlowFromMemAliasReadOnly); | 
|  | break; | 
|  |  | 
|  | case MatchState::FlowFromMemAliasNoReadWrite: | 
|  | NextRevAssignState(MatchState::FlowFromReadOnly); | 
|  | NextAssignState(MatchState::FlowToWriteOnly); | 
|  | break; | 
|  |  | 
|  | case MatchState::FlowFromMemAliasReadOnly: | 
|  | NextRevAssignState(MatchState::FlowFromReadOnly); | 
|  | NextAssignState(MatchState::FlowToReadWrite); | 
|  | break; | 
|  |  | 
|  | case MatchState::FlowToWriteOnly: | 
|  | NextAssignState(MatchState::FlowToWriteOnly); | 
|  | NextMemState(MatchState::FlowToMemAliasWriteOnly); | 
|  | break; | 
|  |  | 
|  | case MatchState::FlowToReadWrite: | 
|  | NextAssignState(MatchState::FlowToReadWrite); | 
|  | NextMemState(MatchState::FlowToMemAliasReadWrite); | 
|  | break; | 
|  |  | 
|  | case MatchState::FlowToMemAliasWriteOnly: | 
|  | NextAssignState(MatchState::FlowToWriteOnly); | 
|  | break; | 
|  |  | 
|  | case MatchState::FlowToMemAliasReadWrite: | 
|  | NextAssignState(MatchState::FlowToReadWrite); | 
|  | break; | 
|  | } | 
|  | } | 
|  |  | 
|  | static AliasAttrMap buildAttrMap(const CFLGraph &Graph, | 
|  | const ReachabilitySet &ReachSet) { | 
|  | AliasAttrMap AttrMap; | 
|  | std::vector<InstantiatedValue> WorkList, NextList; | 
|  |  | 
|  | // Initialize each node with its original AliasAttrs in CFLGraph | 
|  | for (const auto &Mapping : Graph.value_mappings()) { | 
|  | auto Val = Mapping.first; | 
|  | auto &ValueInfo = Mapping.second; | 
|  | for (unsigned I = 0, E = ValueInfo.getNumLevels(); I < E; ++I) { | 
|  | auto Node = InstantiatedValue{Val, I}; | 
|  | AttrMap.add(Node, ValueInfo.getNodeInfoAtLevel(I).Attr); | 
|  | WorkList.push_back(Node); | 
|  | } | 
|  | } | 
|  |  | 
|  | while (!WorkList.empty()) { | 
|  | for (const auto &Dst : WorkList) { | 
|  | auto DstAttr = AttrMap.getAttrs(Dst); | 
|  | if (DstAttr.none()) | 
|  | continue; | 
|  |  | 
|  | // Propagate attr on the same level | 
|  | for (const auto &Mapping : ReachSet.reachableValueAliases(Dst)) { | 
|  | auto Src = Mapping.first; | 
|  | if (AttrMap.add(Src, DstAttr)) | 
|  | NextList.push_back(Src); | 
|  | } | 
|  |  | 
|  | // Propagate attr to the levels below | 
|  | auto DstBelow = getNodeBelow(Graph, Dst); | 
|  | while (DstBelow) { | 
|  | if (AttrMap.add(*DstBelow, DstAttr)) { | 
|  | NextList.push_back(*DstBelow); | 
|  | break; | 
|  | } | 
|  | DstBelow = getNodeBelow(Graph, *DstBelow); | 
|  | } | 
|  | } | 
|  | WorkList.swap(NextList); | 
|  | NextList.clear(); | 
|  | } | 
|  |  | 
|  | return AttrMap; | 
|  | } | 
|  |  | 
|  | CFLAndersAAResult::FunctionInfo | 
|  | CFLAndersAAResult::buildInfoFrom(const Function &Fn) { | 
|  | CFLGraphBuilder<CFLAndersAAResult> GraphBuilder( | 
|  | *this, GetTLI(const_cast<Function &>(Fn)), | 
|  | // Cast away the constness here due to GraphBuilder's API requirement | 
|  | const_cast<Function &>(Fn)); | 
|  | auto &Graph = GraphBuilder.getCFLGraph(); | 
|  |  | 
|  | ReachabilitySet ReachSet; | 
|  | AliasMemSet MemSet; | 
|  |  | 
|  | std::vector<WorkListItem> WorkList, NextList; | 
|  | initializeWorkList(WorkList, ReachSet, Graph); | 
|  | // TODO: make sure we don't stop before the fix point is reached | 
|  | while (!WorkList.empty()) { | 
|  | for (const auto &Item : WorkList) | 
|  | processWorkListItem(Item, Graph, ReachSet, MemSet, NextList); | 
|  |  | 
|  | NextList.swap(WorkList); | 
|  | NextList.clear(); | 
|  | } | 
|  |  | 
|  | // Now that we have all the reachability info, propagate AliasAttrs according | 
|  | // to it | 
|  | auto IValueAttrMap = buildAttrMap(Graph, ReachSet); | 
|  |  | 
|  | return FunctionInfo(Fn, GraphBuilder.getReturnValues(), ReachSet, | 
|  | std::move(IValueAttrMap)); | 
|  | } | 
|  |  | 
|  | void CFLAndersAAResult::scan(const Function &Fn) { | 
|  | auto InsertPair = Cache.insert(std::make_pair(&Fn, Optional<FunctionInfo>())); | 
|  | (void)InsertPair; | 
|  | assert(InsertPair.second && | 
|  | "Trying to scan a function that has already been cached"); | 
|  |  | 
|  | // Note that we can't do Cache[Fn] = buildSetsFrom(Fn) here: the function call | 
|  | // may get evaluated after operator[], potentially triggering a DenseMap | 
|  | // resize and invalidating the reference returned by operator[] | 
|  | auto FunInfo = buildInfoFrom(Fn); | 
|  | Cache[&Fn] = std::move(FunInfo); | 
|  | Handles.emplace_front(const_cast<Function *>(&Fn), this); | 
|  | } | 
|  |  | 
|  | void CFLAndersAAResult::evict(const Function *Fn) { Cache.erase(Fn); } | 
|  |  | 
|  | const Optional<CFLAndersAAResult::FunctionInfo> & | 
|  | CFLAndersAAResult::ensureCached(const Function &Fn) { | 
|  | auto Iter = Cache.find(&Fn); | 
|  | if (Iter == Cache.end()) { | 
|  | scan(Fn); | 
|  | Iter = Cache.find(&Fn); | 
|  | assert(Iter != Cache.end()); | 
|  | assert(Iter->second.hasValue()); | 
|  | } | 
|  | return Iter->second; | 
|  | } | 
|  |  | 
|  | const AliasSummary *CFLAndersAAResult::getAliasSummary(const Function &Fn) { | 
|  | auto &FunInfo = ensureCached(Fn); | 
|  | if (FunInfo.hasValue()) | 
|  | return &FunInfo->getAliasSummary(); | 
|  | else | 
|  | return nullptr; | 
|  | } | 
|  |  | 
|  | AliasResult CFLAndersAAResult::query(const MemoryLocation &LocA, | 
|  | const MemoryLocation &LocB) { | 
|  | auto *ValA = LocA.Ptr; | 
|  | auto *ValB = LocB.Ptr; | 
|  |  | 
|  | if (!ValA->getType()->isPointerTy() || !ValB->getType()->isPointerTy()) | 
|  | return NoAlias; | 
|  |  | 
|  | auto *Fn = parentFunctionOfValue(ValA); | 
|  | if (!Fn) { | 
|  | Fn = parentFunctionOfValue(ValB); | 
|  | if (!Fn) { | 
|  | // The only times this is known to happen are when globals + InlineAsm are | 
|  | // involved | 
|  | LLVM_DEBUG( | 
|  | dbgs() | 
|  | << "CFLAndersAA: could not extract parent function information.\n"); | 
|  | return MayAlias; | 
|  | } | 
|  | } else { | 
|  | assert(!parentFunctionOfValue(ValB) || parentFunctionOfValue(ValB) == Fn); | 
|  | } | 
|  |  | 
|  | assert(Fn != nullptr); | 
|  | auto &FunInfo = ensureCached(*Fn); | 
|  |  | 
|  | // AliasMap lookup | 
|  | if (FunInfo->mayAlias(ValA, LocA.Size, ValB, LocB.Size)) | 
|  | return MayAlias; | 
|  | return NoAlias; | 
|  | } | 
|  |  | 
|  | AliasResult CFLAndersAAResult::alias(const MemoryLocation &LocA, | 
|  | const MemoryLocation &LocB, | 
|  | AAQueryInfo &AAQI) { | 
|  | if (LocA.Ptr == LocB.Ptr) | 
|  | return MustAlias; | 
|  |  | 
|  | // Comparisons between global variables and other constants should be | 
|  | // handled by BasicAA. | 
|  | // CFLAndersAA may report NoAlias when comparing a GlobalValue and | 
|  | // ConstantExpr, but every query needs to have at least one Value tied to a | 
|  | // Function, and neither GlobalValues nor ConstantExprs are. | 
|  | if (isa<Constant>(LocA.Ptr) && isa<Constant>(LocB.Ptr)) | 
|  | return AAResultBase::alias(LocA, LocB, AAQI); | 
|  |  | 
|  | AliasResult QueryResult = query(LocA, LocB); | 
|  | if (QueryResult == MayAlias) | 
|  | return AAResultBase::alias(LocA, LocB, AAQI); | 
|  |  | 
|  | return QueryResult; | 
|  | } | 
|  |  | 
|  | AnalysisKey CFLAndersAA::Key; | 
|  |  | 
|  | CFLAndersAAResult CFLAndersAA::run(Function &F, FunctionAnalysisManager &AM) { | 
|  | auto GetTLI = [&AM](Function &F) -> TargetLibraryInfo & { | 
|  | return AM.getResult<TargetLibraryAnalysis>(F); | 
|  | }; | 
|  | return CFLAndersAAResult(GetTLI); | 
|  | } | 
|  |  | 
|  | char CFLAndersAAWrapperPass::ID = 0; | 
|  | INITIALIZE_PASS(CFLAndersAAWrapperPass, "cfl-anders-aa", | 
|  | "Inclusion-Based CFL Alias Analysis", false, true) | 
|  |  | 
|  | ImmutablePass *llvm::createCFLAndersAAWrapperPass() { | 
|  | return new CFLAndersAAWrapperPass(); | 
|  | } | 
|  |  | 
|  | CFLAndersAAWrapperPass::CFLAndersAAWrapperPass() : ImmutablePass(ID) { | 
|  | initializeCFLAndersAAWrapperPassPass(*PassRegistry::getPassRegistry()); | 
|  | } | 
|  |  | 
|  | void CFLAndersAAWrapperPass::initializePass() { | 
|  | auto GetTLI = [this](Function &F) -> TargetLibraryInfo & { | 
|  | return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); | 
|  | }; | 
|  | Result.reset(new CFLAndersAAResult(GetTLI)); | 
|  | } | 
|  |  | 
|  | void CFLAndersAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { | 
|  | AU.setPreservesAll(); | 
|  | AU.addRequired<TargetLibraryInfoWrapperPass>(); | 
|  | } |