| //===- FunctionAttrs.cpp - Pass which marks functions attributes ----------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| /// \file |
| /// This file implements interprocedural passes which walk the |
| /// call-graph deducing and/or propagating function attributes. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "llvm/Transforms/IPO/FunctionAttrs.h" |
| #include "llvm/ADT/ArrayRef.h" |
| #include "llvm/ADT/DenseMap.h" |
| #include "llvm/ADT/PostOrderIterator.h" |
| #include "llvm/ADT/SCCIterator.h" |
| #include "llvm/ADT/STLExtras.h" |
| #include "llvm/ADT/SetVector.h" |
| #include "llvm/ADT/SmallPtrSet.h" |
| #include "llvm/ADT/SmallSet.h" |
| #include "llvm/ADT/SmallVector.h" |
| #include "llvm/ADT/Statistic.h" |
| #include "llvm/Analysis/AssumptionCache.h" |
| #include "llvm/Analysis/BasicAliasAnalysis.h" |
| #include "llvm/Analysis/CFG.h" |
| #include "llvm/Analysis/CGSCCPassManager.h" |
| #include "llvm/Analysis/CallGraph.h" |
| #include "llvm/Analysis/CallGraphSCCPass.h" |
| #include "llvm/Analysis/CaptureTracking.h" |
| #include "llvm/Analysis/LazyCallGraph.h" |
| #include "llvm/Analysis/MemoryLocation.h" |
| #include "llvm/Analysis/ValueTracking.h" |
| #include "llvm/IR/Argument.h" |
| #include "llvm/IR/Attributes.h" |
| #include "llvm/IR/BasicBlock.h" |
| #include "llvm/IR/Constant.h" |
| #include "llvm/IR/ConstantRangeList.h" |
| #include "llvm/IR/Constants.h" |
| #include "llvm/IR/Function.h" |
| #include "llvm/IR/InstIterator.h" |
| #include "llvm/IR/InstrTypes.h" |
| #include "llvm/IR/Instruction.h" |
| #include "llvm/IR/Instructions.h" |
| #include "llvm/IR/IntrinsicInst.h" |
| #include "llvm/IR/Metadata.h" |
| #include "llvm/IR/ModuleSummaryIndex.h" |
| #include "llvm/IR/PassManager.h" |
| #include "llvm/IR/Type.h" |
| #include "llvm/IR/Use.h" |
| #include "llvm/IR/User.h" |
| #include "llvm/IR/Value.h" |
| #include "llvm/Support/Casting.h" |
| #include "llvm/Support/CommandLine.h" |
| #include "llvm/Support/Compiler.h" |
| #include "llvm/Support/Debug.h" |
| #include "llvm/Support/ErrorHandling.h" |
| #include "llvm/Support/raw_ostream.h" |
| #include "llvm/Transforms/IPO.h" |
| #include "llvm/Transforms/Utils/Local.h" |
| #include <cassert> |
| #include <iterator> |
| #include <map> |
| #include <optional> |
| #include <vector> |
| |
| using namespace llvm; |
| |
| #define DEBUG_TYPE "function-attrs" |
| |
| STATISTIC(NumMemoryAttr, "Number of functions with improved memory attribute"); |
| STATISTIC(NumCapturesNone, "Number of arguments marked captures(none)"); |
| STATISTIC(NumCapturesPartial, "Number of arguments marked with captures " |
| "attribute other than captures(none)"); |
| STATISTIC(NumReturned, "Number of arguments marked returned"); |
| STATISTIC(NumReadNoneArg, "Number of arguments marked readnone"); |
| STATISTIC(NumReadOnlyArg, "Number of arguments marked readonly"); |
| STATISTIC(NumWriteOnlyArg, "Number of arguments marked writeonly"); |
| STATISTIC(NumNoAlias, "Number of function returns marked noalias"); |
| STATISTIC(NumNonNullReturn, "Number of function returns marked nonnull"); |
| STATISTIC(NumNoUndefReturn, "Number of function returns marked noundef"); |
| STATISTIC(NumNoRecurse, "Number of functions marked as norecurse"); |
| STATISTIC(NumNoUnwind, "Number of functions marked as nounwind"); |
| STATISTIC(NumNoFree, "Number of functions marked as nofree"); |
| STATISTIC(NumWillReturn, "Number of functions marked as willreturn"); |
| STATISTIC(NumNoSync, "Number of functions marked as nosync"); |
| STATISTIC(NumCold, "Number of functions marked as cold"); |
| |
| STATISTIC(NumThinLinkNoRecurse, |
| "Number of functions marked as norecurse during thinlink"); |
| STATISTIC(NumThinLinkNoUnwind, |
| "Number of functions marked as nounwind during thinlink"); |
| |
| static cl::opt<bool> EnableNonnullArgPropagation( |
| "enable-nonnull-arg-prop", cl::init(true), cl::Hidden, |
| cl::desc("Try to propagate nonnull argument attributes from callsites to " |
| "caller functions.")); |
| |
| static cl::opt<bool> DisableNoUnwindInference( |
| "disable-nounwind-inference", cl::Hidden, |
| cl::desc("Stop inferring nounwind attribute during function-attrs pass")); |
| |
| static cl::opt<bool> DisableNoFreeInference( |
| "disable-nofree-inference", cl::Hidden, |
| cl::desc("Stop inferring nofree attribute during function-attrs pass")); |
| |
| static cl::opt<bool> DisableThinLTOPropagation( |
| "disable-thinlto-funcattrs", cl::init(true), cl::Hidden, |
| cl::desc("Don't propagate function-attrs in thinLTO")); |
| |
| static void addCapturesStat(CaptureInfo CI) { |
| if (capturesNothing(CI)) |
| ++NumCapturesNone; |
| else |
| ++NumCapturesPartial; |
| } |
| |
| namespace { |
| |
| using SCCNodeSet = SmallSetVector<Function *, 8>; |
| |
| } // end anonymous namespace |
| |
| static void addLocAccess(MemoryEffects &ME, const MemoryLocation &Loc, |
| ModRefInfo MR, AAResults &AAR) { |
| // Ignore accesses to known-invariant or local memory. |
| MR &= AAR.getModRefInfoMask(Loc, /*IgnoreLocal=*/true); |
| if (isNoModRef(MR)) |
| return; |
| |
| const Value *UO = getUnderlyingObjectAggressive(Loc.Ptr); |
| if (isa<AllocaInst>(UO)) |
| return; |
| if (isa<Argument>(UO)) { |
| ME |= MemoryEffects::argMemOnly(MR); |
| return; |
| } |
| |
| // If it's not an identified object, it might be an argument. |
| if (!isIdentifiedObject(UO)) |
| ME |= MemoryEffects::argMemOnly(MR); |
| ME |= MemoryEffects(IRMemLocation::ErrnoMem, MR); |
| ME |= MemoryEffects(IRMemLocation::Other, MR); |
| } |
| |
| static void addArgLocs(MemoryEffects &ME, const CallBase *Call, |
| ModRefInfo ArgMR, AAResults &AAR) { |
| for (const Value *Arg : Call->args()) { |
| if (!Arg->getType()->isPtrOrPtrVectorTy()) |
| continue; |
| |
| addLocAccess(ME, |
| MemoryLocation::getBeforeOrAfter(Arg, Call->getAAMetadata()), |
| ArgMR, AAR); |
| } |
| } |
| |
| /// Returns the memory access attribute for function F using AAR for AA results, |
| /// where SCCNodes is the current SCC. |
| /// |
| /// If ThisBody is true, this function may examine the function body and will |
| /// return a result pertaining to this copy of the function. If it is false, the |
| /// result will be based only on AA results for the function declaration; it |
| /// will be assumed that some other (perhaps less optimized) version of the |
| /// function may be selected at link time. |
| /// |
| /// The return value is split into two parts: Memory effects that always apply, |
| /// and additional memory effects that apply if any of the functions in the SCC |
| /// can access argmem. |
| static std::pair<MemoryEffects, MemoryEffects> |
| checkFunctionMemoryAccess(Function &F, bool ThisBody, AAResults &AAR, |
| const SCCNodeSet &SCCNodes) { |
| MemoryEffects OrigME = AAR.getMemoryEffects(&F); |
| if (OrigME.doesNotAccessMemory()) |
| // Already perfect! |
| return {OrigME, MemoryEffects::none()}; |
| |
| if (!ThisBody) |
| return {OrigME, MemoryEffects::none()}; |
| |
| MemoryEffects ME = MemoryEffects::none(); |
| // Additional locations accessed if the SCC accesses argmem. |
| MemoryEffects RecursiveArgME = MemoryEffects::none(); |
| |
| // Inalloca and preallocated arguments are always clobbered by the call. |
| if (F.getAttributes().hasAttrSomewhere(Attribute::InAlloca) || |
| F.getAttributes().hasAttrSomewhere(Attribute::Preallocated)) |
| ME |= MemoryEffects::argMemOnly(ModRefInfo::ModRef); |
| |
| // Scan the function body for instructions that may read or write memory. |
| for (Instruction &I : instructions(F)) { |
| // Some instructions can be ignored even if they read or write memory. |
| // Detect these now, skipping to the next instruction if one is found. |
| if (auto *Call = dyn_cast<CallBase>(&I)) { |
| // We can optimistically ignore calls to functions in the same SCC, with |
| // two caveats: |
| // * Calls with operand bundles may have additional effects. |
| // * Argument memory accesses may imply additional effects depending on |
| // what the argument location is. |
| if (!Call->hasOperandBundles() && Call->getCalledFunction() && |
| SCCNodes.count(Call->getCalledFunction())) { |
| // Keep track of which additional locations are accessed if the SCC |
| // turns out to access argmem. |
| addArgLocs(RecursiveArgME, Call, ModRefInfo::ModRef, AAR); |
| continue; |
| } |
| |
| MemoryEffects CallME = AAR.getMemoryEffects(Call); |
| |
| // If the call doesn't access memory, we're done. |
| if (CallME.doesNotAccessMemory()) |
| continue; |
| |
| // A pseudo probe call shouldn't change any function attribute since it |
| // doesn't translate to a real instruction. It comes with a memory access |
| // tag to prevent itself being removed by optimizations and not block |
| // other instructions being optimized. |
| if (isa<PseudoProbeInst>(I)) |
| continue; |
| |
| // Merge callee's memory effects into caller's ones, including |
| // inaccessible and errno memory, but excluding argument memory, which is |
| // handled separately. |
| ME |= CallME.getWithoutLoc(IRMemLocation::ArgMem); |
| |
| // If the call accesses captured memory (currently part of "other") and |
| // an argument is captured (currently not tracked), then it may also |
| // access argument memory. |
| ModRefInfo OtherMR = CallME.getModRef(IRMemLocation::Other); |
| ME |= MemoryEffects::argMemOnly(OtherMR); |
| |
| // Check whether all pointer arguments point to local memory, and |
| // ignore calls that only access local memory. |
| ModRefInfo ArgMR = CallME.getModRef(IRMemLocation::ArgMem); |
| if (ArgMR != ModRefInfo::NoModRef) |
| addArgLocs(ME, Call, ArgMR, AAR); |
| continue; |
| } |
| |
| ModRefInfo MR = ModRefInfo::NoModRef; |
| if (I.mayWriteToMemory()) |
| MR |= ModRefInfo::Mod; |
| if (I.mayReadFromMemory()) |
| MR |= ModRefInfo::Ref; |
| if (MR == ModRefInfo::NoModRef) |
| continue; |
| |
| std::optional<MemoryLocation> Loc = MemoryLocation::getOrNone(&I); |
| if (!Loc) { |
| // If no location is known, conservatively assume anything can be |
| // accessed. |
| ME |= MemoryEffects(MR); |
| continue; |
| } |
| |
| // Volatile operations may access inaccessible memory. |
| if (I.isVolatile()) |
| ME |= MemoryEffects::inaccessibleMemOnly(MR); |
| |
| addLocAccess(ME, *Loc, MR, AAR); |
| } |
| |
| return {OrigME & ME, RecursiveArgME}; |
| } |
| |
| MemoryEffects llvm::computeFunctionBodyMemoryAccess(Function &F, |
| AAResults &AAR) { |
| return checkFunctionMemoryAccess(F, /*ThisBody=*/true, AAR, {}).first; |
| } |
| |
| /// Deduce readonly/readnone/writeonly attributes for the SCC. |
| template <typename AARGetterT> |
| static void addMemoryAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter, |
| SmallSet<Function *, 8> &Changed) { |
| MemoryEffects ME = MemoryEffects::none(); |
| MemoryEffects RecursiveArgME = MemoryEffects::none(); |
| for (Function *F : SCCNodes) { |
| // Call the callable parameter to look up AA results for this function. |
| AAResults &AAR = AARGetter(*F); |
| // Non-exact function definitions may not be selected at link time, and an |
| // alternative version that writes to memory may be selected. See the |
| // comment on GlobalValue::isDefinitionExact for more details. |
| auto [FnME, FnRecursiveArgME] = |
| checkFunctionMemoryAccess(*F, F->hasExactDefinition(), AAR, SCCNodes); |
| ME |= FnME; |
| RecursiveArgME |= FnRecursiveArgME; |
| // Reached bottom of the lattice, we will not be able to improve the result. |
| if (ME == MemoryEffects::unknown()) |
| return; |
| } |
| |
| // If the SCC accesses argmem, add recursive accesses resulting from that. |
| ModRefInfo ArgMR = ME.getModRef(IRMemLocation::ArgMem); |
| if (ArgMR != ModRefInfo::NoModRef) |
| ME |= RecursiveArgME & MemoryEffects(ArgMR); |
| |
| for (Function *F : SCCNodes) { |
| MemoryEffects OldME = F->getMemoryEffects(); |
| MemoryEffects NewME = ME & OldME; |
| if (NewME != OldME) { |
| ++NumMemoryAttr; |
| F->setMemoryEffects(NewME); |
| // Remove conflicting writable attributes. |
| if (!isModSet(NewME.getModRef(IRMemLocation::ArgMem))) |
| for (Argument &A : F->args()) |
| A.removeAttr(Attribute::Writable); |
| Changed.insert(F); |
| } |
| } |
| } |
| |
| // Compute definitive function attributes for a function taking into account |
| // prevailing definitions and linkage types |
| static FunctionSummary *calculatePrevailingSummary( |
| ValueInfo VI, |
| DenseMap<ValueInfo, FunctionSummary *> &CachedPrevailingSummary, |
| function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> |
| IsPrevailing) { |
| |
| if (auto It = CachedPrevailingSummary.find(VI); |
| It != CachedPrevailingSummary.end()) |
| return It->second; |
| |
| /// At this point, prevailing symbols have been resolved. The following leads |
| /// to returning a conservative result: |
| /// - Multiple instances with local linkage. Normally local linkage would be |
| /// unique per module |
| /// as the GUID includes the module path. We could have a guid alias if |
| /// there wasn't any distinguishing path when each file was compiled, but |
| /// that should be rare so we'll punt on those. |
| |
| /// These next 2 cases should not happen and will assert: |
| /// - Multiple instances with external linkage. This should be caught in |
| /// symbol resolution |
| /// - Non-existent FunctionSummary for Aliasee. This presents a hole in our |
| /// knowledge meaning we have to go conservative. |
| |
| /// Otherwise, we calculate attributes for a function as: |
| /// 1. If we have a local linkage, take its attributes. If there's somehow |
| /// multiple, bail and go conservative. |
| /// 2. If we have an external/WeakODR/LinkOnceODR linkage check that it is |
| /// prevailing, take its attributes. |
| /// 3. If we have a Weak/LinkOnce linkage the copies can have semantic |
| /// differences. However, if the prevailing copy is known it will be used |
| /// so take its attributes. If the prevailing copy is in a native file |
| /// all IR copies will be dead and propagation will go conservative. |
| /// 4. AvailableExternally summaries without a prevailing copy are known to |
| /// occur in a couple of circumstances: |
| /// a. An internal function gets imported due to its caller getting |
| /// imported, it becomes AvailableExternally but no prevailing |
| /// definition exists. Because it has to get imported along with its |
| /// caller the attributes will be captured by propagating on its |
| /// caller. |
| /// b. C++11 [temp.explicit]p10 can generate AvailableExternally |
| /// definitions of explicitly instanced template declarations |
| /// for inlining which are ultimately dropped from the TU. Since this |
| /// is localized to the TU the attributes will have already made it to |
| /// the callers. |
| /// These are edge cases and already captured by their callers so we |
| /// ignore these for now. If they become relevant to optimize in the |
| /// future this can be revisited. |
| /// 5. Otherwise, go conservative. |
| |
| CachedPrevailingSummary[VI] = nullptr; |
| FunctionSummary *Local = nullptr; |
| FunctionSummary *Prevailing = nullptr; |
| |
| for (const auto &GVS : VI.getSummaryList()) { |
| if (!GVS->isLive()) |
| continue; |
| |
| FunctionSummary *FS = dyn_cast<FunctionSummary>(GVS->getBaseObject()); |
| // Virtual and Unknown (e.g. indirect) calls require going conservative |
| if (!FS || FS->fflags().HasUnknownCall) |
| return nullptr; |
| |
| const auto &Linkage = GVS->linkage(); |
| if (GlobalValue::isLocalLinkage(Linkage)) { |
| if (Local) { |
| LLVM_DEBUG( |
| dbgs() |
| << "ThinLTO FunctionAttrs: Multiple Local Linkage, bailing on " |
| "function " |
| << VI.name() << " from " << FS->modulePath() << ". Previous module " |
| << Local->modulePath() << "\n"); |
| return nullptr; |
| } |
| Local = FS; |
| } else if (GlobalValue::isExternalLinkage(Linkage)) { |
| assert(IsPrevailing(VI.getGUID(), GVS.get())); |
| Prevailing = FS; |
| break; |
| } else if (GlobalValue::isWeakODRLinkage(Linkage) || |
| GlobalValue::isLinkOnceODRLinkage(Linkage) || |
| GlobalValue::isWeakAnyLinkage(Linkage) || |
| GlobalValue::isLinkOnceAnyLinkage(Linkage)) { |
| if (IsPrevailing(VI.getGUID(), GVS.get())) { |
| Prevailing = FS; |
| break; |
| } |
| } else if (GlobalValue::isAvailableExternallyLinkage(Linkage)) { |
| // TODO: Handle these cases if they become meaningful |
| continue; |
| } |
| } |
| |
| if (Local) { |
| assert(!Prevailing); |
| CachedPrevailingSummary[VI] = Local; |
| } else if (Prevailing) { |
| assert(!Local); |
| CachedPrevailingSummary[VI] = Prevailing; |
| } |
| |
| return CachedPrevailingSummary[VI]; |
| } |
| |
| bool llvm::thinLTOPropagateFunctionAttrs( |
| ModuleSummaryIndex &Index, |
| function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> |
| IsPrevailing) { |
| // TODO: implement addNoAliasAttrs once |
| // there's more information about the return type in the summary |
| if (DisableThinLTOPropagation) |
| return false; |
| |
| DenseMap<ValueInfo, FunctionSummary *> CachedPrevailingSummary; |
| bool Changed = false; |
| |
| auto PropagateAttributes = [&](std::vector<ValueInfo> &SCCNodes) { |
| // Assume we can propagate unless we discover otherwise |
| FunctionSummary::FFlags InferredFlags; |
| InferredFlags.NoRecurse = (SCCNodes.size() == 1); |
| InferredFlags.NoUnwind = true; |
| |
| for (auto &V : SCCNodes) { |
| FunctionSummary *CallerSummary = |
| calculatePrevailingSummary(V, CachedPrevailingSummary, IsPrevailing); |
| |
| // Function summaries can fail to contain information such as declarations |
| if (!CallerSummary) |
| return; |
| |
| if (CallerSummary->fflags().MayThrow) |
| InferredFlags.NoUnwind = false; |
| |
| for (const auto &Callee : CallerSummary->calls()) { |
| FunctionSummary *CalleeSummary = calculatePrevailingSummary( |
| Callee.first, CachedPrevailingSummary, IsPrevailing); |
| |
| if (!CalleeSummary) |
| return; |
| |
| if (!CalleeSummary->fflags().NoRecurse) |
| InferredFlags.NoRecurse = false; |
| |
| if (!CalleeSummary->fflags().NoUnwind) |
| InferredFlags.NoUnwind = false; |
| |
| if (!InferredFlags.NoUnwind && !InferredFlags.NoRecurse) |
| break; |
| } |
| } |
| |
| if (InferredFlags.NoUnwind || InferredFlags.NoRecurse) { |
| Changed = true; |
| for (auto &V : SCCNodes) { |
| if (InferredFlags.NoRecurse) { |
| LLVM_DEBUG(dbgs() << "ThinLTO FunctionAttrs: Propagated NoRecurse to " |
| << V.name() << "\n"); |
| ++NumThinLinkNoRecurse; |
| } |
| |
| if (InferredFlags.NoUnwind) { |
| LLVM_DEBUG(dbgs() << "ThinLTO FunctionAttrs: Propagated NoUnwind to " |
| << V.name() << "\n"); |
| ++NumThinLinkNoUnwind; |
| } |
| |
| for (const auto &S : V.getSummaryList()) { |
| if (auto *FS = dyn_cast<FunctionSummary>(S.get())) { |
| if (InferredFlags.NoRecurse) |
| FS->setNoRecurse(); |
| |
| if (InferredFlags.NoUnwind) |
| FS->setNoUnwind(); |
| } |
| } |
| } |
| } |
| }; |
| |
| // Call propagation functions on each SCC in the Index |
| for (scc_iterator<ModuleSummaryIndex *> I = scc_begin(&Index); !I.isAtEnd(); |
| ++I) { |
| std::vector<ValueInfo> Nodes(*I); |
| PropagateAttributes(Nodes); |
| } |
| return Changed; |
| } |
| |
| namespace { |
| |
| /// For a given pointer Argument, this retains a list of Arguments of functions |
| /// in the same SCC that the pointer data flows into. We use this to build an |
| /// SCC of the arguments. |
| struct ArgumentGraphNode { |
| Argument *Definition; |
| /// CaptureComponents for this argument, excluding captures via Uses. |
| /// We don't distinguish between other/return captures here. |
| CaptureComponents CC = CaptureComponents::None; |
| SmallVector<ArgumentGraphNode *, 4> Uses; |
| }; |
| |
| class ArgumentGraph { |
| // We store pointers to ArgumentGraphNode objects, so it's important that |
| // that they not move around upon insert. |
| using ArgumentMapTy = std::map<Argument *, ArgumentGraphNode>; |
| |
| ArgumentMapTy ArgumentMap; |
| |
| // There is no root node for the argument graph, in fact: |
| // void f(int *x, int *y) { if (...) f(x, y); } |
| // is an example where the graph is disconnected. The SCCIterator requires a |
| // single entry point, so we maintain a fake ("synthetic") root node that |
| // uses every node. Because the graph is directed and nothing points into |
| // the root, it will not participate in any SCCs (except for its own). |
| ArgumentGraphNode SyntheticRoot; |
| |
| public: |
| ArgumentGraph() { SyntheticRoot.Definition = nullptr; } |
| |
| using iterator = SmallVectorImpl<ArgumentGraphNode *>::iterator; |
| |
| iterator begin() { return SyntheticRoot.Uses.begin(); } |
| iterator end() { return SyntheticRoot.Uses.end(); } |
| ArgumentGraphNode *getEntryNode() { return &SyntheticRoot; } |
| |
| ArgumentGraphNode *operator[](Argument *A) { |
| ArgumentGraphNode &Node = ArgumentMap[A]; |
| Node.Definition = A; |
| SyntheticRoot.Uses.push_back(&Node); |
| return &Node; |
| } |
| }; |
| |
| /// This tracker checks whether callees are in the SCC, and if so it does not |
| /// consider that a capture, instead adding it to the "Uses" list and |
| /// continuing with the analysis. |
| struct ArgumentUsesTracker : public CaptureTracker { |
| ArgumentUsesTracker(const SCCNodeSet &SCCNodes) : SCCNodes(SCCNodes) {} |
| |
| void tooManyUses() override { CI = CaptureInfo::all(); } |
| |
| Action captured(const Use *U, UseCaptureInfo UseCI) override { |
| if (updateCaptureInfo(U, UseCI.UseCC)) { |
| // Don't bother continuing if we already capture everything. |
| if (capturesAll(CI.getOtherComponents())) |
| return Stop; |
| return Continue; |
| } |
| |
| // For SCC argument tracking, we're not going to analyze other/ret |
| // components separately, so don't follow the return value. |
| return ContinueIgnoringReturn; |
| } |
| |
| bool updateCaptureInfo(const Use *U, CaptureComponents CC) { |
| CallBase *CB = dyn_cast<CallBase>(U->getUser()); |
| if (!CB) { |
| if (isa<ReturnInst>(U->getUser())) |
| CI |= CaptureInfo::retOnly(CC); |
| else |
| // Conservatively assume that the captured value might make its way |
| // into the return value as well. This could be made more precise. |
| CI |= CaptureInfo(CC); |
| return true; |
| } |
| |
| Function *F = CB->getCalledFunction(); |
| if (!F || !F->hasExactDefinition() || !SCCNodes.count(F)) { |
| CI |= CaptureInfo(CC); |
| return true; |
| } |
| |
| assert(!CB->isCallee(U) && "callee operand reported captured?"); |
| const unsigned UseIndex = CB->getDataOperandNo(U); |
| if (UseIndex >= CB->arg_size()) { |
| // Data operand, but not a argument operand -- must be a bundle operand |
| assert(CB->hasOperandBundles() && "Must be!"); |
| |
| // CaptureTracking told us that we're being captured by an operand bundle |
| // use. In this case it does not matter if the callee is within our SCC |
| // or not -- we've been captured in some unknown way, and we have to be |
| // conservative. |
| CI |= CaptureInfo(CC); |
| return true; |
| } |
| |
| if (UseIndex >= F->arg_size()) { |
| assert(F->isVarArg() && "More params than args in non-varargs call"); |
| CI |= CaptureInfo(CC); |
| return true; |
| } |
| |
| // TODO(captures): Could improve precision by remembering maximum |
| // capture components for the argument. |
| Uses.push_back(&*std::next(F->arg_begin(), UseIndex)); |
| return false; |
| } |
| |
| // Does not include potential captures via Uses in the SCC. |
| CaptureInfo CI = CaptureInfo::none(); |
| |
| // Uses within our SCC. |
| SmallVector<Argument *, 4> Uses; |
| |
| const SCCNodeSet &SCCNodes; |
| }; |
| |
| /// A struct of argument use: a Use and the offset it accesses. This struct |
| /// is to track uses inside function via GEP. If GEP has a non-constant index, |
| /// the Offset field is nullopt. |
| struct ArgumentUse { |
| Use *U; |
| std::optional<int64_t> Offset; |
| }; |
| |
| /// A struct of argument access info. "Unknown" accesses are the cases like |
| /// unrecognized instructions, instructions that have more than one use of |
| /// the argument, or volatile memory accesses. "WriteWithSideEffect" are call |
| /// instructions that not only write an argument but also capture it. |
| struct ArgumentAccessInfo { |
| enum class AccessType : uint8_t { Write, WriteWithSideEffect, Read, Unknown }; |
| AccessType ArgAccessType; |
| ConstantRangeList AccessRanges; |
| }; |
| |
| /// A struct to wrap the argument use info per block. |
| struct UsesPerBlockInfo { |
| SmallDenseMap<Instruction *, ArgumentAccessInfo, 4> Insts; |
| bool HasWrites = false; |
| bool HasUnknownAccess = false; |
| }; |
| |
| /// A struct to summarize the argument use info in a function. |
| struct ArgumentUsesSummary { |
| bool HasAnyWrite = false; |
| bool HasWriteOutsideEntryBB = false; |
| SmallDenseMap<const BasicBlock *, UsesPerBlockInfo, 16> UsesPerBlock; |
| }; |
| |
| ArgumentAccessInfo getArgumentAccessInfo(const Instruction *I, |
| const ArgumentUse &ArgUse, |
| const DataLayout &DL) { |
| auto GetTypeAccessRange = |
| [&DL](Type *Ty, |
| std::optional<int64_t> Offset) -> std::optional<ConstantRange> { |
| auto TypeSize = DL.getTypeStoreSize(Ty); |
| if (!TypeSize.isScalable() && Offset) { |
| int64_t Size = TypeSize.getFixedValue(); |
| return ConstantRange(APInt(64, *Offset, true), |
| APInt(64, *Offset + Size, true)); |
| } |
| return std::nullopt; |
| }; |
| auto GetConstantIntRange = |
| [](Value *Length, |
| std::optional<int64_t> Offset) -> std::optional<ConstantRange> { |
| auto *ConstantLength = dyn_cast<ConstantInt>(Length); |
| if (ConstantLength && Offset && |
| ConstantLength->getValue().isStrictlyPositive()) { |
| return ConstantRange( |
| APInt(64, *Offset, true), |
| APInt(64, *Offset + ConstantLength->getSExtValue(), true)); |
| } |
| return std::nullopt; |
| }; |
| if (auto *SI = dyn_cast<StoreInst>(I)) { |
| if (SI->isSimple() && &SI->getOperandUse(1) == ArgUse.U) { |
| // Get the fixed type size of "SI". Since the access range of a write |
| // will be unioned, if "SI" doesn't have a fixed type size, we just set |
| // the access range to empty. |
| ConstantRangeList AccessRanges; |
| if (auto TypeAccessRange = |
| GetTypeAccessRange(SI->getAccessType(), ArgUse.Offset)) |
| AccessRanges.insert(*TypeAccessRange); |
| return {ArgumentAccessInfo::AccessType::Write, std::move(AccessRanges)}; |
| } |
| } else if (auto *LI = dyn_cast<LoadInst>(I)) { |
| if (LI->isSimple()) { |
| assert(&LI->getOperandUse(0) == ArgUse.U); |
| // Get the fixed type size of "LI". Different from Write, if "LI" |
| // doesn't have a fixed type size, we conservatively set as a clobber |
| // with an empty access range. |
| if (auto TypeAccessRange = |
| GetTypeAccessRange(LI->getAccessType(), ArgUse.Offset)) |
| return {ArgumentAccessInfo::AccessType::Read, {*TypeAccessRange}}; |
| } |
| } else if (auto *MemSet = dyn_cast<MemSetInst>(I)) { |
| if (!MemSet->isVolatile()) { |
| ConstantRangeList AccessRanges; |
| if (auto AccessRange = |
| GetConstantIntRange(MemSet->getLength(), ArgUse.Offset)) |
| AccessRanges.insert(*AccessRange); |
| return {ArgumentAccessInfo::AccessType::Write, AccessRanges}; |
| } |
| } else if (auto *MTI = dyn_cast<MemTransferInst>(I)) { |
| if (!MTI->isVolatile()) { |
| if (&MTI->getOperandUse(0) == ArgUse.U) { |
| ConstantRangeList AccessRanges; |
| if (auto AccessRange = |
| GetConstantIntRange(MTI->getLength(), ArgUse.Offset)) |
| AccessRanges.insert(*AccessRange); |
| return {ArgumentAccessInfo::AccessType::Write, AccessRanges}; |
| } else if (&MTI->getOperandUse(1) == ArgUse.U) { |
| if (auto AccessRange = |
| GetConstantIntRange(MTI->getLength(), ArgUse.Offset)) |
| return {ArgumentAccessInfo::AccessType::Read, {*AccessRange}}; |
| } |
| } |
| } else if (auto *CB = dyn_cast<CallBase>(I)) { |
| if (CB->isArgOperand(ArgUse.U) && |
| !CB->isByValArgument(CB->getArgOperandNo(ArgUse.U))) { |
| unsigned ArgNo = CB->getArgOperandNo(ArgUse.U); |
| bool IsInitialize = CB->paramHasAttr(ArgNo, Attribute::Initializes); |
| if (IsInitialize && ArgUse.Offset) { |
| // Argument is a Write when parameter is writeonly/readnone |
| // and nocapture. Otherwise, it's a WriteWithSideEffect. |
| auto Access = CB->onlyWritesMemory(ArgNo) && CB->doesNotCapture(ArgNo) |
| ? ArgumentAccessInfo::AccessType::Write |
| : ArgumentAccessInfo::AccessType::WriteWithSideEffect; |
| ConstantRangeList AccessRanges; |
| Attribute Attr = CB->getParamAttr(ArgNo, Attribute::Initializes); |
| ConstantRangeList CBCRL = Attr.getValueAsConstantRangeList(); |
| for (ConstantRange &CR : CBCRL) |
| AccessRanges.insert(ConstantRange(CR.getLower() + *ArgUse.Offset, |
| CR.getUpper() + *ArgUse.Offset)); |
| return {Access, AccessRanges}; |
| } |
| } |
| } |
| // Other unrecognized instructions are considered as unknown. |
| return {ArgumentAccessInfo::AccessType::Unknown, {}}; |
| } |
| |
| // Collect the uses of argument "A" in "F". |
| ArgumentUsesSummary collectArgumentUsesPerBlock(Argument &A, Function &F) { |
| auto &DL = F.getParent()->getDataLayout(); |
| unsigned PointerSize = |
| DL.getIndexSizeInBits(A.getType()->getPointerAddressSpace()); |
| ArgumentUsesSummary Result; |
| |
| BasicBlock &EntryBB = F.getEntryBlock(); |
| SmallVector<ArgumentUse, 4> Worklist; |
| for (Use &U : A.uses()) |
| Worklist.push_back({&U, 0}); |
| |
| // Update "UsesPerBlock" with the block of "I" as key and "Info" as value. |
| // Return true if the block of "I" has write accesses after updating. |
| auto UpdateUseInfo = [&Result](Instruction *I, ArgumentAccessInfo Info) { |
| auto *BB = I->getParent(); |
| auto &BBInfo = Result.UsesPerBlock[BB]; |
| bool AlreadyVisitedInst = BBInfo.Insts.contains(I); |
| auto &IInfo = BBInfo.Insts[I]; |
| |
| // Instructions that have more than one use of the argument are considered |
| // as clobbers. |
| if (AlreadyVisitedInst) { |
| IInfo = {ArgumentAccessInfo::AccessType::Unknown, {}}; |
| BBInfo.HasUnknownAccess = true; |
| return false; |
| } |
| |
| IInfo = std::move(Info); |
| BBInfo.HasUnknownAccess |= |
| IInfo.ArgAccessType == ArgumentAccessInfo::AccessType::Unknown; |
| bool InfoHasWrites = |
| (IInfo.ArgAccessType == ArgumentAccessInfo::AccessType::Write || |
| IInfo.ArgAccessType == |
| ArgumentAccessInfo::AccessType::WriteWithSideEffect) && |
| !IInfo.AccessRanges.empty(); |
| BBInfo.HasWrites |= InfoHasWrites; |
| return InfoHasWrites; |
| }; |
| |
| // No need for a visited set because we don't look through phis, so there are |
| // no cycles. |
| while (!Worklist.empty()) { |
| ArgumentUse ArgUse = Worklist.pop_back_val(); |
| User *U = ArgUse.U->getUser(); |
| // Add GEP uses to worklist. |
| // If the GEP is not a constant GEP, set the ArgumentUse::Offset to nullopt. |
| if (auto *GEP = dyn_cast<GEPOperator>(U)) { |
| std::optional<int64_t> NewOffset = std::nullopt; |
| if (ArgUse.Offset) { |
| APInt Offset(PointerSize, 0); |
| if (GEP->accumulateConstantOffset(DL, Offset)) |
| NewOffset = *ArgUse.Offset + Offset.getSExtValue(); |
| } |
| for (Use &U : GEP->uses()) |
| Worklist.push_back({&U, NewOffset}); |
| continue; |
| } |
| |
| auto *I = cast<Instruction>(U); |
| bool HasWrite = UpdateUseInfo(I, getArgumentAccessInfo(I, ArgUse, DL)); |
| |
| Result.HasAnyWrite |= HasWrite; |
| |
| if (HasWrite && I->getParent() != &EntryBB) |
| Result.HasWriteOutsideEntryBB = true; |
| } |
| return Result; |
| } |
| |
| } // end anonymous namespace |
| |
| namespace llvm { |
| |
| template <> struct GraphTraits<ArgumentGraphNode *> { |
| using NodeRef = ArgumentGraphNode *; |
| using ChildIteratorType = SmallVectorImpl<ArgumentGraphNode *>::iterator; |
| |
| static NodeRef getEntryNode(NodeRef A) { return A; } |
| static ChildIteratorType child_begin(NodeRef N) { return N->Uses.begin(); } |
| static ChildIteratorType child_end(NodeRef N) { return N->Uses.end(); } |
| }; |
| |
| template <> |
| struct GraphTraits<ArgumentGraph *> : public GraphTraits<ArgumentGraphNode *> { |
| static NodeRef getEntryNode(ArgumentGraph *AG) { return AG->getEntryNode(); } |
| |
| static ChildIteratorType nodes_begin(ArgumentGraph *AG) { |
| return AG->begin(); |
| } |
| |
| static ChildIteratorType nodes_end(ArgumentGraph *AG) { return AG->end(); } |
| }; |
| |
| } // end namespace llvm |
| |
| /// Returns Attribute::None, Attribute::ReadOnly or Attribute::ReadNone. |
| static Attribute::AttrKind |
| determinePointerAccessAttrs(Argument *A, |
| const SmallPtrSet<Argument *, 8> &SCCNodes) { |
| SmallVector<Use *, 32> Worklist; |
| SmallPtrSet<Use *, 32> Visited; |
| |
| // inalloca arguments are always clobbered by the call. |
| if (A->hasInAllocaAttr() || A->hasPreallocatedAttr()) |
| return Attribute::None; |
| |
| bool IsRead = false; |
| bool IsWrite = false; |
| |
| for (Use &U : A->uses()) { |
| Visited.insert(&U); |
| Worklist.push_back(&U); |
| } |
| |
| while (!Worklist.empty()) { |
| if (IsWrite && IsRead) |
| // No point in searching further.. |
| return Attribute::None; |
| |
| Use *U = Worklist.pop_back_val(); |
| Instruction *I = cast<Instruction>(U->getUser()); |
| |
| switch (I->getOpcode()) { |
| case Instruction::BitCast: |
| case Instruction::GetElementPtr: |
| case Instruction::PHI: |
| case Instruction::Select: |
| case Instruction::AddrSpaceCast: |
| // The original value is not read/written via this if the new value isn't. |
| for (Use &UU : I->uses()) |
| if (Visited.insert(&UU).second) |
| Worklist.push_back(&UU); |
| break; |
| |
| case Instruction::Call: |
| case Instruction::Invoke: { |
| CallBase &CB = cast<CallBase>(*I); |
| if (CB.isCallee(U)) { |
| IsRead = true; |
| // Note that indirect calls do not capture, see comment in |
| // CaptureTracking for context |
| continue; |
| } |
| |
| // Given we've explicitly handled the callee operand above, what's left |
| // must be a data operand (e.g. argument or operand bundle) |
| const unsigned UseIndex = CB.getDataOperandNo(U); |
| |
| // Some intrinsics (for instance ptrmask) do not capture their results, |
| // but return results thas alias their pointer argument, and thus should |
| // be handled like GEP or addrspacecast above. |
| if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing( |
| &CB, /*MustPreserveNullness=*/false)) { |
| for (Use &UU : CB.uses()) |
| if (Visited.insert(&UU).second) |
| Worklist.push_back(&UU); |
| } else if (!CB.doesNotCapture(UseIndex)) { |
| if (!CB.onlyReadsMemory()) |
| // If the callee can save a copy into other memory, then simply |
| // scanning uses of the call is insufficient. We have no way |
| // of tracking copies of the pointer through memory to see |
| // if a reloaded copy is written to, thus we must give up. |
| return Attribute::None; |
| // Push users for processing once we finish this one |
| if (!I->getType()->isVoidTy()) |
| for (Use &UU : I->uses()) |
| if (Visited.insert(&UU).second) |
| Worklist.push_back(&UU); |
| } |
| |
| ModRefInfo ArgMR = CB.getMemoryEffects().getModRef(IRMemLocation::ArgMem); |
| if (isNoModRef(ArgMR)) |
| continue; |
| |
| if (Function *F = CB.getCalledFunction()) |
| if (CB.isArgOperand(U) && UseIndex < F->arg_size() && |
| SCCNodes.count(F->getArg(UseIndex))) |
| // This is an argument which is part of the speculative SCC. Note |
| // that only operands corresponding to formal arguments of the callee |
| // can participate in the speculation. |
| break; |
| |
| // The accessors used on call site here do the right thing for calls and |
| // invokes with operand bundles. |
| if (CB.doesNotAccessMemory(UseIndex)) { |
| /* nop */ |
| } else if (!isModSet(ArgMR) || CB.onlyReadsMemory(UseIndex)) { |
| IsRead = true; |
| } else if (!isRefSet(ArgMR) || |
| CB.dataOperandHasImpliedAttr(UseIndex, Attribute::WriteOnly)) { |
| IsWrite = true; |
| } else { |
| return Attribute::None; |
| } |
| break; |
| } |
| |
| case Instruction::Load: |
| // A volatile load has side effects beyond what readonly can be relied |
| // upon. |
| if (cast<LoadInst>(I)->isVolatile()) |
| return Attribute::None; |
| |
| IsRead = true; |
| break; |
| |
| case Instruction::Store: |
| if (cast<StoreInst>(I)->getValueOperand() == *U) |
| // untrackable capture |
| return Attribute::None; |
| |
| // A volatile store has side effects beyond what writeonly can be relied |
| // upon. |
| if (cast<StoreInst>(I)->isVolatile()) |
| return Attribute::None; |
| |
| IsWrite = true; |
| break; |
| |
| case Instruction::ICmp: |
| case Instruction::Ret: |
| break; |
| |
| default: |
| return Attribute::None; |
| } |
| } |
| |
| if (IsWrite && IsRead) |
| return Attribute::None; |
| else if (IsRead) |
| return Attribute::ReadOnly; |
| else if (IsWrite) |
| return Attribute::WriteOnly; |
| else |
| return Attribute::ReadNone; |
| } |
| |
| /// Deduce returned attributes for the SCC. |
| static void addArgumentReturnedAttrs(const SCCNodeSet &SCCNodes, |
| SmallSet<Function *, 8> &Changed) { |
| // Check each function in turn, determining if an argument is always returned. |
| for (Function *F : SCCNodes) { |
| // We can infer and propagate function attributes only when we know that the |
| // definition we'll get at link time is *exactly* the definition we see now. |
| // For more details, see GlobalValue::mayBeDerefined. |
| if (!F->hasExactDefinition()) |
| continue; |
| |
| if (F->getReturnType()->isVoidTy()) |
| continue; |
| |
| // There is nothing to do if an argument is already marked as 'returned'. |
| if (F->getAttributes().hasAttrSomewhere(Attribute::Returned)) |
| continue; |
| |
| auto FindRetArg = [&]() -> Argument * { |
| Argument *RetArg = nullptr; |
| for (BasicBlock &BB : *F) |
| if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) { |
| // Note that stripPointerCasts should look through functions with |
| // returned arguments. |
| auto *RetVal = |
| dyn_cast<Argument>(Ret->getReturnValue()->stripPointerCasts()); |
| if (!RetVal || RetVal->getType() != F->getReturnType()) |
| return nullptr; |
| |
| if (!RetArg) |
| RetArg = RetVal; |
| else if (RetArg != RetVal) |
| return nullptr; |
| } |
| |
| return RetArg; |
| }; |
| |
| if (Argument *RetArg = FindRetArg()) { |
| RetArg->addAttr(Attribute::Returned); |
| ++NumReturned; |
| Changed.insert(F); |
| } |
| } |
| } |
| |
| /// If a callsite has arguments that are also arguments to the parent function, |
| /// try to propagate attributes from the callsite's arguments to the parent's |
| /// arguments. This may be important because inlining can cause information loss |
| /// when attribute knowledge disappears with the inlined call. |
| static bool addArgumentAttrsFromCallsites(Function &F) { |
| if (!EnableNonnullArgPropagation) |
| return false; |
| |
| bool Changed = false; |
| |
| // For an argument attribute to transfer from a callsite to the parent, the |
| // call must be guaranteed to execute every time the parent is called. |
| // Conservatively, just check for calls in the entry block that are guaranteed |
| // to execute. |
| // TODO: This could be enhanced by testing if the callsite post-dominates the |
| // entry block or by doing simple forward walks or backward walks to the |
| // callsite. |
| BasicBlock &Entry = F.getEntryBlock(); |
| for (Instruction &I : Entry) { |
| if (auto *CB = dyn_cast<CallBase>(&I)) { |
| if (auto *CalledFunc = CB->getCalledFunction()) { |
| for (auto &CSArg : CalledFunc->args()) { |
| if (!CSArg.hasNonNullAttr(/* AllowUndefOrPoison */ false)) |
| continue; |
| |
| // If the non-null callsite argument operand is an argument to 'F' |
| // (the caller) and the call is guaranteed to execute, then the value |
| // must be non-null throughout 'F'. |
| auto *FArg = dyn_cast<Argument>(CB->getArgOperand(CSArg.getArgNo())); |
| if (FArg && !FArg->hasNonNullAttr()) { |
| FArg->addAttr(Attribute::NonNull); |
| Changed = true; |
| } |
| } |
| } |
| } |
| if (!isGuaranteedToTransferExecutionToSuccessor(&I)) |
| break; |
| } |
| |
| return Changed; |
| } |
| |
| static bool addAccessAttr(Argument *A, Attribute::AttrKind R) { |
| assert((R == Attribute::ReadOnly || R == Attribute::ReadNone || |
| R == Attribute::WriteOnly) |
| && "Must be an access attribute."); |
| assert(A && "Argument must not be null."); |
| |
| // If the argument already has the attribute, nothing needs to be done. |
| if (A->hasAttribute(R)) |
| return false; |
| |
| // Otherwise, remove potentially conflicting attribute, add the new one, |
| // and update statistics. |
| A->removeAttr(Attribute::WriteOnly); |
| A->removeAttr(Attribute::ReadOnly); |
| A->removeAttr(Attribute::ReadNone); |
| // Remove conflicting writable attribute. |
| if (R == Attribute::ReadNone || R == Attribute::ReadOnly) |
| A->removeAttr(Attribute::Writable); |
| A->addAttr(R); |
| if (R == Attribute::ReadOnly) |
| ++NumReadOnlyArg; |
| else if (R == Attribute::WriteOnly) |
| ++NumWriteOnlyArg; |
| else |
| ++NumReadNoneArg; |
| return true; |
| } |
| |
| static bool inferInitializes(Argument &A, Function &F) { |
| auto ArgumentUses = collectArgumentUsesPerBlock(A, F); |
| // No write anywhere in the function, bail. |
| if (!ArgumentUses.HasAnyWrite) |
| return false; |
| |
| auto &UsesPerBlock = ArgumentUses.UsesPerBlock; |
| BasicBlock &EntryBB = F.getEntryBlock(); |
| // A map to store the argument ranges initialized by a BasicBlock (including |
| // its successors). |
| DenseMap<const BasicBlock *, ConstantRangeList> Initialized; |
| // Visit the successors of "BB" block and the instructions in BB (post-order) |
| // to get the argument ranges initialized by "BB" (including its successors). |
| // The result will be cached in "Initialized". |
| auto VisitBlock = [&](const BasicBlock *BB) -> ConstantRangeList { |
| auto UPB = UsesPerBlock.find(BB); |
| ConstantRangeList CRL; |
| |
| // Start with intersection of successors. |
| // If this block has any clobbering use, we're going to clear out the |
| // ranges at some point in this block anyway, so don't bother looking at |
| // successors. |
| if (UPB == UsesPerBlock.end() || !UPB->second.HasUnknownAccess) { |
| bool HasAddedSuccessor = false; |
| for (auto *Succ : successors(BB)) { |
| if (auto SuccI = Initialized.find(Succ); SuccI != Initialized.end()) { |
| if (HasAddedSuccessor) { |
| CRL = CRL.intersectWith(SuccI->second); |
| } else { |
| CRL = SuccI->second; |
| HasAddedSuccessor = true; |
| } |
| } else { |
| CRL = ConstantRangeList(); |
| break; |
| } |
| } |
| } |
| |
| if (UPB != UsesPerBlock.end()) { |
| // Sort uses in this block by instruction order. |
| SmallVector<std::pair<Instruction *, ArgumentAccessInfo>, 2> Insts; |
| append_range(Insts, UPB->second.Insts); |
| sort(Insts, [](std::pair<Instruction *, ArgumentAccessInfo> &LHS, |
| std::pair<Instruction *, ArgumentAccessInfo> &RHS) { |
| return LHS.first->comesBefore(RHS.first); |
| }); |
| |
| // From the end of the block to the beginning of the block, set |
| // initializes ranges. |
| for (auto &[_, Info] : reverse(Insts)) { |
| if (Info.ArgAccessType == ArgumentAccessInfo::AccessType::Unknown || |
| Info.ArgAccessType == |
| ArgumentAccessInfo::AccessType::WriteWithSideEffect) |
| CRL = ConstantRangeList(); |
| if (!Info.AccessRanges.empty()) { |
| if (Info.ArgAccessType == ArgumentAccessInfo::AccessType::Write || |
| Info.ArgAccessType == |
| ArgumentAccessInfo::AccessType::WriteWithSideEffect) { |
| CRL = CRL.unionWith(Info.AccessRanges); |
| } else { |
| assert(Info.ArgAccessType == ArgumentAccessInfo::AccessType::Read); |
| for (const auto &ReadRange : Info.AccessRanges) |
| CRL.subtract(ReadRange); |
| } |
| } |
| } |
| } |
| return CRL; |
| }; |
| |
| ConstantRangeList EntryCRL; |
| // If all write instructions are in the EntryBB, or if the EntryBB has |
| // a clobbering use, we only need to look at EntryBB. |
| bool OnlyScanEntryBlock = !ArgumentUses.HasWriteOutsideEntryBB; |
| if (!OnlyScanEntryBlock) |
| if (auto EntryUPB = UsesPerBlock.find(&EntryBB); |
| EntryUPB != UsesPerBlock.end()) |
| OnlyScanEntryBlock = EntryUPB->second.HasUnknownAccess; |
| if (OnlyScanEntryBlock) { |
| EntryCRL = VisitBlock(&EntryBB); |
| if (EntryCRL.empty()) |
| return false; |
| } else { |
| // Now we have to go through CFG to get the initialized argument ranges |
| // across blocks. With dominance and post-dominance, the initialized ranges |
| // by a block include both accesses inside this block and accesses in its |
| // (transitive) successors. So visit successors before predecessors with a |
| // post-order walk of the blocks and memorize the results in "Initialized". |
| for (const BasicBlock *BB : post_order(&F)) { |
| ConstantRangeList CRL = VisitBlock(BB); |
| if (!CRL.empty()) |
| Initialized[BB] = CRL; |
| } |
| |
| auto EntryCRLI = Initialized.find(&EntryBB); |
| if (EntryCRLI == Initialized.end()) |
| return false; |
| |
| EntryCRL = EntryCRLI->second; |
| } |
| |
| assert(!EntryCRL.empty() && |
| "should have bailed already if EntryCRL is empty"); |
| |
| if (A.hasAttribute(Attribute::Initializes)) { |
| ConstantRangeList PreviousCRL = |
| A.getAttribute(Attribute::Initializes).getValueAsConstantRangeList(); |
| if (PreviousCRL == EntryCRL) |
| return false; |
| EntryCRL = EntryCRL.unionWith(PreviousCRL); |
| } |
| |
| A.addAttr(Attribute::get(A.getContext(), Attribute::Initializes, |
| EntryCRL.rangesRef())); |
| |
| return true; |
| } |
| |
| /// Deduce nocapture attributes for the SCC. |
| static void addArgumentAttrs(const SCCNodeSet &SCCNodes, |
| SmallSet<Function *, 8> &Changed, |
| bool SkipInitializes) { |
| ArgumentGraph AG; |
| |
| auto DetermineAccessAttrsForSingleton = [](Argument *A) { |
| SmallPtrSet<Argument *, 8> Self; |
| Self.insert(A); |
| Attribute::AttrKind R = determinePointerAccessAttrs(A, Self); |
| if (R != Attribute::None) |
| return addAccessAttr(A, R); |
| return false; |
| }; |
| |
| // Check each function in turn, determining which pointer arguments are not |
| // captured. |
| for (Function *F : SCCNodes) { |
| // We can infer and propagate function attributes only when we know that the |
| // definition we'll get at link time is *exactly* the definition we see now. |
| // For more details, see GlobalValue::mayBeDerefined. |
| if (!F->hasExactDefinition()) |
| continue; |
| |
| if (addArgumentAttrsFromCallsites(*F)) |
| Changed.insert(F); |
| |
| // Functions that are readonly (or readnone) and nounwind and don't return |
| // a value can't capture arguments. Don't analyze them. |
| if (F->onlyReadsMemory() && F->doesNotThrow() && F->willReturn() && |
| F->getReturnType()->isVoidTy()) { |
| for (Argument &A : F->args()) { |
| if (A.getType()->isPointerTy() && !A.hasNoCaptureAttr()) { |
| A.addAttr(Attribute::getWithCaptureInfo(A.getContext(), |
| CaptureInfo::none())); |
| ++NumCapturesNone; |
| Changed.insert(F); |
| } |
| } |
| continue; |
| } |
| |
| for (Argument &A : F->args()) { |
| if (!A.getType()->isPointerTy()) |
| continue; |
| bool HasNonLocalUses = false; |
| CaptureInfo OrigCI = A.getAttributes().getCaptureInfo(); |
| if (!capturesNothing(OrigCI)) { |
| ArgumentUsesTracker Tracker(SCCNodes); |
| PointerMayBeCaptured(&A, &Tracker); |
| CaptureInfo NewCI = Tracker.CI & OrigCI; |
| if (NewCI != OrigCI) { |
| if (Tracker.Uses.empty()) { |
| // If the information is complete, add the attribute now. |
| A.addAttr(Attribute::getWithCaptureInfo(A.getContext(), NewCI)); |
| addCapturesStat(NewCI); |
| Changed.insert(F); |
| } else { |
| // If it's not trivially captured and not trivially not captured, |
| // then it must be calling into another function in our SCC. Save |
| // its particulars for Argument-SCC analysis later. |
| ArgumentGraphNode *Node = AG[&A]; |
| Node->CC = CaptureComponents(NewCI); |
| for (Argument *Use : Tracker.Uses) { |
| Node->Uses.push_back(AG[Use]); |
| if (Use != &A) |
| HasNonLocalUses = true; |
| } |
| } |
| } |
| // Otherwise, it's captured. Don't bother doing SCC analysis on it. |
| } |
| if (!HasNonLocalUses && !A.onlyReadsMemory()) { |
| // Can we determine that it's readonly/readnone/writeonly without doing |
| // an SCC? Note that we don't allow any calls at all here, or else our |
| // result will be dependent on the iteration order through the |
| // functions in the SCC. |
| if (DetermineAccessAttrsForSingleton(&A)) |
| Changed.insert(F); |
| } |
| if (!SkipInitializes && !A.onlyReadsMemory()) { |
| if (inferInitializes(A, *F)) |
| Changed.insert(F); |
| } |
| } |
| } |
| |
| // The graph we've collected is partial because we stopped scanning for |
| // argument uses once we solved the argument trivially. These partial nodes |
| // show up as ArgumentGraphNode objects with an empty Uses list, and for |
| // these nodes the final decision about whether they capture has already been |
| // made. If the definition doesn't have a 'nocapture' attribute by now, it |
| // captures. |
| |
| for (scc_iterator<ArgumentGraph *> I = scc_begin(&AG); !I.isAtEnd(); ++I) { |
| const std::vector<ArgumentGraphNode *> &ArgumentSCC = *I; |
| if (ArgumentSCC.size() == 1) { |
| if (!ArgumentSCC[0]->Definition) |
| continue; // synthetic root node |
| |
| // eg. "void f(int* x) { if (...) f(x); }" |
| if (ArgumentSCC[0]->Uses.size() == 1 && |
| ArgumentSCC[0]->Uses[0] == ArgumentSCC[0]) { |
| Argument *A = ArgumentSCC[0]->Definition; |
| CaptureInfo OrigCI = A->getAttributes().getCaptureInfo(); |
| CaptureInfo NewCI = CaptureInfo(ArgumentSCC[0]->CC) & OrigCI; |
| if (NewCI != OrigCI) { |
| A->addAttr(Attribute::getWithCaptureInfo(A->getContext(), NewCI)); |
| addCapturesStat(NewCI); |
| Changed.insert(A->getParent()); |
| } |
| |
| // Infer the access attributes given the new captures one |
| if (DetermineAccessAttrsForSingleton(A)) |
| Changed.insert(A->getParent()); |
| } |
| continue; |
| } |
| |
| SmallPtrSet<Argument *, 8> ArgumentSCCNodes; |
| // Fill ArgumentSCCNodes with the elements of the ArgumentSCC. Used for |
| // quickly looking up whether a given Argument is in this ArgumentSCC. |
| for (ArgumentGraphNode *I : ArgumentSCC) { |
| ArgumentSCCNodes.insert(I->Definition); |
| } |
| |
| // At the SCC level, only track merged CaptureComponents. We're not |
| // currently prepared to handle propagation of return-only captures across |
| // the SCC. |
| CaptureComponents CC = CaptureComponents::None; |
| for (ArgumentGraphNode *N : ArgumentSCC) { |
| for (ArgumentGraphNode *Use : N->Uses) { |
| Argument *A = Use->Definition; |
| if (ArgumentSCCNodes.count(A)) |
| CC |= Use->CC; |
| else |
| CC |= CaptureComponents(A->getAttributes().getCaptureInfo()); |
| break; |
| } |
| if (capturesAll(CC)) |
| break; |
| } |
| |
| if (!capturesAll(CC)) { |
| for (ArgumentGraphNode *N : ArgumentSCC) { |
| Argument *A = N->Definition; |
| CaptureInfo OrigCI = A->getAttributes().getCaptureInfo(); |
| CaptureInfo NewCI = CaptureInfo(N->CC | CC) & OrigCI; |
| if (NewCI != OrigCI) { |
| A->addAttr(Attribute::getWithCaptureInfo(A->getContext(), NewCI)); |
| addCapturesStat(NewCI); |
| Changed.insert(A->getParent()); |
| } |
| } |
| } |
| |
| // TODO(captures): Ignore address-only captures. |
| if (capturesAnything(CC)) { |
| // As the pointer may be captured, determine the pointer attributes |
| // looking at each argument individually. |
| for (ArgumentGraphNode *N : ArgumentSCC) { |
| if (DetermineAccessAttrsForSingleton(N->Definition)) |
| Changed.insert(N->Definition->getParent()); |
| } |
| continue; |
| } |
| |
| // We also want to compute readonly/readnone/writeonly. With a small number |
| // of false negatives, we can assume that any pointer which is captured |
| // isn't going to be provably readonly or readnone, since by definition |
| // we can't analyze all uses of a captured pointer. |
| // |
| // The false negatives happen when the pointer is captured by a function |
| // that promises readonly/readnone behaviour on the pointer, then the |
| // pointer's lifetime ends before anything that writes to arbitrary memory. |
| // Also, a readonly/readnone pointer may be returned, but returning a |
| // pointer is capturing it. |
| |
| auto meetAccessAttr = [](Attribute::AttrKind A, Attribute::AttrKind B) { |
| if (A == B) |
| return A; |
| if (A == Attribute::ReadNone) |
| return B; |
| if (B == Attribute::ReadNone) |
| return A; |
| return Attribute::None; |
| }; |
| |
| Attribute::AttrKind AccessAttr = Attribute::ReadNone; |
| for (ArgumentGraphNode *N : ArgumentSCC) { |
| Argument *A = N->Definition; |
| Attribute::AttrKind K = determinePointerAccessAttrs(A, ArgumentSCCNodes); |
| AccessAttr = meetAccessAttr(AccessAttr, K); |
| if (AccessAttr == Attribute::None) |
| break; |
| } |
| |
| if (AccessAttr != Attribute::None) { |
| for (ArgumentGraphNode *N : ArgumentSCC) { |
| Argument *A = N->Definition; |
| if (addAccessAttr(A, AccessAttr)) |
| Changed.insert(A->getParent()); |
| } |
| } |
| } |
| } |
| |
| /// Tests whether a function is "malloc-like". |
| /// |
| /// A function is "malloc-like" if it returns either null or a pointer that |
| /// doesn't alias any other pointer visible to the caller. |
| static bool isFunctionMallocLike(Function *F, const SCCNodeSet &SCCNodes) { |
| SmallSetVector<Value *, 8> FlowsToReturn; |
| for (BasicBlock &BB : *F) |
| if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) |
| FlowsToReturn.insert(Ret->getReturnValue()); |
| |
| for (unsigned i = 0; i != FlowsToReturn.size(); ++i) { |
| Value *RetVal = FlowsToReturn[i]; |
| |
| if (Constant *C = dyn_cast<Constant>(RetVal)) { |
| if (!C->isNullValue() && !isa<UndefValue>(C)) |
| return false; |
| |
| continue; |
| } |
| |
| if (isa<Argument>(RetVal)) |
| return false; |
| |
| if (Instruction *RVI = dyn_cast<Instruction>(RetVal)) |
| switch (RVI->getOpcode()) { |
| // Extend the analysis by looking upwards. |
| case Instruction::BitCast: |
| case Instruction::GetElementPtr: |
| case Instruction::AddrSpaceCast: |
| FlowsToReturn.insert(RVI->getOperand(0)); |
| continue; |
| case Instruction::Select: { |
| SelectInst *SI = cast<SelectInst>(RVI); |
| FlowsToReturn.insert(SI->getTrueValue()); |
| FlowsToReturn.insert(SI->getFalseValue()); |
| continue; |
| } |
| case Instruction::PHI: { |
| PHINode *PN = cast<PHINode>(RVI); |
| for (Value *IncValue : PN->incoming_values()) |
| FlowsToReturn.insert(IncValue); |
| continue; |
| } |
| |
| // Check whether the pointer came from an allocation. |
| case Instruction::Alloca: |
| break; |
| case Instruction::Call: |
| case Instruction::Invoke: { |
| CallBase &CB = cast<CallBase>(*RVI); |
| if (CB.hasRetAttr(Attribute::NoAlias)) |
| break; |
| if (CB.getCalledFunction() && SCCNodes.count(CB.getCalledFunction())) |
| break; |
| [[fallthrough]]; |
| } |
| default: |
| return false; // Did not come from an allocation. |
| } |
| |
| if (PointerMayBeCaptured(RetVal, /*ReturnCaptures=*/false)) |
| return false; |
| } |
| |
| return true; |
| } |
| |
| /// Deduce noalias attributes for the SCC. |
| static void addNoAliasAttrs(const SCCNodeSet &SCCNodes, |
| SmallSet<Function *, 8> &Changed) { |
| // Check each function in turn, determining which functions return noalias |
| // pointers. |
| for (Function *F : SCCNodes) { |
| // Already noalias. |
| if (F->returnDoesNotAlias()) |
| continue; |
| |
| // We can infer and propagate function attributes only when we know that the |
| // definition we'll get at link time is *exactly* the definition we see now. |
| // For more details, see GlobalValue::mayBeDerefined. |
| if (!F->hasExactDefinition()) |
| return; |
| |
| // We annotate noalias return values, which are only applicable to |
| // pointer types. |
| if (!F->getReturnType()->isPointerTy()) |
| continue; |
| |
| if (!isFunctionMallocLike(F, SCCNodes)) |
| return; |
| } |
| |
| for (Function *F : SCCNodes) { |
| if (F->returnDoesNotAlias() || |
| !F->getReturnType()->isPointerTy()) |
| continue; |
| |
| F->setReturnDoesNotAlias(); |
| ++NumNoAlias; |
| Changed.insert(F); |
| } |
| } |
| |
| /// Tests whether this function is known to not return null. |
| /// |
| /// Requires that the function returns a pointer. |
| /// |
| /// Returns true if it believes the function will not return a null, and sets |
| /// \p Speculative based on whether the returned conclusion is a speculative |
| /// conclusion due to SCC calls. |
| static bool isReturnNonNull(Function *F, const SCCNodeSet &SCCNodes, |
| bool &Speculative) { |
| assert(F->getReturnType()->isPointerTy() && |
| "nonnull only meaningful on pointer types"); |
| Speculative = false; |
| |
| SmallSetVector<Value *, 8> FlowsToReturn; |
| for (BasicBlock &BB : *F) |
| if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) |
| FlowsToReturn.insert(Ret->getReturnValue()); |
| |
| auto &DL = F->getDataLayout(); |
| |
| for (unsigned i = 0; i != FlowsToReturn.size(); ++i) { |
| Value *RetVal = FlowsToReturn[i]; |
| |
| // If this value is locally known to be non-null, we're good |
| if (isKnownNonZero(RetVal, DL)) |
| continue; |
| |
| // Otherwise, we need to look upwards since we can't make any local |
| // conclusions. |
| Instruction *RVI = dyn_cast<Instruction>(RetVal); |
| if (!RVI) |
| return false; |
| switch (RVI->getOpcode()) { |
| // Extend the analysis by looking upwards. |
| case Instruction::BitCast: |
| case Instruction::AddrSpaceCast: |
| FlowsToReturn.insert(RVI->getOperand(0)); |
| continue; |
| case Instruction::GetElementPtr: |
| if (cast<GEPOperator>(RVI)->isInBounds()) { |
| FlowsToReturn.insert(RVI->getOperand(0)); |
| continue; |
| } |
| return false; |
| case Instruction::Select: { |
| SelectInst *SI = cast<SelectInst>(RVI); |
| FlowsToReturn.insert(SI->getTrueValue()); |
| FlowsToReturn.insert(SI->getFalseValue()); |
| continue; |
| } |
| case Instruction::PHI: { |
| PHINode *PN = cast<PHINode>(RVI); |
| for (int i = 0, e = PN->getNumIncomingValues(); i != e; ++i) |
| FlowsToReturn.insert(PN->getIncomingValue(i)); |
| continue; |
| } |
| case Instruction::Call: |
| case Instruction::Invoke: { |
| CallBase &CB = cast<CallBase>(*RVI); |
| Function *Callee = CB.getCalledFunction(); |
| // A call to a node within the SCC is assumed to return null until |
| // proven otherwise |
| if (Callee && SCCNodes.count(Callee)) { |
| Speculative = true; |
| continue; |
| } |
| return false; |
| } |
| default: |
| return false; // Unknown source, may be null |
| }; |
| llvm_unreachable("should have either continued or returned"); |
| } |
| |
| return true; |
| } |
| |
| /// Deduce nonnull attributes for the SCC. |
| static void addNonNullAttrs(const SCCNodeSet &SCCNodes, |
| SmallSet<Function *, 8> &Changed) { |
| // Speculative that all functions in the SCC return only nonnull |
| // pointers. We may refute this as we analyze functions. |
| bool SCCReturnsNonNull = true; |
| |
| // Check each function in turn, determining which functions return nonnull |
| // pointers. |
| for (Function *F : SCCNodes) { |
| // Already nonnull. |
| if (F->getAttributes().hasRetAttr(Attribute::NonNull)) |
| continue; |
| |
| // We can infer and propagate function attributes only when we know that the |
| // definition we'll get at link time is *exactly* the definition we see now. |
| // For more details, see GlobalValue::mayBeDerefined. |
| if (!F->hasExactDefinition()) |
| return; |
| |
| // We annotate nonnull return values, which are only applicable to |
| // pointer types. |
| if (!F->getReturnType()->isPointerTy()) |
| continue; |
| |
| bool Speculative = false; |
| if (isReturnNonNull(F, SCCNodes, Speculative)) { |
| if (!Speculative) { |
| // Mark the function eagerly since we may discover a function |
| // which prevents us from speculating about the entire SCC |
| LLVM_DEBUG(dbgs() << "Eagerly marking " << F->getName() |
| << " as nonnull\n"); |
| F->addRetAttr(Attribute::NonNull); |
| ++NumNonNullReturn; |
| Changed.insert(F); |
| } |
| continue; |
| } |
| // At least one function returns something which could be null, can't |
| // speculate any more. |
| SCCReturnsNonNull = false; |
| } |
| |
| if (SCCReturnsNonNull) { |
| for (Function *F : SCCNodes) { |
| if (F->getAttributes().hasRetAttr(Attribute::NonNull) || |
| !F->getReturnType()->isPointerTy()) |
| continue; |
| |
| LLVM_DEBUG(dbgs() << "SCC marking " << F->getName() << " as nonnull\n"); |
| F->addRetAttr(Attribute::NonNull); |
| ++NumNonNullReturn; |
| Changed.insert(F); |
| } |
| } |
| } |
| |
| /// Deduce noundef attributes for the SCC. |
| static void addNoUndefAttrs(const SCCNodeSet &SCCNodes, |
| SmallSet<Function *, 8> &Changed) { |
| // Check each function in turn, determining which functions return noundef |
| // values. |
| for (Function *F : SCCNodes) { |
| // Already noundef. |
| AttributeList Attrs = F->getAttributes(); |
| if (Attrs.hasRetAttr(Attribute::NoUndef)) |
| continue; |
| |
| // We can infer and propagate function attributes only when we know that the |
| // definition we'll get at link time is *exactly* the definition we see now. |
| // For more details, see GlobalValue::mayBeDerefined. |
| if (!F->hasExactDefinition()) |
| return; |
| |
| // MemorySanitizer assumes that the definition and declaration of a |
| // function will be consistent. A function with sanitize_memory attribute |
| // should be skipped from inference. |
| if (F->hasFnAttribute(Attribute::SanitizeMemory)) |
| continue; |
| |
| if (F->getReturnType()->isVoidTy()) |
| continue; |
| |
| const DataLayout &DL = F->getDataLayout(); |
| if (all_of(*F, [&](BasicBlock &BB) { |
| if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) { |
| // TODO: perform context-sensitive analysis? |
| Value *RetVal = Ret->getReturnValue(); |
| if (!isGuaranteedNotToBeUndefOrPoison(RetVal)) |
| return false; |
| |
| // We know the original return value is not poison now, but it |
| // could still be converted to poison by another return attribute. |
| // Try to explicitly re-prove the relevant attributes. |
| if (Attrs.hasRetAttr(Attribute::NonNull) && |
| !isKnownNonZero(RetVal, DL)) |
| return false; |
| |
| if (MaybeAlign Align = Attrs.getRetAlignment()) |
| if (RetVal->getPointerAlignment(DL) < *Align) |
| return false; |
| |
| Attribute Attr = Attrs.getRetAttr(Attribute::Range); |
| if (Attr.isValid() && |
| !Attr.getRange().contains( |
| computeConstantRange(RetVal, /*ForSigned=*/false))) |
| return false; |
| } |
| return true; |
| })) { |
| F->addRetAttr(Attribute::NoUndef); |
| ++NumNoUndefReturn; |
| Changed.insert(F); |
| } |
| } |
| } |
| |
| namespace { |
| |
| /// Collects a set of attribute inference requests and performs them all in one |
| /// go on a single SCC Node. Inference involves scanning function bodies |
| /// looking for instructions that violate attribute assumptions. |
| /// As soon as all the bodies are fine we are free to set the attribute. |
| /// Customization of inference for individual attributes is performed by |
| /// providing a handful of predicates for each attribute. |
| class AttributeInferer { |
| public: |
| /// Describes a request for inference of a single attribute. |
| struct InferenceDescriptor { |
| |
| /// Returns true if this function does not have to be handled. |
| /// General intent for this predicate is to provide an optimization |
| /// for functions that do not need this attribute inference at all |
| /// (say, for functions that already have the attribute). |
| std::function<bool(const Function &)> SkipFunction; |
| |
| /// Returns true if this instruction violates attribute assumptions. |
| std::function<bool(Instruction &)> InstrBreaksAttribute; |
| |
| /// Sets the inferred attribute for this function. |
| std::function<void(Function &)> SetAttribute; |
| |
| /// Attribute we derive. |
| Attribute::AttrKind AKind; |
| |
| /// If true, only "exact" definitions can be used to infer this attribute. |
| /// See GlobalValue::isDefinitionExact. |
| bool RequiresExactDefinition; |
| |
| InferenceDescriptor(Attribute::AttrKind AK, |
| std::function<bool(const Function &)> SkipFunc, |
| std::function<bool(Instruction &)> InstrScan, |
| std::function<void(Function &)> SetAttr, |
| bool ReqExactDef) |
| : SkipFunction(SkipFunc), InstrBreaksAttribute(InstrScan), |
| SetAttribute(SetAttr), AKind(AK), |
| RequiresExactDefinition(ReqExactDef) {} |
| }; |
| |
| private: |
| SmallVector<InferenceDescriptor, 4> InferenceDescriptors; |
| |
| public: |
| void registerAttrInference(InferenceDescriptor AttrInference) { |
| InferenceDescriptors.push_back(AttrInference); |
| } |
| |
| void run(const SCCNodeSet &SCCNodes, SmallSet<Function *, 8> &Changed); |
| }; |
| |
| /// Perform all the requested attribute inference actions according to the |
| /// attribute predicates stored before. |
| void AttributeInferer::run(const SCCNodeSet &SCCNodes, |
| SmallSet<Function *, 8> &Changed) { |
| SmallVector<InferenceDescriptor, 4> InferInSCC = InferenceDescriptors; |
| // Go through all the functions in SCC and check corresponding attribute |
| // assumptions for each of them. Attributes that are invalid for this SCC |
| // will be removed from InferInSCC. |
| for (Function *F : SCCNodes) { |
| |
| // No attributes whose assumptions are still valid - done. |
| if (InferInSCC.empty()) |
| return; |
| |
| // Check if our attributes ever need scanning/can be scanned. |
| llvm::erase_if(InferInSCC, [F](const InferenceDescriptor &ID) { |
| if (ID.SkipFunction(*F)) |
| return false; |
| |
| // Remove from further inference (invalidate) when visiting a function |
| // that has no instructions to scan/has an unsuitable definition. |
| return F->isDeclaration() || |
| (ID.RequiresExactDefinition && !F->hasExactDefinition()); |
| }); |
| |
| // For each attribute still in InferInSCC that doesn't explicitly skip F, |
| // set up the F instructions scan to verify assumptions of the attribute. |
| SmallVector<InferenceDescriptor, 4> InferInThisFunc; |
| llvm::copy_if( |
| InferInSCC, std::back_inserter(InferInThisFunc), |
| [F](const InferenceDescriptor &ID) { return !ID.SkipFunction(*F); }); |
| |
| if (InferInThisFunc.empty()) |
| continue; |
| |
| // Start instruction scan. |
| for (Instruction &I : instructions(*F)) { |
| llvm::erase_if(InferInThisFunc, [&](const InferenceDescriptor &ID) { |
| if (!ID.InstrBreaksAttribute(I)) |
| return false; |
| // Remove attribute from further inference on any other functions |
| // because attribute assumptions have just been violated. |
| llvm::erase_if(InferInSCC, [&ID](const InferenceDescriptor &D) { |
| return D.AKind == ID.AKind; |
| }); |
| // Remove attribute from the rest of current instruction scan. |
| return true; |
| }); |
| |
| if (InferInThisFunc.empty()) |
| break; |
| } |
| } |
| |
| if (InferInSCC.empty()) |
| return; |
| |
| for (Function *F : SCCNodes) |
| // At this point InferInSCC contains only functions that were either: |
| // - explicitly skipped from scan/inference, or |
| // - verified to have no instructions that break attribute assumptions. |
| // Hence we just go and force the attribute for all non-skipped functions. |
| for (auto &ID : InferInSCC) { |
| if (ID.SkipFunction(*F)) |
| continue; |
| Changed.insert(F); |
| ID.SetAttribute(*F); |
| } |
| } |
| |
| struct SCCNodesResult { |
| SCCNodeSet SCCNodes; |
| bool HasUnknownCall; |
| }; |
| |
| } // end anonymous namespace |
| |
| /// Helper for non-Convergent inference predicate InstrBreaksAttribute. |
| static bool InstrBreaksNonConvergent(Instruction &I, |
| const SCCNodeSet &SCCNodes) { |
| const CallBase *CB = dyn_cast<CallBase>(&I); |
| // Breaks non-convergent assumption if CS is a convergent call to a function |
| // not in the SCC. |
| return CB && CB->isConvergent() && |
| !SCCNodes.contains(CB->getCalledFunction()); |
| } |
| |
| /// Helper for NoUnwind inference predicate InstrBreaksAttribute. |
| static bool InstrBreaksNonThrowing(Instruction &I, const SCCNodeSet &SCCNodes) { |
| if (!I.mayThrow(/* IncludePhaseOneUnwind */ true)) |
| return false; |
| if (const auto *CI = dyn_cast<CallInst>(&I)) { |
| if (Function *Callee = CI->getCalledFunction()) { |
| // I is a may-throw call to a function inside our SCC. This doesn't |
| // invalidate our current working assumption that the SCC is no-throw; we |
| // just have to scan that other function. |
| if (SCCNodes.contains(Callee)) |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| /// Helper for NoFree inference predicate InstrBreaksAttribute. |
| static bool InstrBreaksNoFree(Instruction &I, const SCCNodeSet &SCCNodes) { |
| CallBase *CB = dyn_cast<CallBase>(&I); |
| if (!CB) |
| return false; |
| |
| if (CB->hasFnAttr(Attribute::NoFree)) |
| return false; |
| |
| // Speculatively assume in SCC. |
| if (Function *Callee = CB->getCalledFunction()) |
| if (SCCNodes.contains(Callee)) |
| return false; |
| |
| return true; |
| } |
| |
| // Return true if this is an atomic which has an ordering stronger than |
| // unordered. Note that this is different than the predicate we use in |
| // Attributor. Here we chose to be conservative and consider monotonic |
| // operations potentially synchronizing. We generally don't do much with |
| // monotonic operations, so this is simply risk reduction. |
| static bool isOrderedAtomic(Instruction *I) { |
| if (!I->isAtomic()) |
| return false; |
| |
| if (auto *FI = dyn_cast<FenceInst>(I)) |
| // All legal orderings for fence are stronger than monotonic. |
| return FI->getSyncScopeID() != SyncScope::SingleThread; |
| else if (isa<AtomicCmpXchgInst>(I) || isa<AtomicRMWInst>(I)) |
| return true; |
| else if (auto *SI = dyn_cast<StoreInst>(I)) |
| return !SI->isUnordered(); |
| else if (auto *LI = dyn_cast<LoadInst>(I)) |
| return !LI->isUnordered(); |
| else { |
| llvm_unreachable("unknown atomic instruction?"); |
| } |
| } |
| |
| static bool InstrBreaksNoSync(Instruction &I, const SCCNodeSet &SCCNodes) { |
| // Volatile may synchronize |
| if (I.isVolatile()) |
| return true; |
| |
| // An ordered atomic may synchronize. (See comment about on monotonic.) |
| if (isOrderedAtomic(&I)) |
| return true; |
| |
| auto *CB = dyn_cast<CallBase>(&I); |
| if (!CB) |
| // Non call site cases covered by the two checks above |
| return false; |
| |
| if (CB->hasFnAttr(Attribute::NoSync)) |
| return false; |
| |
| // Non volatile memset/memcpy/memmoves are nosync |
| // NOTE: Only intrinsics with volatile flags should be handled here. All |
| // others should be marked in Intrinsics.td. |
| if (auto *MI = dyn_cast<MemIntrinsic>(&I)) |
| if (!MI->isVolatile()) |
| return false; |
| |
| // Speculatively assume in SCC. |
| if (Function *Callee = CB->getCalledFunction()) |
| if (SCCNodes.contains(Callee)) |
| return false; |
| |
| return true; |
| } |
| |
| /// Attempt to remove convergent function attribute when possible. |
| /// |
| /// Returns true if any changes to function attributes were made. |
| static void inferConvergent(const SCCNodeSet &SCCNodes, |
| SmallSet<Function *, 8> &Changed) { |
| AttributeInferer AI; |
| |
| // Request to remove the convergent attribute from all functions in the SCC |
| // if every callsite within the SCC is not convergent (except for calls |
| // to functions within the SCC). |
| // Note: Removal of the attr from the callsites will happen in |
| // InstCombineCalls separately. |
| AI.registerAttrInference(AttributeInferer::InferenceDescriptor{ |
| Attribute::Convergent, |
| // Skip non-convergent functions. |
| [](const Function &F) { return !F.isConvergent(); }, |
| // Instructions that break non-convergent assumption. |
| [SCCNodes](Instruction &I) { |
| return InstrBreaksNonConvergent(I, SCCNodes); |
| }, |
| [](Function &F) { |
| LLVM_DEBUG(dbgs() << "Removing convergent attr from fn " << F.getName() |
| << "\n"); |
| F.setNotConvergent(); |
| }, |
| /* RequiresExactDefinition= */ false}); |
| // Perform all the requested attribute inference actions. |
| AI.run(SCCNodes, Changed); |
| } |
| |
| /// Infer attributes from all functions in the SCC by scanning every |
| /// instruction for compliance to the attribute assumptions. |
| /// |
| /// Returns true if any changes to function attributes were made. |
| static void inferAttrsFromFunctionBodies(const SCCNodeSet &SCCNodes, |
| SmallSet<Function *, 8> &Changed) { |
| AttributeInferer AI; |
| |
| if (!DisableNoUnwindInference) |
| // Request to infer nounwind attribute for all the functions in the SCC if |
| // every callsite within the SCC is not throwing (except for calls to |
| // functions within the SCC). Note that nounwind attribute suffers from |
| // derefinement - results may change depending on how functions are |
| // optimized. Thus it can be inferred only from exact definitions. |
| AI.registerAttrInference(AttributeInferer::InferenceDescriptor{ |
| Attribute::NoUnwind, |
| // Skip non-throwing functions. |
| [](const Function &F) { return F.doesNotThrow(); }, |
| // Instructions that break non-throwing assumption. |
| [&SCCNodes](Instruction &I) { |
| return InstrBreaksNonThrowing(I, SCCNodes); |
| }, |
| [](Function &F) { |
| LLVM_DEBUG(dbgs() |
| << "Adding nounwind attr to fn " << F.getName() << "\n"); |
| F.setDoesNotThrow(); |
| ++NumNoUnwind; |
| }, |
| /* RequiresExactDefinition= */ true}); |
| |
| if (!DisableNoFreeInference) |
| // Request to infer nofree attribute for all the functions in the SCC if |
| // every callsite within the SCC does not directly or indirectly free |
| // memory (except for calls to functions within the SCC). Note that nofree |
| // attribute suffers from derefinement - results may change depending on |
| // how functions are optimized. Thus it can be inferred only from exact |
| // definitions. |
| AI.registerAttrInference(AttributeInferer::InferenceDescriptor{ |
| Attribute::NoFree, |
| // Skip functions known not to free memory. |
| [](const Function &F) { return F.doesNotFreeMemory(); }, |
| // Instructions that break non-deallocating assumption. |
| [&SCCNodes](Instruction &I) { |
| return InstrBreaksNoFree(I, SCCNodes); |
| }, |
| [](Function &F) { |
| LLVM_DEBUG(dbgs() |
| << "Adding nofree attr to fn " << F.getName() << "\n"); |
| F.setDoesNotFreeMemory(); |
| ++NumNoFree; |
| }, |
| /* RequiresExactDefinition= */ true}); |
| |
| AI.registerAttrInference(AttributeInferer::InferenceDescriptor{ |
| Attribute::NoSync, |
| // Skip already marked functions. |
| [](const Function &F) { return F.hasNoSync(); }, |
| // Instructions that break nosync assumption. |
| [&SCCNodes](Instruction &I) { |
| return InstrBreaksNoSync(I, SCCNodes); |
| }, |
| [](Function &F) { |
| LLVM_DEBUG(dbgs() |
| << "Adding nosync attr to fn " << F.getName() << "\n"); |
| F.setNoSync(); |
| ++NumNoSync; |
| }, |
| /* RequiresExactDefinition= */ true}); |
| |
| // Perform all the requested attribute inference actions. |
| AI.run(SCCNodes, Changed); |
| } |
| |
| static void addNoRecurseAttrs(const SCCNodeSet &SCCNodes, |
| SmallSet<Function *, 8> &Changed) { |
| // Try and identify functions that do not recurse. |
| |
| // If the SCC contains multiple nodes we know for sure there is recursion. |
| if (SCCNodes.size() != 1) |
| return; |
| |
| Function *F = *SCCNodes.begin(); |
| if (!F || !F->hasExactDefinition() || F->doesNotRecurse()) |
| return; |
| |
| // If all of the calls in F are identifiable and are to norecurse functions, F |
| // is norecurse. This check also detects self-recursion as F is not currently |
| // marked norecurse, so any called from F to F will not be marked norecurse. |
| for (auto &BB : *F) |
| for (auto &I : BB.instructionsWithoutDebug()) |
| if (auto *CB = dyn_cast<CallBase>(&I)) { |
| Function *Callee = CB->getCalledFunction(); |
| if (!Callee || Callee == F || |
| (!Callee->doesNotRecurse() && |
| !(Callee->isDeclaration() && |
| Callee->hasFnAttribute(Attribute::NoCallback)))) |
| // Function calls a potentially recursive function. |
| return; |
| } |
| |
| // Every call was to a non-recursive function other than this function, and |
| // we have no indirect recursion as the SCC size is one. This function cannot |
| // recurse. |
| F->setDoesNotRecurse(); |
| ++NumNoRecurse; |
| Changed.insert(F); |
| } |
| |
| static bool instructionDoesNotReturn(Instruction &I) { |
| if (auto *CB = dyn_cast<CallBase>(&I)) |
| return CB->hasFnAttr(Attribute::NoReturn); |
| return false; |
| } |
| |
| // A basic block can only return if it terminates with a ReturnInst and does not |
| // contain calls to noreturn functions. |
| static bool basicBlockCanReturn(BasicBlock &BB) { |
| if (!isa<ReturnInst>(BB.getTerminator())) |
| return false; |
| return none_of(BB, instructionDoesNotReturn); |
| } |
| |
| // FIXME: this doesn't handle recursion. |
| static bool canReturn(Function &F) { |
| SmallVector<BasicBlock *, 16> Worklist; |
| SmallPtrSet<BasicBlock *, 16> Visited; |
| |
| Visited.insert(&F.front()); |
| Worklist.push_back(&F.front()); |
| |
| do { |
| BasicBlock *BB = Worklist.pop_back_val(); |
| if (basicBlockCanReturn(*BB)) |
| return true; |
| for (BasicBlock *Succ : successors(BB)) |
| if (Visited.insert(Succ).second) |
| Worklist.push_back(Succ); |
| } while (!Worklist.empty()); |
| |
| return false; |
| } |
| |
| |
| // Set the noreturn function attribute if possible. |
| static void addNoReturnAttrs(const SCCNodeSet &SCCNodes, |
| SmallSet<Function *, 8> &Changed) { |
| for (Function *F : SCCNodes) { |
| if (!F || !F->hasExactDefinition() || F->hasFnAttribute(Attribute::Naked) || |
| F->doesNotReturn()) |
| continue; |
| |
| if (!canReturn(*F)) { |
| F->setDoesNotReturn(); |
| Changed.insert(F); |
| } |
| } |
| } |
| |
| static bool allPathsGoThroughCold(Function &F) { |
| SmallDenseMap<BasicBlock *, bool, 16> ColdPaths; |
| ColdPaths[&F.front()] = false; |
| SmallVector<BasicBlock *> Jobs; |
| Jobs.push_back(&F.front()); |
| |
| while (!Jobs.empty()) { |
| BasicBlock *BB = Jobs.pop_back_val(); |
| |
| // If block contains a cold callsite this path through the CG is cold. |
| // Ignore whether the instructions actually are guaranteed to transfer |
| // execution. Divergent behavior is considered unlikely. |
| if (any_of(*BB, [](Instruction &I) { |
| if (auto *CB = dyn_cast<CallBase>(&I)) |
| return CB->hasFnAttr(Attribute::Cold); |
| return false; |
| })) { |
| ColdPaths[BB] = true; |
| continue; |
| } |
| |
| auto Succs = successors(BB); |
| // We found a path that doesn't go through any cold callsite. |
| if (Succs.empty()) |
| return false; |
| |
| // We didn't find a cold callsite in this BB, so check that all successors |
| // contain a cold callsite (or that their successors do). |
| // Potential TODO: We could use static branch hints to assume certain |
| // successor paths are inherently cold, irrespective of if they contain a |
| // cold callsite. |
| for (BasicBlock *Succ : Succs) { |
| // Start with false, this is necessary to ensure we don't turn loops into |
| // cold. |
| auto [Iter, Inserted] = ColdPaths.try_emplace(Succ, false); |
| if (!Inserted) { |
| if (Iter->second) |
| continue; |
| return false; |
| } |
| Jobs.push_back(Succ); |
| } |
| } |
| return true; |
| } |
| |
| // Set the cold function attribute if possible. |
| static void addColdAttrs(const SCCNodeSet &SCCNodes, |
| SmallSet<Function *, 8> &Changed) { |
| for (Function *F : SCCNodes) { |
| if (!F || !F->hasExactDefinition() || F->hasFnAttribute(Attribute::Naked) || |
| F->hasFnAttribute(Attribute::Cold) || F->hasFnAttribute(Attribute::Hot)) |
| continue; |
| |
| // Potential TODO: We could add attribute `cold` on functions with `coldcc`. |
| if (allPathsGoThroughCold(*F)) { |
| F->addFnAttr(Attribute::Cold); |
| ++NumCold; |
| Changed.insert(F); |
| continue; |
| } |
| } |
| } |
| |
| static bool functionWillReturn(const Function &F) { |
| // We can infer and propagate function attributes only when we know that the |
| // definition we'll get at link time is *exactly* the definition we see now. |
| // For more details, see GlobalValue::mayBeDerefined. |
| if (!F.hasExactDefinition()) |
| return false; |
| |
| // Must-progress function without side-effects must return. |
| if (F.mustProgress() && F.onlyReadsMemory()) |
| return true; |
| |
| // Can only analyze functions with a definition. |
| if (F.isDeclaration()) |
| return false; |
| |
| // Functions with loops require more sophisticated analysis, as the loop |
| // may be infinite. For now, don't try to handle them. |
| SmallVector<std::pair<const BasicBlock *, const BasicBlock *>> Backedges; |
| FindFunctionBackedges(F, Backedges); |
| if (!Backedges.empty()) |
| return false; |
| |
| // If there are no loops, then the function is willreturn if all calls in |
| // it are willreturn. |
| return all_of(instructions(F), [](const Instruction &I) { |
| return I.willReturn(); |
| }); |
| } |
| |
| // Set the willreturn function attribute if possible. |
| static void addWillReturn(const SCCNodeSet &SCCNodes, |
| SmallSet<Function *, 8> &Changed) { |
| for (Function *F : SCCNodes) { |
| if (!F || F->willReturn() || !functionWillReturn(*F)) |
| continue; |
| |
| F->setWillReturn(); |
| NumWillReturn++; |
| Changed.insert(F); |
| } |
| } |
| |
| static SCCNodesResult createSCCNodeSet(ArrayRef<Function *> Functions) { |
| SCCNodesResult Res; |
| Res.HasUnknownCall = false; |
| for (Function *F : Functions) { |
| if (!F || F->hasOptNone() || F->hasFnAttribute(Attribute::Naked) || |
| F->isPresplitCoroutine()) { |
| // Treat any function we're trying not to optimize as if it were an |
| // indirect call and omit it from the node set used below. |
| Res.HasUnknownCall = true; |
| continue; |
| } |
| // Track whether any functions in this SCC have an unknown call edge. |
| // Note: if this is ever a performance hit, we can common it with |
| // subsequent routines which also do scans over the instructions of the |
| // function. |
| if (!Res.HasUnknownCall) { |
| for (Instruction &I : instructions(*F)) { |
| if (auto *CB = dyn_cast<CallBase>(&I)) { |
| if (!CB->getCalledFunction()) { |
| Res.HasUnknownCall = true; |
| break; |
| } |
| } |
| } |
| } |
| Res.SCCNodes.insert(F); |
| } |
| return Res; |
| } |
| |
| template <typename AARGetterT> |
| static SmallSet<Function *, 8> |
| deriveAttrsInPostOrder(ArrayRef<Function *> Functions, AARGetterT &&AARGetter, |
| bool ArgAttrsOnly) { |
| SCCNodesResult Nodes = createSCCNodeSet(Functions); |
| |
| // Bail if the SCC only contains optnone functions. |
| if (Nodes.SCCNodes.empty()) |
| return {}; |
| |
| SmallSet<Function *, 8> Changed; |
| if (ArgAttrsOnly) { |
| // ArgAttrsOnly means to only infer attributes that may aid optimizations |
| // on the *current* function. "initializes" attribute is to aid |
| // optimizations (like DSE) on the callers, so skip "initializes" here. |
| addArgumentAttrs(Nodes.SCCNodes, Changed, /*SkipInitializes=*/true); |
| return Changed; |
| } |
| |
| addArgumentReturnedAttrs(Nodes.SCCNodes, Changed); |
| addMemoryAttrs(Nodes.SCCNodes, AARGetter, Changed); |
| addArgumentAttrs(Nodes.SCCNodes, Changed, /*SkipInitializes=*/false); |
| inferConvergent(Nodes.SCCNodes, Changed); |
| addNoReturnAttrs(Nodes.SCCNodes, Changed); |
| addColdAttrs(Nodes.SCCNodes, Changed); |
| addWillReturn(Nodes.SCCNodes, Changed); |
| addNoUndefAttrs(Nodes.SCCNodes, Changed); |
| |
| // If we have no external nodes participating in the SCC, we can deduce some |
| // more precise attributes as well. |
| if (!Nodes.HasUnknownCall) { |
| addNoAliasAttrs(Nodes.SCCNodes, Changed); |
| addNonNullAttrs(Nodes.SCCNodes, Changed); |
| inferAttrsFromFunctionBodies(Nodes.SCCNodes, Changed); |
| addNoRecurseAttrs(Nodes.SCCNodes, Changed); |
| } |
| |
| // Finally, infer the maximal set of attributes from the ones we've inferred |
| // above. This is handling the cases where one attribute on a signature |
| // implies another, but for implementation reasons the inference rule for |
| // the later is missing (or simply less sophisticated). |
| for (Function *F : Nodes.SCCNodes) |
| if (F) |
| if (inferAttributesFromOthers(*F)) |
| Changed.insert(F); |
| |
| return Changed; |
| } |
| |
| PreservedAnalyses PostOrderFunctionAttrsPass::run(LazyCallGraph::SCC &C, |
| CGSCCAnalysisManager &AM, |
| LazyCallGraph &CG, |
| CGSCCUpdateResult &) { |
| // Skip non-recursive functions if requested. |
| // Only infer argument attributes for non-recursive functions, because |
| // it can affect optimization behavior in conjunction with noalias. |
| bool ArgAttrsOnly = false; |
| if (C.size() == 1 && SkipNonRecursive) { |
| LazyCallGraph::Node &N = *C.begin(); |
| if (!N->lookup(N)) |
| ArgAttrsOnly = true; |
| } |
| |
| FunctionAnalysisManager &FAM = |
| AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager(); |
| |
| // We pass a lambda into functions to wire them up to the analysis manager |
| // for getting function analyses. |
| auto AARGetter = [&](Function &F) -> AAResults & { |
| return FAM.getResult<AAManager>(F); |
| }; |
| |
| SmallVector<Function *, 8> Functions; |
| for (LazyCallGraph::Node &N : C) { |
| Functions.push_back(&N.getFunction()); |
| } |
| |
| auto ChangedFunctions = |
| deriveAttrsInPostOrder(Functions, AARGetter, ArgAttrsOnly); |
| if (ChangedFunctions.empty()) |
| return PreservedAnalyses::all(); |
| |
| // Invalidate analyses for modified functions so that we don't have to |
| // invalidate all analyses for all functions in this SCC. |
| PreservedAnalyses FuncPA; |
| // We haven't changed the CFG for modified functions. |
| FuncPA.preserveSet<CFGAnalyses>(); |
| for (Function *Changed : ChangedFunctions) { |
| FAM.invalidate(*Changed, FuncPA); |
| // Also invalidate any direct callers of changed functions since analyses |
| // may care about attributes of direct callees. For example, MemorySSA cares |
| // about whether or not a call's callee modifies memory and queries that |
| // through function attributes. |
| for (auto *U : Changed->users()) { |
| if (auto *Call = dyn_cast<CallBase>(U)) { |
| if (Call->getCalledFunction() == Changed) |
| FAM.invalidate(*Call->getFunction(), FuncPA); |
| } |
| } |
| } |
| |
| PreservedAnalyses PA; |
| // We have not added or removed functions. |
| PA.preserve<FunctionAnalysisManagerCGSCCProxy>(); |
| // We already invalidated all relevant function analyses above. |
| PA.preserveSet<AllAnalysesOn<Function>>(); |
| return PA; |
| } |
| |
| void PostOrderFunctionAttrsPass::printPipeline( |
| raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) { |
| static_cast<PassInfoMixin<PostOrderFunctionAttrsPass> *>(this)->printPipeline( |
| OS, MapClassName2PassName); |
| if (SkipNonRecursive) |
| OS << "<skip-non-recursive-function-attrs>"; |
| } |
| |
| template <typename AARGetterT> |
| static bool runImpl(CallGraphSCC &SCC, AARGetterT AARGetter) { |
| SmallVector<Function *, 8> Functions; |
| for (CallGraphNode *I : SCC) { |
| Functions.push_back(I->getFunction()); |
| } |
| |
| return !deriveAttrsInPostOrder(Functions, AARGetter).empty(); |
| } |
| |
| static bool addNoRecurseAttrsTopDown(Function &F) { |
| // We check the preconditions for the function prior to calling this to avoid |
| // the cost of building up a reversible post-order list. We assert them here |
| // to make sure none of the invariants this relies on were violated. |
| assert(!F.isDeclaration() && "Cannot deduce norecurse without a definition!"); |
| assert(!F.doesNotRecurse() && |
| "This function has already been deduced as norecurs!"); |
| assert(F.hasInternalLinkage() && |
| "Can only do top-down deduction for internal linkage functions!"); |
| |
| // If F is internal and all of its uses are calls from a non-recursive |
| // functions, then none of its calls could in fact recurse without going |
| // through a function marked norecurse, and so we can mark this function too |
| // as norecurse. Note that the uses must actually be calls -- otherwise |
| // a pointer to this function could be returned from a norecurse function but |
| // this function could be recursively (indirectly) called. Note that this |
| // also detects if F is directly recursive as F is not yet marked as |
| // a norecurse function. |
| for (auto &U : F.uses()) { |
| auto *I = dyn_cast<Instruction>(U.getUser()); |
| if (!I) |
| return false; |
| CallBase *CB = dyn_cast<CallBase>(I); |
| if (!CB || !CB->isCallee(&U) || |
| !CB->getParent()->getParent()->doesNotRecurse()) |
| return false; |
| } |
| F.setDoesNotRecurse(); |
| ++NumNoRecurse; |
| return true; |
| } |
| |
| static bool deduceFunctionAttributeInRPO(Module &M, LazyCallGraph &CG) { |
| // We only have a post-order SCC traversal (because SCCs are inherently |
| // discovered in post-order), so we accumulate them in a vector and then walk |
| // it in reverse. This is simpler than using the RPO iterator infrastructure |
| // because we need to combine SCC detection and the PO walk of the call |
| // graph. We can also cheat egregiously because we're primarily interested in |
| // synthesizing norecurse and so we can only save the singular SCCs as SCCs |
| // with multiple functions in them will clearly be recursive. |
| |
| SmallVector<Function *, 16> Worklist; |
| CG.buildRefSCCs(); |
| for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs()) { |
| for (LazyCallGraph::SCC &SCC : RC) { |
| if (SCC.size() != 1) |
| continue; |
| Function &F = SCC.begin()->getFunction(); |
| if (!F.isDeclaration() && !F.doesNotRecurse() && F.hasInternalLinkage()) |
| Worklist.push_back(&F); |
| } |
| } |
| bool Changed = false; |
| for (auto *F : llvm::reverse(Worklist)) |
| Changed |= addNoRecurseAttrsTopDown(*F); |
| |
| return Changed; |
| } |
| |
| PreservedAnalyses |
| ReversePostOrderFunctionAttrsPass::run(Module &M, ModuleAnalysisManager &AM) { |
| auto &CG = AM.getResult<LazyCallGraphAnalysis>(M); |
| |
| if (!deduceFunctionAttributeInRPO(M, CG)) |
| return PreservedAnalyses::all(); |
| |
| PreservedAnalyses PA; |
| PA.preserve<LazyCallGraphAnalysis>(); |
| return PA; |
| } |