| //===- SpeculateAroundPHIs.cpp --------------------------------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "llvm/Transforms/Scalar/SpeculateAroundPHIs.h" |
| #include "llvm/ADT/PostOrderIterator.h" |
| #include "llvm/ADT/Sequence.h" |
| #include "llvm/ADT/SetVector.h" |
| #include "llvm/ADT/Statistic.h" |
| #include "llvm/Analysis/TargetTransformInfo.h" |
| #include "llvm/Analysis/ValueTracking.h" |
| #include "llvm/IR/BasicBlock.h" |
| #include "llvm/IR/IRBuilder.h" |
| #include "llvm/IR/Instructions.h" |
| #include "llvm/IR/IntrinsicInst.h" |
| #include "llvm/Support/Debug.h" |
| #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
| |
| using namespace llvm; |
| |
| #define DEBUG_TYPE "spec-phis" |
| |
| STATISTIC(NumPHIsSpeculated, "Number of PHI nodes we speculated around"); |
| STATISTIC(NumEdgesSplit, |
| "Number of critical edges which were split for speculation"); |
| STATISTIC(NumSpeculatedInstructions, |
| "Number of instructions we speculated around the PHI nodes"); |
| STATISTIC(NumNewRedundantInstructions, |
| "Number of new, redundant instructions inserted"); |
| |
| /// Check whether speculating the users of a PHI node around the PHI |
| /// will be safe. |
| /// |
| /// This checks both that all of the users are safe and also that all of their |
| /// operands are either recursively safe or already available along an incoming |
| /// edge to the PHI. |
| /// |
| /// This routine caches both all the safe nodes explored in `PotentialSpecSet` |
| /// and the chain of nodes that definitively reach any unsafe node in |
| /// `UnsafeSet`. By preserving these between repeated calls to this routine for |
| /// PHIs in the same basic block, the exploration here can be reused. However, |
| /// these caches must no be reused for PHIs in a different basic block as they |
| /// reflect what is available along incoming edges. |
| static bool |
| isSafeToSpeculatePHIUsers(PHINode &PN, DominatorTree &DT, |
| SmallPtrSetImpl<Instruction *> &PotentialSpecSet, |
| SmallPtrSetImpl<Instruction *> &UnsafeSet) { |
| auto *PhiBB = PN.getParent(); |
| SmallPtrSet<Instruction *, 4> Visited; |
| SmallVector<std::pair<Instruction *, User::value_op_iterator>, 16> DFSStack; |
| |
| // Walk each user of the PHI node. |
| for (Use &U : PN.uses()) { |
| auto *UI = cast<Instruction>(U.getUser()); |
| |
| // Ensure the use post-dominates the PHI node. This ensures that, in the |
| // absence of unwinding, the use will actually be reached. |
| // FIXME: We use a blunt hammer of requiring them to be in the same basic |
| // block. We should consider using actual post-dominance here in the |
| // future. |
| if (UI->getParent() != PhiBB) { |
| LLVM_DEBUG(dbgs() << " Unsafe: use in a different BB: " << *UI << "\n"); |
| return false; |
| } |
| |
| // FIXME: This check is much too conservative. We're not going to move these |
| // instructions onto new dynamic paths through the program unless there is |
| // a call instruction between the use and the PHI node. And memory isn't |
| // changing unless there is a store in that same sequence. We should |
| // probably change this to do at least a limited scan of the intervening |
| // instructions and allow handling stores in easily proven safe cases. |
| if (mayBeMemoryDependent(*UI)) { |
| LLVM_DEBUG(dbgs() << " Unsafe: can't speculate use: " << *UI << "\n"); |
| return false; |
| } |
| |
| // Now do a depth-first search of everything these users depend on to make |
| // sure they are transitively safe. This is a depth-first search, but we |
| // check nodes in preorder to minimize the amount of checking. |
| Visited.insert(UI); |
| DFSStack.push_back({UI, UI->value_op_begin()}); |
| do { |
| User::value_op_iterator OpIt; |
| std::tie(UI, OpIt) = DFSStack.pop_back_val(); |
| |
| while (OpIt != UI->value_op_end()) { |
| auto *OpI = dyn_cast<Instruction>(*OpIt); |
| // Increment to the next operand for whenever we continue. |
| ++OpIt; |
| // No need to visit non-instructions, which can't form dependencies. |
| if (!OpI) |
| continue; |
| |
| // Now do the main pre-order checks that this operand is a viable |
| // dependency of something we want to speculate. |
| |
| // First do a few checks for instructions that won't require |
| // speculation at all because they are trivially available on the |
| // incoming edge (either through dominance or through an incoming value |
| // to a PHI). |
| // |
| // The cases in the current block will be trivially dominated by the |
| // edge. |
| auto *ParentBB = OpI->getParent(); |
| if (ParentBB == PhiBB) { |
| if (isa<PHINode>(OpI)) { |
| // We can trivially map through phi nodes in the same block. |
| continue; |
| } |
| } else if (DT.dominates(ParentBB, PhiBB)) { |
| // Instructions from dominating blocks are already available. |
| continue; |
| } |
| |
| // Once we know that we're considering speculating the operand, check |
| // if we've already explored this subgraph and found it to be safe. |
| if (PotentialSpecSet.count(OpI)) |
| continue; |
| |
| // If we've already explored this subgraph and found it unsafe, bail. |
| // If when we directly test whether this is safe it fails, bail. |
| if (UnsafeSet.count(OpI) || ParentBB != PhiBB || |
| mayBeMemoryDependent(*OpI)) { |
| LLVM_DEBUG(dbgs() << " Unsafe: can't speculate transitive use: " |
| << *OpI << "\n"); |
| // Record the stack of instructions which reach this node as unsafe |
| // so we prune subsequent searches. |
| UnsafeSet.insert(OpI); |
| for (auto &StackPair : DFSStack) { |
| Instruction *I = StackPair.first; |
| UnsafeSet.insert(I); |
| } |
| return false; |
| } |
| |
| // Skip any operands we're already recursively checking. |
| if (!Visited.insert(OpI).second) |
| continue; |
| |
| // Push onto the stack and descend. We can directly continue this |
| // loop when ascending. |
| DFSStack.push_back({UI, OpIt}); |
| UI = OpI; |
| OpIt = OpI->value_op_begin(); |
| } |
| |
| // This node and all its operands are safe. Go ahead and cache that for |
| // reuse later. |
| PotentialSpecSet.insert(UI); |
| |
| // Continue with the next node on the stack. |
| } while (!DFSStack.empty()); |
| } |
| |
| #ifndef NDEBUG |
| // Every visited operand should have been marked as safe for speculation at |
| // this point. Verify this and return success. |
| for (auto *I : Visited) |
| assert(PotentialSpecSet.count(I) && |
| "Failed to mark a visited instruction as safe!"); |
| #endif |
| return true; |
| } |
| |
| /// Check whether, in isolation, a given PHI node is both safe and profitable |
| /// to speculate users around. |
| /// |
| /// This handles checking whether there are any constant operands to a PHI |
| /// which could represent a useful speculation candidate, whether the users of |
| /// the PHI are safe to speculate including all their transitive dependencies, |
| /// and whether after speculation there will be some cost savings (profit) to |
| /// folding the operands into the users of the PHI node. Returns true if both |
| /// safe and profitable with relevant cost savings updated in the map and with |
| /// an update to the `PotentialSpecSet`. Returns false if either safety or |
| /// profitability are absent. Some new entries may be made to the |
| /// `PotentialSpecSet` even when this routine returns false, but they remain |
| /// conservatively correct. |
| /// |
| /// The profitability check here is a local one, but it checks this in an |
| /// interesting way. Beyond checking that the total cost of materializing the |
| /// constants will be less than the cost of folding them into their users, it |
| /// also checks that no one incoming constant will have a higher cost when |
| /// folded into its users rather than materialized. This higher cost could |
| /// result in a dynamic *path* that is more expensive even when the total cost |
| /// is lower. Currently, all of the interesting cases where this optimization |
| /// should fire are ones where it is a no-loss operation in this sense. If we |
| /// ever want to be more aggressive here, we would need to balance the |
| /// different incoming edges' cost by looking at their respective |
| /// probabilities. |
| static bool isSafeAndProfitableToSpeculateAroundPHI( |
| PHINode &PN, SmallDenseMap<PHINode *, int, 16> &CostSavingsMap, |
| SmallPtrSetImpl<Instruction *> &PotentialSpecSet, |
| SmallPtrSetImpl<Instruction *> &UnsafeSet, DominatorTree &DT, |
| TargetTransformInfo &TTI) { |
| // First see whether there is any cost savings to speculating around this |
| // PHI, and build up a map of the constant inputs to how many times they |
| // occur. |
| bool NonFreeMat = false; |
| struct CostsAndCount { |
| int MatCost = TargetTransformInfo::TCC_Free; |
| int FoldedCost = TargetTransformInfo::TCC_Free; |
| int Count = 0; |
| }; |
| SmallDenseMap<ConstantInt *, CostsAndCount, 16> CostsAndCounts; |
| SmallPtrSet<BasicBlock *, 16> IncomingConstantBlocks; |
| for (int i : llvm::seq<int>(0, PN.getNumIncomingValues())) { |
| auto *IncomingC = dyn_cast<ConstantInt>(PN.getIncomingValue(i)); |
| if (!IncomingC) |
| continue; |
| |
| // Only visit each incoming edge with a constant input once. |
| if (!IncomingConstantBlocks.insert(PN.getIncomingBlock(i)).second) |
| continue; |
| |
| auto InsertResult = CostsAndCounts.insert({IncomingC, {}}); |
| // Count how many edges share a given incoming costant. |
| ++InsertResult.first->second.Count; |
| // Only compute the cost the first time we see a particular constant. |
| if (!InsertResult.second) |
| continue; |
| |
| int &MatCost = InsertResult.first->second.MatCost; |
| MatCost = TTI.getIntImmCost(IncomingC->getValue(), IncomingC->getType()); |
| NonFreeMat |= MatCost != TTI.TCC_Free; |
| } |
| if (!NonFreeMat) { |
| LLVM_DEBUG(dbgs() << " Free: " << PN << "\n"); |
| // No profit in free materialization. |
| return false; |
| } |
| |
| // Now check that the uses of this PHI can actually be speculated, |
| // otherwise we'll still have to materialize the PHI value. |
| if (!isSafeToSpeculatePHIUsers(PN, DT, PotentialSpecSet, UnsafeSet)) { |
| LLVM_DEBUG(dbgs() << " Unsafe PHI: " << PN << "\n"); |
| return false; |
| } |
| |
| // Compute how much (if any) savings are available by speculating around this |
| // PHI. |
| for (Use &U : PN.uses()) { |
| auto *UserI = cast<Instruction>(U.getUser()); |
| // Now check whether there is any savings to folding the incoming constants |
| // into this use. |
| unsigned Idx = U.getOperandNo(); |
| |
| // If we have a binary operator that is commutative, an actual constant |
| // operand would end up on the RHS, so pretend the use of the PHI is on the |
| // RHS. |
| // |
| // Technically, this is a bit weird if *both* operands are PHIs we're |
| // speculating. But if that is the case, giving an "optimistic" cost isn't |
| // a bad thing because after speculation it will constant fold. And |
| // moreover, such cases should likely have been constant folded already by |
| // some other pass, so we shouldn't worry about "modeling" them terribly |
| // accurately here. Similarly, if the other operand is a constant, it still |
| // seems fine to be "optimistic" in our cost modeling, because when the |
| // incoming operand from the PHI node is also a constant, we will end up |
| // constant folding. |
| if (UserI->isBinaryOp() && UserI->isCommutative() && Idx != 1) |
| // Assume we will commute the constant to the RHS to be canonical. |
| Idx = 1; |
| |
| // Get the intrinsic ID if this user is an intrinsic. |
| Intrinsic::ID IID = Intrinsic::not_intrinsic; |
| if (auto *UserII = dyn_cast<IntrinsicInst>(UserI)) |
| IID = UserII->getIntrinsicID(); |
| |
| for (auto &IncomingConstantAndCostsAndCount : CostsAndCounts) { |
| ConstantInt *IncomingC = IncomingConstantAndCostsAndCount.first; |
| int MatCost = IncomingConstantAndCostsAndCount.second.MatCost; |
| int &FoldedCost = IncomingConstantAndCostsAndCount.second.FoldedCost; |
| if (IID) |
| FoldedCost += TTI.getIntImmCost(IID, Idx, IncomingC->getValue(), |
| IncomingC->getType()); |
| else |
| FoldedCost += |
| TTI.getIntImmCost(UserI->getOpcode(), Idx, IncomingC->getValue(), |
| IncomingC->getType()); |
| |
| // If we accumulate more folded cost for this incoming constant than |
| // materialized cost, then we'll regress any edge with this constant so |
| // just bail. We're only interested in cases where folding the incoming |
| // constants is at least break-even on all paths. |
| if (FoldedCost > MatCost) { |
| LLVM_DEBUG(dbgs() << " Not profitable to fold imm: " << *IncomingC |
| << "\n" |
| " Materializing cost: " |
| << MatCost |
| << "\n" |
| " Accumulated folded cost: " |
| << FoldedCost << "\n"); |
| return false; |
| } |
| } |
| } |
| |
| // Compute the total cost savings afforded by this PHI node. |
| int TotalMatCost = TTI.TCC_Free, TotalFoldedCost = TTI.TCC_Free; |
| for (auto IncomingConstantAndCostsAndCount : CostsAndCounts) { |
| int MatCost = IncomingConstantAndCostsAndCount.second.MatCost; |
| int FoldedCost = IncomingConstantAndCostsAndCount.second.FoldedCost; |
| int Count = IncomingConstantAndCostsAndCount.second.Count; |
| |
| TotalMatCost += MatCost * Count; |
| TotalFoldedCost += FoldedCost * Count; |
| } |
| assert(TotalFoldedCost <= TotalMatCost && "If each constant's folded cost is " |
| "less that its materialized cost, " |
| "the sum must be as well."); |
| |
| LLVM_DEBUG(dbgs() << " Cost savings " << (TotalMatCost - TotalFoldedCost) |
| << ": " << PN << "\n"); |
| CostSavingsMap[&PN] = TotalMatCost - TotalFoldedCost; |
| return true; |
| } |
| |
| /// Simple helper to walk all the users of a list of phis depth first, and call |
| /// a visit function on each one in post-order. |
| /// |
| /// All of the PHIs should be in the same basic block, and this is primarily |
| /// used to make a single depth-first walk across their collective users |
| /// without revisiting any subgraphs. Callers should provide a fast, idempotent |
| /// callable to test whether a node has been visited and the more important |
| /// callable to actually visit a particular node. |
| /// |
| /// Depth-first and postorder here refer to the *operand* graph -- we start |
| /// from a collection of users of PHI nodes and walk "up" the operands |
| /// depth-first. |
| template <typename IsVisitedT, typename VisitT> |
| static void visitPHIUsersAndDepsInPostOrder(ArrayRef<PHINode *> PNs, |
| IsVisitedT IsVisited, |
| VisitT Visit) { |
| SmallVector<std::pair<Instruction *, User::value_op_iterator>, 16> DFSStack; |
| for (auto *PN : PNs) |
| for (Use &U : PN->uses()) { |
| auto *UI = cast<Instruction>(U.getUser()); |
| if (IsVisited(UI)) |
| // Already visited this user, continue across the roots. |
| continue; |
| |
| // Otherwise, walk the operand graph depth-first and visit each |
| // dependency in postorder. |
| DFSStack.push_back({UI, UI->value_op_begin()}); |
| do { |
| User::value_op_iterator OpIt; |
| std::tie(UI, OpIt) = DFSStack.pop_back_val(); |
| while (OpIt != UI->value_op_end()) { |
| auto *OpI = dyn_cast<Instruction>(*OpIt); |
| // Increment to the next operand for whenever we continue. |
| ++OpIt; |
| // No need to visit non-instructions, which can't form dependencies, |
| // or instructions outside of our potential dependency set that we |
| // were given. Finally, if we've already visited the node, continue |
| // to the next. |
| if (!OpI || IsVisited(OpI)) |
| continue; |
| |
| // Push onto the stack and descend. We can directly continue this |
| // loop when ascending. |
| DFSStack.push_back({UI, OpIt}); |
| UI = OpI; |
| OpIt = OpI->value_op_begin(); |
| } |
| |
| // Finished visiting children, visit this node. |
| assert(!IsVisited(UI) && "Should not have already visited a node!"); |
| Visit(UI); |
| } while (!DFSStack.empty()); |
| } |
| } |
| |
| /// Find profitable PHIs to speculate. |
| /// |
| /// For a PHI node to be profitable, we need the cost of speculating its users |
| /// (and their dependencies) to not exceed the savings of folding the PHI's |
| /// constant operands into the speculated users. |
| /// |
| /// Computing this is surprisingly challenging. Because users of two different |
| /// PHI nodes can depend on each other or on common other instructions, it may |
| /// be profitable to speculate two PHI nodes together even though neither one |
| /// in isolation is profitable. The straightforward way to find all the |
| /// profitable PHIs would be to check each combination of PHIs' cost, but this |
| /// is exponential in complexity. |
| /// |
| /// Even if we assume that we only care about cases where we can consider each |
| /// PHI node in isolation (rather than considering cases where none are |
| /// profitable in isolation but some subset are profitable as a set), we still |
| /// have a challenge. The obvious way to find all individually profitable PHIs |
| /// is to iterate until reaching a fixed point, but this will be quadratic in |
| /// complexity. =/ |
| /// |
| /// This code currently uses a linear-to-compute order for a greedy approach. |
| /// It won't find cases where a set of PHIs must be considered together, but it |
| /// handles most cases of order dependence without quadratic iteration. The |
| /// specific order used is the post-order across the operand DAG. When the last |
| /// user of a PHI is visited in this postorder walk, we check it for |
| /// profitability. |
| /// |
| /// There is an orthogonal extra complexity to all of this: computing the cost |
| /// itself can easily become a linear computation making everything again (at |
| /// best) quadratic. Using a postorder over the operand graph makes it |
| /// particularly easy to avoid this through dynamic programming. As we do the |
| /// postorder walk, we build the transitive cost of that subgraph. It is also |
| /// straightforward to then update these costs when we mark a PHI for |
| /// speculation so that subsequent PHIs don't re-pay the cost of already |
| /// speculated instructions. |
| static SmallVector<PHINode *, 16> |
| findProfitablePHIs(ArrayRef<PHINode *> PNs, |
| const SmallDenseMap<PHINode *, int, 16> &CostSavingsMap, |
| const SmallPtrSetImpl<Instruction *> &PotentialSpecSet, |
| int NumPreds, DominatorTree &DT, TargetTransformInfo &TTI) { |
| SmallVector<PHINode *, 16> SpecPNs; |
| |
| // First, establish a reverse mapping from immediate users of the PHI nodes |
| // to the nodes themselves, and count how many users each PHI node has in |
| // a way we can update while processing them. |
| SmallDenseMap<Instruction *, TinyPtrVector<PHINode *>, 16> UserToPNMap; |
| SmallDenseMap<PHINode *, int, 16> PNUserCountMap; |
| SmallPtrSet<Instruction *, 16> UserSet; |
| for (auto *PN : PNs) { |
| assert(UserSet.empty() && "Must start with an empty user set!"); |
| for (Use &U : PN->uses()) |
| UserSet.insert(cast<Instruction>(U.getUser())); |
| PNUserCountMap[PN] = UserSet.size(); |
| for (auto *UI : UserSet) |
| UserToPNMap.insert({UI, {}}).first->second.push_back(PN); |
| UserSet.clear(); |
| } |
| |
| // Now do a DFS across the operand graph of the users, computing cost as we |
| // go and when all costs for a given PHI are known, checking that PHI for |
| // profitability. |
| SmallDenseMap<Instruction *, int, 16> SpecCostMap; |
| visitPHIUsersAndDepsInPostOrder( |
| PNs, |
| /*IsVisited*/ |
| [&](Instruction *I) { |
| // We consider anything that isn't potentially speculated to be |
| // "visited" as it is already handled. Similarly, anything that *is* |
| // potentially speculated but for which we have an entry in our cost |
| // map, we're done. |
| return !PotentialSpecSet.count(I) || SpecCostMap.count(I); |
| }, |
| /*Visit*/ |
| [&](Instruction *I) { |
| // We've fully visited the operands, so sum their cost with this node |
| // and update the cost map. |
| int Cost = TTI.TCC_Free; |
| for (Value *OpV : I->operand_values()) |
| if (auto *OpI = dyn_cast<Instruction>(OpV)) { |
| auto CostMapIt = SpecCostMap.find(OpI); |
| if (CostMapIt != SpecCostMap.end()) |
| Cost += CostMapIt->second; |
| } |
| Cost += TTI.getUserCost(I); |
| bool Inserted = SpecCostMap.insert({I, Cost}).second; |
| (void)Inserted; |
| assert(Inserted && "Must not re-insert a cost during the DFS!"); |
| |
| // Now check if this node had a corresponding PHI node using it. If so, |
| // we need to decrement the outstanding user count for it. |
| auto UserPNsIt = UserToPNMap.find(I); |
| if (UserPNsIt == UserToPNMap.end()) |
| return; |
| auto &UserPNs = UserPNsIt->second; |
| auto UserPNsSplitIt = std::stable_partition( |
| UserPNs.begin(), UserPNs.end(), [&](PHINode *UserPN) { |
| int &PNUserCount = PNUserCountMap.find(UserPN)->second; |
| assert( |
| PNUserCount > 0 && |
| "Should never re-visit a PN after its user count hits zero!"); |
| --PNUserCount; |
| return PNUserCount != 0; |
| }); |
| |
| // FIXME: Rather than one at a time, we should sum the savings as the |
| // cost will be completely shared. |
| SmallVector<Instruction *, 16> SpecWorklist; |
| for (auto *PN : llvm::make_range(UserPNsSplitIt, UserPNs.end())) { |
| int SpecCost = TTI.TCC_Free; |
| for (Use &U : PN->uses()) |
| SpecCost += |
| SpecCostMap.find(cast<Instruction>(U.getUser()))->second; |
| SpecCost *= (NumPreds - 1); |
| // When the user count of a PHI node hits zero, we should check its |
| // profitability. If profitable, we should mark it for speculation |
| // and zero out the cost of everything it depends on. |
| int CostSavings = CostSavingsMap.find(PN)->second; |
| if (SpecCost > CostSavings) { |
| LLVM_DEBUG(dbgs() << " Not profitable, speculation cost: " << *PN |
| << "\n" |
| " Cost savings: " |
| << CostSavings |
| << "\n" |
| " Speculation cost: " |
| << SpecCost << "\n"); |
| continue; |
| } |
| |
| // We're going to speculate this user-associated PHI. Copy it out and |
| // add its users to the worklist to update their cost. |
| SpecPNs.push_back(PN); |
| for (Use &U : PN->uses()) { |
| auto *UI = cast<Instruction>(U.getUser()); |
| auto CostMapIt = SpecCostMap.find(UI); |
| if (CostMapIt->second == 0) |
| continue; |
| // Zero out this cost entry to avoid duplicates. |
| CostMapIt->second = 0; |
| SpecWorklist.push_back(UI); |
| } |
| } |
| |
| // Now walk all the operands of the users in the worklist transitively |
| // to zero out all the memoized costs. |
| while (!SpecWorklist.empty()) { |
| Instruction *SpecI = SpecWorklist.pop_back_val(); |
| assert(SpecCostMap.find(SpecI)->second == 0 && |
| "Didn't zero out a cost!"); |
| |
| // Walk the operands recursively to zero out their cost as well. |
| for (auto *OpV : SpecI->operand_values()) { |
| auto *OpI = dyn_cast<Instruction>(OpV); |
| if (!OpI) |
| continue; |
| auto CostMapIt = SpecCostMap.find(OpI); |
| if (CostMapIt == SpecCostMap.end() || CostMapIt->second == 0) |
| continue; |
| CostMapIt->second = 0; |
| SpecWorklist.push_back(OpI); |
| } |
| } |
| }); |
| |
| return SpecPNs; |
| } |
| |
| /// Speculate users around a set of PHI nodes. |
| /// |
| /// This routine does the actual speculation around a set of PHI nodes where we |
| /// have determined this to be both safe and profitable. |
| /// |
| /// This routine handles any spliting of critical edges necessary to create |
| /// a safe block to speculate into as well as cloning the instructions and |
| /// rewriting all uses. |
| static void speculatePHIs(ArrayRef<PHINode *> SpecPNs, |
| SmallPtrSetImpl<Instruction *> &PotentialSpecSet, |
| SmallSetVector<BasicBlock *, 16> &PredSet, |
| DominatorTree &DT) { |
| LLVM_DEBUG(dbgs() << " Speculating around " << SpecPNs.size() << " PHIs!\n"); |
| NumPHIsSpeculated += SpecPNs.size(); |
| |
| // Split any critical edges so that we have a block to hoist into. |
| auto *ParentBB = SpecPNs[0]->getParent(); |
| SmallVector<BasicBlock *, 16> SpecPreds; |
| SpecPreds.reserve(PredSet.size()); |
| for (auto *PredBB : PredSet) { |
| auto *NewPredBB = SplitCriticalEdge( |
| PredBB, ParentBB, |
| CriticalEdgeSplittingOptions(&DT).setMergeIdenticalEdges()); |
| if (NewPredBB) { |
| ++NumEdgesSplit; |
| LLVM_DEBUG(dbgs() << " Split critical edge from: " << PredBB->getName() |
| << "\n"); |
| SpecPreds.push_back(NewPredBB); |
| } else { |
| assert(PredBB->getSingleSuccessor() == ParentBB && |
| "We need a non-critical predecessor to speculate into."); |
| assert(!isa<InvokeInst>(PredBB->getTerminator()) && |
| "Cannot have a non-critical invoke!"); |
| |
| // Already non-critical, use existing pred. |
| SpecPreds.push_back(PredBB); |
| } |
| } |
| |
| SmallPtrSet<Instruction *, 16> SpecSet; |
| SmallVector<Instruction *, 16> SpecList; |
| visitPHIUsersAndDepsInPostOrder(SpecPNs, |
| /*IsVisited*/ |
| [&](Instruction *I) { |
| // This is visited if we don't need to |
| // speculate it or we already have |
| // speculated it. |
| return !PotentialSpecSet.count(I) || |
| SpecSet.count(I); |
| }, |
| /*Visit*/ |
| [&](Instruction *I) { |
| // All operands scheduled, schedule this |
| // node. |
| SpecSet.insert(I); |
| SpecList.push_back(I); |
| }); |
| |
| int NumSpecInsts = SpecList.size() * SpecPreds.size(); |
| int NumRedundantInsts = NumSpecInsts - SpecList.size(); |
| LLVM_DEBUG(dbgs() << " Inserting " << NumSpecInsts |
| << " speculated instructions, " << NumRedundantInsts |
| << " redundancies\n"); |
| NumSpeculatedInstructions += NumSpecInsts; |
| NumNewRedundantInstructions += NumRedundantInsts; |
| |
| // Each predecessor is numbered by its index in `SpecPreds`, so for each |
| // instruction we speculate, the speculated instruction is stored in that |
| // index of the vector associated with the original instruction. We also |
| // store the incoming values for each predecessor from any PHIs used. |
| SmallDenseMap<Instruction *, SmallVector<Value *, 2>, 16> SpeculatedValueMap; |
| |
| // Inject the synthetic mappings to rewrite PHIs to the appropriate incoming |
| // value. This handles both the PHIs we are speculating around and any other |
| // PHIs that happen to be used. |
| for (auto *OrigI : SpecList) |
| for (auto *OpV : OrigI->operand_values()) { |
| auto *OpPN = dyn_cast<PHINode>(OpV); |
| if (!OpPN || OpPN->getParent() != ParentBB) |
| continue; |
| |
| auto InsertResult = SpeculatedValueMap.insert({OpPN, {}}); |
| if (!InsertResult.second) |
| continue; |
| |
| auto &SpeculatedVals = InsertResult.first->second; |
| |
| // Populating our structure for mapping is particularly annoying because |
| // finding an incoming value for a particular predecessor block in a PHI |
| // node is a linear time operation! To avoid quadratic behavior, we build |
| // a map for this PHI node's incoming values and then translate it into |
| // the more compact representation used below. |
| SmallDenseMap<BasicBlock *, Value *, 16> IncomingValueMap; |
| for (int i : llvm::seq<int>(0, OpPN->getNumIncomingValues())) |
| IncomingValueMap[OpPN->getIncomingBlock(i)] = OpPN->getIncomingValue(i); |
| |
| for (auto *PredBB : SpecPreds) |
| SpeculatedVals.push_back(IncomingValueMap.find(PredBB)->second); |
| } |
| |
| // Speculate into each predecessor. |
| for (int PredIdx : llvm::seq<int>(0, SpecPreds.size())) { |
| auto *PredBB = SpecPreds[PredIdx]; |
| assert(PredBB->getSingleSuccessor() == ParentBB && |
| "We need a non-critical predecessor to speculate into."); |
| |
| for (auto *OrigI : SpecList) { |
| auto *NewI = OrigI->clone(); |
| NewI->setName(Twine(OrigI->getName()) + "." + Twine(PredIdx)); |
| NewI->insertBefore(PredBB->getTerminator()); |
| |
| // Rewrite all the operands to the previously speculated instructions. |
| // Because we're walking in-order, the defs must precede the uses and we |
| // should already have these mappings. |
| for (Use &U : NewI->operands()) { |
| auto *OpI = dyn_cast<Instruction>(U.get()); |
| if (!OpI) |
| continue; |
| auto MapIt = SpeculatedValueMap.find(OpI); |
| if (MapIt == SpeculatedValueMap.end()) |
| continue; |
| const auto &SpeculatedVals = MapIt->second; |
| assert(SpeculatedVals[PredIdx] && |
| "Must have a speculated value for this predecessor!"); |
| assert(SpeculatedVals[PredIdx]->getType() == OpI->getType() && |
| "Speculated value has the wrong type!"); |
| |
| // Rewrite the use to this predecessor's speculated instruction. |
| U.set(SpeculatedVals[PredIdx]); |
| } |
| |
| // Commute instructions which now have a constant in the LHS but not the |
| // RHS. |
| if (NewI->isBinaryOp() && NewI->isCommutative() && |
| isa<Constant>(NewI->getOperand(0)) && |
| !isa<Constant>(NewI->getOperand(1))) |
| NewI->getOperandUse(0).swap(NewI->getOperandUse(1)); |
| |
| SpeculatedValueMap[OrigI].push_back(NewI); |
| assert(SpeculatedValueMap[OrigI][PredIdx] == NewI && |
| "Mismatched speculated instruction index!"); |
| } |
| } |
| |
| // Walk the speculated instruction list and if they have uses, insert a PHI |
| // for them from the speculated versions, and replace the uses with the PHI. |
| // Then erase the instructions as they have been fully speculated. The walk |
| // needs to be in reverse so that we don't think there are users when we'll |
| // actually eventually remove them later. |
| IRBuilder<> IRB(SpecPNs[0]); |
| for (auto *OrigI : llvm::reverse(SpecList)) { |
| // Check if we need a PHI for any remaining users and if so, insert it. |
| if (!OrigI->use_empty()) { |
| auto *SpecIPN = IRB.CreatePHI(OrigI->getType(), SpecPreds.size(), |
| Twine(OrigI->getName()) + ".phi"); |
| // Add the incoming values we speculated. |
| auto &SpeculatedVals = SpeculatedValueMap.find(OrigI)->second; |
| for (int PredIdx : llvm::seq<int>(0, SpecPreds.size())) |
| SpecIPN->addIncoming(SpeculatedVals[PredIdx], SpecPreds[PredIdx]); |
| |
| // And replace the uses with the PHI node. |
| OrigI->replaceAllUsesWith(SpecIPN); |
| } |
| |
| // It is important to immediately erase this so that it stops using other |
| // instructions. This avoids inserting needless PHIs of them. |
| OrigI->eraseFromParent(); |
| } |
| |
| // All of the uses of the speculated phi nodes should be removed at this |
| // point, so erase them. |
| for (auto *SpecPN : SpecPNs) { |
| assert(SpecPN->use_empty() && "All users should have been speculated!"); |
| SpecPN->eraseFromParent(); |
| } |
| } |
| |
| /// Try to speculate around a series of PHIs from a single basic block. |
| /// |
| /// This routine checks whether any of these PHIs are profitable to speculate |
| /// users around. If safe and profitable, it does the speculation. It returns |
| /// true when at least some speculation occurs. |
| static bool tryToSpeculatePHIs(SmallVectorImpl<PHINode *> &PNs, |
| DominatorTree &DT, TargetTransformInfo &TTI) { |
| LLVM_DEBUG(dbgs() << "Evaluating phi nodes for speculation:\n"); |
| |
| // Savings in cost from speculating around a PHI node. |
| SmallDenseMap<PHINode *, int, 16> CostSavingsMap; |
| |
| // Remember the set of instructions that are candidates for speculation so |
| // that we can quickly walk things within that space. This prunes out |
| // instructions already available along edges, etc. |
| SmallPtrSet<Instruction *, 16> PotentialSpecSet; |
| |
| // Remember the set of instructions that are (transitively) unsafe to |
| // speculate into the incoming edges of this basic block. This avoids |
| // recomputing them for each PHI node we check. This set is specific to this |
| // block though as things are pruned out of it based on what is available |
| // along incoming edges. |
| SmallPtrSet<Instruction *, 16> UnsafeSet; |
| |
| // For each PHI node in this block, check whether there are immediate folding |
| // opportunities from speculation, and whether that speculation will be |
| // valid. This determise the set of safe PHIs to speculate. |
| PNs.erase(llvm::remove_if(PNs, |
| [&](PHINode *PN) { |
| return !isSafeAndProfitableToSpeculateAroundPHI( |
| *PN, CostSavingsMap, PotentialSpecSet, |
| UnsafeSet, DT, TTI); |
| }), |
| PNs.end()); |
| // If no PHIs were profitable, skip. |
| if (PNs.empty()) { |
| LLVM_DEBUG(dbgs() << " No safe and profitable PHIs found!\n"); |
| return false; |
| } |
| |
| // We need to know how much speculation will cost which is determined by how |
| // many incoming edges will need a copy of each speculated instruction. |
| SmallSetVector<BasicBlock *, 16> PredSet; |
| for (auto *PredBB : PNs[0]->blocks()) { |
| if (!PredSet.insert(PredBB)) |
| continue; |
| |
| // We cannot speculate when a predecessor is an indirect branch. |
| // FIXME: We also can't reliably create a non-critical edge block for |
| // speculation if the predecessor is an invoke. This doesn't seem |
| // fundamental and we should probably be splitting critical edges |
| // differently. |
| if (isa<IndirectBrInst>(PredBB->getTerminator()) || |
| isa<InvokeInst>(PredBB->getTerminator())) { |
| LLVM_DEBUG(dbgs() << " Invalid: predecessor terminator: " |
| << PredBB->getName() << "\n"); |
| return false; |
| } |
| } |
| if (PredSet.size() < 2) { |
| LLVM_DEBUG(dbgs() << " Unimportant: phi with only one predecessor\n"); |
| return false; |
| } |
| |
| SmallVector<PHINode *, 16> SpecPNs = findProfitablePHIs( |
| PNs, CostSavingsMap, PotentialSpecSet, PredSet.size(), DT, TTI); |
| if (SpecPNs.empty()) |
| // Nothing to do. |
| return false; |
| |
| speculatePHIs(SpecPNs, PotentialSpecSet, PredSet, DT); |
| return true; |
| } |
| |
| PreservedAnalyses SpeculateAroundPHIsPass::run(Function &F, |
| FunctionAnalysisManager &AM) { |
| auto &DT = AM.getResult<DominatorTreeAnalysis>(F); |
| auto &TTI = AM.getResult<TargetIRAnalysis>(F); |
| |
| bool Changed = false; |
| for (auto *BB : ReversePostOrderTraversal<Function *>(&F)) { |
| SmallVector<PHINode *, 16> PNs; |
| auto BBI = BB->begin(); |
| while (auto *PN = dyn_cast<PHINode>(&*BBI)) { |
| PNs.push_back(PN); |
| ++BBI; |
| } |
| |
| if (PNs.empty()) |
| continue; |
| |
| Changed |= tryToSpeculatePHIs(PNs, DT, TTI); |
| } |
| |
| if (!Changed) |
| return PreservedAnalyses::all(); |
| |
| PreservedAnalyses PA; |
| return PA; |
| } |