|  | //===- DFAJumpThreading.cpp - Threads a switch statement inside a loop ----===// | 
|  | // | 
|  | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|  | // See https://llvm.org/LICENSE.txt for license information. | 
|  | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  | // | 
|  | // Transform each threading path to effectively jump thread the DFA. For | 
|  | // example, the CFG below could be transformed as follows, where the cloned | 
|  | // blocks unconditionally branch to the next correct case based on what is | 
|  | // identified in the analysis. | 
|  | // | 
|  | //          sw.bb                        sw.bb | 
|  | //        /   |   \                    /   |   \ | 
|  | //   case1  case2  case3          case1  case2  case3 | 
|  | //        \   |   /                 |      |      | | 
|  | //       determinator            det.2   det.3  det.1 | 
|  | //        br sw.bb                /        |        \ | 
|  | //                          sw.bb.2     sw.bb.3     sw.bb.1 | 
|  | //                           br case2    br case3    br case1ยง | 
|  | // | 
|  | // Definitions and Terminology: | 
|  | // | 
|  | // * Threading path: | 
|  | //   a list of basic blocks, the exit state, and the block that determines | 
|  | //   the next state, for which the following notation will be used: | 
|  | //   < path of BBs that form a cycle > [ state, determinator ] | 
|  | // | 
|  | // * Predictable switch: | 
|  | //   The switch variable is always a known constant so that all conditional | 
|  | //   jumps based on switch variable can be converted to unconditional jump. | 
|  | // | 
|  | // * Determinator: | 
|  | //   The basic block that determines the next state of the DFA. | 
|  | // | 
|  | // Representing the optimization in C-like pseudocode: the code pattern on the | 
|  | // left could functionally be transformed to the right pattern if the switch | 
|  | // condition is predictable. | 
|  | // | 
|  | //  X = A                       goto A | 
|  | //  for (...)                   A: | 
|  | //    switch (X)                  ... | 
|  | //      case A                    goto B | 
|  | //        X = B                 B: | 
|  | //      case B                    ... | 
|  | //        X = C                   goto C | 
|  | // | 
|  | // The pass first checks that switch variable X is decided by the control flow | 
|  | // path taken in the loop; for example, in case B, the next value of X is | 
|  | // decided to be C. It then enumerates through all paths in the loop and labels | 
|  | // the basic blocks where the next state is decided. | 
|  | // | 
|  | // Using this information it creates new paths that unconditionally branch to | 
|  | // the next case. This involves cloning code, so it only gets triggered if the | 
|  | // amount of code duplicated is below a threshold. | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | #include "llvm/Transforms/Scalar/DFAJumpThreading.h" | 
|  | #include "llvm/ADT/APInt.h" | 
|  | #include "llvm/ADT/DenseMap.h" | 
|  | #include "llvm/ADT/SmallSet.h" | 
|  | #include "llvm/ADT/Statistic.h" | 
|  | #include "llvm/Analysis/AssumptionCache.h" | 
|  | #include "llvm/Analysis/CodeMetrics.h" | 
|  | #include "llvm/Analysis/DomTreeUpdater.h" | 
|  | #include "llvm/Analysis/LoopInfo.h" | 
|  | #include "llvm/Analysis/OptimizationRemarkEmitter.h" | 
|  | #include "llvm/Analysis/TargetTransformInfo.h" | 
|  | #include "llvm/IR/CFG.h" | 
|  | #include "llvm/IR/Constants.h" | 
|  | #include "llvm/IR/IntrinsicInst.h" | 
|  | #include "llvm/Support/CommandLine.h" | 
|  | #include "llvm/Support/Debug.h" | 
|  | #include "llvm/Transforms/Utils/Cloning.h" | 
|  | #include "llvm/Transforms/Utils/SSAUpdaterBulk.h" | 
|  | #include "llvm/Transforms/Utils/ValueMapper.h" | 
|  | #include <deque> | 
|  |  | 
|  | #ifdef EXPENSIVE_CHECKS | 
|  | #include "llvm/IR/Verifier.h" | 
|  | #endif | 
|  |  | 
|  | using namespace llvm; | 
|  |  | 
|  | #define DEBUG_TYPE "dfa-jump-threading" | 
|  |  | 
|  | STATISTIC(NumTransforms, "Number of transformations done"); | 
|  | STATISTIC(NumCloned, "Number of blocks cloned"); | 
|  | STATISTIC(NumPaths, "Number of individual paths threaded"); | 
|  |  | 
|  | static cl::opt<bool> | 
|  | ClViewCfgBefore("dfa-jump-view-cfg-before", | 
|  | cl::desc("View the CFG before DFA Jump Threading"), | 
|  | cl::Hidden, cl::init(false)); | 
|  |  | 
|  | static cl::opt<bool> EarlyExitHeuristic( | 
|  | "dfa-early-exit-heuristic", | 
|  | cl::desc("Exit early if an unpredictable value come from the same loop"), | 
|  | cl::Hidden, cl::init(true)); | 
|  |  | 
|  | static cl::opt<unsigned> MaxPathLength( | 
|  | "dfa-max-path-length", | 
|  | cl::desc("Max number of blocks searched to find a threading path"), | 
|  | cl::Hidden, cl::init(20)); | 
|  |  | 
|  | static cl::opt<unsigned> MaxNumVisitiedPaths( | 
|  | "dfa-max-num-visited-paths", | 
|  | cl::desc( | 
|  | "Max number of blocks visited while enumerating paths around a switch"), | 
|  | cl::Hidden, cl::init(2500)); | 
|  |  | 
|  | static cl::opt<unsigned> | 
|  | MaxNumPaths("dfa-max-num-paths", | 
|  | cl::desc("Max number of paths enumerated around a switch"), | 
|  | cl::Hidden, cl::init(200)); | 
|  |  | 
|  | static cl::opt<unsigned> | 
|  | CostThreshold("dfa-cost-threshold", | 
|  | cl::desc("Maximum cost accepted for the transformation"), | 
|  | cl::Hidden, cl::init(50)); | 
|  |  | 
|  | namespace { | 
|  |  | 
|  | class SelectInstToUnfold { | 
|  | SelectInst *SI; | 
|  | PHINode *SIUse; | 
|  |  | 
|  | public: | 
|  | SelectInstToUnfold(SelectInst *SI, PHINode *SIUse) : SI(SI), SIUse(SIUse) {} | 
|  |  | 
|  | SelectInst *getInst() { return SI; } | 
|  | PHINode *getUse() { return SIUse; } | 
|  |  | 
|  | explicit operator bool() const { return SI && SIUse; } | 
|  | }; | 
|  |  | 
|  | void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold, | 
|  | std::vector<SelectInstToUnfold> *NewSIsToUnfold, | 
|  | std::vector<BasicBlock *> *NewBBs); | 
|  |  | 
|  | class DFAJumpThreading { | 
|  | public: | 
|  | DFAJumpThreading(AssumptionCache *AC, DominatorTree *DT, LoopInfo *LI, | 
|  | TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE) | 
|  | : AC(AC), DT(DT), LI(LI), TTI(TTI), ORE(ORE) {} | 
|  |  | 
|  | bool run(Function &F); | 
|  | bool LoopInfoBroken; | 
|  |  | 
|  | private: | 
|  | void | 
|  | unfoldSelectInstrs(DominatorTree *DT, | 
|  | const SmallVector<SelectInstToUnfold, 4> &SelectInsts) { | 
|  | DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); | 
|  | SmallVector<SelectInstToUnfold, 4> Stack(SelectInsts); | 
|  |  | 
|  | while (!Stack.empty()) { | 
|  | SelectInstToUnfold SIToUnfold = Stack.pop_back_val(); | 
|  |  | 
|  | std::vector<SelectInstToUnfold> NewSIsToUnfold; | 
|  | std::vector<BasicBlock *> NewBBs; | 
|  | unfold(&DTU, LI, SIToUnfold, &NewSIsToUnfold, &NewBBs); | 
|  |  | 
|  | // Put newly discovered select instructions into the work list. | 
|  | for (const SelectInstToUnfold &NewSIToUnfold : NewSIsToUnfold) | 
|  | Stack.push_back(NewSIToUnfold); | 
|  | } | 
|  | } | 
|  |  | 
|  | AssumptionCache *AC; | 
|  | DominatorTree *DT; | 
|  | LoopInfo *LI; | 
|  | TargetTransformInfo *TTI; | 
|  | OptimizationRemarkEmitter *ORE; | 
|  | }; | 
|  |  | 
|  | } // end anonymous namespace | 
|  |  | 
|  | namespace { | 
|  |  | 
|  | /// Unfold the select instruction held in \p SIToUnfold by replacing it with | 
|  | /// control flow. | 
|  | /// | 
|  | /// Put newly discovered select instructions into \p NewSIsToUnfold. Put newly | 
|  | /// created basic blocks into \p NewBBs. | 
|  | /// | 
|  | /// TODO: merge it with CodeGenPrepare::optimizeSelectInst() if possible. | 
|  | void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold, | 
|  | std::vector<SelectInstToUnfold> *NewSIsToUnfold, | 
|  | std::vector<BasicBlock *> *NewBBs) { | 
|  | SelectInst *SI = SIToUnfold.getInst(); | 
|  | PHINode *SIUse = SIToUnfold.getUse(); | 
|  | BasicBlock *StartBlock = SI->getParent(); | 
|  | BranchInst *StartBlockTerm = | 
|  | dyn_cast<BranchInst>(StartBlock->getTerminator()); | 
|  |  | 
|  | assert(StartBlockTerm); | 
|  | assert(SI->hasOneUse()); | 
|  |  | 
|  | if (StartBlockTerm->isUnconditional()) { | 
|  | BasicBlock *EndBlock = StartBlock->getUniqueSuccessor(); | 
|  | // Arbitrarily choose the 'false' side for a new input value to the PHI. | 
|  | BasicBlock *NewBlock = BasicBlock::Create( | 
|  | SI->getContext(), Twine(SI->getName(), ".si.unfold.false"), | 
|  | EndBlock->getParent(), EndBlock); | 
|  | NewBBs->push_back(NewBlock); | 
|  | BranchInst::Create(EndBlock, NewBlock); | 
|  | DTU->applyUpdates({{DominatorTree::Insert, NewBlock, EndBlock}}); | 
|  |  | 
|  | // StartBlock | 
|  | //   |  \ | 
|  | //   |  NewBlock | 
|  | //   |  / | 
|  | // EndBlock | 
|  | Value *SIOp1 = SI->getTrueValue(); | 
|  | Value *SIOp2 = SI->getFalseValue(); | 
|  |  | 
|  | PHINode *NewPhi = PHINode::Create(SIUse->getType(), 1, | 
|  | Twine(SIOp2->getName(), ".si.unfold.phi"), | 
|  | NewBlock->getFirstInsertionPt()); | 
|  | NewPhi->addIncoming(SIOp2, StartBlock); | 
|  |  | 
|  | // Update any other PHI nodes in EndBlock. | 
|  | for (PHINode &Phi : EndBlock->phis()) { | 
|  | if (SIUse == &Phi) | 
|  | continue; | 
|  | Phi.addIncoming(Phi.getIncomingValueForBlock(StartBlock), NewBlock); | 
|  | } | 
|  |  | 
|  | // Update the phi node of SI, which is its only use. | 
|  | if (EndBlock == SIUse->getParent()) { | 
|  | SIUse->addIncoming(NewPhi, NewBlock); | 
|  | SIUse->replaceUsesOfWith(SI, SIOp1); | 
|  | } else { | 
|  | PHINode *EndPhi = PHINode::Create(SIUse->getType(), pred_size(EndBlock), | 
|  | Twine(SI->getName(), ".si.unfold.phi"), | 
|  | EndBlock->getFirstInsertionPt()); | 
|  | for (BasicBlock *Pred : predecessors(EndBlock)) { | 
|  | if (Pred != StartBlock && Pred != NewBlock) | 
|  | EndPhi->addIncoming(EndPhi, Pred); | 
|  | } | 
|  |  | 
|  | EndPhi->addIncoming(SIOp1, StartBlock); | 
|  | EndPhi->addIncoming(NewPhi, NewBlock); | 
|  | SIUse->replaceUsesOfWith(SI, EndPhi); | 
|  | SIUse = EndPhi; | 
|  | } | 
|  |  | 
|  | if (auto *OpSi = dyn_cast<SelectInst>(SIOp1)) | 
|  | NewSIsToUnfold->push_back(SelectInstToUnfold(OpSi, SIUse)); | 
|  | if (auto *OpSi = dyn_cast<SelectInst>(SIOp2)) | 
|  | NewSIsToUnfold->push_back(SelectInstToUnfold(OpSi, NewPhi)); | 
|  |  | 
|  | // Insert the real conditional branch based on the original condition. | 
|  | StartBlockTerm->eraseFromParent(); | 
|  | BranchInst::Create(EndBlock, NewBlock, SI->getCondition(), StartBlock); | 
|  | DTU->applyUpdates({{DominatorTree::Insert, StartBlock, EndBlock}, | 
|  | {DominatorTree::Insert, StartBlock, NewBlock}}); | 
|  | } else { | 
|  | BasicBlock *EndBlock = SIUse->getParent(); | 
|  | BasicBlock *NewBlockT = BasicBlock::Create( | 
|  | SI->getContext(), Twine(SI->getName(), ".si.unfold.true"), | 
|  | EndBlock->getParent(), EndBlock); | 
|  | BasicBlock *NewBlockF = BasicBlock::Create( | 
|  | SI->getContext(), Twine(SI->getName(), ".si.unfold.false"), | 
|  | EndBlock->getParent(), EndBlock); | 
|  |  | 
|  | NewBBs->push_back(NewBlockT); | 
|  | NewBBs->push_back(NewBlockF); | 
|  |  | 
|  | // Def only has one use in EndBlock. | 
|  | // Before transformation: | 
|  | // StartBlock(Def) | 
|  | //   |      \ | 
|  | // EndBlock  OtherBlock | 
|  | //  (Use) | 
|  | // | 
|  | // After transformation: | 
|  | // StartBlock(Def) | 
|  | //   |      \ | 
|  | //   |       OtherBlock | 
|  | // NewBlockT | 
|  | //   |     \ | 
|  | //   |   NewBlockF | 
|  | //   |      / | 
|  | //   |     / | 
|  | // EndBlock | 
|  | //  (Use) | 
|  | BranchInst::Create(EndBlock, NewBlockF); | 
|  | // Insert the real conditional branch based on the original condition. | 
|  | BranchInst::Create(EndBlock, NewBlockF, SI->getCondition(), NewBlockT); | 
|  | DTU->applyUpdates({{DominatorTree::Insert, NewBlockT, NewBlockF}, | 
|  | {DominatorTree::Insert, NewBlockT, EndBlock}, | 
|  | {DominatorTree::Insert, NewBlockF, EndBlock}}); | 
|  |  | 
|  | Value *TrueVal = SI->getTrueValue(); | 
|  | Value *FalseVal = SI->getFalseValue(); | 
|  |  | 
|  | PHINode *NewPhiT = PHINode::Create( | 
|  | SIUse->getType(), 1, Twine(TrueVal->getName(), ".si.unfold.phi"), | 
|  | NewBlockT->getFirstInsertionPt()); | 
|  | PHINode *NewPhiF = PHINode::Create( | 
|  | SIUse->getType(), 1, Twine(FalseVal->getName(), ".si.unfold.phi"), | 
|  | NewBlockF->getFirstInsertionPt()); | 
|  | NewPhiT->addIncoming(TrueVal, StartBlock); | 
|  | NewPhiF->addIncoming(FalseVal, NewBlockT); | 
|  |  | 
|  | if (auto *TrueSI = dyn_cast<SelectInst>(TrueVal)) | 
|  | NewSIsToUnfold->push_back(SelectInstToUnfold(TrueSI, NewPhiT)); | 
|  | if (auto *FalseSi = dyn_cast<SelectInst>(FalseVal)) | 
|  | NewSIsToUnfold->push_back(SelectInstToUnfold(FalseSi, NewPhiF)); | 
|  |  | 
|  | SIUse->addIncoming(NewPhiT, NewBlockT); | 
|  | SIUse->addIncoming(NewPhiF, NewBlockF); | 
|  | SIUse->removeIncomingValue(StartBlock); | 
|  |  | 
|  | // Update any other PHI nodes in EndBlock. | 
|  | for (PHINode &Phi : EndBlock->phis()) { | 
|  | if (SIUse == &Phi) | 
|  | continue; | 
|  | Phi.addIncoming(Phi.getIncomingValueForBlock(StartBlock), NewBlockT); | 
|  | Phi.addIncoming(Phi.getIncomingValueForBlock(StartBlock), NewBlockF); | 
|  | Phi.removeIncomingValue(StartBlock); | 
|  | } | 
|  |  | 
|  | // Update the appropriate successor of the start block to point to the new | 
|  | // unfolded block. | 
|  | unsigned SuccNum = StartBlockTerm->getSuccessor(1) == EndBlock ? 1 : 0; | 
|  | StartBlockTerm->setSuccessor(SuccNum, NewBlockT); | 
|  | DTU->applyUpdates({{DominatorTree::Delete, StartBlock, EndBlock}, | 
|  | {DominatorTree::Insert, StartBlock, NewBlockT}}); | 
|  | } | 
|  |  | 
|  | // Preserve loop info | 
|  | if (Loop *L = LI->getLoopFor(SI->getParent())) { | 
|  | for (BasicBlock *NewBB : *NewBBs) | 
|  | L->addBasicBlockToLoop(NewBB, *LI); | 
|  | } | 
|  |  | 
|  | // The select is now dead. | 
|  | assert(SI->use_empty() && "Select must be dead now"); | 
|  | SI->eraseFromParent(); | 
|  | } | 
|  |  | 
|  | struct ClonedBlock { | 
|  | BasicBlock *BB; | 
|  | APInt State; ///< \p State corresponds to the next value of a switch stmnt. | 
|  | }; | 
|  |  | 
|  | typedef std::deque<BasicBlock *> PathType; | 
|  | typedef std::vector<PathType> PathsType; | 
|  | typedef SmallPtrSet<const BasicBlock *, 8> VisitedBlocks; | 
|  | typedef std::vector<ClonedBlock> CloneList; | 
|  |  | 
|  | // This data structure keeps track of all blocks that have been cloned.  If two | 
|  | // different ThreadingPaths clone the same block for a certain state it should | 
|  | // be reused, and it can be looked up in this map. | 
|  | typedef DenseMap<BasicBlock *, CloneList> DuplicateBlockMap; | 
|  |  | 
|  | // This map keeps track of all the new definitions for an instruction. This | 
|  | // information is needed when restoring SSA form after cloning blocks. | 
|  | typedef MapVector<Instruction *, std::vector<Instruction *>> DefMap; | 
|  |  | 
|  | inline raw_ostream &operator<<(raw_ostream &OS, const PathType &Path) { | 
|  | OS << "< "; | 
|  | for (const BasicBlock *BB : Path) { | 
|  | std::string BBName; | 
|  | if (BB->hasName()) | 
|  | raw_string_ostream(BBName) << BB->getName(); | 
|  | else | 
|  | raw_string_ostream(BBName) << BB; | 
|  | OS << BBName << " "; | 
|  | } | 
|  | OS << ">"; | 
|  | return OS; | 
|  | } | 
|  |  | 
|  | /// ThreadingPath is a path in the control flow of a loop that can be threaded | 
|  | /// by cloning necessary basic blocks and replacing conditional branches with | 
|  | /// unconditional ones. A threading path includes a list of basic blocks, the | 
|  | /// exit state, and the block that determines the next state. | 
|  | struct ThreadingPath { | 
|  | /// Exit value is DFA's exit state for the given path. | 
|  | APInt getExitValue() const { return ExitVal; } | 
|  | void setExitValue(const ConstantInt *V) { | 
|  | ExitVal = V->getValue(); | 
|  | IsExitValSet = true; | 
|  | } | 
|  | bool isExitValueSet() const { return IsExitValSet; } | 
|  |  | 
|  | /// Determinator is the basic block that determines the next state of the DFA. | 
|  | const BasicBlock *getDeterminatorBB() const { return DBB; } | 
|  | void setDeterminator(const BasicBlock *BB) { DBB = BB; } | 
|  |  | 
|  | /// Path is a list of basic blocks. | 
|  | const PathType &getPath() const { return Path; } | 
|  | void setPath(const PathType &NewPath) { Path = NewPath; } | 
|  | void push_back(BasicBlock *BB) { Path.push_back(BB); } | 
|  | void push_front(BasicBlock *BB) { Path.push_front(BB); } | 
|  | void appendExcludingFirst(const PathType &OtherPath) { | 
|  | Path.insert(Path.end(), OtherPath.begin() + 1, OtherPath.end()); | 
|  | } | 
|  |  | 
|  | void print(raw_ostream &OS) const { | 
|  | OS << Path << " [ " << ExitVal << ", " << DBB->getName() << " ]"; | 
|  | } | 
|  |  | 
|  | private: | 
|  | PathType Path; | 
|  | APInt ExitVal; | 
|  | const BasicBlock *DBB = nullptr; | 
|  | bool IsExitValSet = false; | 
|  | }; | 
|  |  | 
|  | #ifndef NDEBUG | 
|  | inline raw_ostream &operator<<(raw_ostream &OS, const ThreadingPath &TPath) { | 
|  | TPath.print(OS); | 
|  | return OS; | 
|  | } | 
|  | #endif | 
|  |  | 
|  | struct MainSwitch { | 
|  | MainSwitch(SwitchInst *SI, LoopInfo *LI, OptimizationRemarkEmitter *ORE) | 
|  | : LI(LI) { | 
|  | if (isCandidate(SI)) { | 
|  | Instr = SI; | 
|  | } else { | 
|  | ORE->emit([&]() { | 
|  | return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable", SI) | 
|  | << "Switch instruction is not predictable."; | 
|  | }); | 
|  | } | 
|  | } | 
|  |  | 
|  | virtual ~MainSwitch() = default; | 
|  |  | 
|  | SwitchInst *getInstr() const { return Instr; } | 
|  | const SmallVector<SelectInstToUnfold, 4> getSelectInsts() { | 
|  | return SelectInsts; | 
|  | } | 
|  |  | 
|  | private: | 
|  | /// Do a use-def chain traversal starting from the switch condition to see if | 
|  | /// \p SI is a potential condidate. | 
|  | /// | 
|  | /// Also, collect select instructions to unfold. | 
|  | bool isCandidate(const SwitchInst *SI) { | 
|  | std::deque<std::pair<Value *, BasicBlock *>> Q; | 
|  | SmallSet<Value *, 16> SeenValues; | 
|  | SelectInsts.clear(); | 
|  |  | 
|  | Value *SICond = SI->getCondition(); | 
|  | LLVM_DEBUG(dbgs() << "\tSICond: " << *SICond << "\n"); | 
|  | if (!isa<PHINode>(SICond)) | 
|  | return false; | 
|  |  | 
|  | // The switch must be in a loop. | 
|  | const Loop *L = LI->getLoopFor(SI->getParent()); | 
|  | if (!L) | 
|  | return false; | 
|  |  | 
|  | addToQueue(SICond, nullptr, Q, SeenValues); | 
|  |  | 
|  | while (!Q.empty()) { | 
|  | Value *Current = Q.front().first; | 
|  | BasicBlock *CurrentIncomingBB = Q.front().second; | 
|  | Q.pop_front(); | 
|  |  | 
|  | if (auto *Phi = dyn_cast<PHINode>(Current)) { | 
|  | for (BasicBlock *IncomingBB : Phi->blocks()) { | 
|  | Value *Incoming = Phi->getIncomingValueForBlock(IncomingBB); | 
|  | addToQueue(Incoming, IncomingBB, Q, SeenValues); | 
|  | } | 
|  | LLVM_DEBUG(dbgs() << "\tphi: " << *Phi << "\n"); | 
|  | } else if (SelectInst *SelI = dyn_cast<SelectInst>(Current)) { | 
|  | if (!isValidSelectInst(SelI)) | 
|  | return false; | 
|  | addToQueue(SelI->getTrueValue(), CurrentIncomingBB, Q, SeenValues); | 
|  | addToQueue(SelI->getFalseValue(), CurrentIncomingBB, Q, SeenValues); | 
|  | LLVM_DEBUG(dbgs() << "\tselect: " << *SelI << "\n"); | 
|  | if (auto *SelIUse = dyn_cast<PHINode>(SelI->user_back())) | 
|  | SelectInsts.push_back(SelectInstToUnfold(SelI, SelIUse)); | 
|  | } else if (isa<Constant>(Current)) { | 
|  | LLVM_DEBUG(dbgs() << "\tconst: " << *Current << "\n"); | 
|  | continue; | 
|  | } else { | 
|  | LLVM_DEBUG(dbgs() << "\tother: " << *Current << "\n"); | 
|  | // Allow unpredictable values. The hope is that those will be the | 
|  | // initial switch values that can be ignored (they will hit the | 
|  | // unthreaded switch) but this assumption will get checked later after | 
|  | // paths have been enumerated (in function getStateDefMap). | 
|  |  | 
|  | // If the unpredictable value comes from the same inner loop it is | 
|  | // likely that it will also be on the enumerated paths, causing us to | 
|  | // exit after we have enumerated all the paths. This heuristic save | 
|  | // compile time because a search for all the paths can become expensive. | 
|  | if (EarlyExitHeuristic && | 
|  | L->contains(LI->getLoopFor(CurrentIncomingBB))) { | 
|  | LLVM_DEBUG(dbgs() | 
|  | << "\tExiting early due to unpredictability heuristic.\n"); | 
|  | return false; | 
|  | } | 
|  |  | 
|  | continue; | 
|  | } | 
|  | } | 
|  |  | 
|  | return true; | 
|  | } | 
|  |  | 
|  | void addToQueue(Value *Val, BasicBlock *BB, | 
|  | std::deque<std::pair<Value *, BasicBlock *>> &Q, | 
|  | SmallSet<Value *, 16> &SeenValues) { | 
|  | if (SeenValues.insert(Val).second) | 
|  | Q.push_back({Val, BB}); | 
|  | } | 
|  |  | 
|  | bool isValidSelectInst(SelectInst *SI) { | 
|  | if (!SI->hasOneUse()) | 
|  | return false; | 
|  |  | 
|  | Instruction *SIUse = dyn_cast<Instruction>(SI->user_back()); | 
|  | // The use of the select inst should be either a phi or another select. | 
|  | if (!SIUse && !(isa<PHINode>(SIUse) || isa<SelectInst>(SIUse))) | 
|  | return false; | 
|  |  | 
|  | BasicBlock *SIBB = SI->getParent(); | 
|  |  | 
|  | // Currently, we can only expand select instructions in basic blocks with | 
|  | // one successor. | 
|  | BranchInst *SITerm = dyn_cast<BranchInst>(SIBB->getTerminator()); | 
|  | if (!SITerm || !SITerm->isUnconditional()) | 
|  | return false; | 
|  |  | 
|  | // Only fold the select coming from directly where it is defined. | 
|  | PHINode *PHIUser = dyn_cast<PHINode>(SIUse); | 
|  | if (PHIUser && PHIUser->getIncomingBlock(*SI->use_begin()) != SIBB) | 
|  | return false; | 
|  |  | 
|  | // If select will not be sunk during unfolding, and it is in the same basic | 
|  | // block as another state defining select, then cannot unfold both. | 
|  | for (SelectInstToUnfold SIToUnfold : SelectInsts) { | 
|  | SelectInst *PrevSI = SIToUnfold.getInst(); | 
|  | if (PrevSI->getTrueValue() != SI && PrevSI->getFalseValue() != SI && | 
|  | PrevSI->getParent() == SI->getParent()) | 
|  | return false; | 
|  | } | 
|  |  | 
|  | return true; | 
|  | } | 
|  |  | 
|  | LoopInfo *LI; | 
|  | SwitchInst *Instr = nullptr; | 
|  | SmallVector<SelectInstToUnfold, 4> SelectInsts; | 
|  | }; | 
|  |  | 
|  | struct AllSwitchPaths { | 
|  | AllSwitchPaths(const MainSwitch *MSwitch, OptimizationRemarkEmitter *ORE, | 
|  | LoopInfo *LI, Loop *L) | 
|  | : Switch(MSwitch->getInstr()), SwitchBlock(Switch->getParent()), ORE(ORE), | 
|  | LI(LI), SwitchOuterLoop(L) {} | 
|  |  | 
|  | std::vector<ThreadingPath> &getThreadingPaths() { return TPaths; } | 
|  | unsigned getNumThreadingPaths() { return TPaths.size(); } | 
|  | SwitchInst *getSwitchInst() { return Switch; } | 
|  | BasicBlock *getSwitchBlock() { return SwitchBlock; } | 
|  |  | 
|  | void run() { | 
|  | StateDefMap StateDef = getStateDefMap(); | 
|  | if (StateDef.empty()) { | 
|  | ORE->emit([&]() { | 
|  | return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable", | 
|  | Switch) | 
|  | << "Switch instruction is not predictable."; | 
|  | }); | 
|  | return; | 
|  | } | 
|  |  | 
|  | auto *SwitchPhi = cast<PHINode>(Switch->getOperand(0)); | 
|  | auto *SwitchPhiDefBB = SwitchPhi->getParent(); | 
|  | VisitedBlocks VB; | 
|  | // Get paths from the determinator BBs to SwitchPhiDefBB | 
|  | std::vector<ThreadingPath> PathsToPhiDef = | 
|  | getPathsFromStateDefMap(StateDef, SwitchPhi, VB); | 
|  | if (SwitchPhiDefBB == SwitchBlock) { | 
|  | TPaths = std::move(PathsToPhiDef); | 
|  | return; | 
|  | } | 
|  |  | 
|  | // Find and append paths from SwitchPhiDefBB to SwitchBlock. | 
|  | PathsType PathsToSwitchBB = | 
|  | paths(SwitchPhiDefBB, SwitchBlock, VB, /* PathDepth = */ 1); | 
|  | if (PathsToSwitchBB.empty()) | 
|  | return; | 
|  |  | 
|  | std::vector<ThreadingPath> TempList; | 
|  | for (const ThreadingPath &Path : PathsToPhiDef) { | 
|  | for (const PathType &PathToSw : PathsToSwitchBB) { | 
|  | ThreadingPath PathCopy(Path); | 
|  | PathCopy.appendExcludingFirst(PathToSw); | 
|  | TempList.push_back(PathCopy); | 
|  | } | 
|  | } | 
|  | TPaths = std::move(TempList); | 
|  | } | 
|  |  | 
|  | private: | 
|  | // Value: an instruction that defines a switch state; | 
|  | // Key: the parent basic block of that instruction. | 
|  | typedef DenseMap<const BasicBlock *, const PHINode *> StateDefMap; | 
|  | std::vector<ThreadingPath> getPathsFromStateDefMap(StateDefMap &StateDef, | 
|  | PHINode *Phi, | 
|  | VisitedBlocks &VB) { | 
|  | std::vector<ThreadingPath> Res; | 
|  | auto *PhiBB = Phi->getParent(); | 
|  | VB.insert(PhiBB); | 
|  |  | 
|  | VisitedBlocks UniqueBlocks; | 
|  | for (auto *IncomingBB : Phi->blocks()) { | 
|  | if (!UniqueBlocks.insert(IncomingBB).second) | 
|  | continue; | 
|  | if (!SwitchOuterLoop->contains(IncomingBB)) | 
|  | continue; | 
|  |  | 
|  | Value *IncomingValue = Phi->getIncomingValueForBlock(IncomingBB); | 
|  | // We found the determinator. This is the start of our path. | 
|  | if (auto *C = dyn_cast<ConstantInt>(IncomingValue)) { | 
|  | // SwitchBlock is the determinator, unsupported unless its also the def. | 
|  | if (PhiBB == SwitchBlock && | 
|  | SwitchBlock != cast<PHINode>(Switch->getOperand(0))->getParent()) | 
|  | continue; | 
|  | ThreadingPath NewPath; | 
|  | NewPath.setDeterminator(PhiBB); | 
|  | NewPath.setExitValue(C); | 
|  | // Don't add SwitchBlock at the start, this is handled later. | 
|  | if (IncomingBB != SwitchBlock) | 
|  | NewPath.push_back(IncomingBB); | 
|  | NewPath.push_back(PhiBB); | 
|  | Res.push_back(NewPath); | 
|  | continue; | 
|  | } | 
|  | // Don't get into a cycle. | 
|  | if (VB.contains(IncomingBB) || IncomingBB == SwitchBlock) | 
|  | continue; | 
|  | // Recurse up the PHI chain. | 
|  | auto *IncomingPhi = dyn_cast<PHINode>(IncomingValue); | 
|  | if (!IncomingPhi) | 
|  | continue; | 
|  | auto *IncomingPhiDefBB = IncomingPhi->getParent(); | 
|  | if (!StateDef.contains(IncomingPhiDefBB)) | 
|  | continue; | 
|  |  | 
|  | // Direct predecessor, just add to the path. | 
|  | if (IncomingPhiDefBB == IncomingBB) { | 
|  | std::vector<ThreadingPath> PredPaths = | 
|  | getPathsFromStateDefMap(StateDef, IncomingPhi, VB); | 
|  | for (ThreadingPath &Path : PredPaths) { | 
|  | Path.push_back(PhiBB); | 
|  | Res.push_back(std::move(Path)); | 
|  | } | 
|  | continue; | 
|  | } | 
|  | // Not a direct predecessor, find intermediate paths to append to the | 
|  | // existing path. | 
|  | if (VB.contains(IncomingPhiDefBB)) | 
|  | continue; | 
|  |  | 
|  | PathsType IntermediatePaths; | 
|  | IntermediatePaths = | 
|  | paths(IncomingPhiDefBB, IncomingBB, VB, /* PathDepth = */ 1); | 
|  | if (IntermediatePaths.empty()) | 
|  | continue; | 
|  |  | 
|  | std::vector<ThreadingPath> PredPaths = | 
|  | getPathsFromStateDefMap(StateDef, IncomingPhi, VB); | 
|  | for (const ThreadingPath &Path : PredPaths) { | 
|  | for (const PathType &IPath : IntermediatePaths) { | 
|  | ThreadingPath NewPath(Path); | 
|  | NewPath.appendExcludingFirst(IPath); | 
|  | NewPath.push_back(PhiBB); | 
|  | Res.push_back(NewPath); | 
|  | } | 
|  | } | 
|  | } | 
|  | VB.erase(PhiBB); | 
|  | return Res; | 
|  | } | 
|  |  | 
|  | PathsType paths(BasicBlock *BB, BasicBlock *ToBB, VisitedBlocks &Visited, | 
|  | unsigned PathDepth) { | 
|  | PathsType Res; | 
|  |  | 
|  | // Stop exploring paths after visiting MaxPathLength blocks | 
|  | if (PathDepth > MaxPathLength) { | 
|  | ORE->emit([&]() { | 
|  | return OptimizationRemarkAnalysis(DEBUG_TYPE, "MaxPathLengthReached", | 
|  | Switch) | 
|  | << "Exploration stopped after visiting MaxPathLength=" | 
|  | << ore::NV("MaxPathLength", MaxPathLength) << " blocks."; | 
|  | }); | 
|  | return Res; | 
|  | } | 
|  |  | 
|  | Visited.insert(BB); | 
|  | if (++NumVisited > MaxNumVisitiedPaths) | 
|  | return Res; | 
|  |  | 
|  | // Stop if we have reached the BB out of loop, since its successors have no | 
|  | // impact on the DFA. | 
|  | if (!SwitchOuterLoop->contains(BB)) | 
|  | return Res; | 
|  |  | 
|  | // Some blocks have multiple edges to the same successor, and this set | 
|  | // is used to prevent a duplicate path from being generated | 
|  | SmallSet<BasicBlock *, 4> Successors; | 
|  | for (BasicBlock *Succ : successors(BB)) { | 
|  | if (!Successors.insert(Succ).second) | 
|  | continue; | 
|  |  | 
|  | // Found a cycle through the final block. | 
|  | if (Succ == ToBB) { | 
|  | Res.push_back({BB, ToBB}); | 
|  | continue; | 
|  | } | 
|  |  | 
|  | // We have encountered a cycle, do not get caught in it | 
|  | if (Visited.contains(Succ)) | 
|  | continue; | 
|  |  | 
|  | auto *CurrLoop = LI->getLoopFor(BB); | 
|  | // Unlikely to be beneficial. | 
|  | if (Succ == CurrLoop->getHeader()) | 
|  | continue; | 
|  | // Skip for now, revisit this condition later to see the impact on | 
|  | // coverage and compile time. | 
|  | if (LI->getLoopFor(Succ) != CurrLoop) | 
|  | continue; | 
|  |  | 
|  | PathsType SuccPaths = paths(Succ, ToBB, Visited, PathDepth + 1); | 
|  | for (PathType &Path : SuccPaths) { | 
|  | Path.push_front(BB); | 
|  | Res.push_back(Path); | 
|  | if (Res.size() >= MaxNumPaths) { | 
|  | return Res; | 
|  | } | 
|  | } | 
|  | } | 
|  | // This block could now be visited again from a different predecessor. Note | 
|  | // that this will result in exponential runtime. Subpaths could possibly be | 
|  | // cached but it takes a lot of memory to store them. | 
|  | Visited.erase(BB); | 
|  | return Res; | 
|  | } | 
|  |  | 
|  | /// Walk the use-def chain and collect all the state-defining blocks and the | 
|  | /// PHI nodes in those blocks that define the state. | 
|  | StateDefMap getStateDefMap() const { | 
|  | StateDefMap Res; | 
|  | PHINode *FirstDef = dyn_cast<PHINode>(Switch->getOperand(0)); | 
|  | assert(FirstDef && "The first definition must be a phi."); | 
|  |  | 
|  | SmallVector<PHINode *, 8> Stack; | 
|  | Stack.push_back(FirstDef); | 
|  | SmallSet<Value *, 16> SeenValues; | 
|  |  | 
|  | while (!Stack.empty()) { | 
|  | PHINode *CurPhi = Stack.pop_back_val(); | 
|  |  | 
|  | Res[CurPhi->getParent()] = CurPhi; | 
|  | SeenValues.insert(CurPhi); | 
|  |  | 
|  | for (BasicBlock *IncomingBB : CurPhi->blocks()) { | 
|  | PHINode *IncomingPhi = | 
|  | dyn_cast<PHINode>(CurPhi->getIncomingValueForBlock(IncomingBB)); | 
|  | if (!IncomingPhi) | 
|  | continue; | 
|  | bool IsOutsideLoops = !SwitchOuterLoop->contains(IncomingBB); | 
|  | if (SeenValues.contains(IncomingPhi) || IsOutsideLoops) | 
|  | continue; | 
|  |  | 
|  | Stack.push_back(IncomingPhi); | 
|  | } | 
|  | } | 
|  |  | 
|  | return Res; | 
|  | } | 
|  |  | 
|  | unsigned NumVisited = 0; | 
|  | SwitchInst *Switch; | 
|  | BasicBlock *SwitchBlock; | 
|  | OptimizationRemarkEmitter *ORE; | 
|  | std::vector<ThreadingPath> TPaths; | 
|  | LoopInfo *LI; | 
|  | Loop *SwitchOuterLoop; | 
|  | }; | 
|  |  | 
|  | struct TransformDFA { | 
|  | TransformDFA(AllSwitchPaths *SwitchPaths, DominatorTree *DT, | 
|  | AssumptionCache *AC, TargetTransformInfo *TTI, | 
|  | OptimizationRemarkEmitter *ORE, | 
|  | SmallPtrSet<const Value *, 32> EphValues) | 
|  | : SwitchPaths(SwitchPaths), DT(DT), AC(AC), TTI(TTI), ORE(ORE), | 
|  | EphValues(EphValues) {} | 
|  |  | 
|  | void run() { | 
|  | if (isLegalAndProfitableToTransform()) { | 
|  | createAllExitPaths(); | 
|  | NumTransforms++; | 
|  | } | 
|  | } | 
|  |  | 
|  | private: | 
|  | /// This function performs both a legality check and profitability check at | 
|  | /// the same time since it is convenient to do so. It iterates through all | 
|  | /// blocks that will be cloned, and keeps track of the duplication cost. It | 
|  | /// also returns false if it is illegal to clone some required block. | 
|  | bool isLegalAndProfitableToTransform() { | 
|  | CodeMetrics Metrics; | 
|  | SwitchInst *Switch = SwitchPaths->getSwitchInst(); | 
|  |  | 
|  | // Don't thread switch without multiple successors. | 
|  | if (Switch->getNumSuccessors() <= 1) | 
|  | return false; | 
|  |  | 
|  | // Note that DuplicateBlockMap is not being used as intended here. It is | 
|  | // just being used to ensure (BB, State) pairs are only counted once. | 
|  | DuplicateBlockMap DuplicateMap; | 
|  |  | 
|  | for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) { | 
|  | PathType PathBBs = TPath.getPath(); | 
|  | APInt NextState = TPath.getExitValue(); | 
|  | const BasicBlock *Determinator = TPath.getDeterminatorBB(); | 
|  |  | 
|  | // Update Metrics for the Switch block, this is always cloned | 
|  | BasicBlock *BB = SwitchPaths->getSwitchBlock(); | 
|  | BasicBlock *VisitedBB = getClonedBB(BB, NextState, DuplicateMap); | 
|  | if (!VisitedBB) { | 
|  | Metrics.analyzeBasicBlock(BB, *TTI, EphValues); | 
|  | DuplicateMap[BB].push_back({BB, NextState}); | 
|  | } | 
|  |  | 
|  | // If the Switch block is the Determinator, then we can continue since | 
|  | // this is the only block that is cloned and we already counted for it. | 
|  | if (PathBBs.front() == Determinator) | 
|  | continue; | 
|  |  | 
|  | // Otherwise update Metrics for all blocks that will be cloned. If any | 
|  | // block is already cloned and would be reused, don't double count it. | 
|  | auto DetIt = llvm::find(PathBBs, Determinator); | 
|  | for (auto BBIt = DetIt; BBIt != PathBBs.end(); BBIt++) { | 
|  | BB = *BBIt; | 
|  | VisitedBB = getClonedBB(BB, NextState, DuplicateMap); | 
|  | if (VisitedBB) | 
|  | continue; | 
|  | Metrics.analyzeBasicBlock(BB, *TTI, EphValues); | 
|  | DuplicateMap[BB].push_back({BB, NextState}); | 
|  | } | 
|  |  | 
|  | if (Metrics.notDuplicatable) { | 
|  | LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains " | 
|  | << "non-duplicatable instructions.\n"); | 
|  | ORE->emit([&]() { | 
|  | return OptimizationRemarkMissed(DEBUG_TYPE, "NonDuplicatableInst", | 
|  | Switch) | 
|  | << "Contains non-duplicatable instructions."; | 
|  | }); | 
|  | return false; | 
|  | } | 
|  |  | 
|  | // FIXME: Allow jump threading with controlled convergence. | 
|  | if (Metrics.Convergence != ConvergenceKind::None) { | 
|  | LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains " | 
|  | << "convergent instructions.\n"); | 
|  | ORE->emit([&]() { | 
|  | return OptimizationRemarkMissed(DEBUG_TYPE, "ConvergentInst", Switch) | 
|  | << "Contains convergent instructions."; | 
|  | }); | 
|  | return false; | 
|  | } | 
|  |  | 
|  | if (!Metrics.NumInsts.isValid()) { | 
|  | LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains " | 
|  | << "instructions with invalid cost.\n"); | 
|  | ORE->emit([&]() { | 
|  | return OptimizationRemarkMissed(DEBUG_TYPE, "ConvergentInst", Switch) | 
|  | << "Contains instructions with invalid cost."; | 
|  | }); | 
|  | return false; | 
|  | } | 
|  | } | 
|  |  | 
|  | InstructionCost DuplicationCost = 0; | 
|  |  | 
|  | unsigned JumpTableSize = 0; | 
|  | TTI->getEstimatedNumberOfCaseClusters(*Switch, JumpTableSize, nullptr, | 
|  | nullptr); | 
|  | if (JumpTableSize == 0) { | 
|  | // Factor in the number of conditional branches reduced from jump | 
|  | // threading. Assume that lowering the switch block is implemented by | 
|  | // using binary search, hence the LogBase2(). | 
|  | unsigned CondBranches = | 
|  | APInt(32, Switch->getNumSuccessors()).ceilLogBase2(); | 
|  | assert(CondBranches > 0 && | 
|  | "The threaded switch must have multiple branches"); | 
|  | DuplicationCost = Metrics.NumInsts / CondBranches; | 
|  | } else { | 
|  | // Compared with jump tables, the DFA optimizer removes an indirect branch | 
|  | // on each loop iteration, thus making branch prediction more precise. The | 
|  | // more branch targets there are, the more likely it is for the branch | 
|  | // predictor to make a mistake, and the more benefit there is in the DFA | 
|  | // optimizer. Thus, the more branch targets there are, the lower is the | 
|  | // cost of the DFA opt. | 
|  | DuplicationCost = Metrics.NumInsts / JumpTableSize; | 
|  | } | 
|  |  | 
|  | LLVM_DEBUG(dbgs() << "\nDFA Jump Threading: Cost to jump thread block " | 
|  | << SwitchPaths->getSwitchBlock()->getName() | 
|  | << " is: " << DuplicationCost << "\n\n"); | 
|  |  | 
|  | if (DuplicationCost > CostThreshold) { | 
|  | LLVM_DEBUG(dbgs() << "Not jump threading, duplication cost exceeds the " | 
|  | << "cost threshold.\n"); | 
|  | ORE->emit([&]() { | 
|  | return OptimizationRemarkMissed(DEBUG_TYPE, "NotProfitable", Switch) | 
|  | << "Duplication cost exceeds the cost threshold (cost=" | 
|  | << ore::NV("Cost", DuplicationCost) | 
|  | << ", threshold=" << ore::NV("Threshold", CostThreshold) << ")."; | 
|  | }); | 
|  | return false; | 
|  | } | 
|  |  | 
|  | ORE->emit([&]() { | 
|  | return OptimizationRemark(DEBUG_TYPE, "JumpThreaded", Switch) | 
|  | << "Switch statement jump-threaded."; | 
|  | }); | 
|  |  | 
|  | return true; | 
|  | } | 
|  |  | 
|  | /// Transform each threading path to effectively jump thread the DFA. | 
|  | void createAllExitPaths() { | 
|  | DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Eager); | 
|  |  | 
|  | // Move the switch block to the end of the path, since it will be duplicated | 
|  | BasicBlock *SwitchBlock = SwitchPaths->getSwitchBlock(); | 
|  | for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) { | 
|  | LLVM_DEBUG(dbgs() << TPath << "\n"); | 
|  | // TODO: Fix exit path creation logic so that we dont need this | 
|  | // placeholder. | 
|  | TPath.push_front(SwitchBlock); | 
|  | } | 
|  |  | 
|  | // Transform the ThreadingPaths and keep track of the cloned values | 
|  | DuplicateBlockMap DuplicateMap; | 
|  | DefMap NewDefs; | 
|  |  | 
|  | SmallSet<BasicBlock *, 16> BlocksToClean; | 
|  | BlocksToClean.insert_range(successors(SwitchBlock)); | 
|  |  | 
|  | for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) { | 
|  | createExitPath(NewDefs, TPath, DuplicateMap, BlocksToClean, &DTU); | 
|  | NumPaths++; | 
|  | } | 
|  |  | 
|  | // After all paths are cloned, now update the last successor of the cloned | 
|  | // path so it skips over the switch statement | 
|  | for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) | 
|  | updateLastSuccessor(TPath, DuplicateMap, &DTU); | 
|  |  | 
|  | // For each instruction that was cloned and used outside, update its uses | 
|  | updateSSA(NewDefs); | 
|  |  | 
|  | // Clean PHI Nodes for the newly created blocks | 
|  | for (BasicBlock *BB : BlocksToClean) | 
|  | cleanPhiNodes(BB); | 
|  | } | 
|  |  | 
|  | /// For a specific ThreadingPath \p Path, create an exit path starting from | 
|  | /// the determinator block. | 
|  | /// | 
|  | /// To remember the correct destination, we have to duplicate blocks | 
|  | /// corresponding to each state. Also update the terminating instruction of | 
|  | /// the predecessors, and phis in the successor blocks. | 
|  | void createExitPath(DefMap &NewDefs, ThreadingPath &Path, | 
|  | DuplicateBlockMap &DuplicateMap, | 
|  | SmallSet<BasicBlock *, 16> &BlocksToClean, | 
|  | DomTreeUpdater *DTU) { | 
|  | APInt NextState = Path.getExitValue(); | 
|  | const BasicBlock *Determinator = Path.getDeterminatorBB(); | 
|  | PathType PathBBs = Path.getPath(); | 
|  |  | 
|  | // Don't select the placeholder block in front | 
|  | if (PathBBs.front() == Determinator) | 
|  | PathBBs.pop_front(); | 
|  |  | 
|  | auto DetIt = llvm::find(PathBBs, Determinator); | 
|  | // When there is only one BB in PathBBs, the determinator takes itself as a | 
|  | // direct predecessor. | 
|  | BasicBlock *PrevBB = PathBBs.size() == 1 ? *DetIt : *std::prev(DetIt); | 
|  | for (auto BBIt = DetIt; BBIt != PathBBs.end(); BBIt++) { | 
|  | BasicBlock *BB = *BBIt; | 
|  | BlocksToClean.insert(BB); | 
|  |  | 
|  | // We already cloned BB for this NextState, now just update the branch | 
|  | // and continue. | 
|  | BasicBlock *NextBB = getClonedBB(BB, NextState, DuplicateMap); | 
|  | if (NextBB) { | 
|  | updatePredecessor(PrevBB, BB, NextBB, DTU); | 
|  | PrevBB = NextBB; | 
|  | continue; | 
|  | } | 
|  |  | 
|  | // Clone the BB and update the successor of Prev to jump to the new block | 
|  | BasicBlock *NewBB = cloneBlockAndUpdatePredecessor( | 
|  | BB, PrevBB, NextState, DuplicateMap, NewDefs, DTU); | 
|  | DuplicateMap[BB].push_back({NewBB, NextState}); | 
|  | BlocksToClean.insert(NewBB); | 
|  | PrevBB = NewBB; | 
|  | } | 
|  | } | 
|  |  | 
|  | /// Restore SSA form after cloning blocks. | 
|  | /// | 
|  | /// Each cloned block creates new defs for a variable, and the uses need to be | 
|  | /// updated to reflect this. The uses may be replaced with a cloned value, or | 
|  | /// some derived phi instruction. Note that all uses of a value defined in the | 
|  | /// same block were already remapped when cloning the block. | 
|  | void updateSSA(DefMap &NewDefs) { | 
|  | SSAUpdaterBulk SSAUpdate; | 
|  | SmallVector<Use *, 16> UsesToRename; | 
|  |  | 
|  | for (const auto &KV : NewDefs) { | 
|  | Instruction *I = KV.first; | 
|  | BasicBlock *BB = I->getParent(); | 
|  | std::vector<Instruction *> Cloned = KV.second; | 
|  |  | 
|  | // Scan all uses of this instruction to see if it is used outside of its | 
|  | // block, and if so, record them in UsesToRename. | 
|  | for (Use &U : I->uses()) { | 
|  | Instruction *User = cast<Instruction>(U.getUser()); | 
|  | if (PHINode *UserPN = dyn_cast<PHINode>(User)) { | 
|  | if (UserPN->getIncomingBlock(U) == BB) | 
|  | continue; | 
|  | } else if (User->getParent() == BB) { | 
|  | continue; | 
|  | } | 
|  |  | 
|  | UsesToRename.push_back(&U); | 
|  | } | 
|  |  | 
|  | // If there are no uses outside the block, we're done with this | 
|  | // instruction. | 
|  | if (UsesToRename.empty()) | 
|  | continue; | 
|  | LLVM_DEBUG(dbgs() << "DFA-JT: Renaming non-local uses of: " << *I | 
|  | << "\n"); | 
|  |  | 
|  | // We found a use of I outside of BB.  Rename all uses of I that are | 
|  | // outside its block to be uses of the appropriate PHI node etc.  See | 
|  | // ValuesInBlocks with the values we know. | 
|  | unsigned VarNum = SSAUpdate.AddVariable(I->getName(), I->getType()); | 
|  | SSAUpdate.AddAvailableValue(VarNum, BB, I); | 
|  | for (Instruction *New : Cloned) | 
|  | SSAUpdate.AddAvailableValue(VarNum, New->getParent(), New); | 
|  |  | 
|  | while (!UsesToRename.empty()) | 
|  | SSAUpdate.AddUse(VarNum, UsesToRename.pop_back_val()); | 
|  |  | 
|  | LLVM_DEBUG(dbgs() << "\n"); | 
|  | } | 
|  | // SSAUpdater handles phi placement and renaming uses with the appropriate | 
|  | // value. | 
|  | SSAUpdate.RewriteAllUses(DT); | 
|  | } | 
|  |  | 
|  | /// Clones a basic block, and adds it to the CFG. | 
|  | /// | 
|  | /// This function also includes updating phi nodes in the successors of the | 
|  | /// BB, and remapping uses that were defined locally in the cloned BB. | 
|  | BasicBlock *cloneBlockAndUpdatePredecessor(BasicBlock *BB, BasicBlock *PrevBB, | 
|  | const APInt &NextState, | 
|  | DuplicateBlockMap &DuplicateMap, | 
|  | DefMap &NewDefs, | 
|  | DomTreeUpdater *DTU) { | 
|  | ValueToValueMapTy VMap; | 
|  | BasicBlock *NewBB = CloneBasicBlock( | 
|  | BB, VMap, ".jt" + std::to_string(NextState.getLimitedValue()), | 
|  | BB->getParent()); | 
|  | NewBB->moveAfter(BB); | 
|  | NumCloned++; | 
|  |  | 
|  | for (Instruction &I : *NewBB) { | 
|  | // Do not remap operands of PHINode in case a definition in BB is an | 
|  | // incoming value to a phi in the same block. This incoming value will | 
|  | // be renamed later while restoring SSA. | 
|  | if (isa<PHINode>(&I)) | 
|  | continue; | 
|  | RemapInstruction(&I, VMap, | 
|  | RF_IgnoreMissingLocals | RF_NoModuleLevelChanges); | 
|  | if (AssumeInst *II = dyn_cast<AssumeInst>(&I)) | 
|  | AC->registerAssumption(II); | 
|  | } | 
|  |  | 
|  | updateSuccessorPhis(BB, NewBB, NextState, VMap, DuplicateMap); | 
|  | updatePredecessor(PrevBB, BB, NewBB, DTU); | 
|  | updateDefMap(NewDefs, VMap); | 
|  |  | 
|  | // Add all successors to the DominatorTree | 
|  | SmallPtrSet<BasicBlock *, 4> SuccSet; | 
|  | for (auto *SuccBB : successors(NewBB)) { | 
|  | if (SuccSet.insert(SuccBB).second) | 
|  | DTU->applyUpdates({{DominatorTree::Insert, NewBB, SuccBB}}); | 
|  | } | 
|  | SuccSet.clear(); | 
|  | return NewBB; | 
|  | } | 
|  |  | 
|  | /// Update the phi nodes in BB's successors. | 
|  | /// | 
|  | /// This means creating a new incoming value from NewBB with the new | 
|  | /// instruction wherever there is an incoming value from BB. | 
|  | void updateSuccessorPhis(BasicBlock *BB, BasicBlock *ClonedBB, | 
|  | const APInt &NextState, ValueToValueMapTy &VMap, | 
|  | DuplicateBlockMap &DuplicateMap) { | 
|  | std::vector<BasicBlock *> BlocksToUpdate; | 
|  |  | 
|  | // If BB is the last block in the path, we can simply update the one case | 
|  | // successor that will be reached. | 
|  | if (BB == SwitchPaths->getSwitchBlock()) { | 
|  | SwitchInst *Switch = SwitchPaths->getSwitchInst(); | 
|  | BasicBlock *NextCase = getNextCaseSuccessor(Switch, NextState); | 
|  | BlocksToUpdate.push_back(NextCase); | 
|  | BasicBlock *ClonedSucc = getClonedBB(NextCase, NextState, DuplicateMap); | 
|  | if (ClonedSucc) | 
|  | BlocksToUpdate.push_back(ClonedSucc); | 
|  | } | 
|  | // Otherwise update phis in all successors. | 
|  | else { | 
|  | for (BasicBlock *Succ : successors(BB)) { | 
|  | BlocksToUpdate.push_back(Succ); | 
|  |  | 
|  | // Check if a successor has already been cloned for the particular exit | 
|  | // value. In this case if a successor was already cloned, the phi nodes | 
|  | // in the cloned block should be updated directly. | 
|  | BasicBlock *ClonedSucc = getClonedBB(Succ, NextState, DuplicateMap); | 
|  | if (ClonedSucc) | 
|  | BlocksToUpdate.push_back(ClonedSucc); | 
|  | } | 
|  | } | 
|  |  | 
|  | // If there is a phi with an incoming value from BB, create a new incoming | 
|  | // value for the new predecessor ClonedBB. The value will either be the same | 
|  | // value from BB or a cloned value. | 
|  | for (BasicBlock *Succ : BlocksToUpdate) { | 
|  | for (auto II = Succ->begin(); PHINode *Phi = dyn_cast<PHINode>(II); | 
|  | ++II) { | 
|  | Value *Incoming = Phi->getIncomingValueForBlock(BB); | 
|  | if (Incoming) { | 
|  | if (isa<Constant>(Incoming)) { | 
|  | Phi->addIncoming(Incoming, ClonedBB); | 
|  | continue; | 
|  | } | 
|  | Value *ClonedVal = VMap[Incoming]; | 
|  | if (ClonedVal) | 
|  | Phi->addIncoming(ClonedVal, ClonedBB); | 
|  | else | 
|  | Phi->addIncoming(Incoming, ClonedBB); | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | /// Sets the successor of PrevBB to be NewBB instead of OldBB. Note that all | 
|  | /// other successors are kept as well. | 
|  | void updatePredecessor(BasicBlock *PrevBB, BasicBlock *OldBB, | 
|  | BasicBlock *NewBB, DomTreeUpdater *DTU) { | 
|  | // When a path is reused, there is a chance that predecessors were already | 
|  | // updated before. Check if the predecessor needs to be updated first. | 
|  | if (!isPredecessor(OldBB, PrevBB)) | 
|  | return; | 
|  |  | 
|  | Instruction *PrevTerm = PrevBB->getTerminator(); | 
|  | for (unsigned Idx = 0; Idx < PrevTerm->getNumSuccessors(); Idx++) { | 
|  | if (PrevTerm->getSuccessor(Idx) == OldBB) { | 
|  | OldBB->removePredecessor(PrevBB, /* KeepOneInputPHIs = */ true); | 
|  | PrevTerm->setSuccessor(Idx, NewBB); | 
|  | } | 
|  | } | 
|  | DTU->applyUpdates({{DominatorTree::Delete, PrevBB, OldBB}, | 
|  | {DominatorTree::Insert, PrevBB, NewBB}}); | 
|  | } | 
|  |  | 
|  | /// Add new value mappings to the DefMap to keep track of all new definitions | 
|  | /// for a particular instruction. These will be used while updating SSA form. | 
|  | void updateDefMap(DefMap &NewDefs, ValueToValueMapTy &VMap) { | 
|  | SmallVector<std::pair<Instruction *, Instruction *>> NewDefsVector; | 
|  | NewDefsVector.reserve(VMap.size()); | 
|  |  | 
|  | for (auto Entry : VMap) { | 
|  | Instruction *Inst = | 
|  | dyn_cast<Instruction>(const_cast<Value *>(Entry.first)); | 
|  | if (!Inst || !Entry.second || isa<BranchInst>(Inst) || | 
|  | isa<SwitchInst>(Inst)) { | 
|  | continue; | 
|  | } | 
|  |  | 
|  | Instruction *Cloned = dyn_cast<Instruction>(Entry.second); | 
|  | if (!Cloned) | 
|  | continue; | 
|  |  | 
|  | NewDefsVector.push_back({Inst, Cloned}); | 
|  | } | 
|  |  | 
|  | // Sort the defs to get deterministic insertion order into NewDefs. | 
|  | sort(NewDefsVector, [](const auto &LHS, const auto &RHS) { | 
|  | if (LHS.first == RHS.first) | 
|  | return LHS.second->comesBefore(RHS.second); | 
|  | return LHS.first->comesBefore(RHS.first); | 
|  | }); | 
|  |  | 
|  | for (const auto &KV : NewDefsVector) | 
|  | NewDefs[KV.first].push_back(KV.second); | 
|  | } | 
|  |  | 
|  | /// Update the last branch of a particular cloned path to point to the correct | 
|  | /// case successor. | 
|  | /// | 
|  | /// Note that this is an optional step and would have been done in later | 
|  | /// optimizations, but it makes the CFG significantly easier to work with. | 
|  | void updateLastSuccessor(ThreadingPath &TPath, | 
|  | DuplicateBlockMap &DuplicateMap, | 
|  | DomTreeUpdater *DTU) { | 
|  | APInt NextState = TPath.getExitValue(); | 
|  | BasicBlock *BB = TPath.getPath().back(); | 
|  | BasicBlock *LastBlock = getClonedBB(BB, NextState, DuplicateMap); | 
|  |  | 
|  | // Note multiple paths can end at the same block so check that it is not | 
|  | // updated yet | 
|  | if (!isa<SwitchInst>(LastBlock->getTerminator())) | 
|  | return; | 
|  | SwitchInst *Switch = cast<SwitchInst>(LastBlock->getTerminator()); | 
|  | BasicBlock *NextCase = getNextCaseSuccessor(Switch, NextState); | 
|  |  | 
|  | std::vector<DominatorTree::UpdateType> DTUpdates; | 
|  | SmallPtrSet<BasicBlock *, 4> SuccSet; | 
|  | for (BasicBlock *Succ : successors(LastBlock)) { | 
|  | if (Succ != NextCase && SuccSet.insert(Succ).second) | 
|  | DTUpdates.push_back({DominatorTree::Delete, LastBlock, Succ}); | 
|  | } | 
|  |  | 
|  | Switch->eraseFromParent(); | 
|  | BranchInst::Create(NextCase, LastBlock); | 
|  |  | 
|  | DTU->applyUpdates(DTUpdates); | 
|  | } | 
|  |  | 
|  | /// After cloning blocks, some of the phi nodes have extra incoming values | 
|  | /// that are no longer used. This function removes them. | 
|  | void cleanPhiNodes(BasicBlock *BB) { | 
|  | // If BB is no longer reachable, remove any remaining phi nodes | 
|  | if (pred_empty(BB)) { | 
|  | std::vector<PHINode *> PhiToRemove; | 
|  | for (auto II = BB->begin(); PHINode *Phi = dyn_cast<PHINode>(II); ++II) { | 
|  | PhiToRemove.push_back(Phi); | 
|  | } | 
|  | for (PHINode *PN : PhiToRemove) { | 
|  | PN->replaceAllUsesWith(PoisonValue::get(PN->getType())); | 
|  | PN->eraseFromParent(); | 
|  | } | 
|  | return; | 
|  | } | 
|  |  | 
|  | // Remove any incoming values that come from an invalid predecessor | 
|  | for (auto II = BB->begin(); PHINode *Phi = dyn_cast<PHINode>(II); ++II) { | 
|  | std::vector<BasicBlock *> BlocksToRemove; | 
|  | for (BasicBlock *IncomingBB : Phi->blocks()) { | 
|  | if (!isPredecessor(BB, IncomingBB)) | 
|  | BlocksToRemove.push_back(IncomingBB); | 
|  | } | 
|  | for (BasicBlock *BB : BlocksToRemove) | 
|  | Phi->removeIncomingValue(BB); | 
|  | } | 
|  | } | 
|  |  | 
|  | /// Checks if BB was already cloned for a particular next state value. If it | 
|  | /// was then it returns this cloned block, and otherwise null. | 
|  | BasicBlock *getClonedBB(BasicBlock *BB, const APInt &NextState, | 
|  | DuplicateBlockMap &DuplicateMap) { | 
|  | CloneList ClonedBBs = DuplicateMap[BB]; | 
|  |  | 
|  | // Find an entry in the CloneList with this NextState. If it exists then | 
|  | // return the corresponding BB | 
|  | auto It = llvm::find_if(ClonedBBs, [NextState](const ClonedBlock &C) { | 
|  | return C.State == NextState; | 
|  | }); | 
|  | return It != ClonedBBs.end() ? (*It).BB : nullptr; | 
|  | } | 
|  |  | 
|  | /// Helper to get the successor corresponding to a particular case value for | 
|  | /// a switch statement. | 
|  | BasicBlock *getNextCaseSuccessor(SwitchInst *Switch, const APInt &NextState) { | 
|  | BasicBlock *NextCase = nullptr; | 
|  | for (auto Case : Switch->cases()) { | 
|  | if (Case.getCaseValue()->getValue() == NextState) { | 
|  | NextCase = Case.getCaseSuccessor(); | 
|  | break; | 
|  | } | 
|  | } | 
|  | if (!NextCase) | 
|  | NextCase = Switch->getDefaultDest(); | 
|  | return NextCase; | 
|  | } | 
|  |  | 
|  | /// Returns true if IncomingBB is a predecessor of BB. | 
|  | bool isPredecessor(BasicBlock *BB, BasicBlock *IncomingBB) { | 
|  | return llvm::is_contained(predecessors(BB), IncomingBB); | 
|  | } | 
|  |  | 
|  | AllSwitchPaths *SwitchPaths; | 
|  | DominatorTree *DT; | 
|  | AssumptionCache *AC; | 
|  | TargetTransformInfo *TTI; | 
|  | OptimizationRemarkEmitter *ORE; | 
|  | SmallPtrSet<const Value *, 32> EphValues; | 
|  | std::vector<ThreadingPath> TPaths; | 
|  | }; | 
|  |  | 
|  | bool DFAJumpThreading::run(Function &F) { | 
|  | LLVM_DEBUG(dbgs() << "\nDFA Jump threading: " << F.getName() << "\n"); | 
|  |  | 
|  | if (F.hasOptSize()) { | 
|  | LLVM_DEBUG(dbgs() << "Skipping due to the 'minsize' attribute\n"); | 
|  | return false; | 
|  | } | 
|  |  | 
|  | if (ClViewCfgBefore) | 
|  | F.viewCFG(); | 
|  |  | 
|  | SmallVector<AllSwitchPaths, 2> ThreadableLoops; | 
|  | bool MadeChanges = false; | 
|  | LoopInfoBroken = false; | 
|  |  | 
|  | for (BasicBlock &BB : F) { | 
|  | auto *SI = dyn_cast<SwitchInst>(BB.getTerminator()); | 
|  | if (!SI) | 
|  | continue; | 
|  |  | 
|  | LLVM_DEBUG(dbgs() << "\nCheck if SwitchInst in BB " << BB.getName() | 
|  | << " is a candidate\n"); | 
|  | MainSwitch Switch(SI, LI, ORE); | 
|  |  | 
|  | if (!Switch.getInstr()) { | 
|  | LLVM_DEBUG(dbgs() << "\nSwitchInst in BB " << BB.getName() << " is not a " | 
|  | << "candidate for jump threading\n"); | 
|  | continue; | 
|  | } | 
|  |  | 
|  | LLVM_DEBUG(dbgs() << "\nSwitchInst in BB " << BB.getName() << " is a " | 
|  | << "candidate for jump threading\n"); | 
|  | LLVM_DEBUG(SI->dump()); | 
|  |  | 
|  | unfoldSelectInstrs(DT, Switch.getSelectInsts()); | 
|  | if (!Switch.getSelectInsts().empty()) | 
|  | MadeChanges = true; | 
|  |  | 
|  | AllSwitchPaths SwitchPaths(&Switch, ORE, LI, | 
|  | LI->getLoopFor(&BB)->getOutermostLoop()); | 
|  | SwitchPaths.run(); | 
|  |  | 
|  | if (SwitchPaths.getNumThreadingPaths() > 0) { | 
|  | ThreadableLoops.push_back(SwitchPaths); | 
|  |  | 
|  | // For the time being limit this optimization to occurring once in a | 
|  | // function since it can change the CFG significantly. This is not a | 
|  | // strict requirement but it can cause buggy behavior if there is an | 
|  | // overlap of blocks in different opportunities. There is a lot of room to | 
|  | // experiment with catching more opportunities here. | 
|  | // NOTE: To release this contraint, we must handle LoopInfo invalidation | 
|  | break; | 
|  | } | 
|  | } | 
|  |  | 
|  | #ifdef NDEBUG | 
|  | LI->verify(*DT); | 
|  | #endif | 
|  |  | 
|  | SmallPtrSet<const Value *, 32> EphValues; | 
|  | if (ThreadableLoops.size() > 0) | 
|  | CodeMetrics::collectEphemeralValues(&F, AC, EphValues); | 
|  |  | 
|  | for (AllSwitchPaths SwitchPaths : ThreadableLoops) { | 
|  | TransformDFA Transform(&SwitchPaths, DT, AC, TTI, ORE, EphValues); | 
|  | Transform.run(); | 
|  | MadeChanges = true; | 
|  | LoopInfoBroken = true; | 
|  | } | 
|  |  | 
|  | #ifdef EXPENSIVE_CHECKS | 
|  | assert(DT->verify(DominatorTree::VerificationLevel::Full)); | 
|  | verifyFunction(F, &dbgs()); | 
|  | #endif | 
|  |  | 
|  | return MadeChanges; | 
|  | } | 
|  |  | 
|  | } // end anonymous namespace | 
|  |  | 
|  | /// Integrate with the new Pass Manager | 
|  | PreservedAnalyses DFAJumpThreadingPass::run(Function &F, | 
|  | FunctionAnalysisManager &AM) { | 
|  | AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F); | 
|  | DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F); | 
|  | LoopInfo &LI = AM.getResult<LoopAnalysis>(F); | 
|  | TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F); | 
|  | OptimizationRemarkEmitter ORE(&F); | 
|  | DFAJumpThreading ThreadImpl(&AC, &DT, &LI, &TTI, &ORE); | 
|  | if (!ThreadImpl.run(F)) | 
|  | return PreservedAnalyses::all(); | 
|  |  | 
|  | PreservedAnalyses PA; | 
|  | PA.preserve<DominatorTreeAnalysis>(); | 
|  | if (!ThreadImpl.LoopInfoBroken) | 
|  | PA.preserve<LoopAnalysis>(); | 
|  | return PA; | 
|  | } |