| //===- VPlan.cpp - Vectorizer Plan ----------------------------------------===// |
| // |
| // The LLVM Compiler Infrastructure |
| // |
| // This file is distributed under the University of Illinois Open Source |
| // License. See LICENSE.TXT for details. |
| // |
| //===----------------------------------------------------------------------===// |
| /// |
| /// \file |
| /// This is the LLVM vectorization plan. It represents a candidate for |
| /// vectorization, allowing to plan and optimize how to vectorize a given loop |
| /// before generating LLVM-IR. |
| /// The vectorizer uses vectorization plans to estimate the costs of potential |
| /// candidates and if profitable to execute the desired plan, generating vector |
| /// LLVM-IR code. |
| /// |
| //===----------------------------------------------------------------------===// |
| |
| #include "VPlan.h" |
| #include "llvm/ADT/DepthFirstIterator.h" |
| #include "llvm/ADT/PostOrderIterator.h" |
| #include "llvm/ADT/SmallVector.h" |
| #include "llvm/ADT/Twine.h" |
| #include "llvm/Analysis/LoopInfo.h" |
| #include "llvm/IR/BasicBlock.h" |
| #include "llvm/IR/CFG.h" |
| #include "llvm/IR/Dominators.h" |
| #include "llvm/IR/InstrTypes.h" |
| #include "llvm/IR/Instruction.h" |
| #include "llvm/IR/Instructions.h" |
| #include "llvm/IR/Type.h" |
| #include "llvm/IR/Value.h" |
| #include "llvm/Support/Casting.h" |
| #include "llvm/Support/Debug.h" |
| #include "llvm/Support/ErrorHandling.h" |
| #include "llvm/Support/GraphWriter.h" |
| #include "llvm/Support/raw_ostream.h" |
| #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
| #include <cassert> |
| #include <iterator> |
| #include <string> |
| #include <vector> |
| |
| using namespace llvm; |
| |
| #define DEBUG_TYPE "vplan" |
| |
| /// \return the VPBasicBlock that is the entry of Block, possibly indirectly. |
| const VPBasicBlock *VPBlockBase::getEntryBasicBlock() const { |
| const VPBlockBase *Block = this; |
| while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) |
| Block = Region->getEntry(); |
| return cast<VPBasicBlock>(Block); |
| } |
| |
| VPBasicBlock *VPBlockBase::getEntryBasicBlock() { |
| VPBlockBase *Block = this; |
| while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) |
| Block = Region->getEntry(); |
| return cast<VPBasicBlock>(Block); |
| } |
| |
| /// \return the VPBasicBlock that is the exit of Block, possibly indirectly. |
| const VPBasicBlock *VPBlockBase::getExitBasicBlock() const { |
| const VPBlockBase *Block = this; |
| while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) |
| Block = Region->getExit(); |
| return cast<VPBasicBlock>(Block); |
| } |
| |
| VPBasicBlock *VPBlockBase::getExitBasicBlock() { |
| VPBlockBase *Block = this; |
| while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) |
| Block = Region->getExit(); |
| return cast<VPBasicBlock>(Block); |
| } |
| |
| VPBlockBase *VPBlockBase::getEnclosingBlockWithSuccessors() { |
| if (!Successors.empty() || !Parent) |
| return this; |
| assert(Parent->getExit() == this && |
| "Block w/o successors not the exit of its parent."); |
| return Parent->getEnclosingBlockWithSuccessors(); |
| } |
| |
| VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() { |
| if (!Predecessors.empty() || !Parent) |
| return this; |
| assert(Parent->getEntry() == this && |
| "Block w/o predecessors not the entry of its parent."); |
| return Parent->getEnclosingBlockWithPredecessors(); |
| } |
| |
| void VPBlockBase::deleteCFG(VPBlockBase *Entry) { |
| SmallVector<VPBlockBase *, 8> Blocks; |
| for (VPBlockBase *Block : depth_first(Entry)) |
| Blocks.push_back(Block); |
| |
| for (VPBlockBase *Block : Blocks) |
| delete Block; |
| } |
| |
| BasicBlock * |
| VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) { |
| // BB stands for IR BasicBlocks. VPBB stands for VPlan VPBasicBlocks. |
| // Pred stands for Predessor. Prev stands for Previous - last visited/created. |
| BasicBlock *PrevBB = CFG.PrevBB; |
| BasicBlock *NewBB = BasicBlock::Create(PrevBB->getContext(), getName(), |
| PrevBB->getParent(), CFG.LastBB); |
| DEBUG(dbgs() << "LV: created " << NewBB->getName() << '\n'); |
| |
| // Hook up the new basic block to its predecessors. |
| for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) { |
| VPBasicBlock *PredVPBB = PredVPBlock->getExitBasicBlock(); |
| auto &PredVPSuccessors = PredVPBB->getSuccessors(); |
| BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB]; |
| assert(PredBB && "Predecessor basic-block not found building successor."); |
| auto *PredBBTerminator = PredBB->getTerminator(); |
| DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n'); |
| if (isa<UnreachableInst>(PredBBTerminator)) { |
| assert(PredVPSuccessors.size() == 1 && |
| "Predecessor ending w/o branch must have single successor."); |
| PredBBTerminator->eraseFromParent(); |
| BranchInst::Create(NewBB, PredBB); |
| } else { |
| assert(PredVPSuccessors.size() == 2 && |
| "Predecessor ending with branch must have two successors."); |
| unsigned idx = PredVPSuccessors.front() == this ? 0 : 1; |
| assert(!PredBBTerminator->getSuccessor(idx) && |
| "Trying to reset an existing successor block."); |
| PredBBTerminator->setSuccessor(idx, NewBB); |
| } |
| } |
| return NewBB; |
| } |
| |
| void VPBasicBlock::execute(VPTransformState *State) { |
| bool Replica = State->Instance && |
| !(State->Instance->Part == 0 && State->Instance->Lane == 0); |
| VPBasicBlock *PrevVPBB = State->CFG.PrevVPBB; |
| VPBlockBase *SingleHPred = nullptr; |
| BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible. |
| |
| // 1. Create an IR basic block, or reuse the last one if possible. |
| // The last IR basic block is reused, as an optimization, in three cases: |
| // A. the first VPBB reuses the loop header BB - when PrevVPBB is null; |
| // B. when the current VPBB has a single (hierarchical) predecessor which |
| // is PrevVPBB and the latter has a single (hierarchical) successor; and |
| // C. when the current VPBB is an entry of a region replica - where PrevVPBB |
| // is the exit of this region from a previous instance, or the predecessor |
| // of this region. |
| if (PrevVPBB && /* A */ |
| !((SingleHPred = getSingleHierarchicalPredecessor()) && |
| SingleHPred->getExitBasicBlock() == PrevVPBB && |
| PrevVPBB->getSingleHierarchicalSuccessor()) && /* B */ |
| !(Replica && getPredecessors().empty())) { /* C */ |
| NewBB = createEmptyBasicBlock(State->CFG); |
| State->Builder.SetInsertPoint(NewBB); |
| // Temporarily terminate with unreachable until CFG is rewired. |
| UnreachableInst *Terminator = State->Builder.CreateUnreachable(); |
| State->Builder.SetInsertPoint(Terminator); |
| // Register NewBB in its loop. In innermost loops its the same for all BB's. |
| Loop *L = State->LI->getLoopFor(State->CFG.LastBB); |
| L->addBasicBlockToLoop(NewBB, *State->LI); |
| State->CFG.PrevBB = NewBB; |
| } |
| |
| // 2. Fill the IR basic block with IR instructions. |
| DEBUG(dbgs() << "LV: vectorizing VPBB:" << getName() |
| << " in BB:" << NewBB->getName() << '\n'); |
| |
| State->CFG.VPBB2IRBB[this] = NewBB; |
| State->CFG.PrevVPBB = this; |
| |
| for (VPRecipeBase &Recipe : Recipes) |
| Recipe.execute(*State); |
| |
| DEBUG(dbgs() << "LV: filled BB:" << *NewBB); |
| } |
| |
| void VPRegionBlock::execute(VPTransformState *State) { |
| ReversePostOrderTraversal<VPBlockBase *> RPOT(Entry); |
| |
| if (!isReplicator()) { |
| // Visit the VPBlocks connected to "this", starting from it. |
| for (VPBlockBase *Block : RPOT) { |
| DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n'); |
| Block->execute(State); |
| } |
| return; |
| } |
| |
| assert(!State->Instance && "Replicating a Region with non-null instance."); |
| |
| // Enter replicating mode. |
| State->Instance = {0, 0}; |
| |
| for (unsigned Part = 0, UF = State->UF; Part < UF; ++Part) { |
| State->Instance->Part = Part; |
| for (unsigned Lane = 0, VF = State->VF; Lane < VF; ++Lane) { |
| State->Instance->Lane = Lane; |
| // Visit the VPBlocks connected to \p this, starting from it. |
| for (VPBlockBase *Block : RPOT) { |
| DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n'); |
| Block->execute(State); |
| } |
| } |
| } |
| |
| // Exit replicating mode. |
| State->Instance.reset(); |
| } |
| |
| /// Generate the code inside the body of the vectorized loop. Assumes a single |
| /// LoopVectorBody basic-block was created for this. Introduce additional |
| /// basic-blocks as needed, and fill them all. |
| void VPlan::execute(VPTransformState *State) { |
| BasicBlock *VectorPreHeaderBB = State->CFG.PrevBB; |
| BasicBlock *VectorHeaderBB = VectorPreHeaderBB->getSingleSuccessor(); |
| assert(VectorHeaderBB && "Loop preheader does not have a single successor."); |
| BasicBlock *VectorLatchBB = VectorHeaderBB; |
| |
| // 1. Make room to generate basic-blocks inside loop body if needed. |
| VectorLatchBB = VectorHeaderBB->splitBasicBlock( |
| VectorHeaderBB->getFirstInsertionPt(), "vector.body.latch"); |
| Loop *L = State->LI->getLoopFor(VectorHeaderBB); |
| L->addBasicBlockToLoop(VectorLatchBB, *State->LI); |
| // Remove the edge between Header and Latch to allow other connections. |
| // Temporarily terminate with unreachable until CFG is rewired. |
| // Note: this asserts the generated code's assumption that |
| // getFirstInsertionPt() can be dereferenced into an Instruction. |
| VectorHeaderBB->getTerminator()->eraseFromParent(); |
| State->Builder.SetInsertPoint(VectorHeaderBB); |
| UnreachableInst *Terminator = State->Builder.CreateUnreachable(); |
| State->Builder.SetInsertPoint(Terminator); |
| |
| // 2. Generate code in loop body. |
| State->CFG.PrevVPBB = nullptr; |
| State->CFG.PrevBB = VectorHeaderBB; |
| State->CFG.LastBB = VectorLatchBB; |
| |
| for (VPBlockBase *Block : depth_first(Entry)) |
| Block->execute(State); |
| |
| // 3. Merge the temporary latch created with the last basic-block filled. |
| BasicBlock *LastBB = State->CFG.PrevBB; |
| // Connect LastBB to VectorLatchBB to facilitate their merge. |
| assert(isa<UnreachableInst>(LastBB->getTerminator()) && |
| "Expected VPlan CFG to terminate with unreachable"); |
| LastBB->getTerminator()->eraseFromParent(); |
| BranchInst::Create(VectorLatchBB, LastBB); |
| |
| // Merge LastBB with Latch. |
| bool Merged = MergeBlockIntoPredecessor(VectorLatchBB, nullptr, State->LI); |
| (void)Merged; |
| assert(Merged && "Could not merge last basic block with latch."); |
| VectorLatchBB = LastBB; |
| |
| updateDominatorTree(State->DT, VectorPreHeaderBB, VectorLatchBB); |
| } |
| |
| void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopPreHeaderBB, |
| BasicBlock *LoopLatchBB) { |
| BasicBlock *LoopHeaderBB = LoopPreHeaderBB->getSingleSuccessor(); |
| assert(LoopHeaderBB && "Loop preheader does not have a single successor."); |
| DT->addNewBlock(LoopHeaderBB, LoopPreHeaderBB); |
| // The vector body may be more than a single basic-block by this point. |
| // Update the dominator tree information inside the vector body by propagating |
| // it from header to latch, expecting only triangular control-flow, if any. |
| BasicBlock *PostDomSucc = nullptr; |
| for (auto *BB = LoopHeaderBB; BB != LoopLatchBB; BB = PostDomSucc) { |
| // Get the list of successors of this block. |
| std::vector<BasicBlock *> Succs(succ_begin(BB), succ_end(BB)); |
| assert(Succs.size() <= 2 && |
| "Basic block in vector loop has more than 2 successors."); |
| PostDomSucc = Succs[0]; |
| if (Succs.size() == 1) { |
| assert(PostDomSucc->getSinglePredecessor() && |
| "PostDom successor has more than one predecessor."); |
| DT->addNewBlock(PostDomSucc, BB); |
| continue; |
| } |
| BasicBlock *InterimSucc = Succs[1]; |
| if (PostDomSucc->getSingleSuccessor() == InterimSucc) { |
| PostDomSucc = Succs[1]; |
| InterimSucc = Succs[0]; |
| } |
| assert(InterimSucc->getSingleSuccessor() == PostDomSucc && |
| "One successor of a basic block does not lead to the other."); |
| assert(InterimSucc->getSinglePredecessor() && |
| "Interim successor has more than one predecessor."); |
| assert(std::distance(pred_begin(PostDomSucc), pred_end(PostDomSucc)) == 2 && |
| "PostDom successor has more than two predecessors."); |
| DT->addNewBlock(InterimSucc, BB); |
| DT->addNewBlock(PostDomSucc, BB); |
| } |
| } |
| |
| const Twine VPlanPrinter::getUID(const VPBlockBase *Block) { |
| return (isa<VPRegionBlock>(Block) ? "cluster_N" : "N") + |
| Twine(getOrCreateBID(Block)); |
| } |
| |
| const Twine VPlanPrinter::getOrCreateName(const VPBlockBase *Block) { |
| const std::string &Name = Block->getName(); |
| if (!Name.empty()) |
| return Name; |
| return "VPB" + Twine(getOrCreateBID(Block)); |
| } |
| |
| void VPlanPrinter::dump() { |
| Depth = 1; |
| bumpIndent(0); |
| OS << "digraph VPlan {\n"; |
| OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan"; |
| if (!Plan.getName().empty()) |
| OS << "\\n" << DOT::EscapeString(Plan.getName()); |
| OS << "\"]\n"; |
| OS << "node [shape=rect, fontname=Courier, fontsize=30]\n"; |
| OS << "edge [fontname=Courier, fontsize=30]\n"; |
| OS << "compound=true\n"; |
| |
| for (VPBlockBase *Block : depth_first(Plan.getEntry())) |
| dumpBlock(Block); |
| |
| OS << "}\n"; |
| } |
| |
| void VPlanPrinter::dumpBlock(const VPBlockBase *Block) { |
| if (const VPBasicBlock *BasicBlock = dyn_cast<VPBasicBlock>(Block)) |
| dumpBasicBlock(BasicBlock); |
| else if (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block)) |
| dumpRegion(Region); |
| else |
| llvm_unreachable("Unsupported kind of VPBlock."); |
| } |
| |
| void VPlanPrinter::drawEdge(const VPBlockBase *From, const VPBlockBase *To, |
| bool Hidden, const Twine &Label) { |
| // Due to "dot" we print an edge between two regions as an edge between the |
| // exit basic block and the entry basic of the respective regions. |
| const VPBlockBase *Tail = From->getExitBasicBlock(); |
| const VPBlockBase *Head = To->getEntryBasicBlock(); |
| OS << Indent << getUID(Tail) << " -> " << getUID(Head); |
| OS << " [ label=\"" << Label << '\"'; |
| if (Tail != From) |
| OS << " ltail=" << getUID(From); |
| if (Head != To) |
| OS << " lhead=" << getUID(To); |
| if (Hidden) |
| OS << "; splines=none"; |
| OS << "]\n"; |
| } |
| |
| void VPlanPrinter::dumpEdges(const VPBlockBase *Block) { |
| auto &Successors = Block->getSuccessors(); |
| if (Successors.size() == 1) |
| drawEdge(Block, Successors.front(), false, ""); |
| else if (Successors.size() == 2) { |
| drawEdge(Block, Successors.front(), false, "T"); |
| drawEdge(Block, Successors.back(), false, "F"); |
| } else { |
| unsigned SuccessorNumber = 0; |
| for (auto *Successor : Successors) |
| drawEdge(Block, Successor, false, Twine(SuccessorNumber++)); |
| } |
| } |
| |
| void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) { |
| OS << Indent << getUID(BasicBlock) << " [label =\n"; |
| bumpIndent(1); |
| OS << Indent << "\"" << DOT::EscapeString(BasicBlock->getName()) << ":\\n\""; |
| bumpIndent(1); |
| for (const VPRecipeBase &Recipe : *BasicBlock) |
| Recipe.print(OS, Indent); |
| bumpIndent(-2); |
| OS << "\n" << Indent << "]\n"; |
| dumpEdges(BasicBlock); |
| } |
| |
| void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) { |
| OS << Indent << "subgraph " << getUID(Region) << " {\n"; |
| bumpIndent(1); |
| OS << Indent << "fontname=Courier\n" |
| << Indent << "label=\"" |
| << DOT::EscapeString(Region->isReplicator() ? "<xVFxUF> " : "<x1> ") |
| << DOT::EscapeString(Region->getName()) << "\"\n"; |
| // Dump the blocks of the region. |
| assert(Region->getEntry() && "Region contains no inner blocks."); |
| for (const VPBlockBase *Block : depth_first(Region->getEntry())) |
| dumpBlock(Block); |
| bumpIndent(-1); |
| OS << Indent << "}\n"; |
| dumpEdges(Region); |
| } |
| |
| void VPlanPrinter::printAsIngredient(raw_ostream &O, Value *V) { |
| std::string IngredientString; |
| raw_string_ostream RSO(IngredientString); |
| if (auto *Inst = dyn_cast<Instruction>(V)) { |
| if (!Inst->getType()->isVoidTy()) { |
| Inst->printAsOperand(RSO, false); |
| RSO << " = "; |
| } |
| RSO << Inst->getOpcodeName() << " "; |
| unsigned E = Inst->getNumOperands(); |
| if (E > 0) { |
| Inst->getOperand(0)->printAsOperand(RSO, false); |
| for (unsigned I = 1; I < E; ++I) |
| Inst->getOperand(I)->printAsOperand(RSO << ", ", false); |
| } |
| } else // !Inst |
| V->printAsOperand(RSO, false); |
| RSO.flush(); |
| O << DOT::EscapeString(IngredientString); |
| } |