blob: 2357d81916a4577840b5370c6f0da40643986dae [file] [log] [blame]
//===- PromoteMemoryToRegister.cpp - Convert allocas to registers ---------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file promotes memory references to be register references. It promotes
// alloca instructions which only have loads and stores as uses. An alloca is
// transformed by using iterated dominator frontiers to place PHI nodes, then
// traversing the function in depth-first order to rewrite loads and stores as
// appropriate.
//
// The algorithm used here is based on:
//
// Sreedhar and Gao. A linear time algorithm for placing phi-nodes.
// In Proceedings of the 22nd ACM SIGPLAN-SIGACT Symposium on Principles of
// Programming Languages
// POPL '95. ACM, New York, NY, 62-73.
//
// It has been modified to not explicitly use the DJ graph data structure and to
// directly compute pruned SSA using per-variable liveness information.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "mem2reg"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Metadata.h"
#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Analysis/DIBuilder.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/CFG.h"
#include <algorithm>
#include <queue>
using namespace llvm;
STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block");
STATISTIC(NumSingleStore, "Number of alloca's promoted with a single store");
STATISTIC(NumDeadAlloca, "Number of dead alloca's removed");
STATISTIC(NumPHIInsert, "Number of PHI nodes inserted");
namespace llvm {
template<>
struct DenseMapInfo<std::pair<BasicBlock*, unsigned> > {
typedef std::pair<BasicBlock*, unsigned> EltTy;
static inline EltTy getEmptyKey() {
return EltTy(reinterpret_cast<BasicBlock*>(-1), ~0U);
}
static inline EltTy getTombstoneKey() {
return EltTy(reinterpret_cast<BasicBlock*>(-2), 0U);
}
static unsigned getHashValue(const std::pair<BasicBlock*, unsigned> &Val) {
using llvm::hash_value;
return static_cast<unsigned>(hash_value(Val));
}
static bool isEqual(const EltTy &LHS, const EltTy &RHS) {
return LHS == RHS;
}
};
}
/// isAllocaPromotable - Return true if this alloca is legal for promotion.
/// This is true if there are only loads and stores to the alloca.
///
bool llvm::isAllocaPromotable(const AllocaInst *AI) {
// FIXME: If the memory unit is of pointer or integer type, we can permit
// assignments to subsections of the memory unit.
// Only allow direct and non-volatile loads and stores...
for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end();
UI != UE; ++UI) { // Loop over all of the uses of the alloca
const User *U = *UI;
if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
// Note that atomic loads can be transformed; atomic semantics do
// not have any meaning for a local alloca.
if (LI->isVolatile())
return false;
} else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
if (SI->getOperand(0) == AI)
return false; // Don't allow a store OF the AI, only INTO the AI.
// Note that atomic stores can be transformed; atomic semantics do
// not have any meaning for a local alloca.
if (SI->isVolatile())
return false;
} else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
II->getIntrinsicID() != Intrinsic::lifetime_end)
return false;
} else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
if (BCI->getType() != Type::getInt8PtrTy(U->getContext()))
return false;
if (!onlyUsedByLifetimeMarkers(BCI))
return false;
} else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
if (GEPI->getType() != Type::getInt8PtrTy(U->getContext()))
return false;
if (!GEPI->hasAllZeroIndices())
return false;
if (!onlyUsedByLifetimeMarkers(GEPI))
return false;
} else {
return false;
}
}
return true;
}
namespace {
struct AllocaInfo;
// Data package used by RenamePass()
class RenamePassData {
public:
typedef std::vector<Value *> ValVector;
RenamePassData() : BB(NULL), Pred(NULL), Values() {}
RenamePassData(BasicBlock *B, BasicBlock *P,
const ValVector &V) : BB(B), Pred(P), Values(V) {}
BasicBlock *BB;
BasicBlock *Pred;
ValVector Values;
void swap(RenamePassData &RHS) {
std::swap(BB, RHS.BB);
std::swap(Pred, RHS.Pred);
Values.swap(RHS.Values);
}
};
/// LargeBlockInfo - This assigns and keeps a per-bb relative ordering of
/// load/store instructions in the block that directly load or store an alloca.
///
/// This functionality is important because it avoids scanning large basic
/// blocks multiple times when promoting many allocas in the same block.
class LargeBlockInfo {
/// InstNumbers - For each instruction that we track, keep the index of the
/// instruction. The index starts out as the number of the instruction from
/// the start of the block.
DenseMap<const Instruction *, unsigned> InstNumbers;
public:
/// isInterestingInstruction - This code only looks at accesses to allocas.
static bool isInterestingInstruction(const Instruction *I) {
return (isa<LoadInst>(I) && isa<AllocaInst>(I->getOperand(0))) ||
(isa<StoreInst>(I) && isa<AllocaInst>(I->getOperand(1)));
}
/// getInstructionIndex - Get or calculate the index of the specified
/// instruction.
unsigned getInstructionIndex(const Instruction *I) {
assert(isInterestingInstruction(I) &&
"Not a load/store to/from an alloca?");
// If we already have this instruction number, return it.
DenseMap<const Instruction *, unsigned>::iterator It = InstNumbers.find(I);
if (It != InstNumbers.end()) return It->second;
// Scan the whole block to get the instruction. This accumulates
// information for every interesting instruction in the block, in order to
// avoid gratuitus rescans.
const BasicBlock *BB = I->getParent();
unsigned InstNo = 0;
for (BasicBlock::const_iterator BBI = BB->begin(), E = BB->end();
BBI != E; ++BBI)
if (isInterestingInstruction(BBI))
InstNumbers[BBI] = InstNo++;
It = InstNumbers.find(I);
assert(It != InstNumbers.end() && "Didn't insert instruction?");
return It->second;
}
void deleteValue(const Instruction *I) {
InstNumbers.erase(I);
}
void clear() {
InstNumbers.clear();
}
};
struct PromoteMem2Reg {
/// Allocas - The alloca instructions being promoted.
///
std::vector<AllocaInst*> Allocas;
DominatorTree &DT;
DIBuilder *DIB;
/// AST - An AliasSetTracker object to update. If null, don't update it.
///
AliasSetTracker *AST;
/// AllocaLookup - Reverse mapping of Allocas.
///
DenseMap<AllocaInst*, unsigned> AllocaLookup;
/// NewPhiNodes - The PhiNodes we're adding.
///
DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*> NewPhiNodes;
/// PhiToAllocaMap - For each PHI node, keep track of which entry in Allocas
/// it corresponds to.
DenseMap<PHINode*, unsigned> PhiToAllocaMap;
/// PointerAllocaValues - If we are updating an AliasSetTracker, then for
/// each alloca that is of pointer type, we keep track of what to copyValue
/// to the inserted PHI nodes here.
///
std::vector<Value*> PointerAllocaValues;
/// AllocaDbgDeclares - For each alloca, we keep track of the dbg.declare
/// intrinsic that describes it, if any, so that we can convert it to a
/// dbg.value intrinsic if the alloca gets promoted.
SmallVector<DbgDeclareInst*, 8> AllocaDbgDeclares;
/// Visited - The set of basic blocks the renamer has already visited.
///
SmallPtrSet<BasicBlock*, 16> Visited;
/// BBNumbers - Contains a stable numbering of basic blocks to avoid
/// non-determinstic behavior.
DenseMap<BasicBlock*, unsigned> BBNumbers;
/// DomLevels - Maps DomTreeNodes to their level in the dominator tree.
DenseMap<DomTreeNode*, unsigned> DomLevels;
/// BBNumPreds - Lazily compute the number of predecessors a block has.
DenseMap<const BasicBlock*, unsigned> BBNumPreds;
public:
PromoteMem2Reg(const std::vector<AllocaInst*> &A, DominatorTree &dt,
AliasSetTracker *ast)
: Allocas(A), DT(dt), DIB(0), AST(ast) {}
~PromoteMem2Reg() {
delete DIB;
}
void run();
/// dominates - Return true if BB1 dominates BB2 using the DominatorTree.
///
bool dominates(BasicBlock *BB1, BasicBlock *BB2) const {
return DT.dominates(BB1, BB2);
}
private:
void RemoveFromAllocasList(unsigned &AllocaIdx) {
Allocas[AllocaIdx] = Allocas.back();
Allocas.pop_back();
--AllocaIdx;
}
unsigned getNumPreds(const BasicBlock *BB) {
unsigned &NP = BBNumPreds[BB];
if (NP == 0)
NP = std::distance(pred_begin(BB), pred_end(BB))+1;
return NP-1;
}
void DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
AllocaInfo &Info);
void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info,
const SmallPtrSet<BasicBlock*, 32> &DefBlocks,
SmallPtrSet<BasicBlock*, 32> &LiveInBlocks);
void RewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
LargeBlockInfo &LBI);
void PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info,
LargeBlockInfo &LBI);
void RenamePass(BasicBlock *BB, BasicBlock *Pred,
RenamePassData::ValVector &IncVals,
std::vector<RenamePassData> &Worklist);
bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version);
};
struct AllocaInfo {
SmallVector<BasicBlock*, 32> DefiningBlocks;
SmallVector<BasicBlock*, 32> UsingBlocks;
StoreInst *OnlyStore;
BasicBlock *OnlyBlock;
bool OnlyUsedInOneBlock;
Value *AllocaPointerVal;
DbgDeclareInst *DbgDeclare;
void clear() {
DefiningBlocks.clear();
UsingBlocks.clear();
OnlyStore = 0;
OnlyBlock = 0;
OnlyUsedInOneBlock = true;
AllocaPointerVal = 0;
DbgDeclare = 0;
}
/// AnalyzeAlloca - Scan the uses of the specified alloca, filling in our
/// ivars.
void AnalyzeAlloca(AllocaInst *AI) {
clear();
// As we scan the uses of the alloca instruction, keep track of stores,
// and decide whether all of the loads and stores to the alloca are within
// the same basic block.
for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
UI != E;) {
Instruction *User = cast<Instruction>(*UI++);
if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
// Remember the basic blocks which define new values for the alloca
DefiningBlocks.push_back(SI->getParent());
AllocaPointerVal = SI->getOperand(0);
OnlyStore = SI;
} else {
LoadInst *LI = cast<LoadInst>(User);
// Otherwise it must be a load instruction, keep track of variable
// reads.
UsingBlocks.push_back(LI->getParent());
AllocaPointerVal = LI;
}
if (OnlyUsedInOneBlock) {
if (OnlyBlock == 0)
OnlyBlock = User->getParent();
else if (OnlyBlock != User->getParent())
OnlyUsedInOneBlock = false;
}
}
DbgDeclare = FindAllocaDbgDeclare(AI);
}
};
typedef std::pair<DomTreeNode*, unsigned> DomTreeNodePair;
struct DomTreeNodeCompare {
bool operator()(const DomTreeNodePair &LHS, const DomTreeNodePair &RHS) {
return LHS.second < RHS.second;
}
};
} // end of anonymous namespace
static void removeLifetimeIntrinsicUsers(AllocaInst *AI) {
// Knowing that this alloca is promotable, we know that it's safe to kill all
// instructions except for load and store.
for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end();
UI != UE;) {
Instruction *I = cast<Instruction>(*UI);
++UI;
if (isa<LoadInst>(I) || isa<StoreInst>(I))
continue;
if (!I->getType()->isVoidTy()) {
// The only users of this bitcast/GEP instruction are lifetime intrinsics.
// Follow the use/def chain to erase them now instead of leaving it for
// dead code elimination later.
for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
UI != UE;) {
Instruction *Inst = cast<Instruction>(*UI);
++UI;
Inst->eraseFromParent();
}
}
I->eraseFromParent();
}
}
void PromoteMem2Reg::run() {
Function &F = *DT.getRoot()->getParent();
if (AST) PointerAllocaValues.resize(Allocas.size());
AllocaDbgDeclares.resize(Allocas.size());
AllocaInfo Info;
LargeBlockInfo LBI;
for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) {
AllocaInst *AI = Allocas[AllocaNum];
assert(isAllocaPromotable(AI) &&
"Cannot promote non-promotable alloca!");
assert(AI->getParent()->getParent() == &F &&
"All allocas should be in the same function, which is same as DF!");
removeLifetimeIntrinsicUsers(AI);
if (AI->use_empty()) {
// If there are no uses of the alloca, just delete it now.
if (AST) AST->deleteValue(AI);
AI->eraseFromParent();
// Remove the alloca from the Allocas list, since it has been processed
RemoveFromAllocasList(AllocaNum);
++NumDeadAlloca;
continue;
}
// Calculate the set of read and write-locations for each alloca. This is
// analogous to finding the 'uses' and 'definitions' of each variable.
Info.AnalyzeAlloca(AI);
// If there is only a single store to this value, replace any loads of
// it that are directly dominated by the definition with the value stored.
if (Info.DefiningBlocks.size() == 1) {
RewriteSingleStoreAlloca(AI, Info, LBI);
// Finally, after the scan, check to see if the store is all that is left.
if (Info.UsingBlocks.empty()) {
// Record debuginfo for the store and remove the declaration's
// debuginfo.
if (DbgDeclareInst *DDI = Info.DbgDeclare) {
if (!DIB)
DIB = new DIBuilder(*DDI->getParent()->getParent()->getParent());
ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, *DIB);
DDI->eraseFromParent();
}
// Remove the (now dead) store and alloca.
Info.OnlyStore->eraseFromParent();
LBI.deleteValue(Info.OnlyStore);
if (AST) AST->deleteValue(AI);
AI->eraseFromParent();
LBI.deleteValue(AI);
// The alloca has been processed, move on.
RemoveFromAllocasList(AllocaNum);
++NumSingleStore;
continue;
}
}
// If the alloca is only read and written in one basic block, just perform a
// linear sweep over the block to eliminate it.
if (Info.OnlyUsedInOneBlock) {
PromoteSingleBlockAlloca(AI, Info, LBI);
// Finally, after the scan, check to see if the stores are all that is
// left.
if (Info.UsingBlocks.empty()) {
// Remove the (now dead) stores and alloca.
while (!AI->use_empty()) {
StoreInst *SI = cast<StoreInst>(AI->use_back());
// Record debuginfo for the store before removing it.
if (DbgDeclareInst *DDI = Info.DbgDeclare) {
if (!DIB)
DIB = new DIBuilder(*SI->getParent()->getParent()->getParent());
ConvertDebugDeclareToDebugValue(DDI, SI, *DIB);
}
SI->eraseFromParent();
LBI.deleteValue(SI);
}
if (AST) AST->deleteValue(AI);
AI->eraseFromParent();
LBI.deleteValue(AI);
// The alloca has been processed, move on.
RemoveFromAllocasList(AllocaNum);
// The alloca's debuginfo can be removed as well.
if (DbgDeclareInst *DDI = Info.DbgDeclare)
DDI->eraseFromParent();
++NumLocalPromoted;
continue;
}
}
// If we haven't computed dominator tree levels, do so now.
if (DomLevels.empty()) {
SmallVector<DomTreeNode*, 32> Worklist;
DomTreeNode *Root = DT.getRootNode();
DomLevels[Root] = 0;
Worklist.push_back(Root);
while (!Worklist.empty()) {
DomTreeNode *Node = Worklist.pop_back_val();
unsigned ChildLevel = DomLevels[Node] + 1;
for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end();
CI != CE; ++CI) {
DomLevels[*CI] = ChildLevel;
Worklist.push_back(*CI);
}
}
}
// If we haven't computed a numbering for the BB's in the function, do so
// now.
if (BBNumbers.empty()) {
unsigned ID = 0;
for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
BBNumbers[I] = ID++;
}
// If we have an AST to keep updated, remember some pointer value that is
// stored into the alloca.
if (AST)
PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal;
// Remember the dbg.declare intrinsic describing this alloca, if any.
if (Info.DbgDeclare) AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare;
// Keep the reverse mapping of the 'Allocas' array for the rename pass.
AllocaLookup[Allocas[AllocaNum]] = AllocaNum;
// At this point, we're committed to promoting the alloca using IDF's, and
// the standard SSA construction algorithm. Determine which blocks need PHI
// nodes and see if we can optimize out some work by avoiding insertion of
// dead phi nodes.
DetermineInsertionPoint(AI, AllocaNum, Info);
}
if (Allocas.empty())
return; // All of the allocas must have been trivial!
LBI.clear();
// Set the incoming values for the basic block to be null values for all of
// the alloca's. We do this in case there is a load of a value that has not
// been stored yet. In this case, it will get this null value.
//
RenamePassData::ValVector Values(Allocas.size());
for (unsigned i = 0, e = Allocas.size(); i != e; ++i)
Values[i] = UndefValue::get(Allocas[i]->getAllocatedType());
// Walks all basic blocks in the function performing the SSA rename algorithm
// and inserting the phi nodes we marked as necessary
//
std::vector<RenamePassData> RenamePassWorkList;
RenamePassWorkList.push_back(RenamePassData(F.begin(), 0, Values));
do {
RenamePassData RPD;
RPD.swap(RenamePassWorkList.back());
RenamePassWorkList.pop_back();
// RenamePass may add new worklist entries.
RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList);
} while (!RenamePassWorkList.empty());
// The renamer uses the Visited set to avoid infinite loops. Clear it now.
Visited.clear();
// Remove the allocas themselves from the function.
for (unsigned i = 0, e = Allocas.size(); i != e; ++i) {
Instruction *A = Allocas[i];
// If there are any uses of the alloca instructions left, they must be in
// unreachable basic blocks that were not processed by walking the dominator
// tree. Just delete the users now.
if (!A->use_empty())
A->replaceAllUsesWith(UndefValue::get(A->getType()));
if (AST) AST->deleteValue(A);
A->eraseFromParent();
}
// Remove alloca's dbg.declare instrinsics from the function.
for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i)
if (DbgDeclareInst *DDI = AllocaDbgDeclares[i])
DDI->eraseFromParent();
// Loop over all of the PHI nodes and see if there are any that we can get
// rid of because they merge all of the same incoming values. This can
// happen due to undef values coming into the PHI nodes. This process is
// iterative, because eliminating one PHI node can cause others to be removed.
bool EliminatedAPHI = true;
while (EliminatedAPHI) {
EliminatedAPHI = false;
for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I =
NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E;) {
PHINode *PN = I->second;
// If this PHI node merges one value and/or undefs, get the value.
if (Value *V = SimplifyInstruction(PN, 0, 0, &DT)) {
if (AST && PN->getType()->isPointerTy())
AST->deleteValue(PN);
PN->replaceAllUsesWith(V);
PN->eraseFromParent();
NewPhiNodes.erase(I++);
EliminatedAPHI = true;
continue;
}
++I;
}
}
// At this point, the renamer has added entries to PHI nodes for all reachable
// code. Unfortunately, there may be unreachable blocks which the renamer
// hasn't traversed. If this is the case, the PHI nodes may not
// have incoming values for all predecessors. Loop over all PHI nodes we have
// created, inserting undef values if they are missing any incoming values.
//
for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I =
NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E; ++I) {
// We want to do this once per basic block. As such, only process a block
// when we find the PHI that is the first entry in the block.
PHINode *SomePHI = I->second;
BasicBlock *BB = SomePHI->getParent();
if (&BB->front() != SomePHI)
continue;
// Only do work here if there the PHI nodes are missing incoming values. We
// know that all PHI nodes that were inserted in a block will have the same
// number of incoming values, so we can just check any of them.
if (SomePHI->getNumIncomingValues() == getNumPreds(BB))
continue;
// Get the preds for BB.
SmallVector<BasicBlock*, 16> Preds(pred_begin(BB), pred_end(BB));
// Ok, now we know that all of the PHI nodes are missing entries for some
// basic blocks. Start by sorting the incoming predecessors for efficient
// access.
std::sort(Preds.begin(), Preds.end());
// Now we loop through all BB's which have entries in SomePHI and remove
// them from the Preds list.
for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) {
// Do a log(n) search of the Preds list for the entry we want.
SmallVector<BasicBlock*, 16>::iterator EntIt =
std::lower_bound(Preds.begin(), Preds.end(),
SomePHI->getIncomingBlock(i));
assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i)&&
"PHI node has entry for a block which is not a predecessor!");
// Remove the entry
Preds.erase(EntIt);
}
// At this point, the blocks left in the preds list must have dummy
// entries inserted into every PHI nodes for the block. Update all the phi
// nodes in this block that we are inserting (there could be phis before
// mem2reg runs).
unsigned NumBadPreds = SomePHI->getNumIncomingValues();
BasicBlock::iterator BBI = BB->begin();
while ((SomePHI = dyn_cast<PHINode>(BBI++)) &&
SomePHI->getNumIncomingValues() == NumBadPreds) {
Value *UndefVal = UndefValue::get(SomePHI->getType());
for (unsigned pred = 0, e = Preds.size(); pred != e; ++pred)
SomePHI->addIncoming(UndefVal, Preds[pred]);
}
}
NewPhiNodes.clear();
}
/// ComputeLiveInBlocks - Determine which blocks the value is live in. These
/// are blocks which lead to uses. Knowing this allows us to avoid inserting
/// PHI nodes into blocks which don't lead to uses (thus, the inserted phi nodes
/// would be dead).
void PromoteMem2Reg::
ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info,
const SmallPtrSet<BasicBlock*, 32> &DefBlocks,
SmallPtrSet<BasicBlock*, 32> &LiveInBlocks) {
// To determine liveness, we must iterate through the predecessors of blocks
// where the def is live. Blocks are added to the worklist if we need to
// check their predecessors. Start with all the using blocks.
SmallVector<BasicBlock*, 64> LiveInBlockWorklist(Info.UsingBlocks.begin(),
Info.UsingBlocks.end());
// If any of the using blocks is also a definition block, check to see if the
// definition occurs before or after the use. If it happens before the use,
// the value isn't really live-in.
for (unsigned i = 0, e = LiveInBlockWorklist.size(); i != e; ++i) {
BasicBlock *BB = LiveInBlockWorklist[i];
if (!DefBlocks.count(BB)) continue;
// Okay, this is a block that both uses and defines the value. If the first
// reference to the alloca is a def (store), then we know it isn't live-in.
for (BasicBlock::iterator I = BB->begin(); ; ++I) {
if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
if (SI->getOperand(1) != AI) continue;
// We found a store to the alloca before a load. The alloca is not
// actually live-in here.
LiveInBlockWorklist[i] = LiveInBlockWorklist.back();
LiveInBlockWorklist.pop_back();
--i, --e;
break;
}
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
if (LI->getOperand(0) != AI) continue;
// Okay, we found a load before a store to the alloca. It is actually
// live into this block.
break;
}
}
}
// Now that we have a set of blocks where the phi is live-in, recursively add
// their predecessors until we find the full region the value is live.
while (!LiveInBlockWorklist.empty()) {
BasicBlock *BB = LiveInBlockWorklist.pop_back_val();
// The block really is live in here, insert it into the set. If already in
// the set, then it has already been processed.
if (!LiveInBlocks.insert(BB))
continue;
// Since the value is live into BB, it is either defined in a predecessor or
// live into it to. Add the preds to the worklist unless they are a
// defining block.
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
BasicBlock *P = *PI;
// The value is not live into a predecessor if it defines the value.
if (DefBlocks.count(P))
continue;
// Otherwise it is, add to the worklist.
LiveInBlockWorklist.push_back(P);
}
}
}
/// DetermineInsertionPoint - At this point, we're committed to promoting the
/// alloca using IDF's, and the standard SSA construction algorithm. Determine
/// which blocks need phi nodes and see if we can optimize out some work by
/// avoiding insertion of dead phi nodes.
void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
AllocaInfo &Info) {
// Unique the set of defining blocks for efficient lookup.
SmallPtrSet<BasicBlock*, 32> DefBlocks;
DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end());
// Determine which blocks the value is live in. These are blocks which lead
// to uses.
SmallPtrSet<BasicBlock*, 32> LiveInBlocks;
ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks);
// Use a priority queue keyed on dominator tree level so that inserted nodes
// are handled from the bottom of the dominator tree upwards.
typedef std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>,
DomTreeNodeCompare> IDFPriorityQueue;
IDFPriorityQueue PQ;
for (SmallPtrSet<BasicBlock*, 32>::const_iterator I = DefBlocks.begin(),
E = DefBlocks.end(); I != E; ++I) {
if (DomTreeNode *Node = DT.getNode(*I))
PQ.push(std::make_pair(Node, DomLevels[Node]));
}
SmallVector<std::pair<unsigned, BasicBlock*>, 32> DFBlocks;
SmallPtrSet<DomTreeNode*, 32> Visited;
SmallVector<DomTreeNode*, 32> Worklist;
while (!PQ.empty()) {
DomTreeNodePair RootPair = PQ.top();
PQ.pop();
DomTreeNode *Root = RootPair.first;
unsigned RootLevel = RootPair.second;
// Walk all dominator tree children of Root, inspecting their CFG edges with
// targets elsewhere on the dominator tree. Only targets whose level is at
// most Root's level are added to the iterated dominance frontier of the
// definition set.
Worklist.clear();
Worklist.push_back(Root);
while (!Worklist.empty()) {
DomTreeNode *Node = Worklist.pop_back_val();
BasicBlock *BB = Node->getBlock();
for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE;
++SI) {
DomTreeNode *SuccNode = DT.getNode(*SI);
// Quickly skip all CFG edges that are also dominator tree edges instead
// of catching them below.
if (SuccNode->getIDom() == Node)
continue;
unsigned SuccLevel = DomLevels[SuccNode];
if (SuccLevel > RootLevel)
continue;
if (!Visited.insert(SuccNode))
continue;
BasicBlock *SuccBB = SuccNode->getBlock();
if (!LiveInBlocks.count(SuccBB))
continue;
DFBlocks.push_back(std::make_pair(BBNumbers[SuccBB], SuccBB));
if (!DefBlocks.count(SuccBB))
PQ.push(std::make_pair(SuccNode, SuccLevel));
}
for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); CI != CE;
++CI) {
if (!Visited.count(*CI))
Worklist.push_back(*CI);
}
}
}
if (DFBlocks.size() > 1)
std::sort(DFBlocks.begin(), DFBlocks.end());
unsigned CurrentVersion = 0;
for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i)
QueuePhiNode(DFBlocks[i].second, AllocaNum, CurrentVersion);
}
/// RewriteSingleStoreAlloca - If there is only a single store to this value,
/// replace any loads of it that are directly dominated by the definition with
/// the value stored.
void PromoteMem2Reg::RewriteSingleStoreAlloca(AllocaInst *AI,
AllocaInfo &Info,
LargeBlockInfo &LBI) {
StoreInst *OnlyStore = Info.OnlyStore;
bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0));
BasicBlock *StoreBB = OnlyStore->getParent();
int StoreIndex = -1;
// Clear out UsingBlocks. We will reconstruct it here if needed.
Info.UsingBlocks.clear();
for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; ) {
Instruction *UserInst = cast<Instruction>(*UI++);
if (!isa<LoadInst>(UserInst)) {
assert(UserInst == OnlyStore && "Should only have load/stores");
continue;
}
LoadInst *LI = cast<LoadInst>(UserInst);
// Okay, if we have a load from the alloca, we want to replace it with the
// only value stored to the alloca. We can do this if the value is
// dominated by the store. If not, we use the rest of the mem2reg machinery
// to insert the phi nodes as needed.
if (!StoringGlobalVal) { // Non-instructions are always dominated.
if (LI->getParent() == StoreBB) {
// If we have a use that is in the same block as the store, compare the
// indices of the two instructions to see which one came first. If the
// load came before the store, we can't handle it.
if (StoreIndex == -1)
StoreIndex = LBI.getInstructionIndex(OnlyStore);
if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) {
// Can't handle this load, bail out.
Info.UsingBlocks.push_back(StoreBB);
continue;
}
} else if (LI->getParent() != StoreBB &&
!dominates(StoreBB, LI->getParent())) {
// If the load and store are in different blocks, use BB dominance to
// check their relationships. If the store doesn't dom the use, bail
// out.
Info.UsingBlocks.push_back(LI->getParent());
continue;
}
}
// Otherwise, we *can* safely rewrite this load.
Value *ReplVal = OnlyStore->getOperand(0);
// If the replacement value is the load, this must occur in unreachable
// code.
if (ReplVal == LI)
ReplVal = UndefValue::get(LI->getType());
LI->replaceAllUsesWith(ReplVal);
if (AST && LI->getType()->isPointerTy())
AST->deleteValue(LI);
LI->eraseFromParent();
LBI.deleteValue(LI);
}
}
namespace {
/// StoreIndexSearchPredicate - This is a helper predicate used to search by the
/// first element of a pair.
struct StoreIndexSearchPredicate {
bool operator()(const std::pair<unsigned, StoreInst*> &LHS,
const std::pair<unsigned, StoreInst*> &RHS) {
return LHS.first < RHS.first;
}
};
}
/// PromoteSingleBlockAlloca - Many allocas are only used within a single basic
/// block. If this is the case, avoid traversing the CFG and inserting a lot of
/// potentially useless PHI nodes by just performing a single linear pass over
/// the basic block using the Alloca.
///
/// If we cannot promote this alloca (because it is read before it is written),
/// return true. This is necessary in cases where, due to control flow, the
/// alloca is potentially undefined on some control flow paths. e.g. code like
/// this is potentially correct:
///
/// for (...) { if (c) { A = undef; undef = B; } }
///
/// ... so long as A is not used before undef is set.
///
void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info,
LargeBlockInfo &LBI) {
// The trickiest case to handle is when we have large blocks. Because of this,
// this code is optimized assuming that large blocks happen. This does not
// significantly pessimize the small block case. This uses LargeBlockInfo to
// make it efficient to get the index of various operations in the block.
// Clear out UsingBlocks. We will reconstruct it here if needed.
Info.UsingBlocks.clear();
// Walk the use-def list of the alloca, getting the locations of all stores.
typedef SmallVector<std::pair<unsigned, StoreInst*>, 64> StoresByIndexTy;
StoresByIndexTy StoresByIndex;
for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
UI != E; ++UI)
if (StoreInst *SI = dyn_cast<StoreInst>(*UI))
StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI));
// If there are no stores to the alloca, just replace any loads with undef.
if (StoresByIndex.empty()) {
for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;)
if (LoadInst *LI = dyn_cast<LoadInst>(*UI++)) {
LI->replaceAllUsesWith(UndefValue::get(LI->getType()));
if (AST && LI->getType()->isPointerTy())
AST->deleteValue(LI);
LBI.deleteValue(LI);
LI->eraseFromParent();
}
return;
}
// Sort the stores by their index, making it efficient to do a lookup with a
// binary search.
std::sort(StoresByIndex.begin(), StoresByIndex.end());
// Walk all of the loads from this alloca, replacing them with the nearest
// store above them, if any.
for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) {
LoadInst *LI = dyn_cast<LoadInst>(*UI++);
if (!LI) continue;
unsigned LoadIdx = LBI.getInstructionIndex(LI);
// Find the nearest store that has a lower than this load.
StoresByIndexTy::iterator I =
std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(),
std::pair<unsigned, StoreInst*>(LoadIdx, static_cast<StoreInst*>(0)),
StoreIndexSearchPredicate());
// If there is no store before this load, then we can't promote this load.
if (I == StoresByIndex.begin()) {
// Can't handle this load, bail out.
Info.UsingBlocks.push_back(LI->getParent());
continue;
}
// Otherwise, there was a store before this load, the load takes its value.
--I;
LI->replaceAllUsesWith(I->second->getOperand(0));
if (AST && LI->getType()->isPointerTy())
AST->deleteValue(LI);
LI->eraseFromParent();
LBI.deleteValue(LI);
}
}
// QueuePhiNode - queues a phi-node to be added to a basic-block for a specific
// Alloca returns true if there wasn't already a phi-node for that variable
//
bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
unsigned &Version) {
// Look up the basic-block in question.
PHINode *&PN = NewPhiNodes[std::make_pair(BB, AllocaNo)];
// If the BB already has a phi node added for the i'th alloca then we're done!
if (PN) return false;
// Create a PhiNode using the dereferenced type... and add the phi-node to the
// BasicBlock.
PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), getNumPreds(BB),
Allocas[AllocaNo]->getName() + "." + Twine(Version++),
BB->begin());
++NumPHIInsert;
PhiToAllocaMap[PN] = AllocaNo;
if (AST && PN->getType()->isPointerTy())
AST->copyValue(PointerAllocaValues[AllocaNo], PN);
return true;
}
// RenamePass - Recursively traverse the CFG of the function, renaming loads and
// stores to the allocas which we are promoting. IncomingVals indicates what
// value each Alloca contains on exit from the predecessor block Pred.
//
void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred,
RenamePassData::ValVector &IncomingVals,
std::vector<RenamePassData> &Worklist) {
NextIteration:
// If we are inserting any phi nodes into this BB, they will already be in the
// block.
if (PHINode *APN = dyn_cast<PHINode>(BB->begin())) {
// If we have PHI nodes to update, compute the number of edges from Pred to
// BB.
if (PhiToAllocaMap.count(APN)) {
// We want to be able to distinguish between PHI nodes being inserted by
// this invocation of mem2reg from those phi nodes that already existed in
// the IR before mem2reg was run. We determine that APN is being inserted
// because it is missing incoming edges. All other PHI nodes being
// inserted by this pass of mem2reg will have the same number of incoming
// operands so far. Remember this count.
unsigned NewPHINumOperands = APN->getNumOperands();
unsigned NumEdges = 0;
for (succ_iterator I = succ_begin(Pred), E = succ_end(Pred); I != E; ++I)
if (*I == BB)
++NumEdges;
assert(NumEdges && "Must be at least one edge from Pred to BB!");
// Add entries for all the phis.
BasicBlock::iterator PNI = BB->begin();
do {
unsigned AllocaNo = PhiToAllocaMap[APN];
// Add N incoming values to the PHI node.
for (unsigned i = 0; i != NumEdges; ++i)
APN->addIncoming(IncomingVals[AllocaNo], Pred);
// The currently active variable for this block is now the PHI.
IncomingVals[AllocaNo] = APN;
// Get the next phi node.
++PNI;
APN = dyn_cast<PHINode>(PNI);
if (APN == 0) break;
// Verify that it is missing entries. If not, it is not being inserted
// by this mem2reg invocation so we want to ignore it.
} while (APN->getNumOperands() == NewPHINumOperands);
}
}
// Don't revisit blocks.
if (!Visited.insert(BB)) return;
for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II); ) {
Instruction *I = II++; // get the instruction, increment iterator
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand());
if (!Src) continue;
DenseMap<AllocaInst*, unsigned>::iterator AI = AllocaLookup.find(Src);
if (AI == AllocaLookup.end()) continue;
Value *V = IncomingVals[AI->second];
// Anything using the load now uses the current value.
LI->replaceAllUsesWith(V);
if (AST && LI->getType()->isPointerTy())
AST->deleteValue(LI);
BB->getInstList().erase(LI);
} else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
// Delete this instruction and mark the name as the current holder of the
// value
AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand());
if (!Dest) continue;
DenseMap<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest);
if (ai == AllocaLookup.end())
continue;
// what value were we writing?
IncomingVals[ai->second] = SI->getOperand(0);
// Record debuginfo for the store before removing it.
if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second]) {
if (!DIB)
DIB = new DIBuilder(*SI->getParent()->getParent()->getParent());
ConvertDebugDeclareToDebugValue(DDI, SI, *DIB);
}
BB->getInstList().erase(SI);
}
}
// 'Recurse' to our successors.
succ_iterator I = succ_begin(BB), E = succ_end(BB);
if (I == E) return;
// Keep track of the successors so we don't visit the same successor twice
SmallPtrSet<BasicBlock*, 8> VisitedSuccs;
// Handle the first successor without using the worklist.
VisitedSuccs.insert(*I);
Pred = BB;
BB = *I;
++I;
for (; I != E; ++I)
if (VisitedSuccs.insert(*I))
Worklist.push_back(RenamePassData(*I, Pred, IncomingVals));
goto NextIteration;
}
/// PromoteMemToReg - Promote the specified list of alloca instructions into
/// scalar registers, inserting PHI nodes as appropriate. This function does
/// not modify the CFG of the function at all. All allocas must be from the
/// same function.
///
/// If AST is specified, the specified tracker is updated to reflect changes
/// made to the IR.
///
void llvm::PromoteMemToReg(const std::vector<AllocaInst*> &Allocas,
DominatorTree &DT, AliasSetTracker *AST) {
// If there is nothing to do, bail out...
if (Allocas.empty()) return;
PromoteMem2Reg(Allocas, DT, AST).run();
}