blob: 03c4d182058a50bb20b6b8c178411a19e0f8e962 [file] [log] [blame]
//===-- PoolAllocate.cpp - Pool Allocation Pass ---------------------------===//
//
// This transform changes programs so that disjoint data structures are
// allocated out of different pools of memory, increasing locality.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "PoolAllocator"
#include "poolalloc/PoolAllocate.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Instructions.h"
#include "llvm/Module.h"
#include "llvm/Analysis/DataStructure.h"
#include "llvm/Analysis/DSGraph.h"
#include "llvm/Analysis/DSGraphTraits.h"
#include "llvm/Support/CFG.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "Support/CommandLine.h"
#include "Support/Debug.h"
#include "Support/DepthFirstIterator.h"
#include "Support/Statistic.h"
using namespace llvm;
using namespace PA;
// PASS_ALL_ARGUMENTS - If this is set to true, pass in pool descriptors for all
// DSNodes in a function, even if there are no allocations or frees in it. This
// is useful for SafeCode.
#define PASS_ALL_ARGUMENTS 0
const Type *PoolAllocate::PoolDescPtrTy = 0;
namespace {
RegisterOpt<PoolAllocate>
X("poolalloc", "Pool allocate disjoint data structures");
Statistic<> NumArgsAdded("poolalloc", "Number of function arguments added");
Statistic<> NumCloned ("poolalloc", "Number of functions cloned");
Statistic<> NumPools ("poolalloc", "Number of pools allocated");
Statistic<> NumTSPools ("poolalloc", "Number of typesafe pools");
Statistic<> NumPoolFree ("poolalloc", "Number of poolfree's elided");
Statistic<> NumNonprofit("poolalloc", "Number of DSNodes not profitable");
Statistic<> NumColocated("poolalloc", "Number of DSNodes colocated");
const Type *VoidPtrTy;
// The type to allocate for a pool descriptor: { sbyte*, uint, uint }
// void *Data (the data)
// unsigned NodeSize (size of an allocated node)
// unsigned FreeablePool (are slabs in the pool freeable upon calls to
// poolfree?)
const Type *PoolDescType;
enum PoolAllocHeuristic {
AllNodes,
NoNodes,
CyclicNodes,
SmartCoallesceNodes,
};
cl::opt<PoolAllocHeuristic>
Heuristic("poolalloc-heuristic",
cl::desc("Heuristic to choose which nodes to pool allocate"),
cl::values(clEnumVal(AllNodes, " Pool allocate all nodes"),
clEnumVal(CyclicNodes, " Pool allocate nodes with cycles"),
clEnumVal(SmartCoallesceNodes, " Use the smart node merging heuristic"),
clEnumVal(NoNodes, " Do not pool allocate anything"),
0),
cl::init(SmartCoallesceNodes));
cl::opt<bool>
DisableInitDestroyOpt("poolalloc-force-simple-pool-init",
cl::desc("Always insert poolinit/pooldestroy calls at start and exit of functions"), cl::init(true));
}
void PoolAllocate::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<CompleteBUDataStructures>();
AU.addRequired<TargetData>();
}
bool PoolAllocate::run(Module &M) {
if (M.begin() == M.end()) return false;
CurModule = &M;
BU = &getAnalysis<CompleteBUDataStructures>();
// Add the pool* prototypes to the module
AddPoolPrototypes();
// Figure out what the equivalence classes are for indirectly called functions
BuildIndirectFunctionSets(M);
// Create the pools for memory objects reachable by global variables.
if (SetupGlobalPools(M))
return true;
// Loop over the functions in the original program finding the pool desc.
// arguments necessary for each function that is indirectly callable.
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
if (!I->isExternal())
FindFunctionPoolArgs(*I);
std::map<Function*, Function*> FuncMap;
// Now clone a function using the pool arg list obtained in the previous pass
// over the modules. Loop over only the function initially in the program,
// don't traverse newly added ones. If the function needs new arguments, make
// its clone.
Module::iterator LastOrigFunction = --M.end();
for (Module::iterator I = M.begin(); ; ++I) {
if (!I->isExternal())
if (Function *R = MakeFunctionClone(*I))
FuncMap[I] = R;
if (I == LastOrigFunction) break;
}
++LastOrigFunction;
// Now that all call targets are available, rewrite the function bodies of the
// clones.
for (Module::iterator I = M.begin(); I != LastOrigFunction; ++I)
if (!I->isExternal()) {
std::map<Function*, Function*>::iterator FI = FuncMap.find(I);
ProcessFunctionBody(*I, FI != FuncMap.end() ? *FI->second : *I);
}
return true;
}
static void GetNodesReachableFromGlobals(DSGraph &G,
hash_set<DSNode*> &NodesFromGlobals) {
for (DSScalarMap::global_iterator I = G.getScalarMap().global_begin(),
E = G.getScalarMap().global_end(); I != E; ++I)
G.getNodeForValue(*I).getNode()->markReachableNodes(NodesFromGlobals);
}
// AddPoolPrototypes - Add prototypes for the pool functions to the specified
// module and update the Pool* instance variables to point to them.
//
void PoolAllocate::AddPoolPrototypes() {
if (VoidPtrTy == 0) {
VoidPtrTy = PointerType::get(Type::SByteTy);
PoolDescType = ArrayType::get(VoidPtrTy, 16);
PoolDescPtrTy = PointerType::get(PoolDescType);
}
CurModule->addTypeName("PoolDescriptor", PoolDescType);
// Get poolinit function...
PoolInit = CurModule->getOrInsertFunction("poolinit", Type::VoidTy,
PoolDescPtrTy, Type::UIntTy, 0);
// Get pooldestroy function...
PoolDestroy = CurModule->getOrInsertFunction("pooldestroy", Type::VoidTy,
PoolDescPtrTy, 0);
// The poolalloc function
PoolAlloc = CurModule->getOrInsertFunction("poolalloc",
VoidPtrTy, PoolDescPtrTy,
Type::UIntTy, 0);
// Get the poolfree function...
PoolFree = CurModule->getOrInsertFunction("poolfree", Type::VoidTy,
PoolDescPtrTy, VoidPtrTy, 0);
}
static void printNTOMap(std::map<Value*, const Value*> &NTOM) {
std::cerr << "NTOM MAP\n";
for (std::map<Value*, const Value *>::iterator I = NTOM.begin(),
E = NTOM.end(); I != E; ++I) {
if (!isa<Function>(I->first) && !isa<BasicBlock>(I->first))
std::cerr << *I->first << " to " << *I->second << "\n";
}
}
static void MarkNodesWhichMustBePassedIn(hash_set<DSNode*> &MarkedNodes,
Function &F, DSGraph &G) {
// Mark globals and incomplete nodes as live... (this handles arguments)
if (F.getName() != "main") {
// All DSNodes reachable from arguments must be passed in.
for (Function::aiterator I = F.abegin(), E = F.aend(); I != E; ++I) {
DSGraph::ScalarMapTy::iterator AI = G.getScalarMap().find(I);
if (AI != G.getScalarMap().end())
if (DSNode *N = AI->second.getNode())
N->markReachableNodes(MarkedNodes);
}
}
// Marked the returned node as alive...
if (DSNode *RetNode = G.getReturnNodeFor(F).getNode())
RetNode->markReachableNodes(MarkedNodes);
// Calculate which DSNodes are reachable from globals. If a node is reachable
// from a global, we will create a global pool for it, so no argument passage
// is required.
hash_set<DSNode*> NodesFromGlobals;
GetNodesReachableFromGlobals(G, NodesFromGlobals);
// Remove any nodes reachable from a global. These nodes will be put into
// global pools, which do not require arguments to be passed in. Also, erase
// any marked node that is not a heap node. Since no allocations or frees
// will be done with it, it needs no argument.
for (hash_set<DSNode*>::iterator I = MarkedNodes.begin(),
E = MarkedNodes.end(); I != E; ) {
DSNode *N = *I++;
if ((!N->isHeapNode() && !PASS_ALL_ARGUMENTS) || NodesFromGlobals.count(N))
MarkedNodes.erase(N);
}
}
const PA::EquivClassInfo &PoolAllocate::getECIForIndirectCallSite(CallSite CS) {
Instruction *I = CS.getInstruction();
assert(I && "Not a call site?");
if (!OneCalledFunction.count(I))
return ECInfoForLeadersMap[0]; // Special null function for empty graphs
Function *Called = OneCalledFunction[I];
Function *Leader = FuncECs.findClass(Called);
assert(Leader && "Leader not found for indirect call target!");
assert(ECInfoForLeadersMap.count(Leader) && "No ECI for indirect call site!");
return ECInfoForLeadersMap[Leader];
}
// BuildIndirectFunctionSets - Iterate over the module looking for indirect
// calls to functions. If a call site can invoke any functions [F1, F2... FN],
// unify the N functions together in the FuncECs set.
//
void PoolAllocate::BuildIndirectFunctionSets(Module &M) {
const CompleteBUDataStructures::ActualCalleesTy AC = BU->getActualCallees();
// Loop over all of the indirect calls in the program. If a call site can
// call multiple different functions, we need to unify all of the callees into
// the same equivalence class.
Instruction *LastInst = 0;
Function *FirstFunc = 0;
for (CompleteBUDataStructures::ActualCalleesTy::const_iterator I = AC.begin(),
E = AC.end(); I != E; ++I) {
CallSite CS = CallSite::get(I->first);
if (!CS.getCalledFunction() && // Ignore direct calls
!I->second->isExternal()) { // Ignore functions we cannot modify
//std::cerr << "CALLEE: " << I->second->getName() << " from : " <<
//I->first;
if (I->first != LastInst) {
// This is the first callee from this call site.
LastInst = I->first;
FirstFunc = I->second;
OneCalledFunction[LastInst] = FirstFunc;
FuncECs.addElement(I->second);
} else {
// This is not the first possible callee from a particular call site.
// Union the callee in with the other functions.
FuncECs.unionSetsWith(FirstFunc, I->second);
}
}
}
// Now that all of the equivalences have been built, turn the union-find data
// structure into a simple map from each function in the equiv class to the
// DSGraph used to represent the union of graphs.
//
std::map<Function*, Function*> &leaderMap = FuncECs.getLeaderMap();
DEBUG(std::cerr << "Indirect Function Equivalence Map:\n");
for (std::map<Function*, Function*>::iterator LI = leaderMap.begin(),
LE = leaderMap.end(); LI != LE; ++LI) {
DEBUG(std::cerr << " " << LI->first->getName() << ": leader is "
<< LI->second->getName() << "\n");
// Now the we have the equiv class info for this object, merge the DSGraph
// for this function into the composite DSGraph.
EquivClassInfo &ECI = ECInfoForLeadersMap[LI->second];
// If this is the first function in this equiv class, create the graph now.
if (ECI.G == 0)
ECI.G = new DSGraph(BU->getGlobalsGraph().getTargetData());
// Clone this member of the equivalence class into ECI.
DSGraph::NodeMapTy NodeMap;
ECI.G->cloneInto(BU->getDSGraph(*LI->first), ECI.G->getScalarMap(),
ECI.G->getReturnNodes(), NodeMap, 0);
// This is N^2 with the number of functions in this equiv class, but I don't
// really care right now. FIXME!
for (unsigned i = 0, e = ECI.FuncsInClass.size(); i != e; ++i) {
Function *F = ECI.FuncsInClass[i];
// Merge the return nodes together.
ECI.G->getReturnNodes()[F].mergeWith(ECI.G->getReturnNodes()[LI->first]);
// Merge the arguments of the functions together.
Function::aiterator AI1 = F->abegin();
Function::aiterator AI2 = LI->first->abegin();
for (; AI1 != F->aend() && AI2 != LI->first->aend(); ++AI1,++AI2)
ECI.G->getNodeForValue(AI1).mergeWith(ECI.G->getNodeForValue(AI2));
}
ECI.FuncsInClass.push_back(LI->first);
}
DEBUG(std::cerr << "\n");
// Now that we have created the graphs for each of the equivalence sets, we
// have to figure out which pool descriptors to pass into the functions. We
// must pass arguments for any pool descriptor that is needed by any function
// in the equiv. class.
for (ECInfoForLeadersMapTy::iterator ECII = ECInfoForLeadersMap.begin(),
E = ECInfoForLeadersMap.end(); ECII != E; ++ECII) {
EquivClassInfo &ECI = ECII->second;
// Traverse part of the graph to provoke most of the node forwardings to
// occur.
DSGraph::ScalarMapTy &SM = ECI.G->getScalarMap();
for (DSGraph::ScalarMapTy::iterator I = SM.begin(), E = SM.end(); I!=E; ++I)
I->second.getNode(); // Collapse forward references...
// Remove breadcrumbs from merging nodes.
ECI.G->removeTriviallyDeadNodes();
// Loop over all of the functions in this equiv class, figuring out which
// pools must be passed in for each function.
for (unsigned i = 0, e = ECI.FuncsInClass.size(); i != e; ++i) {
Function *F = ECI.FuncsInClass[i];
DSGraph &FG = BU->getDSGraph(*F);
// Figure out which nodes need to be passed in for this function (if any)
hash_set<DSNode*> &MarkedNodes = FunctionInfo[F].MarkedNodes;
MarkNodesWhichMustBePassedIn(MarkedNodes, *F, FG);
if (!MarkedNodes.empty()) {
// If any nodes need to be passed in, figure out which nodes these are
// in the unified graph for this equivalence class.
DSGraph::NodeMapTy NodeMapping;
for (Function::aiterator I = F->abegin(), E = F->aend(); I != E; ++I)
DSGraph::computeNodeMapping(FG.getNodeForValue(I),
ECI.G->getNodeForValue(I), NodeMapping);
DSGraph::computeNodeMapping(FG.getReturnNodeFor(*F),
ECI.G->getReturnNodeFor(*F), NodeMapping);
// Loop through all of the nodes which must be passed through for this
// callee, and add them to the arguments list.
for (hash_set<DSNode*>::iterator I = MarkedNodes.begin(),
E = MarkedNodes.end(); I != E; ++I) {
DSNode *LocGraphNode = *I, *ECIGraphNode = NodeMapping[*I].getNode();
// Remember the mapping of this node
ECI.ECGraphToPrivateMap[std::make_pair(F,ECIGraphNode)] =LocGraphNode;
// Add this argument to be passed in. Don't worry about duplicates,
// they will be eliminated soon.
ECI.ArgNodes.push_back(ECIGraphNode);
}
}
}
// Okay, all of the functions have had their required nodes added to the
// ECI.ArgNodes list, but there might be duplicates. Eliminate the dups
// now.
std::sort(ECI.ArgNodes.begin(), ECI.ArgNodes.end());
ECI.ArgNodes.erase(std::unique(ECI.ArgNodes.begin(), ECI.ArgNodes.end()),
ECI.ArgNodes.end());
// Uncomment this if you want to see the aggregate graph result
//ECI.G->viewGraph();
}
}
void PoolAllocate::FindFunctionPoolArgs(Function &F) {
DSGraph &G = BU->getDSGraph(F);
FuncInfo &FI = FunctionInfo[&F]; // Create a new entry for F
hash_set<DSNode*> &MarkedNodes = FI.MarkedNodes;
// If this function is part of an indirectly called function equivalence
// class, we have to handle it specially.
if (Function *Leader = FuncECs.findClass(&F)) {
EquivClassInfo &ECI = ECInfoForLeadersMap[Leader];
// The arguments passed in will be the ones specified by the ArgNodes list.
for (unsigned i = 0, e = ECI.ArgNodes.size(); i != e; ++i) {
DSNode *ArgNode =
ECI.ECGraphToPrivateMap[std::make_pair(&F, ECI.ArgNodes[i])];
FI.ArgNodes.push_back(ArgNode);
MarkedNodes.insert(ArgNode);
}
return;
}
if (G.node_begin() == G.node_end())
return; // No memory activity, nothing is required
// Find DataStructure nodes which are allocated in pools non-local to the
// current function. This set will contain all of the DSNodes which require
// pools to be passed in from outside of the function.
MarkNodesWhichMustBePassedIn(MarkedNodes, F, G);
FI.ArgNodes.insert(FI.ArgNodes.end(), MarkedNodes.begin(), MarkedNodes.end());
}
// MakeFunctionClone - If the specified function needs to be modified for pool
// allocation support, make a clone of it, adding additional arguments as
// necessary, and return it. If not, just return null.
//
Function *PoolAllocate::MakeFunctionClone(Function &F) {
DSGraph &G = BU->getDSGraph(F);
if (G.node_begin() == G.node_end()) return 0;
FuncInfo &FI = FunctionInfo[&F];
if (FI.ArgNodes.empty())
return 0; // No need to clone if no pools need to be passed in!
// Update statistics..
NumArgsAdded += FI.ArgNodes.size();
++NumCloned;
// Figure out what the arguments are to be for the new version of the function
const FunctionType *OldFuncTy = F.getFunctionType();
std::vector<const Type*> ArgTys(FI.ArgNodes.size(), PoolDescPtrTy);
ArgTys.reserve(OldFuncTy->getNumParams() + FI.ArgNodes.size());
ArgTys.insert(ArgTys.end(), OldFuncTy->param_begin(), OldFuncTy->param_end());
// Create the new function prototype
FunctionType *FuncTy = FunctionType::get(OldFuncTy->getReturnType(), ArgTys,
OldFuncTy->isVarArg());
// Create the new function...
Function *New = new Function(FuncTy, GlobalValue::InternalLinkage,
F.getName(), F.getParent());
// Set the rest of the new arguments names to be PDa<n> and add entries to the
// pool descriptors map
std::map<DSNode*, Value*> &PoolDescriptors = FI.PoolDescriptors;
Function::aiterator NI = New->abegin();
for (unsigned i = 0, e = FI.ArgNodes.size(); i != e; ++i, ++NI) {
NI->setName("PDa");
PoolDescriptors[FI.ArgNodes[i]] = NI;
}
// Map the existing arguments of the old function to the corresponding
// arguments of the new function, and copy over the names.
std::map<const Value*, Value*> ValueMap;
for (Function::aiterator I = F.abegin(); NI != New->aend(); ++I, ++NI) {
ValueMap[I] = NI;
NI->setName(I->getName());
}
// Populate the value map with all of the globals in the program.
// FIXME: This should be unnecessary!
Module &M = *F.getParent();
for (Module::iterator I = M.begin(), E=M.end(); I!=E; ++I) ValueMap[I] = I;
for (Module::giterator I = M.gbegin(), E=M.gend(); I!=E; ++I) ValueMap[I] = I;
// Perform the cloning.
std::vector<ReturnInst*> Returns;
CloneFunctionInto(New, &F, ValueMap, Returns);
// Invert the ValueMap into the NewToOldValueMap
std::map<Value*, const Value*> &NewToOldValueMap = FI.NewToOldValueMap;
for (std::map<const Value*, Value*>::iterator I = ValueMap.begin(),
E = ValueMap.end(); I != E; ++I)
NewToOldValueMap.insert(std::make_pair(I->second, I->first));
return FI.Clone = New;
}
static bool NodeExistsInCycle(DSNode *N) {
for (DSNode::iterator I = N->begin(), E = N->end(); I != E; ++I)
if (*I && std::find(df_begin(*I), df_end(*I), N) != df_end(*I))
return true;
return false;
}
/// NodeIsSelfRecursive - Return true if this node contains a pointer to itself.
static bool NodeIsSelfRecursive(DSNode *N) {
for (DSNode::iterator I = N->begin(), E = N->end(); I != E; ++I)
if (*I == N) return true;
return false;
}
static bool ShouldPoolAllocate(DSNode *N) {
// Now that we have a list of all of the nodes that CAN be pool allocated, use
// a heuristic to filter out nodes which are not profitable to pool allocate.
switch (Heuristic) {
default:
case AllNodes: return true;
case NoNodes: return false;
case CyclicNodes: // Pool allocate nodes that are cyclic and not arrays
return NodeExistsInCycle(N);
}
}
/// POVisit - This implements functionality found in Support/PostOrderIterator.h
/// but in a way that allows multiple roots to be used. If PostOrderIterator
/// supported an external set like DepthFirstIterator did I could eliminate this
/// cruft.
///
static void POVisit(DSNode *N, std::set<DSNode*> &Visited,
std::vector<DSNode*> &Order) {
if (!Visited.insert(N).second) return; // already visited
// Visit all children before visiting this node.
for (DSNode::iterator I = N->begin(), E = N->end(); I != E; ++I)
if (DSNode *C = const_cast<DSNode*>(*I))
POVisit(C, Visited, Order);
// Now that we visited all of our children, add ourself to the order.
Order.push_back(N);
}
// SetupGlobalPools - Create global pools for all DSNodes in the globals graph
// which contain heap objects. If a global variable points to a piece of memory
// allocated from the heap, this pool gets a global lifetime. This is
// implemented by making the pool descriptor be a global variable of it's own,
// and initializing the pool on entrance to main. Note that we never destroy
// the pool, because it has global lifetime.
//
// This method returns true if correct pool allocation of the module cannot be
// performed because there is no main function for the module and there are
// global pools.
//
bool PoolAllocate::SetupGlobalPools(Module &M) {
// Get the globals graph for the program.
DSGraph &GG = BU->getGlobalsGraph();
// Get all of the nodes reachable from globals.
hash_set<DSNode*> GlobalHeapNodes;
GetNodesReachableFromGlobals(GG, GlobalHeapNodes);
// Filter out all nodes which have no heap allocations merged into them.
for (hash_set<DSNode*>::iterator I = GlobalHeapNodes.begin(),
E = GlobalHeapNodes.end(); I != E; ) {
hash_set<DSNode*>::iterator Last = I++;
if (!(*Last)->isHeapNode())
GlobalHeapNodes.erase(Last);
}
// If we don't need to create any global pools, exit now.
if (GlobalHeapNodes.empty()) return false;
// Otherwise get the main function to insert the poolinit calls.
Function *MainFunc = M.getMainFunction();
if (MainFunc == 0 || MainFunc->isExternal()) {
std::cerr << "Cannot pool allocate this program: it has global "
<< "pools but no 'main' function yet!\n";
return true;
}
BasicBlock::iterator InsertPt = MainFunc->getEntryBlock().begin();
while (isa<AllocaInst>(InsertPt)) ++InsertPt;
TargetData &TD = getAnalysis<TargetData>();
std::cerr << "Pool allocating " << GlobalHeapNodes.size()
<< " global nodes!\n";
// Loop over all of the pools, creating a new global pool descriptor,
// inserting a new entry in GlobalNodes, and inserting a call to poolinit in
// main.
for (hash_set<DSNode*>::iterator I = GlobalHeapNodes.begin(),
E = GlobalHeapNodes.end(); I != E; ++I) {
bool ShouldPoolAlloc = true;
switch (Heuristic) {
case AllNodes: break;
case NoNodes: ShouldPoolAlloc = false; break;
case SmartCoallesceNodes:
#if 0
if ((*I)->isArray() && !(*I)->isNodeCompletelyFolded())
ShouldPoolAlloc = false;
#endif
// fall through
case CyclicNodes:
if (!NodeExistsInCycle(*I))
ShouldPoolAlloc = false;
break;
}
if (ShouldPoolAlloc) {
GlobalVariable *GV =
new GlobalVariable(PoolDescType, false, GlobalValue::InternalLinkage,
Constant::getNullValue(PoolDescType), "GlobalPool",&M);
GlobalNodes[*I] = GV;
Value *ElSize =
ConstantUInt::get(Type::UIntTy, (*I)->getType()->isSized() ?
TD.getTypeSize((*I)->getType()) : 0);
new CallInst(PoolInit, make_vector((Value*)GV, ElSize, 0), "", InsertPt);
++NumPools;
if (!(*I)->isNodeCompletelyFolded())
++NumTSPools;
} else {
GlobalNodes[*I] =
Constant::getNullValue(PointerType::get(PoolDescType));
++NumNonprofit;
}
}
return false;
}
// CreatePools - This creates the pool initialization and destruction code for
// the DSNodes specified by the NodesToPA list. This adds an entry to the
// PoolDescriptors map for each DSNode.
//
void PoolAllocate::CreatePools(Function &F,
const std::vector<DSNode*> &NodesToPA,
std::map<DSNode*, Value*> &PoolDescriptors) {
if (NodesToPA.empty()) return;
// Loop over all of the pools, inserting code into the entry block of the
// function for the initialization and code in the exit blocks for
// destruction.
//
Instruction *InsertPoint = F.front().begin();
switch (Heuristic) {
case AllNodes:
case NoNodes:
case CyclicNodes:
for (unsigned i = 0, e = NodesToPA.size(); i != e; ++i) {
DSNode *Node = NodesToPA[i];
if (ShouldPoolAllocate(Node)) {
// Create a new alloca instruction for the pool...
Value *AI = new AllocaInst(PoolDescType, 0, "PD", InsertPoint);
// Void types in DS graph are never used
if (Node->getType() == Type::VoidTy)
std::cerr << "Node collapsing in '" << F.getName() << "'\n";
++NumPools;
if (!Node->isNodeCompletelyFolded())
++NumTSPools;
// Update the PoolDescriptors map
PoolDescriptors.insert(std::make_pair(Node, AI));
} else {
PoolDescriptors[Node] =
Constant::getNullValue(PointerType::get(PoolDescType));
++NumNonprofit;
}
}
break;
case SmartCoallesceNodes: {
std::set<DSNode*> NodesToPASet(NodesToPA.begin(), NodesToPA.end());
// DSGraphs only have unidirectional edges, to traverse or inspect the
// predecessors of nodes, we must build a mapping of the inverse graph.
std::map<DSNode*, std::vector<DSNode*> > InverseGraph;
for (unsigned i = 0, e = NodesToPA.size(); i != e; ++i) {
DSNode *Node = NodesToPA[i];
for (DSNode::iterator CI = Node->begin(), E = Node->end(); CI != E; ++CI)
if (DSNode *Child = const_cast<DSNode*>(*CI))
if (NodesToPASet.count(Child))
InverseGraph[Child].push_back(Node);
}
// Traverse the heap nodes in reverse-post-order so that we are guaranteed
// to visit all nodes pointing to another node before we visit that node
// itself (except with cycles).
// FIXME: This really should be using the PostOrderIterator.h file stuff,
// but the routines there do not support external storage!
std::set<DSNode*> Visited;
std::vector<DSNode*> Order;
for (unsigned i = 0, e = NodesToPA.size(); i != e; ++i)
POVisit(NodesToPA[i], Visited, Order);
// We want RPO, not PO, so reverse the order.
std::reverse(Order.begin(), Order.end());
// Okay, we have an ordering of the nodes in reverse post order. Traverse
// each node in this ordering, noting that there may be nodes in the order
// that are not in our NodesToPA list.
for (unsigned i = 0, e = Order.size(); i != e; ++i)
if (NodesToPASet.count(Order[i])) { // Only process pa nodes.
DSNode *N = Order[i];
// If this node has a backedge to itself, pool allocate it in a new
// pool.
if (NodeIsSelfRecursive(N)) {
// Create a new alloca instruction for the pool...
Value *AI = new AllocaInst(PoolDescType, 0, "PD", InsertPoint);
// Void types in DS graph are never used
if (N->isNodeCompletelyFolded())
std::cerr << "Node collapsing in '" << F.getName() << "'\n";
else
++NumTSPools;
++NumPools;
// Update the PoolDescriptors map
PoolDescriptors.insert(std::make_pair(N, AI));
} else {
// Otherwise the node is not self recursive. If the node is not an
// array, we can co-locate it with the pool of a predecessor node if
// any has been pool allocated, and start a new pool if a predecessor
// is an array. If there is a predecessor of this node that has not
// been visited yet in this RPO traversal, that means there is a
// cycle, so we choose to pool allocate this node right away.
//
// If there multiple predecessors in multiple different pools, we
// don't pool allocate this at all.
// Check out each of the predecessors of this node.
std::vector<DSNode*> &Preds = InverseGraph[N];
Value *PredPool = 0;
bool HasUnvisitedPred = false;
bool HasArrayPred = false;
bool HasMultiplePredPools = false;
for (unsigned p = 0, e = Preds.size(); p != e; ++p) {
DSNode *Pred = Preds[p];
if (!PoolDescriptors.count(Pred))
HasUnvisitedPred = true; // no pool assigned to predecessor?
else if (Pred->isArray() && !Pred->isNodeCompletelyFolded())
HasArrayPred = true;
else if (PredPool && PoolDescriptors[Pred] != PredPool)
HasMultiplePredPools = true;
else if (!PredPool &&
!isa<ConstantPointerNull>(PoolDescriptors[Pred]))
PredPool = PoolDescriptors[Pred];
// Otherwise, this predecessor has the same pool as a previous one.
}
if (HasMultiplePredPools) {
// If this node has predecessors that are in different pools, don't
// pool allocate this node.
PoolDescriptors[N] =
Constant::getNullValue(PointerType::get(PoolDescType));
++NumNonprofit;
} else if (PredPool) {
// If all of the predecessors of this node are already in a pool,
// colocate.
PoolDescriptors[N] = PredPool;
++NumColocated;
} else if (HasArrayPred || HasUnvisitedPred) {
// If this node has an array predecessor, or if there is a
// predecessor that has not been visited yet, allocate a new pool
// for it.
Value *AI = new AllocaInst(PoolDescType, 0, "PD", InsertPoint);
if (N->isNodeCompletelyFolded())
std::cerr << "Node collapsing in '" << F.getName() << "'\n";
else
++NumTSPools;
++NumPools;
PoolDescriptors[N] = AI;
} else {
// If this node has no pool allocated predecessors, and there is no
// reason to pool allocate it, don't.
assert(PredPool == 0);
PoolDescriptors[N] =
Constant::getNullValue(PointerType::get(PoolDescType));
++NumNonprofit;
}
}
}
} // End switch case
} // end switch
}
// processFunction - Pool allocate any data structures which are contained in
// the specified function...
//
void PoolAllocate::ProcessFunctionBody(Function &F, Function &NewF) {
DSGraph &G = BU->getDSGraph(F);
if (G.node_begin() == G.node_end()) return; // Quick exit if nothing to do...
FuncInfo &FI = FunctionInfo[&F]; // Get FuncInfo for F
hash_set<DSNode*> &MarkedNodes = FI.MarkedNodes;
// Calculate which DSNodes are reachable from globals. If a node is reachable
// from a global, we will create a global pool for it, so no argument passage
// is required.
DSGraph &GG = BU->getGlobalsGraph();
DSGraph::NodeMapTy GlobalsGraphNodeMapping;
for (DSScalarMap::global_iterator I = G.getScalarMap().global_begin(),
E = G.getScalarMap().global_end(); I != E; ++I) {
// Map all node reachable from this global to the corresponding nodes in
// the globals graph.
DSGraph::computeNodeMapping(G.getNodeForValue(*I), GG.getNodeForValue(*I),
GlobalsGraphNodeMapping);
}
// Loop over all of the nodes which are non-escaping, adding pool-allocatable
// ones to the NodesToPA vector.
std::vector<DSNode*> NodesToPA;
for (DSGraph::node_iterator I = G.node_begin(), E = G.node_end(); I != E;++I){
// We only need to make a pool if there is a heap object in it...
DSNode *N = *I;
if (N->isHeapNode())
if (GlobalsGraphNodeMapping.count(N)) {
// If it is a global pool, set up the pool descriptor appropriately.
DSNode *GGN = GlobalsGraphNodeMapping[N].getNode();
assert(GGN && GlobalNodes[GGN] && "No global node found??");
FI.PoolDescriptors[N] = GlobalNodes[GGN];
} else if (!MarkedNodes.count(N)) {
// Otherwise, if it was not passed in from outside the function, it must
// be a local pool!
assert(!N->isGlobalNode() && "Should be in global mapping!");
NodesToPA.push_back(N);
}
}
std::cerr << "[" << F.getName() << "] " << NodesToPA.size()
<< " nodes pool allocatable\n";
CreatePools(NewF, NodesToPA, FI.PoolDescriptors);
// Transform the body of the function now... collecting information about uses
// of the pools.
std::set<std::pair<AllocaInst*, Instruction*> > PoolUses;
std::set<std::pair<AllocaInst*, CallInst*> > PoolFrees;
TransformBody(G, FI, PoolUses, PoolFrees, NewF);
// Create pool construction/destruction code
if (!NodesToPA.empty())
InitializeAndDestroyPools(NewF, NodesToPA, FI.PoolDescriptors,
PoolUses, PoolFrees);
}
template<class IteratorTy>
static void AllOrNoneInSet(IteratorTy S, IteratorTy E,
std::set<BasicBlock*> &Blocks, bool &AllIn,
bool &NoneIn) {
AllIn = true;
NoneIn = true;
for (; S != E; ++S)
if (Blocks.count(*S))
NoneIn = false;
else
AllIn = false;
}
static void DeleteIfIsPoolFree(Instruction *I, AllocaInst *PD,
std::set<std::pair<AllocaInst*, CallInst*> > &PoolFrees) {
if (CallInst *CI = dyn_cast<CallInst>(I))
if (PoolFrees.count(std::make_pair(PD, CI))) {
PoolFrees.erase(std::make_pair(PD, CI));
I->getParent()->getInstList().erase(I);
++NumPoolFree;
}
}
void PoolAllocate::CalculateLivePoolFreeBlocks(std::set<BasicBlock*>&LiveBlocks,
Value *PD) {
for (Value::use_iterator I = PD->use_begin(), E = PD->use_end(); I != E; ++I){
// The only users of the pool should be call & invoke instructions.
CallSite U = CallSite::get(*I);
if (U.getCalledValue() != PoolFree && U.getCalledValue() != PoolDestroy) {
// This block and every block that can reach this block must keep pool
// frees.
for (idf_ext_iterator<BasicBlock*, std::set<BasicBlock*> >
DI = idf_ext_begin(U.getInstruction()->getParent(), LiveBlocks),
DE = idf_ext_end(U.getInstruction()->getParent(), LiveBlocks);
DI != DE; ++DI)
/* empty */;
}
}
}
/// InitializeAndDestroyPools - This inserts calls to poolinit and pooldestroy
/// into the function to initialize and destroy the pools in the NodesToPA list.
///
void PoolAllocate::InitializeAndDestroyPools(Function &F,
const std::vector<DSNode*> &NodesToPA,
std::map<DSNode*, Value*> &PoolDescriptors,
std::set<std::pair<AllocaInst*, Instruction*> > &PoolUses,
std::set<std::pair<AllocaInst*, CallInst*> > &PoolFrees) {
TargetData &TD = getAnalysis<TargetData>();
std::vector<Instruction*> PoolInitPoints;
std::vector<Instruction*> PoolDestroyPoints;
if (DisableInitDestroyOpt) {
// Insert poolinit calls after all of the allocas...
Instruction *InsertPoint;
for (BasicBlock::iterator I = F.front().begin();
isa<AllocaInst>(InsertPoint = I); ++I)
/*empty*/;
PoolInitPoints.push_back(InsertPoint);
if (F.getName() != "main")
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
if (isa<ReturnInst>(BB->getTerminator()) ||
isa<UnwindInst>(BB->getTerminator()))
PoolDestroyPoints.push_back(BB->getTerminator());
}
std::set<AllocaInst*> AllocasHandled;
// Insert all of the poolalloc calls in the start of the function.
for (unsigned i = 0, e = NodesToPA.size(); i != e; ++i) {
DSNode *Node = NodesToPA[i];
if (AllocaInst *PD = dyn_cast<AllocaInst>(PoolDescriptors[Node]))
if (AllocasHandled.insert(PD).second) {
Value *ElSize =
ConstantUInt::get(Type::UIntTy, Node->getType()->isSized() ?
TD.getTypeSize(Node->getType()) : 0);
// Convert the PoolUses/PoolFrees sets into something specific to this
// pool.
std::set<BasicBlock*> UsingBlocks;
std::set<std::pair<AllocaInst*, Instruction*> >::iterator PUI =
PoolUses.lower_bound(std::make_pair(PD, (Instruction*)0));
if (PUI != PoolUses.end() && PUI->first < PD) ++PUI;
for (; PUI != PoolUses.end() && PUI->first == PD; ++PUI)
UsingBlocks.insert(PUI->second->getParent());
// To calculate all of the basic blocks which require the pool to be
// initialized before, do a depth first search on the CFG from the using
// blocks.
std::set<BasicBlock*> InitializedBefore;
std::set<BasicBlock*> DestroyedAfter;
for (std::set<BasicBlock*>::iterator I = UsingBlocks.begin(),
E = UsingBlocks.end(); I != E; ++I) {
for (df_ext_iterator<BasicBlock*, std::set<BasicBlock*> >
DI = df_ext_begin(*I, InitializedBefore),
DE = df_ext_end(*I, InitializedBefore); DI != DE; ++DI)
/* empty */;
for (idf_ext_iterator<BasicBlock*, std::set<BasicBlock*> >
DI = idf_ext_begin(*I, DestroyedAfter),
DE = idf_ext_end(*I, DestroyedAfter); DI != DE; ++DI)
/* empty */;
}
// Now that we have created the sets, intersect them.
std::set<BasicBlock*> LiveBlocks;
std::set_intersection(InitializedBefore.begin(),InitializedBefore.end(),
DestroyedAfter.begin(), DestroyedAfter.end(),
std::inserter(LiveBlocks, LiveBlocks.end()));
InitializedBefore.clear();
DestroyedAfter.clear();
// Keep track of the blocks we have inserted poolinit/destroy in
std::set<BasicBlock*> PoolInitInsertedBlocks, PoolDestroyInsertedBlocks;
DEBUG(std::cerr << "POOL: " << PD->getName() << " information:\n");
DEBUG(std::cerr << " Live in blocks: ");
for (std::set<BasicBlock*>::iterator I = LiveBlocks.begin(),
E = LiveBlocks.end(); I != E; ++I) {
BasicBlock *BB = *I;
TerminatorInst *Term = BB->getTerminator();
DEBUG(std::cerr << BB->getName() << " ");
// Check the predecessors of this block. If any preds are not in the
// set, or if there are no preds, insert a pool init.
bool AllIn, NoneIn;
AllOrNoneInSet(pred_begin(BB), pred_end(BB), LiveBlocks, AllIn,
NoneIn);
if (NoneIn) {
if (!PoolInitInsertedBlocks.count(BB)) {
BasicBlock::iterator It = BB->begin();
// Move through all of the instructions not in the pool
while (!PoolUses.count(std::make_pair(PD, It)))
// Advance past non-users deleting any pool frees that we run
// across.
DeleteIfIsPoolFree(It++, PD, PoolFrees);
if (!DisableInitDestroyOpt)
PoolInitPoints.push_back(It);
PoolInitInsertedBlocks.insert(BB);
}
} else if (!AllIn) {
TryAgainPred:
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E;
++PI)
if (!LiveBlocks.count(*PI) && !PoolInitInsertedBlocks.count(*PI)){
if (SplitCriticalEdge(BB, PI))
// If the critical edge was split, *PI was invalidated
goto TryAgainPred;
// Insert at the end of the predecessor, before the terminator.
if (!DisableInitDestroyOpt)
PoolInitPoints.push_back((*PI)->getTerminator());
PoolInitInsertedBlocks.insert(*PI);
}
}
// Check the successors of this block. If some succs are not in the
// set, insert destroys on those successor edges. If all succs are
// not in the set, insert a destroy in this block.
AllOrNoneInSet(succ_begin(BB), succ_end(BB), LiveBlocks,
AllIn, NoneIn);
if (NoneIn) {
// Insert before the terminator.
if (!PoolDestroyInsertedBlocks.count(BB)) {
BasicBlock::iterator It = Term;
// Rewind to the first using insruction
while (!PoolUses.count(std::make_pair(PD, It)))
DeleteIfIsPoolFree(It--, PD, PoolFrees);
// Insert after the first using instruction
if (!DisableInitDestroyOpt)
PoolDestroyPoints.push_back(++It);
PoolDestroyInsertedBlocks.insert(BB);
}
} else if (!AllIn) {
for (succ_iterator SI = succ_begin(BB), E = succ_end(BB);
SI != E; ++SI)
if (!LiveBlocks.count(*SI) &&
!PoolDestroyInsertedBlocks.count(*SI)) {
// If this edge is critical, split it.
SplitCriticalEdge(BB, SI);
// Insert at entry to the successor, but after any PHI nodes.
BasicBlock::iterator It = (*SI)->begin();
while (isa<PHINode>(It)) ++It;
if (!DisableInitDestroyOpt)
PoolDestroyPoints.push_back(It);
PoolDestroyInsertedBlocks.insert(*SI);
}
}
}
DEBUG(std::cerr << "\n Init in blocks: ");
// Insert the calls to initialize the pool...
for (unsigned i = 0, e = PoolInitPoints.size(); i != e; ++i) {
new CallInst(PoolInit, make_vector((Value*)PD, ElSize, 0), "",
PoolInitPoints[i]);
DEBUG(std::cerr << PoolInitPoints[i]->getParent()->getName() << " ");
}
if (!DisableInitDestroyOpt)
PoolInitPoints.clear();
DEBUG(std::cerr << "\n Destroy in blocks: ");
// Loop over all of the places to insert pooldestroy's...
for (unsigned i = 0, e = PoolDestroyPoints.size(); i != e; ++i) {
// Insert the pooldestroy call for this pool.
new CallInst(PoolDestroy, make_vector((Value*)PD, 0), "",
PoolDestroyPoints[i]);
DEBUG(std::cerr << PoolDestroyPoints[i]->getParent()->getName()<<" ");
}
DEBUG(std::cerr << "\n\n");
// We are allowed to delete any poolfree's which occur between the last
// call to poolalloc, and the call to pooldestroy. Figure out which
// basic blocks have this property for this pool.
std::set<BasicBlock*> PoolFreeLiveBlocks;
if (!DisableInitDestroyOpt)
CalculateLivePoolFreeBlocks(PoolFreeLiveBlocks, PD);
else
PoolFreeLiveBlocks = LiveBlocks;
if (!DisableInitDestroyOpt)
PoolDestroyPoints.clear();
// Delete any pool frees which are not in live blocks, for correctness.
std::set<std::pair<AllocaInst*, CallInst*> >::iterator PFI =
PoolFrees.lower_bound(std::make_pair(PD, (CallInst*)0));
if (PFI != PoolFrees.end() && PFI->first < PD) ++PFI;
for (; PFI != PoolFrees.end() && PFI->first == PD; ) {
CallInst *PoolFree = (PFI++)->second;
if (!LiveBlocks.count(PoolFree->getParent()) ||
!PoolFreeLiveBlocks.count(PoolFree->getParent()))
DeleteIfIsPoolFree(PoolFree, PD, PoolFrees);
}
}
}
}