blob: 1a0f58a493f1e8d6e9ad6ef89b4b89ed15ae5b4b [file] [log] [blame]
//===-- Heuristic.cpp - Interface to PA heuristics ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file was developed by the LLVM research group and is distributed under
// the University of Illinois Open Source License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This implements the various pool allocation heuristics.
//
// FIXME: It seems that the old alignment heuristics contained in this file
// were designed for 32-bit x86 processors (which makes sense as
// poolalloc was developed before x86-64). We need to revisit this
// code and decide what the heuristics should do today.
//
//===----------------------------------------------------------------------===//
#include "dsa/DSGraphTraits.h"
#include "poolalloc/Heuristic.h"
#include "poolalloc/PoolAllocate.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/IR/DataLayout.h"
#include <iostream>
using namespace llvm;
using namespace PA;
namespace {
cl::opt<bool>
DisableAlignOpt("poolalloc-disable-alignopt",
cl::desc("Force all pool alignment to 8 bytes"));
}
//
// Function: getRecommendedSize()
//
// Description:
// Determine the recommended allocation size for objects represented by this
// DSNode. This is used to determine the size of objects allocated by
// poolalloc().
//
// Inputs:
// N - The DSNode representing the objects whose recommended size the caller
// wants.
//
// Return value:
// The size, in bytes, that should be used in allocating most objects
// represented by this DSNode.
//
unsigned Heuristic::getRecommendedSize(const DSNode *N) {
unsigned PoolSize = 0;
//
// If the DSNode only represents singleton objects (i.e., not arrays), then
// find the size of the singleton object.
//
if (!N->isArrayNode()) {
PoolSize = N->getSize();
}
//
// If the object size is 1, just say that it's zero. The run-time will
// figure out how to deal with it.
//
if (PoolSize == 1) PoolSize = 0;
return PoolSize;
}
//
// Function: Wants8ByteAlignment ()
//
// Description:
// Determine if an object of the specified type should be allocated on an
// 8-byte boundary. This may either be required by the target platform or may
// merely improve performance by aligning data the way the processor wants
// it.
//
// Inputs:
// Ty - The type of the object for which alignment should be tested.
// Offs - The offset of the type within a derived type (e.g., a structure).
// We will try to align a structure on an 8 byte boundary if one of its
// elements can/needs to be.
// TD - A reference to the DataLayout pass.
//
// Return value:
// true - This type should be allocated on an 8-byte boundary.
// false - This type does not need to be allocated on an 8-byte boundary.
//
// Notes:
// FIXME: This is a complete hack for X86 right now.
// FIXME: This code needs to be updated for x86-64.
// FIXME: This code needs to handle LLVM first-class structures and vectors.
//
static bool
Wants8ByteAlignment(Type *Ty, unsigned Offs, const DataLayout &TD) {
//
// If the user has requested this optimization to be turned off, don't bother
// doing it.
//
if (DisableAlignOpt) return true;
//
// If this type is at an align-able offset within its larger data structure,
// see if we should 8 byte align it.
//
if ((Offs & 7) == 0) {
//
// Doubles always want to be 8-byte aligned regardless of what DataLayout
// claims.
//
if (Ty->isDoubleTy()) return true;
//
// If we are on a 64-bit system, we want to align 8-byte integers and
// pointers.
//
if (TD.getPrefTypeAlignment(Ty) == 8)
return true;
}
//
// If this is a first-class data type, but it is located at an offset within
// a structure that cannot be 8-byte aligned, then we cannot ever guarantee
// to 8-byte align it. Therefore, do not try to force it to 8-byte
// alignment.
if (Ty->isFirstClassType())
return false;
//
// If this is a structure or array type, check if any of its elements at
// 8-byte alignment desire to have 8-byte alignment. If so, then the entire
// object wants 8-byte alignment.
//
if (StructType *STy = dyn_cast<StructType>(Ty)) {
const StructLayout *SL = TD.getStructLayout(STy);
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
if (Wants8ByteAlignment(STy->getElementType(i),
Offs+SL->getElementOffset(i), TD))
return true;
}
} else if (SequentialType *STy = dyn_cast<SequentialType>(Ty)) {
return Wants8ByteAlignment(STy->getElementType(), Offs, TD);
} else {
errs() << *Ty << "\n";
assert(0 && "Unknown type!");
}
return false;
}
unsigned Heuristic::getRecommendedAlignment(Type *Ty,
const DataLayout &TD) {
if (!Ty || Ty->isVoidTy()) // Is this void or collapsed?
return 0; // No known alignment, let runtime decide.
return Wants8ByteAlignment(Ty, 0, TD) ? 8 : 4;
}
///
/// getRecommendedAlignment - Return the recommended object alignment for this
/// DSNode.
///
/// @Node - The DSNode for which allocation alignment information is requested.
///
/// FIXME: This method assumes an object wants 4-byte or 8-byte alignment. The
/// new types in LLVM may want larger alignments such as 16-byte
/// alignment. Need to update this code to handle that.
///
unsigned
Heuristic::getRecommendedAlignment(const DSNode *Node) {
//
// Get the DataLayout information from the DSNode's DSGraph.
//
const DataLayout &TD = Node->getParentGraph()->getDataLayout();
//
// Iterate through all the types that the DSA type-inference algorithm
// found and determine if any of them should be 8-byte aligned. If so, then
// we'll 8-byte align the entire structure.
//
DSNode::const_type_iterator tyi;
for (tyi = Node->type_begin(); tyi != Node->type_end(); ++tyi) {
for (svset<Type*>::const_iterator tyii = tyi->second->begin(),
tyee = tyi->second->end(); tyii != tyee; ++tyii) {
//
// Get the type of object allocated. If there is no type, then it is
// implicitly of void type.
//
Type * TypeCreated = *tyii;
if (TypeCreated) {
//
// If the type contains a pointer, it must be changed.
//
if (Wants8ByteAlignment(TypeCreated, tyi->first, TD)) {
return 8;
}
}
}
}
return 4;
}
//
// Function: GetNodesReachableFromGlobals()
//
// Description:
// This function finds all DSNodes which are reachable from globals. It finds
// DSNodes both within the local DSGraph as well as in the Globals graph that
// are reachable from globals. It does, however, filter out those DSNodes
// which are of no interest to automatic pool allocation.
//
// Inputs:
// G - The DSGraph for which to find DSNodes which are reachable by globals.
// This DSGraph can either by a DSGraph associated with a function *or*
// it can be the globals graph itself.
//
// Outputs:
// NodesFromGlobals - A reference to a container object in which to record
// DSNodes reachable from globals. DSNodes are *added* to
// this container; it is not cleared by this function.
// DSNodes from both the local and globals graph are added.
static void
GetNodesReachableFromGlobals (DSGraph* G,
DenseSet<const DSNode*> &NodesFromGlobals) {
//
// Get the globals graph associated with this DSGraph. If the globals graph
// is NULL, then the graph that was passed in *is* the globals graph.
//
DSGraph * GlobalsGraph = G->getGlobalsGraph();
if (!GlobalsGraph)
GlobalsGraph = G;
//
// Find all DSNodes which are reachable in the globals graph.
//
for (DSGraph::node_iterator NI = GlobalsGraph->node_begin();
NI != GlobalsGraph->node_end();
++NI) {
NI->markReachableNodes(NodesFromGlobals);
}
//
// Remove those global nodes which we know will never be pool allocated.
//
std::vector<const DSNode *> toRemove;
for (DenseSet<const DSNode*>::iterator I = NodesFromGlobals.begin(),
E = NodesFromGlobals.end(); I != E; ) {
DenseSet<const DSNode*>::iterator Last = I; ++I;
//
// Nodes that escape to external code could be reachable from globals.
// Nodes that are incomplete could be heap nodes.
// Unknown nodes could be anything.
//
const DSNode *tmp = *Last;
if (!(tmp->isHeapNode()))
toRemove.push_back (tmp);
}
//
// Remove all globally reachable DSNodes which do not require pools.
//
for (unsigned index = 0; index < toRemove.size(); ++index) {
NodesFromGlobals.erase(toRemove[index]);
}
//
// Now the fun part. Find DSNodes in the local graph that correspond to
// those nodes reachable in the globals graph. Add them to the set of
// reachable nodes, too.
//
if (G->getGlobalsGraph()) {
//
// Compute a mapping between local DSNodes and DSNodes in the globals
// graph.
//
DSGraph::NodeMapTy NodeMap;
G->computeGToGGMapping (NodeMap);
//
// Scan through all DSNodes in the local graph. If a local DSNode has a
// corresponding DSNode in the globals graph that is reachable from a
// global, then add the local DSNode to the set of DSNodes reachable from a
// global.
//
// FIXME: A node's existance within the global DSGraph is probably
// sufficient evidence that it is reachable from a global.
//
DSGraph::node_iterator ni = G->node_begin();
for (; ni != G->node_end(); ++ni) {
DSNode * N = ni;
if (NodesFromGlobals.count (NodeMap[N].getNode()))
NodesFromGlobals.insert (N);
}
}
}
//
// Method: findGlobalPoolNodes()
//
// Description:
// This method finds DSNodes that are reachable from globals and that need a
// pool. The Automatic Pool Allocation transform will use the returned
// information to build global pools for the DSNodes in question.
//
// Note that this method does not assign DSNodes to pools; it merely decides
// which DSNodes are reachable from globals and will need a pool of global
// scope.
//
// Outputs:
// Nodes - The DSNodes that are both reachable from globals and which should
// have global pools will be *added* to this container.
//
void
Heuristic::findGlobalPoolNodes (DSNodeSet_t & Nodes) {
// Get the globals graph for the program.
DSGraph* GG = Graphs->getGlobalsGraph();
// Get all of the nodes reachable from globals.
DenseSet<const DSNode*> GlobalHeapNodes;
GetNodesReachableFromGlobals (GG, GlobalHeapNodes);
//
// Now find all DSNodes belonging to function-local DSGraphs which are
// mirrored in the globals graph. These DSNodes require a global pool, too.
//
for (Module::iterator F = M->begin(); F != M->end(); ++F) {
if (Graphs->hasDSGraph(*F)) {
DSGraph* G = Graphs->getDSGraph(*F);
GetNodesReachableFromGlobals (G, GlobalHeapNodes);
}
}
//
// Copy the values into the output container. Note that DenseSet has no
// iterator traits (or whatever allows us to treat DenseSet has a generic
// container), so we have to use a loop to copy values from the DenseSet into
// the output container.
//
for (DenseSet<const DSNode*>::iterator I = GlobalHeapNodes.begin(),
E = GlobalHeapNodes.end(); I != E; ++I) {
Nodes.insert (*I);
}
return;
}
//
// Method: getGlobalPoolNodes()
//
// Description:
// This method returns DSNodes that are reachable from globals and that need a
// pool. The Automatic Pool Allocation transform will use the returned
// information to build global pools for the DSNodes in question.
//
// Note that this method does not assign DSNodes to pools; it merely decides
// which DSNodes are reachable from globals and will need a pool of global
// scope.
//
// Outputs:
// Nodes - The DSNodes that are both reachable from globals and which should
// have global pools will be *added* to this container.
//
// Preconditions:
// This method assumes that the findGlobalPoolNodes() has been called to find
// all of the global DSNodes needing a pool.
//
void
Heuristic::getGlobalPoolNodes (std::vector<const DSNode *> & Nodes) {
//
// Copy the DSNodes which are globally reachable and need a global pool into
// the set of DSNodes given to us by the caller.
//
Nodes.insert (Nodes.end(), GlobalPoolNodes.begin(), GlobalPoolNodes.end());
return;
}
//
// Method: getLocalPoolNodes()
//
// Description:
// For a given function, determine which DSNodes for that function should have
// local pools created for them.
//
void
Heuristic::getLocalPoolNodes (const Function & F, DSNodeList_t & Nodes) {
//
// Get the DSGraph of the specified function. If the DSGraph has no nodes,
// then there is nothing we need to do.
//
DSGraph* G = Graphs->getDSGraph(F);
if (G->node_begin() == G->node_end()) return;
//
// Calculate which DSNodes are reachable from globals. If a node is reachable
// from a global, we will create a global pool for it, so no argument passage
// is required.
Graphs->getGlobalsGraph();
// Map all node reachable from this global to the corresponding nodes in
// the globals graph.
DSGraph::NodeMapTy GlobalsGraphNodeMapping;
G->computeGToGGMapping(GlobalsGraphNodeMapping);
//
// Loop over all of the nodes which are non-escaping, adding pool-allocatable
// ones to the NodesToPA vector. In other words, scan over the DSGraph and
// find nodes for which a new pool must be created within this function.
//
for (DSGraph::node_iterator I = G->node_begin(), E = G->node_end();
I != E;
++I){
// Get the DSNode and, if applicable, its mirror in the globals graph
DSNode * N = I;
DSNode * GGN = GlobalsGraphNodeMapping[N].getNode();
//
// Only the following nodes are pool allocated:
// 1) Local Heap nodes
// 2) Nodes which are mirrored in the globals graph and, in the globals
// graph, are heap nodes.
//
if ((N->isHeapNode()) || (GGN && GGN->isHeapNode())) {
if (!(GlobalPoolNodes.count (N) || GlobalPoolNodes.count (GGN))) {
// Otherwise, if it was not passed in from outside the function, it must
// be a local pool!
assert((!N->isGlobalNode() || N->isPtrToIntNode()) && "Should be in global mapping!");
if(!N->isPtrToIntNode()) {
Nodes.push_back (N);
}
}
}
}
return;
}
//===-- AllButUnreachableFromMemoryHeuristic Heuristic --------------------===//
//
// This heuristic pool allocates everything possible into separate pools, unless
// the pool is not reachable by other memory objects. This filters out objects
// that are not cyclic and are only pointed to by scalars: these tend to be
// singular memory allocations that are not worth creating a whole pool for.
//
bool
AllButUnreachableFromMemoryHeuristic::runOnModule (Module & Module) {
//
// Remember which module we are analyzing.
//
M = &Module;
//
// Get the reference to the DSA Graph.
//
Graphs = &getAnalysis<EQTDDataStructures>();
//
// Find DSNodes which are reachable from globals and should be pool
// allocated.
//
findGlobalPoolNodes (GlobalPoolNodes);
// We never modify anything in this pass
return false;
}
void
AllButUnreachableFromMemoryHeuristic::AssignToPools (
const DSNodeList_t &NodesToPA,
Function *F, DSGraph* G,
std::vector<OnePool> &ResultPools) {
// Build a set of all nodes that are reachable from another node in the
// graph. Here we ignore scalar nodes that are only globals as they are
// often global pointers to big arrays.
std::set<const DSNode*> ReachableFromMemory;
for (DSGraph::node_iterator I = G->node_begin(), E = G->node_end();
I != E; ++I) {
DSNode *N = I;
#if 0
//
// Ignore nodes that are just globals and not arrays.
//
if (N->isArray() || N->isHeapNode() || N->isAllocaNode() ||
N->isUnknownNode())
#endif
// If a node is marked, all children are too.
if (!ReachableFromMemory.count(N)) {
for (DSNode::iterator NI = N->begin(), E = N->end(); NI != E; ++NI) {
//
// Sometimes this results in a NULL DSNode. Skip it if that is the
// case.
//
if (!(*NI)) continue;
//
// Do a depth-first iteration over the DSGraph starting with this
// child node.
//
for (df_ext_iterator<const DSNode*>
DI = df_ext_begin(*NI, ReachableFromMemory),
E = df_ext_end(*NI, ReachableFromMemory); DI != E; ++DI)
/*empty*/;
}
}
}
// Only pool allocate a node if it is reachable from a memory object (itself
// included).
for (unsigned i = 0, e = NodesToPA.size(); i != e; ++i)
if (ReachableFromMemory.count(NodesToPA[i]))
ResultPools.push_back(OnePool(NodesToPA[i]));
}
//===-- CyclicNodes Heuristic ---------------------------------------------===//
//
// This heuristic only pool allocates nodes in an SCC in the DSGraph.
//
bool
CyclicNodesHeuristic::runOnModule (Module & Module) {
//
// Remember which module we are analyzing.
//
M = &Module;
//
// Get the reference to the DSA Graph.
//
Graphs = &getAnalysis<EQTDDataStructures>();
//
// Find DSNodes which are reachable from globals and should be pool
// allocated.
//
findGlobalPoolNodes (GlobalPoolNodes);
// We never modify anything in this pass
return false;
}
static bool NodeExistsInCycle(const DSNode *N) {
for (DSNode::const_iterator I = N->begin(), E = N->end(); I != E; ++I)
if (*I && std::find(df_begin(*I), df_end(*I), N) != df_end(*I))
return true;
return false;
}
void
CyclicNodesHeuristic::AssignToPools(const DSNodeList_t &NodesToPA,
Function *F, DSGraph* G,
std::vector<OnePool> &ResultPools) {
for (unsigned i = 0, e = NodesToPA.size(); i != e; ++i)
if (NodeExistsInCycle(NodesToPA[i]))
ResultPools.push_back(OnePool(NodesToPA[i]));
}
//===-- SmartCoallesceNodes Heuristic -------------------------------------===//
//
// This heuristic attempts to be smart and coallesce nodes at times. In
// practice, it doesn't work very well.
//
bool
SmartCoallesceNodesHeuristic::runOnModule (Module & Module) {
//
// Remember which module we are analyzing.
//
M = &Module;
//
// Get the reference to the DSA Graph.
//
Graphs = &getAnalysis<EQTDDataStructures>();
//
// Find DSNodes which are reachable from globals and should be pool
// allocated.
//
findGlobalPoolNodes (GlobalPoolNodes);
// We never modify anything in this pass
return false;
}
void
SmartCoallesceNodesHeuristic::AssignToPools(const DSNodeList_t &NodesToPA,
Function *F, DSGraph* G,
std::vector<OnePool> &ResultPools) {
// For globals, do not pool allocate unless the node is cyclic and not an
// array (unless it's collapsed).
if (F == 0) {
for (unsigned i = 0, e = NodesToPA.size(); i != e; ++i) {
const DSNode *Node = NodesToPA[i];
if ((Node->isNodeCompletelyFolded() || !Node->isArrayNode()) &&
NodeExistsInCycle(Node))
ResultPools.push_back(OnePool(Node));
}
} else {
// TODO
}
}
#if 0
/// NodeIsSelfRecursive - Return true if this node contains a pointer to itself.
static bool NodeIsSelfRecursive(DSNode *N) {
for (DSNode::iterator I = N->begin(), E = N->end(); I != E; ++I)
if (*I == N) return true;
return false;
}
/// POVisit - This implements functionality found in Support/PostOrderIterator.h
/// but in a way that allows multiple roots to be used. If PostOrderIterator
/// supported an external set like DepthFirstIterator did I could eliminate this
/// cruft.
///
static void POVisit(DSNode *N, std::set<DSNode*> &Visited,
std::vector<DSNode*> &Order) {
if (!Visited.insert(N).second) return; // already visited
// Visit all children before visiting this node.
for (DSNode::iterator I = N->begin(), E = N->end(); I != E; ++I)
if (DSNode *C = const_cast<DSNode*>(*I))
POVisit(C, Visited, Order);
// Now that we visited all of our children, add ourself to the order.
Order.push_back(N);
}
// Heuristic for building per-function pools
switch (Heuristic) {
case SmartCoallesceNodes: {
std::set<DSNode*> NodesToPASet(NodesToPA.begin(), NodesToPA.end());
// DSGraphs only have unidirectional edges, to traverse or inspect the
// predecessors of nodes, we must build a mapping of the inverse graph.
std::map<DSNode*, std::vector<DSNode*> > InverseGraph;
for (unsigned i = 0, e = NodesToPA.size(); i != e; ++i) {
DSNode *Node = NodesToPA[i];
for (DSNode::iterator CI = Node->begin(), E = Node->end(); CI != E; ++CI)
if (DSNode *Child = const_cast<DSNode*>(*CI))
if (NodesToPASet.count(Child))
InverseGraph[Child].push_back(Node);
}
// Traverse the heap nodes in reverse-post-order so that we are guaranteed
// to visit all nodes pointing to another node before we visit that node
// itself (except with cycles).
// FIXME: This really should be using the PostOrderIterator.h file stuff,
// but the routines there do not support external storage!
std::set<DSNode*> Visited;
std::vector<DSNode*> Order;
for (unsigned i = 0, e = NodesToPA.size(); i != e; ++i)
POVisit(NodesToPA[i], Visited, Order);
// We want RPO, not PO, so reverse the order.
std::reverse(Order.begin(), Order.end());
// Okay, we have an ordering of the nodes in reverse post order. Traverse
// each node in this ordering, noting that there may be nodes in the order
// that are not in our NodesToPA list.
for (unsigned i = 0, e = Order.size(); i != e; ++i)
if (NodesToPASet.count(Order[i])) { // Only process pa nodes.
DSNode *N = Order[i];
// If this node has a backedge to itself, pool allocate it in a new
// pool.
if (NodeIsSelfRecursive(N)) {
// Create a new alloca instruction for the pool...
Value *AI = new AllocaInst(PoolDescType, 0, "PD", InsertPoint);
// Void types in DS graph are never used
if (N->isNodeCompletelyFolded())
std::cerr << "Node collapsing in '" << F.getName() << "'\n";
// Update the PoolDescriptors map
PoolDescriptors.insert(std::make_pair(N, AI));
#if 1
} else if (N->isArray() && !N->isNodeCompletelyFolded()) {
// We never pool allocate array nodes.
PoolDescriptors[N] =
Constant::getNullValue(PointerType::getUnqual(PoolDescType));
++NumNonprofit;
#endif
} else {
// Otherwise the node is not self recursive. If the node is not an
// array, we can co-locate it with the pool of a predecessor node if
// any has been pool allocated, and start a new pool if a predecessor
// is an array. If there is a predecessor of this node that has not
// been visited yet in this RPO traversal, that means there is a
// cycle, so we choose to pool allocate this node right away.
//
// If there multiple predecessors in multiple different pools, we
// don't pool allocate this at all.
// Check out each of the predecessors of this node.
std::vector<DSNode*> &Preds = InverseGraph[N];
Value *PredPool = 0;
bool HasUnvisitedPred = false;
bool HasArrayPred = false;
bool HasMultiplePredPools = false;
for (unsigned p = 0, e = Preds.size(); p != e; ++p) {
DSNode *Pred = Preds[p];
if (!PoolDescriptors.count(Pred))
HasUnvisitedPred = true; // no pool assigned to predecessor?
else if (Pred->isArray() && !Pred->isNodeCompletelyFolded())
HasArrayPred = true;
else if (PredPool && PoolDescriptors[Pred] != PredPool)
HasMultiplePredPools = true;
else if (!PredPool &&
!isa<ConstantPointerNull>(PoolDescriptors[Pred]))
PredPool = PoolDescriptors[Pred];
// Otherwise, this predecessor has the same pool as a previous one.
}
if (HasMultiplePredPools) {
// If this node has predecessors that are in different pools, don't
// pool allocate this node.
PoolDescriptors[N] =
Constant::getNullValue(PointerType::getUnqual(PoolDescType));
++NumNonprofit;
} else if (PredPool) {
// If all of the predecessors of this node are already in a pool,
// colocate.
PoolDescriptors[N] = PredPool;
++NumColocated;
} else if (HasArrayPred || HasUnvisitedPred) {
// If this node has an array predecessor, or if there is a
// predecessor that has not been visited yet, allocate a new pool
// for it.
Value *AI = new AllocaInst(PoolDescType, 0, "PD", InsertPoint);
if (N->isNodeCompletelyFolded())
std::cerr << "Node collapsing in '" << F.getName() << "'\n";
PoolDescriptors[N] = AI;
} else {
// If this node has no pool allocated predecessors, and there is no
// reason to pool allocate it, don't.
assert(PredPool == 0);
PoolDescriptors[N] =
Constant::getNullValue(PointerType::getUnqual(PoolDescType));
++NumNonprofit;
}
}
}
} // End switch case
} // end switch
#endif
//===-- AllInOneGlobalPool Heuristic --------------------------------------===//
//
// This heuristic puts all memory in the whole program into a single global
// pool. This is not safe, and is not good for performance, but can be used to
// evaluate how good the pool allocator runtime works as a "malloc replacement".
//
bool
AllInOneGlobalPoolHeuristic::runOnModule (Module & Module) {
//
// Remember which module we are analyzing.
//
M = &Module;
//
// Get the reference to the DSA Graph.
//
Graphs = &getAnalysis<EQTDDataStructures>();
//
// Find DSNodes which are reachable from globals and should be pool
// allocated.
//
findGlobalPoolNodes (GlobalPoolNodes);
// We never modify anything in this pass
return false;
}
void
AllInOneGlobalPoolHeuristic::AssignToPools(const DSNodeList_t &NodesToPA,
Function *F, DSGraph* G,
std::vector<OnePool> &ResultPools) {
if (TheGlobalPD == 0)
TheGlobalPD = PA->CreateGlobalPool(0, 0);
// All nodes allocate from the same global pool.
OnePool Pool;
Pool.NodesInPool = NodesToPA;
Pool.PoolDesc = TheGlobalPD;
ResultPools.push_back(Pool);
}
//===-- OnlyOverhead Heuristic --------------------------------------------===//
//
// This heuristic is a hack to evaluate how much overhead pool allocation adds
// to a program. It adds all of the arguments, poolinits and pool destroys to
// the program, but dynamically only passes null into the pool alloc/free
// functions, causing them to allocate from the heap.
//
bool
OnlyOverheadHeuristic::runOnModule (Module & Module) {
//
// Remember which module we are analyzing.
//
M = &Module;
//
// Get the reference to the DSA Graph.
//
Graphs = &getAnalysis<EQTDDataStructures>();
//
// Find DSNodes which are reachable from globals and should be pool
// allocated.
//
findGlobalPoolNodes (GlobalPoolNodes);
// We never modify anything in this pass
return false;
}
void
OnlyOverheadHeuristic::AssignToPools(const DSNodeList_t &NodesToPA,
Function *F, DSGraph* G,
std::vector<OnePool> &ResultPools) {
// For this heuristic, we assign everything possible to its own pool.
for (unsigned i = 0, e = NodesToPA.size(); i != e; ++i)
ResultPools.push_back(OnePool(NodesToPA[i]));
}
/// getDynamicallyNullPool - Return a PoolDescriptor* that is always dynamically
/// null. Insert the code necessary to produce it before the specified
/// instruction.
static Value *
getDynamicallyNullPool(BasicBlock::iterator I) {
// Arrange to dynamically pass null into all of the pool functions if we are
// only checking for overhead.
static Value *NullGlobal = 0;
if (!NullGlobal) {
Module *M = I->getParent()->getParent()->getParent();
Type * PoolTy = PoolAllocate::PoolDescPtrTy;
Constant * Init = ConstantPointerNull::get(cast<PointerType>(PoolTy));
NullGlobal = new GlobalVariable(*M,
PoolAllocate::PoolDescPtrTy, false,
GlobalValue::ExternalLinkage,
Init,
"llvm-poolalloc-null-init");
}
while (isa<AllocaInst>(I)) ++I;
return new LoadInst(NullGlobal, "nullpd", I);
}
// HackFunctionBody - This method is called on every transformed function body.
// Basically it replaces all uses of real pool descriptors with dynamically null
// values. However, it leaves pool init/destroy alone.
void
OnlyOverheadHeuristic::HackFunctionBody(Function &F,
std::map<const DSNode*, Value*> &PDs) {
Constant *PoolInit = PA->PoolInit;
Constant *PoolDestroy = PA->PoolDestroy;
Value *NullPD = getDynamicallyNullPool(F.front().begin());
for (std::map<const DSNode*, Value*>::iterator PDI = PDs.begin(),
E = PDs.end(); PDI != E; ++PDI) {
Value *OldPD = PDI->second;
std::vector<User*> OldPDUsers(OldPD->user_begin(), OldPD->user_end());
for (unsigned i = 0, e = OldPDUsers.size(); i != e; ++i) {
CallSite PDUser(cast<Instruction>(OldPDUsers[i]));
if (PDUser.getCalledValue() != PoolInit &&
PDUser.getCalledValue() != PoolDestroy) {
assert(PDUser.getInstruction()->getParent()->getParent() == &F &&
"Not in cur fn??");
PDUser.getInstruction()->replaceUsesOfWith(OldPD, NullPD);
}
}
}
}
//===-- NoNodes Heuristic -------------------------------------------------===//
//
// This dummy heuristic chooses to not pool allocate anything.
//
bool
NoNodesHeuristic::runOnModule (Module & Module) {
//
// Remember which module we are analyzing.
//
M = &Module;
// We never modify anything in this pass
return false;
}
//
// Register all of the heuristic passes.
//
static RegisterPass<AllButUnreachableFromMemoryHeuristic>
B ("paheur-AllButUnreachableFromMemory", "Pool allocate all reachable from memory objects");
static RegisterPass<CyclicNodesHeuristic>
C ("paheur-CyclicNodes", "Pool allocate nodes with cycles");
static RegisterPass<SmartCoallesceNodesHeuristic>
D ("paheur-SmartCoallesceNodes", "Pool allocate using the smart node merging heuristic ");
static RegisterPass<AllInOneGlobalPoolHeuristic>
E ("paheur-AllInOneGlobalPool", "Pool allocate using the pool library to replace malloc/free");
static RegisterPass<OnlyOverheadHeuristic>
F ("paheur-OnlyOverhead", "Do not pool allocate anything, but induce all overhead from it");
static RegisterPass<NoNodesHeuristic>
G ("paheur-NoNodes", "Pool allocate nothing heuristic");
//
// Create the heuristic analysis group.
//
static RegisterAnalysisGroup<Heuristic>
HeuristicGroup ("Pool Allocation Heuristic");
RegisterAnalysisGroup<Heuristic> Heuristic2(B);
RegisterAnalysisGroup<Heuristic> Heuristic3(C);
RegisterAnalysisGroup<Heuristic> Heuristic4(D);
RegisterAnalysisGroup<Heuristic> Heuristic5(E);
RegisterAnalysisGroup<Heuristic> Heuristic6(F);
RegisterAnalysisGroup<Heuristic, true> Heuristic7(G);
char Heuristic::ID = 0;
char AllButUnreachableFromMemoryHeuristic::ID = 0;
char CyclicNodesHeuristic::ID = 0;
char SmartCoallesceNodesHeuristic::ID = 0;
char AllInOneGlobalPoolHeuristic::ID = 0;
char OnlyOverheadHeuristic::ID = 0;
char NoNodesHeuristic::ID = 0;