blob: daeb6a5cc5290921e87d24103144490eb0502c68 [file] [log] [blame]
//===-- Heuristic.cpp - Interface to PA heuristics ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file was developed by the LLVM research group and is distributed under
// the University of Illinois Open Source License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This implements the various pool allocation heuristics.
//
//===----------------------------------------------------------------------===//
#include "Heuristic.h"
#include "dsa/DSGraphTraits.h"
#include "poolalloc/PoolAllocate.h"
#include "llvm/Instructions.h"
#include "llvm/Module.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Target/TargetData.h"
#include <iostream>
using namespace llvm;
using namespace PA;
namespace {
enum PoolAllocHeuristic {
NoNodes,
OnlyOverhead,
AllInOneGlobalPool,
SmartCoallesceNodes,
CyclicNodes,
AllButUnreachableFromMemory,
AllNodes
};
cl::opt<PoolAllocHeuristic>
TheHeuristic("poolalloc-heuristic",
cl::desc("Heuristic to choose which nodes to pool allocate"),
cl::values(clEnumVal(AllNodes, " Pool allocate all nodes"),
clEnumVal(AllButUnreachableFromMemory, " Pool allocate all reachable from memory objects"),
clEnumVal(CyclicNodes, " Pool allocate nodes with cycles"),
clEnumVal(SmartCoallesceNodes, " Use the smart node merging heuristic"),
clEnumVal(AllInOneGlobalPool, " Use pool library as replacement for malloc/free"),
clEnumVal(OnlyOverhead, " Do not pool allocate anything, but induce all overhead from it"),
clEnumVal(NoNodes, " Do not pool allocate anything"),
clEnumValEnd),
cl::init(AllButUnreachableFromMemory));
cl::opt<bool>
DisableAlignOpt("poolalloc-disable-alignopt",
cl::desc("Force all pool alignment to 8 bytes"));
}
Heuristic::~Heuristic() {}
unsigned Heuristic::getRecommendedSize(const DSNode *N) {
unsigned PoolSize = 0;
//FIXME: types
// if (!N->isArray() && N->getType()->isSized()) {
// PoolSize = N->getParentGraph()->getTargetData().getTypeAllocSize(N->getType());
// }
if (PoolSize == 1) PoolSize = 0;
return PoolSize;
}
/// Wants8ByteAlignment - FIXME: this is a complete hack for X86 right now.
static bool Wants8ByteAlignment(const Type *Ty, unsigned Offs,
const TargetData &TD) {
if (DisableAlignOpt) return true;
if ((Offs & 7) == 0) {
//
// Note:
// The LLVM API has changed, and I do not know how to tell if a type is
// is a double integer. Furthermore, the alignment of a double to 8 bits
// appears to be a hack, and it is not clear as to why. Therefore, we
// will simply align all floating point types on an 8 bit boundary.
//
#if 0
// Doubles always want to be 8-byte aligned.
if (Ty == Type::DoubleTy) return true;
#else
if (Ty->isFloatingPointTy()) return true;
#endif
// If we are on a 64-bit system, we want to align 8-byte integers and
// pointers.
if (TD.getPrefTypeAlignment(Ty) == 8)
return true;
}
if (Ty->isFirstClassType())
return false;
if (const StructType *STy = dyn_cast<StructType>(Ty)) {
const StructLayout *SL = TD.getStructLayout(STy);
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
if (Wants8ByteAlignment(STy->getElementType(i),
Offs+SL->getElementOffset(i), TD))
return true;
}
} else if (const SequentialType *STy = dyn_cast<SequentialType>(Ty)) {
return Wants8ByteAlignment(STy->getElementType(), Offs, TD);
} else {
errs() << *Ty << "\n";
assert(0 && "Unknown type!");
}
return false;
}
unsigned Heuristic::getRecommendedAlignment(const Type *Ty,
const TargetData &TD) {
if (!Ty || Ty->isVoidTy()) // Is this void or collapsed?
return 0; // No known alignment, let runtime decide.
return Wants8ByteAlignment(Ty, 0, TD) ? 8 : 4;
}
/// getRecommendedAlignment - Return the recommended object alignment for this
/// DSNode.
///
unsigned Heuristic::getRecommendedAlignment(const DSNode *N) {
//FIXME: Type
//if (!N->getType() || N->getType()->isVoidTy()) // Is this void or collapsed?
return 0; // No known alignment, let runtime decide.
//const TargetData &TD = N->getParentGraph()->getTargetData();
// If there are no doubles on an 8-byte boundary in this structure, there is
// no reason to 8-byte align objects in the pool.
//return Wants8ByteAlignment(N->getType(), 0, TD) ? 8 : 4;
}
//===-- AllNodes Heuristic ------------------------------------------------===//
//
// This heuristic pool allocates everything possible into separate pools.
//
struct AllNodesHeuristic : public Heuristic {
void AssignToPools(const std::vector<const DSNode*> &NodesToPA,
Function *F, DSGraph* G,
std::vector<OnePool> &ResultPools) {
for (unsigned i = 0, e = NodesToPA.size(); i != e; ++i)
ResultPools.push_back(OnePool(NodesToPA[i]));
}
};
//===-- AllButUnreachableFromMemoryHeuristic Heuristic --------------------===//
//
// This heuristic pool allocates everything possible into separate pools, unless
// the pool is not reachable by other memory objects. This filters out objects
// that are not cyclic and are only pointed to by scalars: these tend to be
// singular memory allocations that are not worth creating a whole pool for.
//
struct AllButUnreachableFromMemoryHeuristic : public Heuristic {
void AssignToPools(const std::vector<const DSNode*> &NodesToPA,
Function *F, DSGraph* G,
std::vector<OnePool> &ResultPools) {
// Build a set of all nodes that are reachable from another node in the
// graph. Here we ignore scalar nodes that are only globals as they are
// often global pointers to big arrays.
std::set<const DSNode*> ReachableFromMemory;
for (DSGraph::node_iterator I = G->node_begin(), E = G->node_end();
I != E; ++I) {
DSNode *N = I;
// Ignore nodes that are just globals and not arrays.
/*
if (N->isArray() || N->isHeapNode() || N->isAllocaNode() ||
N->isUnknownNode())
*/
// If a node is marked, all children are too.
if (!ReachableFromMemory.count(N))
for (DSNode::iterator NI = N->begin(), E = N->end(); NI != E; ++NI)
for (df_ext_iterator<const DSNode*>
DI = df_ext_begin(*NI, ReachableFromMemory),
E = df_ext_end(*NI, ReachableFromMemory); DI != E; ++DI)
/*empty*/;
}
// Only pool allocate a node if it is reachable from a memory object (itself
// included).
for (unsigned i = 0, e = NodesToPA.size(); i != e; ++i)
if (ReachableFromMemory.count(NodesToPA[i]))
ResultPools.push_back(OnePool(NodesToPA[i]));
}
};
//===-- CyclicNodes Heuristic ---------------------------------------------===//
//
// This heuristic only pool allocates nodes in an SCC in the DSGraph.
//
struct CyclicNodesHeuristic : public Heuristic {
void AssignToPools(const std::vector<const DSNode*> &NodesToPA,
Function *F, DSGraph* G,
std::vector<OnePool> &ResultPools);
};
static bool NodeExistsInCycle(const DSNode *N) {
for (DSNode::const_iterator I = N->begin(), E = N->end(); I != E; ++I)
if (*I && std::find(df_begin(*I), df_end(*I), N) != df_end(*I))
return true;
return false;
}
void CyclicNodesHeuristic::AssignToPools(const std::vector<const
DSNode*> &NodesToPA,
Function *F, DSGraph* G,
std::vector<OnePool> &ResultPools) {
for (unsigned i = 0, e = NodesToPA.size(); i != e; ++i)
if (NodeExistsInCycle(NodesToPA[i]))
ResultPools.push_back(OnePool(NodesToPA[i]));
}
//===-- SmartCoallesceNodes Heuristic -------------------------------------===//
//
// This heuristic attempts to be smart and coallesce nodes at times. In
// practice, it doesn't work very well.
//
struct SmartCoallesceNodesHeuristic : public Heuristic {
void AssignToPools(const std::vector<const DSNode*> &NodesToPA,
Function *F, DSGraph* G,
std::vector<OnePool> &ResultPools) {
// For globals, do not pool allocate unless the node is cyclic and not an
// array (unless it's collapsed).
if (F == 0) {
for (unsigned i = 0, e = NodesToPA.size(); i != e; ++i) {
const DSNode *Node = NodesToPA[i];
if ((Node->isNodeCompletelyFolded() || !Node->isArrayNode()) &&
NodeExistsInCycle(Node))
ResultPools.push_back(OnePool(Node));
}
} else {
// TODO
}
}
};
#if 0
/// NodeIsSelfRecursive - Return true if this node contains a pointer to itself.
static bool NodeIsSelfRecursive(DSNode *N) {
for (DSNode::iterator I = N->begin(), E = N->end(); I != E; ++I)
if (*I == N) return true;
return false;
}
/// POVisit - This implements functionality found in Support/PostOrderIterator.h
/// but in a way that allows multiple roots to be used. If PostOrderIterator
/// supported an external set like DepthFirstIterator did I could eliminate this
/// cruft.
///
static void POVisit(DSNode *N, std::set<DSNode*> &Visited,
std::vector<DSNode*> &Order) {
if (!Visited.insert(N).second) return; // already visited
// Visit all children before visiting this node.
for (DSNode::iterator I = N->begin(), E = N->end(); I != E; ++I)
if (DSNode *C = const_cast<DSNode*>(*I))
POVisit(C, Visited, Order);
// Now that we visited all of our children, add ourself to the order.
Order.push_back(N);
}
// Heuristic for building per-function pools
switch (Heuristic) {
case SmartCoallesceNodes: {
std::set<DSNode*> NodesToPASet(NodesToPA.begin(), NodesToPA.end());
// DSGraphs only have unidirectional edges, to traverse or inspect the
// predecessors of nodes, we must build a mapping of the inverse graph.
std::map<DSNode*, std::vector<DSNode*> > InverseGraph;
for (unsigned i = 0, e = NodesToPA.size(); i != e; ++i) {
DSNode *Node = NodesToPA[i];
for (DSNode::iterator CI = Node->begin(), E = Node->end(); CI != E; ++CI)
if (DSNode *Child = const_cast<DSNode*>(*CI))
if (NodesToPASet.count(Child))
InverseGraph[Child].push_back(Node);
}
// Traverse the heap nodes in reverse-post-order so that we are guaranteed
// to visit all nodes pointing to another node before we visit that node
// itself (except with cycles).
// FIXME: This really should be using the PostOrderIterator.h file stuff,
// but the routines there do not support external storage!
std::set<DSNode*> Visited;
std::vector<DSNode*> Order;
for (unsigned i = 0, e = NodesToPA.size(); i != e; ++i)
POVisit(NodesToPA[i], Visited, Order);
// We want RPO, not PO, so reverse the order.
std::reverse(Order.begin(), Order.end());
// Okay, we have an ordering of the nodes in reverse post order. Traverse
// each node in this ordering, noting that there may be nodes in the order
// that are not in our NodesToPA list.
for (unsigned i = 0, e = Order.size(); i != e; ++i)
if (NodesToPASet.count(Order[i])) { // Only process pa nodes.
DSNode *N = Order[i];
// If this node has a backedge to itself, pool allocate it in a new
// pool.
if (NodeIsSelfRecursive(N)) {
// Create a new alloca instruction for the pool...
Value *AI = new AllocaInst(PoolDescType, 0, "PD", InsertPoint);
// Void types in DS graph are never used
if (N->isNodeCompletelyFolded())
std::cerr << "Node collapsing in '" << F.getName() << "'\n";
// Update the PoolDescriptors map
PoolDescriptors.insert(std::make_pair(N, AI));
#if 1
} else if (N->isArray() && !N->isNodeCompletelyFolded()) {
// We never pool allocate array nodes.
PoolDescriptors[N] =
Constant::getNullValue(PointerType::getUnqual(PoolDescType));
++NumNonprofit;
#endif
} else {
// Otherwise the node is not self recursive. If the node is not an
// array, we can co-locate it with the pool of a predecessor node if
// any has been pool allocated, and start a new pool if a predecessor
// is an array. If there is a predecessor of this node that has not
// been visited yet in this RPO traversal, that means there is a
// cycle, so we choose to pool allocate this node right away.
//
// If there multiple predecessors in multiple different pools, we
// don't pool allocate this at all.
// Check out each of the predecessors of this node.
std::vector<DSNode*> &Preds = InverseGraph[N];
Value *PredPool = 0;
bool HasUnvisitedPred = false;
bool HasArrayPred = false;
bool HasMultiplePredPools = false;
for (unsigned p = 0, e = Preds.size(); p != e; ++p) {
DSNode *Pred = Preds[p];
if (!PoolDescriptors.count(Pred))
HasUnvisitedPred = true; // no pool assigned to predecessor?
else if (Pred->isArray() && !Pred->isNodeCompletelyFolded())
HasArrayPred = true;
else if (PredPool && PoolDescriptors[Pred] != PredPool)
HasMultiplePredPools = true;
else if (!PredPool &&
!isa<ConstantPointerNull>(PoolDescriptors[Pred]))
PredPool = PoolDescriptors[Pred];
// Otherwise, this predecessor has the same pool as a previous one.
}
if (HasMultiplePredPools) {
// If this node has predecessors that are in different pools, don't
// pool allocate this node.
PoolDescriptors[N] =
Constant::getNullValue(PointerType::getUnqual(PoolDescType));
++NumNonprofit;
} else if (PredPool) {
// If all of the predecessors of this node are already in a pool,
// colocate.
PoolDescriptors[N] = PredPool;
++NumColocated;
} else if (HasArrayPred || HasUnvisitedPred) {
// If this node has an array predecessor, or if there is a
// predecessor that has not been visited yet, allocate a new pool
// for it.
Value *AI = new AllocaInst(PoolDescType, 0, "PD", InsertPoint);
if (N->isNodeCompletelyFolded())
std::cerr << "Node collapsing in '" << F.getName() << "'\n";
PoolDescriptors[N] = AI;
} else {
// If this node has no pool allocated predecessors, and there is no
// reason to pool allocate it, don't.
assert(PredPool == 0);
PoolDescriptors[N] =
Constant::getNullValue(PointerType::getUnqual(PoolDescType));
++NumNonprofit;
}
}
}
} // End switch case
} // end switch
#endif
//===-- AllInOneGlobalPool Heuristic --------------------------------------===//
//
// This heuristic puts all memory in the whole program into a single global
// pool. This is not safe, and is not good for performance, but can be used to
// evaluate how good the pool allocator runtime works as a "malloc replacement".
//
struct AllInOneGlobalPoolHeuristic : public Heuristic {
// TheGlobalPD - This global pool is the one and only one used when running
// with Heuristic=AllInOneGlobalPool.
GlobalVariable *TheGlobalPD;
AllInOneGlobalPoolHeuristic() : TheGlobalPD(0) {}
virtual bool IsRealHeuristic() { return false; }
void AssignToPools(const std::vector<const DSNode*> &NodesToPA,
Function *F, DSGraph* G,
std::vector<OnePool> &ResultPools) {
if (TheGlobalPD == 0)
TheGlobalPD = PA->CreateGlobalPool(0, 0);
// All nodes allocate from the same global pool.
OnePool Pool;
Pool.NodesInPool = NodesToPA;
Pool.PoolDesc = TheGlobalPD;
ResultPools.push_back(Pool);
}
};
//===-- OnlyOverhead Heuristic --------------------------------------------===//
//
// This heuristic is a hack to evaluate how much overhead pool allocation adds
// to a program. It adds all of the arguments, poolinits and pool destroys to
// the program, but dynamically only passes null into the pool alloc/free
// functions, causing them to allocate from the heap.
//
struct OnlyOverheadHeuristic : public Heuristic {
virtual bool IsRealHeuristic() { return false; }
void AssignToPools(const std::vector<const DSNode*> &NodesToPA,
Function *F, DSGraph* G,
std::vector<OnePool> &ResultPools) {
// For this heuristic, we assign everything possible to its own pool.
for (unsigned i = 0, e = NodesToPA.size(); i != e; ++i)
ResultPools.push_back(OnePool(NodesToPA[i]));
}
void HackFunctionBody(Function &F, std::map<const DSNode*, Value*> &PDs);
};
/// getDynamicallyNullPool - Return a PoolDescriptor* that is always dynamically
/// null. Insert the code necessary to produce it before the specified
/// instruction.
static Value *getDynamicallyNullPool(BasicBlock::iterator I) {
// Arrange to dynamically pass null into all of the pool functions if we are
// only checking for overhead.
static Value *NullGlobal = 0;
if (!NullGlobal) {
Module *M = I->getParent()->getParent()->getParent();
const Type * PoolTy = PoolAllocate::PoolDescPtrTy;
Constant * Init = ConstantAggregateZero::get(PoolTy);
NullGlobal = new GlobalVariable(*M,
PoolAllocate::PoolDescPtrTy, false,
GlobalValue::ExternalLinkage,
Init,
"llvm-poolalloc-null-init");
}
while (isa<AllocaInst>(I)) ++I;
return new LoadInst(NullGlobal, "nullpd", I);
}
// HackFunctionBody - This method is called on every transformed function body.
// Basically it replaces all uses of real pool descriptors with dynamically null
// values. However, it leaves pool init/destroy alone.
void OnlyOverheadHeuristic::HackFunctionBody(Function &F,
std::map<const DSNode*,
Value*> &PDs) {
Constant *PoolInit = PA->PoolInit;
Constant *PoolDestroy = PA->PoolDestroy;
Value *NullPD = getDynamicallyNullPool(F.front().begin());
for (std::map<const DSNode*, Value*>::iterator PDI = PDs.begin(),
E = PDs.end(); PDI != E; ++PDI) {
Value *OldPD = PDI->second;
std::vector<User*> OldPDUsers(OldPD->use_begin(), OldPD->use_end());
for (unsigned i = 0, e = OldPDUsers.size(); i != e; ++i) {
CallSite PDUser = CallSite::get(cast<Instruction>(OldPDUsers[i]));
if (PDUser.getCalledValue() != PoolInit &&
PDUser.getCalledValue() != PoolDestroy) {
assert(PDUser.getInstruction()->getParent()->getParent() == &F &&
"Not in cur fn??");
PDUser.getInstruction()->replaceUsesOfWith(OldPD, NullPD);
}
}
}
}
//===-- NoNodes Heuristic -------------------------------------------------===//
//
// This dummy heuristic chooses to not pool allocate anything.
//
struct NoNodesHeuristic : public Heuristic {
virtual bool IsRealHeuristic() { return false; }
void AssignToPools(const std::vector<const DSNode*> &NodesToPA,
Function *F, DSGraph* G,
std::vector<OnePool> &ResultPools) {
// Nothing to pool allocate here.
}
};
//===----------------------------------------------------------------------===//
// Heuristic dispatch support
//
PA::Heuristic *Heuristic::create() {
switch (TheHeuristic) {
default: assert(0 && "Unknown heuristic!");
case AllNodes: return new AllNodesHeuristic();
case AllButUnreachableFromMemory:
return new AllButUnreachableFromMemoryHeuristic();
case CyclicNodes: return new CyclicNodesHeuristic();
case SmartCoallesceNodes: return new SmartCoallesceNodesHeuristic();
case AllInOneGlobalPool: return new AllInOneGlobalPoolHeuristic();
case OnlyOverhead: return new OnlyOverheadHeuristic();
case NoNodes: return new NoNodesHeuristic();
}
}