poolalloc/lib/PoolAllocate/Heuristic.cpp - llvm-archive - Git at Google

 //===-- Heuristic.cpp - Interface to PA heuristics ------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file was developed by the LLVM research group and is distributed under
 // the University of Illinois Open Source License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // This implements the various pool allocation heuristics.
 //
 //===----------------------------------------------------------------------===//

 #include "Heuristic.h"

 #include "dsa/DSGraphTraits.h"
 #include "poolalloc/PoolAllocate.h"
 #include "llvm/Instructions.h"
 #include "llvm/Module.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Target/TargetData.h"
 #include <iostream>
 using namespace llvm;
 using namespace PA;

 namespace {
   enum PoolAllocHeuristic {
     NoNodes,
     OnlyOverhead,
     AllInOneGlobalPool,
     SmartCoallesceNodes,
     CyclicNodes,
     AllButUnreachableFromMemory,
     AllNodes
   };
   cl::opt<PoolAllocHeuristic>
   TheHeuristic("poolalloc-heuristic",
     cl::desc("Heuristic to choose which nodes to pool allocate"),
     cl::values(clEnumVal(AllNodes, "  Pool allocate all nodes"),
                clEnumVal(AllButUnreachableFromMemory, "  Pool allocate all reachable from memory objects"),
                clEnumVal(CyclicNodes, "  Pool allocate nodes with cycles"),
                clEnumVal(SmartCoallesceNodes, "  Use the smart node merging heuristic"),
                clEnumVal(AllInOneGlobalPool, "  Use pool library as replacement for malloc/free"),
                clEnumVal(OnlyOverhead, "  Do not pool allocate anything, but induce all overhead from it"),
                clEnumVal(NoNodes, "  Do not pool allocate anything"),
                clEnumValEnd),
     cl::init(AllButUnreachableFromMemory));

   cl::opt<bool>
   DisableAlignOpt("poolalloc-disable-alignopt",
                   cl::desc("Force all pool alignment to 8 bytes"));
 }

 Heuristic::~Heuristic() {}

 unsigned Heuristic::getRecommendedSize(const DSNode *N) {
   unsigned PoolSize = 0;
   //FIXME: types
 //  if (!N->isArray() && N->getType()->isSized()) {
 //    PoolSize = N->getParentGraph()->getTargetData().getTypeAllocSize(N->getType());
 //  }
   if (PoolSize == 1) PoolSize = 0;
   return PoolSize;
 }

 /// Wants8ByteAlignment - FIXME: this is a complete hack for X86 right now.
 static bool Wants8ByteAlignment(const Type *Ty, unsigned Offs,
                                 const TargetData &TD) {
   if (DisableAlignOpt) return true;

   if ((Offs & 7) == 0) {
     //
     // Note:
     //  The LLVM API has changed, and I do not know how to tell if a type is
     //  is a double integer.  Furthermore, the alignment of a double to 8 bits
     //  appears to be a hack, and it is not clear as to why.  Therefore, we
     //  will simply align all floating point types on an 8 bit boundary.
     //
 #if 0
     // Doubles always want to be 8-byte aligned.
     if (Ty == Type::DoubleTy) return true;
 #else
     if (Ty->isFloatingPointTy()) return true;
 #endif

     // If we are on a 64-bit system, we want to align 8-byte integers and
     // pointers.
     if (TD.getPrefTypeAlignment(Ty) == 8)
       return true;
   }

   if (Ty->isFirstClassType())
     return false;

   if (const StructType *STy = dyn_cast<StructType>(Ty)) {
     const StructLayout *SL = TD.getStructLayout(STy);
     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
       if (Wants8ByteAlignment(STy->getElementType(i),
                               Offs+SL->getElementOffset(i), TD))
         return true;
     }
   } else if (const SequentialType *STy = dyn_cast<SequentialType>(Ty)) {
     return Wants8ByteAlignment(STy->getElementType(), Offs, TD);
   } else {
     errs() << *Ty << "\n";
     assert(0 && "Unknown type!");
   }
   return false;
 }

 unsigned Heuristic::getRecommendedAlignment(const Type *Ty,
                                             const TargetData &TD) {
   if (!Ty || Ty->isVoidTy())  // Is this void or collapsed?
     return 0;  // No known alignment, let runtime decide.

   return Wants8ByteAlignment(Ty, 0, TD) ? 8 : 4;
 }

 /// getRecommendedAlignment - Return the recommended object alignment for this
 /// DSNode.
 ///
 unsigned Heuristic::getRecommendedAlignment(const DSNode *N) {
   //FIXME: Type
   //if (!N->getType() || N->getType()->isVoidTy())  // Is this void or collapsed?
     return 0;  // No known alignment, let runtime decide.

   //const TargetData &TD = N->getParentGraph()->getTargetData();

   // If there are no doubles on an 8-byte boundary in this structure, there is
   // no reason to 8-byte align objects in the pool.
   //return Wants8ByteAlignment(N->getType(), 0, TD) ? 8 : 4;
 }


 //===-- AllNodes Heuristic ------------------------------------------------===//
 //
 // This heuristic pool allocates everything possible into separate pools.
 //
 struct AllNodesHeuristic : public Heuristic {

   void AssignToPools(const std::vector<const DSNode*> &NodesToPA,
                      Function *F, DSGraph* G,
                      std::vector<OnePool> &ResultPools) {
     for (unsigned i = 0, e = NodesToPA.size(); i != e; ++i)
       ResultPools.push_back(OnePool(NodesToPA[i]));
   }
 };


 //===-- AllButUnreachableFromMemoryHeuristic Heuristic --------------------===//
 //
 // This heuristic pool allocates everything possible into separate pools, unless
 // the pool is not reachable by other memory objects.  This filters out objects
 // that are not cyclic and are only pointed to by scalars: these tend to be
 // singular memory allocations that are not worth creating a whole pool for.
 //
 struct AllButUnreachableFromMemoryHeuristic : public Heuristic {

   void AssignToPools(const std::vector<const DSNode*> &NodesToPA,
                      Function *F, DSGraph* G,
                      std::vector<OnePool> &ResultPools) {
     // Build a set of all nodes that are reachable from another node in the
     // graph.  Here we ignore scalar nodes that are only globals as they are
     // often global pointers to big arrays.
     std::set<const DSNode*> ReachableFromMemory;
     for (DSGraph::node_iterator I = G->node_begin(), E = G->node_end();
          I != E; ++I) {
       DSNode *N = I;
       // Ignore nodes that are just globals and not arrays.
       /*
       if (N->isArray() || N->isHeapNode() || N->isAllocaNode() ||
           N->isUnknownNode())
       */
       // If a node is marked, all children are too.
       if (!ReachableFromMemory.count(N))
         for (DSNode::iterator NI = N->begin(), E = N->end(); NI != E; ++NI)
           for (df_ext_iterator<const DSNode*>
                  DI = df_ext_begin(*NI, ReachableFromMemory),
                  E = df_ext_end(*NI, ReachableFromMemory); DI != E; ++DI)
           /*empty*/;
     }

     // Only pool allocate a node if it is reachable from a memory object (itself
     // included).
     for (unsigned i = 0, e = NodesToPA.size(); i != e; ++i)
       if (ReachableFromMemory.count(NodesToPA[i]))
         ResultPools.push_back(OnePool(NodesToPA[i]));
   }
 };

 //===-- CyclicNodes Heuristic ---------------------------------------------===//
 //
 // This heuristic only pool allocates nodes in an SCC in the DSGraph.
 //
 struct CyclicNodesHeuristic : public Heuristic {

   void AssignToPools(const std::vector<const DSNode*> &NodesToPA,
                      Function *F, DSGraph* G,
                      std::vector<OnePool> &ResultPools);
 };

 static bool NodeExistsInCycle(const DSNode *N) {
   for (DSNode::const_iterator I = N->begin(), E = N->end(); I != E; ++I)
     if (*I && std::find(df_begin(*I), df_end(*I), N) != df_end(*I))
       return true;
   return false;
 }

 void CyclicNodesHeuristic::AssignToPools(const std::vector<const
                                                            DSNode*> &NodesToPA,
                                          Function *F, DSGraph* G,
                                          std::vector<OnePool> &ResultPools) {
   for (unsigned i = 0, e = NodesToPA.size(); i != e; ++i)
     if (NodeExistsInCycle(NodesToPA[i]))
       ResultPools.push_back(OnePool(NodesToPA[i]));
 }


 //===-- SmartCoallesceNodes Heuristic -------------------------------------===//
 //
 // This heuristic attempts to be smart and coallesce nodes at times.  In
 // practice, it doesn't work very well.
 //
 struct SmartCoallesceNodesHeuristic : public Heuristic {

   void AssignToPools(const std::vector<const DSNode*> &NodesToPA,
                     Function *F, DSGraph* G,
                     std::vector<OnePool> &ResultPools) {
     // For globals, do not pool allocate unless the node is cyclic and not an
     // array (unless it's collapsed).
     if (F == 0) {
       for (unsigned i = 0, e = NodesToPA.size(); i != e; ++i) {
         const DSNode *Node = NodesToPA[i];
         if ((Node->isNodeCompletelyFolded() || !Node->isArrayNode()) &&
             NodeExistsInCycle(Node))
           ResultPools.push_back(OnePool(Node));
       }
     } else {
       // TODO
     }
   }
 };

 #if 0
 /// NodeIsSelfRecursive - Return true if this node contains a pointer to itself.
 static bool NodeIsSelfRecursive(DSNode *N) {
   for (DSNode::iterator I = N->begin(), E = N->end(); I != E; ++I)
     if (*I == N) return true;
   return false;
 }

 /// POVisit - This implements functionality found in Support/PostOrderIterator.h
 /// but in a way that allows multiple roots to be used.  If PostOrderIterator
 /// supported an external set like DepthFirstIterator did I could eliminate this
 /// cruft.
 ///
 static void POVisit(DSNode *N, std::set<DSNode*> &Visited,
                     std::vector<DSNode*> &Order) {
   if (!Visited.insert(N).second) return;  // already visited

   // Visit all children before visiting this node.
   for (DSNode::iterator I = N->begin(), E = N->end(); I != E; ++I)
     if (DSNode *C = const_cast<DSNode*>(*I))
       POVisit(C, Visited, Order);
   // Now that we visited all of our children, add ourself to the order.
   Order.push_back(N);
 }


   // Heuristic for building per-function pools

   switch (Heuristic) {
   case SmartCoallesceNodes: {
     std::set<DSNode*> NodesToPASet(NodesToPA.begin(), NodesToPA.end());

     // DSGraphs only have unidirectional edges, to traverse or inspect the
     // predecessors of nodes, we must build a mapping of the inverse graph.
     std::map<DSNode*, std::vector<DSNode*> > InverseGraph;

     for (unsigned i = 0, e = NodesToPA.size(); i != e; ++i) {
       DSNode *Node = NodesToPA[i];
       for (DSNode::iterator CI = Node->begin(), E = Node->end(); CI != E; ++CI)
         if (DSNode *Child = const_cast<DSNode*>(*CI))
           if (NodesToPASet.count(Child))
             InverseGraph[Child].push_back(Node);
     }

     // Traverse the heap nodes in reverse-post-order so that we are guaranteed
     // to visit all nodes pointing to another node before we visit that node
     // itself (except with cycles).

     // FIXME: This really should be using the PostOrderIterator.h file stuff,
     // but the routines there do not support external storage!
     std::set<DSNode*> Visited;
     std::vector<DSNode*> Order;
     for (unsigned i = 0, e = NodesToPA.size(); i != e; ++i)
       POVisit(NodesToPA[i], Visited, Order);

     // We want RPO, not PO, so reverse the order.
     std::reverse(Order.begin(), Order.end());

     // Okay, we have an ordering of the nodes in reverse post order.  Traverse
     // each node in this ordering, noting that there may be nodes in the order
     // that are not in our NodesToPA list.
     for (unsigned i = 0, e = Order.size(); i != e; ++i)
       if (NodesToPASet.count(Order[i])) {        // Only process pa nodes.
         DSNode *N = Order[i];

         // If this node has a backedge to itself, pool allocate it in a new
         // pool.
         if (NodeIsSelfRecursive(N)) {
           // Create a new alloca instruction for the pool...
           Value *AI = new AllocaInst(PoolDescType, 0, "PD", InsertPoint);

           // Void types in DS graph are never used
           if (N->isNodeCompletelyFolded())
             std::cerr << "Node collapsing in '" << F.getName() << "'\n";

           // Update the PoolDescriptors map
           PoolDescriptors.insert(std::make_pair(N, AI));
 #if 1
         } else if (N->isArray() && !N->isNodeCompletelyFolded()) {
           // We never pool allocate array nodes.
           PoolDescriptors[N] =
             Constant::getNullValue(PointerType::getUnqual(PoolDescType));
           ++NumNonprofit;
 #endif
         } else {
           // Otherwise the node is not self recursive.  If the node is not an
           // array, we can co-locate it with the pool of a predecessor node if
           // any has been pool allocated, and start a new pool if a predecessor
           // is an array.  If there is a predecessor of this node that has not
           // been visited yet in this RPO traversal, that means there is a
           // cycle, so we choose to pool allocate this node right away.
           //
           // If there multiple predecessors in multiple different pools, we
           // don't pool allocate this at all.

           // Check out each of the predecessors of this node.
           std::vector<DSNode*> &Preds = InverseGraph[N];
           Value *PredPool = 0;
           bool HasUnvisitedPred     = false;
           bool HasArrayPred         = false;
           bool HasMultiplePredPools = false;
           for (unsigned p = 0, e = Preds.size(); p != e; ++p) {
             DSNode *Pred = Preds[p];
             if (!PoolDescriptors.count(Pred))
               HasUnvisitedPred = true;  // no pool assigned to predecessor?
             else if (Pred->isArray() && !Pred->isNodeCompletelyFolded())
               HasArrayPred = true;
             else if (PredPool && PoolDescriptors[Pred] != PredPool)
               HasMultiplePredPools = true;
             else if (!PredPool &&
                      !isa<ConstantPointerNull>(PoolDescriptors[Pred]))
               PredPool = PoolDescriptors[Pred];
             // Otherwise, this predecessor has the same pool as a previous one.
           }

           if (HasMultiplePredPools) {
             // If this node has predecessors that are in different pools, don't
             // pool allocate this node.
             PoolDescriptors[N] =
               Constant::getNullValue(PointerType::getUnqual(PoolDescType));
             ++NumNonprofit;
           } else if (PredPool) {
             // If all of the predecessors of this node are already in a pool,
             // colocate.
             PoolDescriptors[N] = PredPool;
             ++NumColocated;
           } else if (HasArrayPred || HasUnvisitedPred) {
             // If this node has an array predecessor, or if there is a
             // predecessor that has not been visited yet, allocate a new pool
             // for it.
             Value *AI = new AllocaInst(PoolDescType, 0, "PD", InsertPoint);
             if (N->isNodeCompletelyFolded())
               std::cerr << "Node collapsing in '" << F.getName() << "'\n";

             PoolDescriptors[N] = AI;
           } else {
             // If this node has no pool allocated predecessors, and there is no
             // reason to pool allocate it, don't.
             assert(PredPool == 0);
              PoolDescriptors[N] =
               Constant::getNullValue(PointerType::getUnqual(PoolDescType));
             ++NumNonprofit;
           }
         }
       }
   }  // End switch case
   }  // end switch
 #endif


 //===-- AllInOneGlobalPool Heuristic --------------------------------------===//
 //
 // This heuristic puts all memory in the whole program into a single global
 // pool.  This is not safe, and is not good for performance, but can be used to
 // evaluate how good the pool allocator runtime works as a "malloc replacement".
 //
 struct AllInOneGlobalPoolHeuristic : public Heuristic {
   // TheGlobalPD - This global pool is the one and only one used when running
   // with Heuristic=AllInOneGlobalPool.
   GlobalVariable *TheGlobalPD;

   AllInOneGlobalPoolHeuristic() : TheGlobalPD(0) {}


   virtual bool IsRealHeuristic() { return false; }

   void AssignToPools(const std::vector<const DSNode*> &NodesToPA,
                     Function *F, DSGraph* G,
                     std::vector<OnePool> &ResultPools) {
     if (TheGlobalPD == 0)
       TheGlobalPD = PA->CreateGlobalPool(0, 0);

     // All nodes allocate from the same global pool.
     OnePool Pool;
     Pool.NodesInPool = NodesToPA;
     Pool.PoolDesc = TheGlobalPD;
     ResultPools.push_back(Pool);
   }
 };

 //===-- OnlyOverhead Heuristic --------------------------------------------===//
 //
 // This heuristic is a hack to evaluate how much overhead pool allocation adds
 // to a program.  It adds all of the arguments, poolinits and pool destroys to
 // the program, but dynamically only passes null into the pool alloc/free
 // functions, causing them to allocate from the heap.
 //
 struct OnlyOverheadHeuristic : public Heuristic {
   virtual bool IsRealHeuristic() { return false; }

   void AssignToPools(const std::vector<const DSNode*> &NodesToPA,
                     Function *F, DSGraph* G,
                     std::vector<OnePool> &ResultPools) {
     // For this heuristic, we assign everything possible to its own pool.
     for (unsigned i = 0, e = NodesToPA.size(); i != e; ++i)
       ResultPools.push_back(OnePool(NodesToPA[i]));
   }

   void HackFunctionBody(Function &F, std::map<const DSNode*, Value*> &PDs);
 };

 /// getDynamicallyNullPool - Return a PoolDescriptor* that is always dynamically
 /// null.  Insert the code necessary to produce it before the specified
 /// instruction.
 static Value *getDynamicallyNullPool(BasicBlock::iterator I) {
   // Arrange to dynamically pass null into all of the pool functions if we are
   // only checking for overhead.
   static Value *NullGlobal = 0;
   if (!NullGlobal) {
     Module *M = I->getParent()->getParent()->getParent();
     const Type * PoolTy = PoolAllocate::PoolDescPtrTy;
     Constant * Init = ConstantAggregateZero::get(PoolTy);
     NullGlobal = new GlobalVariable(*M,
                                     PoolAllocate::PoolDescPtrTy, false,
                                     GlobalValue::ExternalLinkage,
                                     Init,
                                     "llvm-poolalloc-null-init");
   }
   while (isa<AllocaInst>(I)) ++I;

   return new LoadInst(NullGlobal, "nullpd", I);
 }

 // HackFunctionBody - This method is called on every transformed function body.
 // Basically it replaces all uses of real pool descriptors with dynamically null
 // values.  However, it leaves pool init/destroy alone.
 void OnlyOverheadHeuristic::HackFunctionBody(Function &F,
                                              std::map<const DSNode*,
                                              Value*> &PDs) {
   Constant *PoolInit = PA->PoolInit;
   Constant *PoolDestroy = PA->PoolDestroy;

   Value *NullPD = getDynamicallyNullPool(F.front().begin());
   for (std::map<const DSNode*, Value*>::iterator PDI = PDs.begin(),
          E = PDs.end(); PDI != E; ++PDI) {
     Value *OldPD = PDI->second;
     std::vector<User*> OldPDUsers(OldPD->use_begin(), OldPD->use_end());
     for (unsigned i = 0, e = OldPDUsers.size(); i != e; ++i) {
       CallSite PDUser = CallSite::get(cast<Instruction>(OldPDUsers[i]));
       if (PDUser.getCalledValue() != PoolInit &&
           PDUser.getCalledValue() != PoolDestroy) {
         assert(PDUser.getInstruction()->getParent()->getParent() == &F &&
                "Not in cur fn??");
         PDUser.getInstruction()->replaceUsesOfWith(OldPD, NullPD);
       }
     }
   }
 }


 //===-- NoNodes Heuristic -------------------------------------------------===//
 //
 // This dummy heuristic chooses to not pool allocate anything.
 //
 struct NoNodesHeuristic : public Heuristic {
   virtual bool IsRealHeuristic() { return false; }

   void AssignToPools(const std::vector<const DSNode*> &NodesToPA,
                     Function *F, DSGraph* G,
                     std::vector<OnePool> &ResultPools) {
     // Nothing to pool allocate here.
   }
 };

 //===----------------------------------------------------------------------===//
 // Heuristic dispatch support
 //

 PA::Heuristic *Heuristic::create() {
   switch (TheHeuristic) {
   default: assert(0 && "Unknown heuristic!");
   case AllNodes: return new AllNodesHeuristic();
   case AllButUnreachableFromMemory:
     return new AllButUnreachableFromMemoryHeuristic();
   case CyclicNodes: return new CyclicNodesHeuristic();
   case SmartCoallesceNodes: return new SmartCoallesceNodesHeuristic();
   case AllInOneGlobalPool: return new AllInOneGlobalPoolHeuristic();
   case OnlyOverhead: return new OnlyOverheadHeuristic();
   case NoNodes: return new NoNodesHeuristic();
   }
 }
	//===-- Heuristic.cpp - Interface to PA heuristics ------------------------===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file was developed by the LLVM research group and is distributed under
	// the University of Illinois Open Source License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//
	//
	// This implements the various pool allocation heuristics.
	//
	//===----------------------------------------------------------------------===//

	#include "Heuristic.h"

	#include "dsa/DSGraphTraits.h"
	#include "poolalloc/PoolAllocate.h"
	#include "llvm/Instructions.h"
	#include "llvm/Module.h"
	#include "llvm/ADT/DepthFirstIterator.h"
	#include "llvm/Support/CommandLine.h"
	#include "llvm/Support/FormattedStream.h"
	#include "llvm/Target/TargetData.h"
	#include <iostream>
	using namespace llvm;
	using namespace PA;

	namespace {
	enum PoolAllocHeuristic {
	NoNodes,
	OnlyOverhead,
	AllInOneGlobalPool,
	SmartCoallesceNodes,
	CyclicNodes,
	AllButUnreachableFromMemory,
	AllNodes
	};
	cl::opt<PoolAllocHeuristic>
	TheHeuristic("poolalloc-heuristic",
	cl::desc("Heuristic to choose which nodes to pool allocate"),
	cl::values(clEnumVal(AllNodes, " Pool allocate all nodes"),
	clEnumVal(AllButUnreachableFromMemory, " Pool allocate all reachable from memory objects"),
	clEnumVal(CyclicNodes, " Pool allocate nodes with cycles"),
	clEnumVal(SmartCoallesceNodes, " Use the smart node merging heuristic"),
	clEnumVal(AllInOneGlobalPool, " Use pool library as replacement for malloc/free"),
	clEnumVal(OnlyOverhead, " Do not pool allocate anything, but induce all overhead from it"),
	clEnumVal(NoNodes, " Do not pool allocate anything"),
	clEnumValEnd),
	cl::init(AllButUnreachableFromMemory));

	cl::opt<bool>
	DisableAlignOpt("poolalloc-disable-alignopt",
	cl::desc("Force all pool alignment to 8 bytes"));
	}

	Heuristic::~Heuristic() {}

	unsigned Heuristic::getRecommendedSize(const DSNode *N) {
	unsigned PoolSize = 0;
	//FIXME: types
	// if (!N->isArray() && N->getType()->isSized()) {
	// PoolSize = N->getParentGraph()->getTargetData().getTypeAllocSize(N->getType());
	// }
	if (PoolSize == 1) PoolSize = 0;
	return PoolSize;
	}

	/// Wants8ByteAlignment - FIXME: this is a complete hack for X86 right now.
	static bool Wants8ByteAlignment(const Type *Ty, unsigned Offs,
	const TargetData &TD) {
	if (DisableAlignOpt) return true;

	if ((Offs & 7) == 0) {
	//
	// Note:
	// The LLVM API has changed, and I do not know how to tell if a type is
	// is a double integer. Furthermore, the alignment of a double to 8 bits
	// appears to be a hack, and it is not clear as to why. Therefore, we
	// will simply align all floating point types on an 8 bit boundary.
	//
	#if 0
	// Doubles always want to be 8-byte aligned.
	if (Ty == Type::DoubleTy) return true;
	#else
	if (Ty->isFloatingPointTy()) return true;
	#endif

	// If we are on a 64-bit system, we want to align 8-byte integers and
	// pointers.
	if (TD.getPrefTypeAlignment(Ty) == 8)
	return true;
	}

	if (Ty->isFirstClassType())
	return false;

	if (const StructType *STy = dyn_cast<StructType>(Ty)) {
	const StructLayout *SL = TD.getStructLayout(STy);
	for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
	if (Wants8ByteAlignment(STy->getElementType(i),
	Offs+SL->getElementOffset(i), TD))
	return true;
	}
	} else if (const SequentialType *STy = dyn_cast<SequentialType>(Ty)) {
	return Wants8ByteAlignment(STy->getElementType(), Offs, TD);
	} else {
	errs() << *Ty << "\n";
	assert(0 && "Unknown type!");
	}
	return false;
	}

	unsigned Heuristic::getRecommendedAlignment(const Type *Ty,
	const TargetData &TD) {
	if (!Ty \|\| Ty->isVoidTy()) // Is this void or collapsed?
	return 0; // No known alignment, let runtime decide.

	return Wants8ByteAlignment(Ty, 0, TD) ? 8 : 4;
	}

	/// getRecommendedAlignment - Return the recommended object alignment for this
	/// DSNode.
	///
	unsigned Heuristic::getRecommendedAlignment(const DSNode *N) {
	//FIXME: Type
	//if (!N->getType() \|\| N->getType()->isVoidTy()) // Is this void or collapsed?
	return 0; // No known alignment, let runtime decide.

	//const TargetData &TD = N->getParentGraph()->getTargetData();

	// If there are no doubles on an 8-byte boundary in this structure, there is
	// no reason to 8-byte align objects in the pool.
	//return Wants8ByteAlignment(N->getType(), 0, TD) ? 8 : 4;
	}


	//===-- AllNodes Heuristic ------------------------------------------------===//
	//
	// This heuristic pool allocates everything possible into separate pools.
	//
	struct AllNodesHeuristic : public Heuristic {

	void AssignToPools(const std::vector<const DSNode*> &NodesToPA,
	Function F, DSGraph G,
	std::vector<OnePool> &ResultPools) {
	for (unsigned i = 0, e = NodesToPA.size(); i != e; ++i)
	ResultPools.push_back(OnePool(NodesToPA[i]));
	}
	};


	//===-- AllButUnreachableFromMemoryHeuristic Heuristic --------------------===//
	//
	// This heuristic pool allocates everything possible into separate pools, unless
	// the pool is not reachable by other memory objects. This filters out objects
	// that are not cyclic and are only pointed to by scalars: these tend to be
	// singular memory allocations that are not worth creating a whole pool for.
	//
	struct AllButUnreachableFromMemoryHeuristic : public Heuristic {

	void AssignToPools(const std::vector<const DSNode*> &NodesToPA,
	Function F, DSGraph G,
	std::vector<OnePool> &ResultPools) {
	// Build a set of all nodes that are reachable from another node in the
	// graph. Here we ignore scalar nodes that are only globals as they are
	// often global pointers to big arrays.
	std::set<const DSNode*> ReachableFromMemory;
	for (DSGraph::node_iterator I = G->node_begin(), E = G->node_end();
	I != E; ++I) {
	DSNode *N = I;
	// Ignore nodes that are just globals and not arrays.
	/*
	if (N->isArray() \|\| N->isHeapNode() \|\| N->isAllocaNode() \|\|
	N->isUnknownNode())
	*/
	// If a node is marked, all children are too.
	if (!ReachableFromMemory.count(N))
	for (DSNode::iterator NI = N->begin(), E = N->end(); NI != E; ++NI)
	for (df_ext_iterator<const DSNode*>
	DI = df_ext_begin(*NI, ReachableFromMemory),
	E = df_ext_end(*NI, ReachableFromMemory); DI != E; ++DI)
	/empty/;
	}

	// Only pool allocate a node if it is reachable from a memory object (itself
	// included).
	for (unsigned i = 0, e = NodesToPA.size(); i != e; ++i)
	if (ReachableFromMemory.count(NodesToPA[i]))
	ResultPools.push_back(OnePool(NodesToPA[i]));
	}
	};

	//===-- CyclicNodes Heuristic ---------------------------------------------===//
	//
	// This heuristic only pool allocates nodes in an SCC in the DSGraph.
	//
	struct CyclicNodesHeuristic : public Heuristic {

	void AssignToPools(const std::vector<const DSNode*> &NodesToPA,
	Function F, DSGraph G,
	std::vector<OnePool> &ResultPools);
	};

	static bool NodeExistsInCycle(const DSNode *N) {
	for (DSNode::const_iterator I = N->begin(), E = N->end(); I != E; ++I)
	if (I && std::find(df_begin(I), df_end(I), N) != df_end(I))
	return true;
	return false;
	}

	void CyclicNodesHeuristic::AssignToPools(const std::vector<const
	DSNode*> &NodesToPA,
	Function F, DSGraph G,
	std::vector<OnePool> &ResultPools) {
	for (unsigned i = 0, e = NodesToPA.size(); i != e; ++i)
	if (NodeExistsInCycle(NodesToPA[i]))
	ResultPools.push_back(OnePool(NodesToPA[i]));
	}


	//===-- SmartCoallesceNodes Heuristic -------------------------------------===//
	//
	// This heuristic attempts to be smart and coallesce nodes at times. In
	// practice, it doesn't work very well.
	//
	struct SmartCoallesceNodesHeuristic : public Heuristic {

	void AssignToPools(const std::vector<const DSNode*> &NodesToPA,
	Function F, DSGraph G,
	std::vector<OnePool> &ResultPools) {
	// For globals, do not pool allocate unless the node is cyclic and not an
	// array (unless it's collapsed).
	if (F == 0) {
	for (unsigned i = 0, e = NodesToPA.size(); i != e; ++i) {
	const DSNode *Node = NodesToPA[i];
	if ((Node->isNodeCompletelyFolded() \|\| !Node->isArrayNode()) &&
	NodeExistsInCycle(Node))
	ResultPools.push_back(OnePool(Node));
	}
	} else {
	// TODO
	}
	}
	};

	#if 0
	/// NodeIsSelfRecursive - Return true if this node contains a pointer to itself.
	static bool NodeIsSelfRecursive(DSNode *N) {
	for (DSNode::iterator I = N->begin(), E = N->end(); I != E; ++I)
	if (*I == N) return true;
	return false;
	}

	/// POVisit - This implements functionality found in Support/PostOrderIterator.h
	/// but in a way that allows multiple roots to be used. If PostOrderIterator
	/// supported an external set like DepthFirstIterator did I could eliminate this
	/// cruft.
	///
	static void POVisit(DSNode N, std::set<DSNode> &Visited,
	std::vector<DSNode*> &Order) {
	if (!Visited.insert(N).second) return; // already visited

	// Visit all children before visiting this node.
	for (DSNode::iterator I = N->begin(), E = N->end(); I != E; ++I)
	if (DSNode C = const_cast<DSNode>(*I))
	POVisit(C, Visited, Order);
	// Now that we visited all of our children, add ourself to the order.
	Order.push_back(N);
	}



	// Heuristic for building per-function pools

	switch (Heuristic) {
	case SmartCoallesceNodes: {
	std::set<DSNode*> NodesToPASet(NodesToPA.begin(), NodesToPA.end());

	// DSGraphs only have unidirectional edges, to traverse or inspect the
	// predecessors of nodes, we must build a mapping of the inverse graph.
	std::map<DSNode, std::vector<DSNode> > InverseGraph;

	for (unsigned i = 0, e = NodesToPA.size(); i != e; ++i) {
	DSNode *Node = NodesToPA[i];
	for (DSNode::iterator CI = Node->begin(), E = Node->end(); CI != E; ++CI)
	if (DSNode Child = const_cast<DSNode>(*CI))
	if (NodesToPASet.count(Child))
	InverseGraph[Child].push_back(Node);
	}

	// Traverse the heap nodes in reverse-post-order so that we are guaranteed
	// to visit all nodes pointing to another node before we visit that node
	// itself (except with cycles).

	// FIXME: This really should be using the PostOrderIterator.h file stuff,
	// but the routines there do not support external storage!
	std::set<DSNode*> Visited;
	std::vector<DSNode*> Order;
	for (unsigned i = 0, e = NodesToPA.size(); i != e; ++i)
	POVisit(NodesToPA[i], Visited, Order);

	// We want RPO, not PO, so reverse the order.
	std::reverse(Order.begin(), Order.end());

	// Okay, we have an ordering of the nodes in reverse post order. Traverse
	// each node in this ordering, noting that there may be nodes in the order
	// that are not in our NodesToPA list.
	for (unsigned i = 0, e = Order.size(); i != e; ++i)
	if (NodesToPASet.count(Order[i])) { // Only process pa nodes.
	DSNode *N = Order[i];

	// If this node has a backedge to itself, pool allocate it in a new
	// pool.
	if (NodeIsSelfRecursive(N)) {
	// Create a new alloca instruction for the pool...
	Value *AI = new AllocaInst(PoolDescType, 0, "PD", InsertPoint);

	// Void types in DS graph are never used
	if (N->isNodeCompletelyFolded())
	std::cerr << "Node collapsing in '" << F.getName() << "'\n";

	// Update the PoolDescriptors map
	PoolDescriptors.insert(std::make_pair(N, AI));
	#if 1
	} else if (N->isArray() && !N->isNodeCompletelyFolded()) {
	// We never pool allocate array nodes.
	PoolDescriptors[N] =
	Constant::getNullValue(PointerType::getUnqual(PoolDescType));
	++NumNonprofit;
	#endif
	} else {
	// Otherwise the node is not self recursive. If the node is not an
	// array, we can co-locate it with the pool of a predecessor node if
	// any has been pool allocated, and start a new pool if a predecessor
	// is an array. If there is a predecessor of this node that has not
	// been visited yet in this RPO traversal, that means there is a
	// cycle, so we choose to pool allocate this node right away.
	//
	// If there multiple predecessors in multiple different pools, we
	// don't pool allocate this at all.

	// Check out each of the predecessors of this node.
	std::vector<DSNode*> &Preds = InverseGraph[N];
	Value *PredPool = 0;
	bool HasUnvisitedPred = false;
	bool HasArrayPred = false;
	bool HasMultiplePredPools = false;
	for (unsigned p = 0, e = Preds.size(); p != e; ++p) {
	DSNode *Pred = Preds[p];
	if (!PoolDescriptors.count(Pred))
	HasUnvisitedPred = true; // no pool assigned to predecessor?
	else if (Pred->isArray() && !Pred->isNodeCompletelyFolded())
	HasArrayPred = true;
	else if (PredPool && PoolDescriptors[Pred] != PredPool)
	HasMultiplePredPools = true;
	else if (!PredPool &&
	!isa<ConstantPointerNull>(PoolDescriptors[Pred]))
	PredPool = PoolDescriptors[Pred];
	// Otherwise, this predecessor has the same pool as a previous one.
	}

	if (HasMultiplePredPools) {
	// If this node has predecessors that are in different pools, don't
	// pool allocate this node.
	PoolDescriptors[N] =
	Constant::getNullValue(PointerType::getUnqual(PoolDescType));
	++NumNonprofit;
	} else if (PredPool) {
	// If all of the predecessors of this node are already in a pool,
	// colocate.
	PoolDescriptors[N] = PredPool;
	++NumColocated;
	} else if (HasArrayPred \|\| HasUnvisitedPred) {
	// If this node has an array predecessor, or if there is a
	// predecessor that has not been visited yet, allocate a new pool
	// for it.
	Value *AI = new AllocaInst(PoolDescType, 0, "PD", InsertPoint);
	if (N->isNodeCompletelyFolded())
	std::cerr << "Node collapsing in '" << F.getName() << "'\n";

	PoolDescriptors[N] = AI;
	} else {
	// If this node has no pool allocated predecessors, and there is no
	// reason to pool allocate it, don't.
	assert(PredPool == 0);
	PoolDescriptors[N] =
	Constant::getNullValue(PointerType::getUnqual(PoolDescType));
	++NumNonprofit;
	}
	}
	}
	} // End switch case
	} // end switch
	#endif


	//===-- AllInOneGlobalPool Heuristic --------------------------------------===//
	//
	// This heuristic puts all memory in the whole program into a single global
	// pool. This is not safe, and is not good for performance, but can be used to
	// evaluate how good the pool allocator runtime works as a "malloc replacement".
	//
	struct AllInOneGlobalPoolHeuristic : public Heuristic {
	// TheGlobalPD - This global pool is the one and only one used when running
	// with Heuristic=AllInOneGlobalPool.
	GlobalVariable *TheGlobalPD;

	AllInOneGlobalPoolHeuristic() : TheGlobalPD(0) {}


	virtual bool IsRealHeuristic() { return false; }

	void AssignToPools(const std::vector<const DSNode*> &NodesToPA,
	Function F, DSGraph G,
	std::vector<OnePool> &ResultPools) {
	if (TheGlobalPD == 0)
	TheGlobalPD = PA->CreateGlobalPool(0, 0);

	// All nodes allocate from the same global pool.
	OnePool Pool;
	Pool.NodesInPool = NodesToPA;
	Pool.PoolDesc = TheGlobalPD;
	ResultPools.push_back(Pool);
	}
	};

	//===-- OnlyOverhead Heuristic --------------------------------------------===//
	//
	// This heuristic is a hack to evaluate how much overhead pool allocation adds
	// to a program. It adds all of the arguments, poolinits and pool destroys to
	// the program, but dynamically only passes null into the pool alloc/free
	// functions, causing them to allocate from the heap.
	//
	struct OnlyOverheadHeuristic : public Heuristic {
	virtual bool IsRealHeuristic() { return false; }

	void AssignToPools(const std::vector<const DSNode*> &NodesToPA,
	Function F, DSGraph G,
	std::vector<OnePool> &ResultPools) {
	// For this heuristic, we assign everything possible to its own pool.
	for (unsigned i = 0, e = NodesToPA.size(); i != e; ++i)
	ResultPools.push_back(OnePool(NodesToPA[i]));
	}

	void HackFunctionBody(Function &F, std::map<const DSNode, Value> &PDs);
	};

	/// getDynamicallyNullPool - Return a PoolDescriptor* that is always dynamically
	/// null. Insert the code necessary to produce it before the specified
	/// instruction.
	static Value *getDynamicallyNullPool(BasicBlock::iterator I) {
	// Arrange to dynamically pass null into all of the pool functions if we are
	// only checking for overhead.
	static Value *NullGlobal = 0;
	if (!NullGlobal) {
	Module *M = I->getParent()->getParent()->getParent();
	const Type * PoolTy = PoolAllocate::PoolDescPtrTy;
	Constant * Init = ConstantAggregateZero::get(PoolTy);
	NullGlobal = new GlobalVariable(*M,
	PoolAllocate::PoolDescPtrTy, false,
	GlobalValue::ExternalLinkage,
	Init,
	"llvm-poolalloc-null-init");
	}
	while (isa<AllocaInst>(I)) ++I;

	return new LoadInst(NullGlobal, "nullpd", I);
	}

	// HackFunctionBody - This method is called on every transformed function body.
	// Basically it replaces all uses of real pool descriptors with dynamically null
	// values. However, it leaves pool init/destroy alone.
	void OnlyOverheadHeuristic::HackFunctionBody(Function &F,
	std::map<const DSNode*,
	Value*> &PDs) {
	Constant *PoolInit = PA->PoolInit;
	Constant *PoolDestroy = PA->PoolDestroy;

	Value *NullPD = getDynamicallyNullPool(F.front().begin());
	for (std::map<const DSNode, Value>::iterator PDI = PDs.begin(),
	E = PDs.end(); PDI != E; ++PDI) {
	Value *OldPD = PDI->second;
	std::vector<User*> OldPDUsers(OldPD->use_begin(), OldPD->use_end());
	for (unsigned i = 0, e = OldPDUsers.size(); i != e; ++i) {
	CallSite PDUser = CallSite::get(cast<Instruction>(OldPDUsers[i]));
	if (PDUser.getCalledValue() != PoolInit &&
	PDUser.getCalledValue() != PoolDestroy) {
	assert(PDUser.getInstruction()->getParent()->getParent() == &F &&
	"Not in cur fn??");
	PDUser.getInstruction()->replaceUsesOfWith(OldPD, NullPD);
	}
	}
	}
	}


	//===-- NoNodes Heuristic -------------------------------------------------===//
	//
	// This dummy heuristic chooses to not pool allocate anything.
	//
	struct NoNodesHeuristic : public Heuristic {
	virtual bool IsRealHeuristic() { return false; }

	void AssignToPools(const std::vector<const DSNode*> &NodesToPA,
	Function F, DSGraph G,
	std::vector<OnePool> &ResultPools) {
	// Nothing to pool allocate here.
	}
	};

	//===----------------------------------------------------------------------===//
	// Heuristic dispatch support
	//

	PA::Heuristic *Heuristic::create() {
	switch (TheHeuristic) {
	default: assert(0 && "Unknown heuristic!");
	case AllNodes: return new AllNodesHeuristic();
	case AllButUnreachableFromMemory:
	return new AllButUnreachableFromMemoryHeuristic();
	case CyclicNodes: return new CyclicNodesHeuristic();
	case SmartCoallesceNodes: return new SmartCoallesceNodesHeuristic();
	case AllInOneGlobalPool: return new AllInOneGlobalPoolHeuristic();
	case OnlyOverhead: return new OnlyOverheadHeuristic();
	case NoNodes: return new NoNodesHeuristic();
	}
	}