poolalloc/lib/PoolAllocate/Heuristic.cpp - llvm-archive - Git at Google

 //===-- Heuristic.cpp - Interface to PA heuristics ------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file was developed by the LLVM research group and is distributed under
 // the University of Illinois Open Source License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 //
 // This implements the various pool allocation heuristics.
 //
 // FIXME: It seems that the old alignment heuristics contained in this file
 //        were designed for 32-bit x86 processors (which makes sense as
 //        poolalloc was developed before x86-64).  We need to revisit this
 //        code and decide what the heuristics should do today.
 //
 //===----------------------------------------------------------------------===//

 #include "dsa/DSGraphTraits.h"
 #include "poolalloc/Heuristic.h"
 #include "poolalloc/PoolAllocate.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/IR/DataLayout.h"
 #include <iostream>

 using namespace llvm;
 using namespace PA;

 namespace {
   cl::opt<bool>
   DisableAlignOpt("poolalloc-disable-alignopt",
                   cl::desc("Force all pool alignment to 8 bytes"));
 }

 //
 // Function: getRecommendedSize()
 //
 // Description:
 //  Determine the recommended allocation size for objects represented by this
 //  DSNode.  This is used to determine the size of objects allocated by
 //  poolalloc().
 //
 // Inputs:
 //  N - The DSNode representing the objects whose recommended size the caller
 //      wants.
 //
 // Return value:
 //  The size, in bytes, that should be used in allocating most objects
 //  represented by this DSNode.
 //
 unsigned Heuristic::getRecommendedSize(const DSNode *N) {
   unsigned PoolSize = 0;

   //
   // If the DSNode only represents singleton objects (i.e., not arrays), then
   // find the size of the singleton object.
   //
   if (!N->isArrayNode()) {
     PoolSize = N->getSize();
   }

   //
   // If the object size is 1, just say that it's zero.  The run-time will
   // figure out how to deal with it.
   //
   if (PoolSize == 1) PoolSize = 0;
   return PoolSize;
 }

 //
 // Function: Wants8ByteAlignment ()
 //
 // Description:
 //  Determine if an object of the specified type should be allocated on an
 //  8-byte boundary.  This may either be required by the target platform or may
 //  merely improve performance by aligning data the way the processor wants
 //  it.
 //
 // Inputs:
 //  Ty   - The type of the object for which alignment should be tested.
 //  Offs - The offset of the type within a derived type (e.g., a structure).
 //         We will try to align a structure on an 8 byte boundary if one of its
 //         elements can/needs to be.
 //  TD   - A reference to the DataLayout pass.
 //
 // Return value:
 //  true - This type should be allocated on an 8-byte boundary.
 //  false - This type does not need to be allocated on an 8-byte boundary.
 //
 // Notes:
 //  FIXME: This is a complete hack for X86 right now.
 //  FIXME: This code needs to be updated for x86-64.
 //  FIXME: This code needs to handle LLVM first-class structures and vectors.
 //
 static bool
 Wants8ByteAlignment(Type *Ty, unsigned Offs, const DataLayout &TD) {
   //
   // If the user has requested this optimization to be turned off, don't bother
   // doing it.
   //
   if (DisableAlignOpt) return true;

   //
   // If this type is at an align-able offset within its larger data structure,
   // see if we should 8 byte align it.
   //
   if ((Offs & 7) == 0) {
     //
     // Doubles always want to be 8-byte aligned regardless of what DataLayout
     // claims.
     //
     if (Ty->isDoubleTy()) return true;

     //
     // If we are on a 64-bit system, we want to align 8-byte integers and
     // pointers.
     //
     if (TD.getPrefTypeAlignment(Ty) == 8)
       return true;
   }

   //
   // If this is a first-class data type, but it is located at an offset within
   // a structure that cannot be 8-byte aligned, then we cannot ever guarantee
   // to 8-byte align it.  Therefore, do not try to force it to 8-byte
   // alignment.
   if (Ty->isFirstClassType())
     return false;

   //
   // If this is a structure or array type, check if any of its elements at
   // 8-byte alignment desire to have 8-byte alignment.  If so, then the entire
   // object wants 8-byte alignment.
   //
   if (StructType *STy = dyn_cast<StructType>(Ty)) {
     const StructLayout *SL = TD.getStructLayout(STy);
     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
       if (Wants8ByteAlignment(STy->getElementType(i),
                               Offs+SL->getElementOffset(i), TD))
         return true;
     }
   } else if (SequentialType *STy = dyn_cast<SequentialType>(Ty)) {
     return Wants8ByteAlignment(STy->getElementType(), Offs, TD);
   } else {
     errs() << *Ty << "\n";
     assert(0 && "Unknown type!");
   }

   return false;
 }

 unsigned Heuristic::getRecommendedAlignment(Type *Ty,
                                             const DataLayout &TD) {
   if (!Ty || Ty->isVoidTy())  // Is this void or collapsed?
     return 0;  // No known alignment, let runtime decide.

   return Wants8ByteAlignment(Ty, 0, TD) ? 8 : 4;
 }

 ///
 /// getRecommendedAlignment - Return the recommended object alignment for this
 /// DSNode.
 ///
 /// @Node - The DSNode for which allocation alignment information is requested.
 ///
 /// FIXME: This method assumes an object wants 4-byte or 8-byte alignment.  The
 ///        new types in LLVM may want larger alignments such as 16-byte
 ///        alignment.  Need to update this code to handle that.
 ///
 unsigned
 Heuristic::getRecommendedAlignment(const DSNode *Node) {
   //
   // Get the DataLayout information from the DSNode's DSGraph.
   //
   const DataLayout &TD = Node->getParentGraph()->getDataLayout();

   //
   // Iterate through all the types that the DSA type-inference algorithm
   // found and determine if any of them should be 8-byte aligned.  If so, then
   // we'll 8-byte align the entire structure.
   //
   DSNode::const_type_iterator tyi;
   for (tyi = Node->type_begin(); tyi != Node->type_end(); ++tyi) {
     for (svset<Type*>::const_iterator tyii = tyi->second->begin(),
          tyee = tyi->second->end(); tyii != tyee; ++tyii) {
       //
       // Get the type of object allocated.  If there is no type, then it is
       // implicitly of void type.
       //
       Type * TypeCreated = *tyii;
       if (TypeCreated) {
         //
         // If the type contains a pointer, it must be changed.
         //
         if (Wants8ByteAlignment(TypeCreated, tyi->first, TD)) {
           return 8;
         }
       }
     }
   }

   return 4;
 }

 //
 // Function: GetNodesReachableFromGlobals()
 //
 // Description:
 //  This function finds all DSNodes which are reachable from globals.  It finds
 //  DSNodes both within the local DSGraph as well as in the Globals graph that
 //  are reachable from globals.  It does, however, filter out those DSNodes
 //  which are of no interest to automatic pool allocation.
 //
 // Inputs:
 //  G - The DSGraph for which to find DSNodes which are reachable by globals.
 //      This DSGraph can either by a DSGraph associated with a function *or*
 //      it can be the globals graph itself.
 //
 // Outputs:
 //  NodesFromGlobals - A reference to a container object in which to record
 //                     DSNodes reachable from globals.  DSNodes are *added* to
 //                     this container; it is not cleared by this function.
 //                     DSNodes from both the local and globals graph are added.
 static void
 GetNodesReachableFromGlobals (DSGraph* G,
                               DenseSet<const DSNode*> &NodesFromGlobals) {
   //
   // Get the globals graph associated with this DSGraph.  If the globals graph
   // is NULL, then the graph that was passed in *is* the globals graph.
   //
   DSGraph * GlobalsGraph = G->getGlobalsGraph();
   if (!GlobalsGraph)
     GlobalsGraph = G;

   //
   // Find all DSNodes which are reachable in the globals graph.
   //
   for (DSGraph::node_iterator NI = GlobalsGraph->node_begin();
        NI != GlobalsGraph->node_end();
        ++NI) {
     NI->markReachableNodes(NodesFromGlobals);
   }

   //
   // Remove those global nodes which we know will never be pool allocated.
   //
   std::vector<const DSNode *> toRemove;
   for (DenseSet<const DSNode*>::iterator I = NodesFromGlobals.begin(),
          E = NodesFromGlobals.end(); I != E; ) {
     DenseSet<const DSNode*>::iterator Last = I; ++I;
     //
     // Nodes that escape to external code could be reachable from globals.
     // Nodes that are incomplete could be heap nodes.
     // Unknown nodes could be anything.
     //
     const DSNode *tmp = *Last;
     if (!(tmp->isHeapNode()))
       toRemove.push_back (tmp);
   }

   //
   // Remove all globally reachable DSNodes which do not require pools.
   //
   for (unsigned index = 0; index < toRemove.size(); ++index) {
     NodesFromGlobals.erase(toRemove[index]);
   }

   //
   // Now the fun part.  Find DSNodes in the local graph that correspond to
   // those nodes reachable in the globals graph.  Add them to the set of
   // reachable nodes, too.
   //
   if (G->getGlobalsGraph()) {
     //
     // Compute a mapping between local DSNodes and DSNodes in the globals
     // graph.
     //
     DSGraph::NodeMapTy NodeMap;
     G->computeGToGGMapping (NodeMap);

     //
     // Scan through all DSNodes in the local graph.  If a local DSNode has a
     // corresponding DSNode in the globals graph that is reachable from a
     // global, then add the local DSNode to the set of DSNodes reachable from a
     // global.
     //
     // FIXME: A node's existance within the global DSGraph is probably
     //        sufficient evidence that it is reachable from a global.
     //
     DSGraph::node_iterator ni = G->node_begin();
     for (; ni != G->node_end(); ++ni) {
       DSNode * N = ni;
       if (NodesFromGlobals.count (NodeMap[N].getNode()))
         NodesFromGlobals.insert (N);
     }
   }
 }

 //
 // Method: findGlobalPoolNodes()
 //
 // Description:
 //  This method finds DSNodes that are reachable from globals and that need a
 //  pool.  The Automatic Pool Allocation transform will use the returned
 //  information to build global pools for the DSNodes in question.
 //
 //  Note that this method does not assign DSNodes to pools; it merely decides
 //  which DSNodes are reachable from globals and will need a pool of global
 //  scope.
 //
 // Outputs:
 //  Nodes - The DSNodes that are both reachable from globals and which should
 //          have global pools will be *added* to this container.
 //
 void
 Heuristic::findGlobalPoolNodes (DSNodeSet_t & Nodes) {
   // Get the globals graph for the program.
   DSGraph* GG = Graphs->getGlobalsGraph();

   // Get all of the nodes reachable from globals.
   DenseSet<const DSNode*> GlobalHeapNodes;
   GetNodesReachableFromGlobals (GG, GlobalHeapNodes);

   //
   // Now find all DSNodes belonging to function-local DSGraphs which are
   // mirrored in the globals graph.  These DSNodes require a global pool, too.
   //
   for (Module::iterator F = M->begin(); F != M->end(); ++F) {
     if (Graphs->hasDSGraph(*F)) {
       DSGraph* G = Graphs->getDSGraph(*F);
       GetNodesReachableFromGlobals (G, GlobalHeapNodes);
     }
   }

   //
   // Copy the values into the output container.  Note that DenseSet has no
   // iterator traits (or whatever allows us to treat DenseSet has a generic
   // container), so we have to use a loop to copy values from the DenseSet into
   // the output container.
   //
   for (DenseSet<const DSNode*>::iterator I = GlobalHeapNodes.begin(),
          E = GlobalHeapNodes.end(); I != E; ++I) {
     Nodes.insert (*I);
   }

   return;
 }

 //
 // Method: getGlobalPoolNodes()
 //
 // Description:
 //  This method returns DSNodes that are reachable from globals and that need a
 //  pool.  The Automatic Pool Allocation transform will use the returned
 //  information to build global pools for the DSNodes in question.
 //
 //  Note that this method does not assign DSNodes to pools; it merely decides
 //  which DSNodes are reachable from globals and will need a pool of global
 //  scope.
 //
 // Outputs:
 //  Nodes - The DSNodes that are both reachable from globals and which should
 //          have global pools will be *added* to this container.
 //
 // Preconditions:
 //  This method assumes that the findGlobalPoolNodes() has been called to find
 //  all of the global DSNodes needing a pool.
 //
 void
 Heuristic::getGlobalPoolNodes (std::vector<const DSNode *> & Nodes) {
   //
   // Copy the DSNodes which are globally reachable and need a global pool into
   // the set of DSNodes given to us by the caller.
   //
   Nodes.insert (Nodes.end(), GlobalPoolNodes.begin(), GlobalPoolNodes.end());
   return;
 }

 //
 // Method: getLocalPoolNodes()
 //
 // Description:
 //  For a given function, determine which DSNodes for that function should have
 //  local pools created for them.
 //
 void
 Heuristic::getLocalPoolNodes (const Function & F, DSNodeList_t & Nodes) {
   //
   // Get the DSGraph of the specified function.  If the DSGraph has no nodes,
   // then there is nothing we need to do.
   //
   DSGraph* G = Graphs->getDSGraph(F);
   if (G->node_begin() == G->node_end()) return;

   //
   // Calculate which DSNodes are reachable from globals.  If a node is reachable
   // from a global, we will create a global pool for it, so no argument passage
   // is required.
   Graphs->getGlobalsGraph();

   // Map all node reachable from this global to the corresponding nodes in
   // the globals graph.
   DSGraph::NodeMapTy GlobalsGraphNodeMapping;
   G->computeGToGGMapping(GlobalsGraphNodeMapping);

   //
   // Loop over all of the nodes which are non-escaping, adding pool-allocatable
   // ones to the NodesToPA vector.  In other words, scan over the DSGraph and
   // find nodes for which a new pool must be created within this function.
   //
   for (DSGraph::node_iterator I = G->node_begin(), E = G->node_end();
        I != E;
        ++I){
     // Get the DSNode and, if applicable, its mirror in the globals graph
     DSNode * N   = I;
     DSNode * GGN = GlobalsGraphNodeMapping[N].getNode();

     //
     // Only the following nodes are pool allocated:
     //  1) Local Heap nodes
     //  2) Nodes which are mirrored in the globals graph and, in the globals
     //     graph, are heap nodes.
     //
     if ((N->isHeapNode()) || (GGN && GGN->isHeapNode())) {
       if (!(GlobalPoolNodes.count (N) || GlobalPoolNodes.count (GGN))) {
         // Otherwise, if it was not passed in from outside the function, it must
         // be a local pool!
         assert((!N->isGlobalNode() || N->isPtrToIntNode()) && "Should be in global mapping!");
         if(!N->isPtrToIntNode()) {
           Nodes.push_back (N);
         }
       }
     }
   }

   return;
 }

 //===-- AllButUnreachableFromMemoryHeuristic Heuristic --------------------===//
 //
 // This heuristic pool allocates everything possible into separate pools, unless
 // the pool is not reachable by other memory objects.  This filters out objects
 // that are not cyclic and are only pointed to by scalars: these tend to be
 // singular memory allocations that are not worth creating a whole pool for.
 //
 bool
 AllButUnreachableFromMemoryHeuristic::runOnModule (Module & Module) {
   //
   // Remember which module we are analyzing.
   //
   M = &Module;

   //
   // Get the reference to the DSA Graph.
   //
   Graphs = &getAnalysis<EQTDDataStructures>();

   //
   // Find DSNodes which are reachable from globals and should be pool
   // allocated.
   //
   findGlobalPoolNodes (GlobalPoolNodes);

   // We never modify anything in this pass
   return false;
 }

 void
 AllButUnreachableFromMemoryHeuristic::AssignToPools (
                                           const DSNodeList_t &NodesToPA,
                                           Function *F, DSGraph* G,
                                           std::vector<OnePool> &ResultPools) {
   // Build a set of all nodes that are reachable from another node in the
   // graph.  Here we ignore scalar nodes that are only globals as they are
   // often global pointers to big arrays.
   std::set<const DSNode*> ReachableFromMemory;
   for (DSGraph::node_iterator I = G->node_begin(), E = G->node_end();
        I != E; ++I) {
     DSNode *N = I;
 #if 0
     //
     // Ignore nodes that are just globals and not arrays.
     //
     if (N->isArray() || N->isHeapNode() || N->isAllocaNode() ||
         N->isUnknownNode())
 #endif
     // If a node is marked, all children are too.
     if (!ReachableFromMemory.count(N)) {
       for (DSNode::iterator NI = N->begin(), E = N->end(); NI != E; ++NI) {
         //
         // Sometimes this results in a NULL DSNode.  Skip it if that is the
         // case.
         //
         if (!(*NI)) continue;

         //
         // Do a depth-first iteration over the DSGraph starting with this
         // child node.
         //
         for (df_ext_iterator<const DSNode*>
                DI = df_ext_begin(*NI, ReachableFromMemory),
                E = df_ext_end(*NI, ReachableFromMemory); DI != E; ++DI)
         /*empty*/;
       }
     }
   }

   // Only pool allocate a node if it is reachable from a memory object (itself
   // included).
   for (unsigned i = 0, e = NodesToPA.size(); i != e; ++i)
     if (ReachableFromMemory.count(NodesToPA[i]))
       ResultPools.push_back(OnePool(NodesToPA[i]));
 }

 //===-- CyclicNodes Heuristic ---------------------------------------------===//
 //
 // This heuristic only pool allocates nodes in an SCC in the DSGraph.
 //

 bool
 CyclicNodesHeuristic::runOnModule (Module & Module) {
   //
   // Remember which module we are analyzing.
   //
   M = &Module;

   //
   // Get the reference to the DSA Graph.
   //
   Graphs = &getAnalysis<EQTDDataStructures>();

   //
   // Find DSNodes which are reachable from globals and should be pool
   // allocated.
   //
   findGlobalPoolNodes (GlobalPoolNodes);

   // We never modify anything in this pass
   return false;
 }

 static bool NodeExistsInCycle(const DSNode *N) {
   for (DSNode::const_iterator I = N->begin(), E = N->end(); I != E; ++I)
     if (*I && std::find(df_begin(*I), df_end(*I), N) != df_end(*I))
       return true;
   return false;
 }

 void
 CyclicNodesHeuristic::AssignToPools(const DSNodeList_t &NodesToPA,
                                     Function *F, DSGraph* G,
                                     std::vector<OnePool> &ResultPools) {
   for (unsigned i = 0, e = NodesToPA.size(); i != e; ++i)
     if (NodeExistsInCycle(NodesToPA[i]))
       ResultPools.push_back(OnePool(NodesToPA[i]));
 }

 //===-- SmartCoallesceNodes Heuristic -------------------------------------===//
 //
 // This heuristic attempts to be smart and coallesce nodes at times.  In
 // practice, it doesn't work very well.
 //
 bool
 SmartCoallesceNodesHeuristic::runOnModule (Module & Module) {
   //
   // Remember which module we are analyzing.
   //
   M = &Module;

   //
   // Get the reference to the DSA Graph.
   //
   Graphs = &getAnalysis<EQTDDataStructures>();

   //
   // Find DSNodes which are reachable from globals and should be pool
   // allocated.
   //
   findGlobalPoolNodes (GlobalPoolNodes);

   // We never modify anything in this pass
   return false;
 }

 void
 SmartCoallesceNodesHeuristic::AssignToPools(const DSNodeList_t &NodesToPA,
                                             Function *F, DSGraph* G,
                                             std::vector<OnePool> &ResultPools) {
   // For globals, do not pool allocate unless the node is cyclic and not an
   // array (unless it's collapsed).
   if (F == 0) {
     for (unsigned i = 0, e = NodesToPA.size(); i != e; ++i) {
       const DSNode *Node = NodesToPA[i];
       if ((Node->isNodeCompletelyFolded() || !Node->isArrayNode()) &&
           NodeExistsInCycle(Node))
         ResultPools.push_back(OnePool(Node));
     }
   } else {
     // TODO
   }
 }

 #if 0
 /// NodeIsSelfRecursive - Return true if this node contains a pointer to itself.
 static bool NodeIsSelfRecursive(DSNode *N) {
   for (DSNode::iterator I = N->begin(), E = N->end(); I != E; ++I)
     if (*I == N) return true;
   return false;
 }

 /// POVisit - This implements functionality found in Support/PostOrderIterator.h
 /// but in a way that allows multiple roots to be used.  If PostOrderIterator
 /// supported an external set like DepthFirstIterator did I could eliminate this
 /// cruft.
 ///
 static void POVisit(DSNode *N, std::set<DSNode*> &Visited,
                     std::vector<DSNode*> &Order) {
   if (!Visited.insert(N).second) return;  // already visited

   // Visit all children before visiting this node.
   for (DSNode::iterator I = N->begin(), E = N->end(); I != E; ++I)
     if (DSNode *C = const_cast<DSNode*>(*I))
       POVisit(C, Visited, Order);
   // Now that we visited all of our children, add ourself to the order.
   Order.push_back(N);
 }


   // Heuristic for building per-function pools

   switch (Heuristic) {
   case SmartCoallesceNodes: {
     std::set<DSNode*> NodesToPASet(NodesToPA.begin(), NodesToPA.end());

     // DSGraphs only have unidirectional edges, to traverse or inspect the
     // predecessors of nodes, we must build a mapping of the inverse graph.
     std::map<DSNode*, std::vector<DSNode*> > InverseGraph;

     for (unsigned i = 0, e = NodesToPA.size(); i != e; ++i) {
       DSNode *Node = NodesToPA[i];
       for (DSNode::iterator CI = Node->begin(), E = Node->end(); CI != E; ++CI)
         if (DSNode *Child = const_cast<DSNode*>(*CI))
           if (NodesToPASet.count(Child))
             InverseGraph[Child].push_back(Node);
     }

     // Traverse the heap nodes in reverse-post-order so that we are guaranteed
     // to visit all nodes pointing to another node before we visit that node
     // itself (except with cycles).

     // FIXME: This really should be using the PostOrderIterator.h file stuff,
     // but the routines there do not support external storage!
     std::set<DSNode*> Visited;
     std::vector<DSNode*> Order;
     for (unsigned i = 0, e = NodesToPA.size(); i != e; ++i)
       POVisit(NodesToPA[i], Visited, Order);

     // We want RPO, not PO, so reverse the order.
     std::reverse(Order.begin(), Order.end());

     // Okay, we have an ordering of the nodes in reverse post order.  Traverse
     // each node in this ordering, noting that there may be nodes in the order
     // that are not in our NodesToPA list.
     for (unsigned i = 0, e = Order.size(); i != e; ++i)
       if (NodesToPASet.count(Order[i])) {        // Only process pa nodes.
         DSNode *N = Order[i];

         // If this node has a backedge to itself, pool allocate it in a new
         // pool.
         if (NodeIsSelfRecursive(N)) {
           // Create a new alloca instruction for the pool...
           Value *AI = new AllocaInst(PoolDescType, 0, "PD", InsertPoint);

           // Void types in DS graph are never used
           if (N->isNodeCompletelyFolded())
             std::cerr << "Node collapsing in '" << F.getName() << "'\n";

           // Update the PoolDescriptors map
           PoolDescriptors.insert(std::make_pair(N, AI));
 #if 1
         } else if (N->isArray() && !N->isNodeCompletelyFolded()) {
           // We never pool allocate array nodes.
           PoolDescriptors[N] =
             Constant::getNullValue(PointerType::getUnqual(PoolDescType));
           ++NumNonprofit;
 #endif
         } else {
           // Otherwise the node is not self recursive.  If the node is not an
           // array, we can co-locate it with the pool of a predecessor node if
           // any has been pool allocated, and start a new pool if a predecessor
           // is an array.  If there is a predecessor of this node that has not
           // been visited yet in this RPO traversal, that means there is a
           // cycle, so we choose to pool allocate this node right away.
           //
           // If there multiple predecessors in multiple different pools, we
           // don't pool allocate this at all.

           // Check out each of the predecessors of this node.
           std::vector<DSNode*> &Preds = InverseGraph[N];
           Value *PredPool = 0;
           bool HasUnvisitedPred     = false;
           bool HasArrayPred         = false;
           bool HasMultiplePredPools = false;
           for (unsigned p = 0, e = Preds.size(); p != e; ++p) {
             DSNode *Pred = Preds[p];
             if (!PoolDescriptors.count(Pred))
               HasUnvisitedPred = true;  // no pool assigned to predecessor?
             else if (Pred->isArray() && !Pred->isNodeCompletelyFolded())
               HasArrayPred = true;
             else if (PredPool && PoolDescriptors[Pred] != PredPool)
               HasMultiplePredPools = true;
             else if (!PredPool &&
                      !isa<ConstantPointerNull>(PoolDescriptors[Pred]))
               PredPool = PoolDescriptors[Pred];
             // Otherwise, this predecessor has the same pool as a previous one.
           }

           if (HasMultiplePredPools) {
             // If this node has predecessors that are in different pools, don't
             // pool allocate this node.
             PoolDescriptors[N] =
               Constant::getNullValue(PointerType::getUnqual(PoolDescType));
             ++NumNonprofit;
           } else if (PredPool) {
             // If all of the predecessors of this node are already in a pool,
             // colocate.
             PoolDescriptors[N] = PredPool;
             ++NumColocated;
           } else if (HasArrayPred || HasUnvisitedPred) {
             // If this node has an array predecessor, or if there is a
             // predecessor that has not been visited yet, allocate a new pool
             // for it.
             Value *AI = new AllocaInst(PoolDescType, 0, "PD", InsertPoint);
             if (N->isNodeCompletelyFolded())
               std::cerr << "Node collapsing in '" << F.getName() << "'\n";

             PoolDescriptors[N] = AI;
           } else {
             // If this node has no pool allocated predecessors, and there is no
             // reason to pool allocate it, don't.
             assert(PredPool == 0);
              PoolDescriptors[N] =
               Constant::getNullValue(PointerType::getUnqual(PoolDescType));
             ++NumNonprofit;
           }
         }
       }
   }  // End switch case
   }  // end switch
 #endif


 //===-- AllInOneGlobalPool Heuristic --------------------------------------===//
 //
 // This heuristic puts all memory in the whole program into a single global
 // pool.  This is not safe, and is not good for performance, but can be used to
 // evaluate how good the pool allocator runtime works as a "malloc replacement".
 //
 bool
 AllInOneGlobalPoolHeuristic::runOnModule (Module & Module) {
   //
   // Remember which module we are analyzing.
   //
   M = &Module;

   //
   // Get the reference to the DSA Graph.
   //
   Graphs = &getAnalysis<EQTDDataStructures>();

   //
   // Find DSNodes which are reachable from globals and should be pool
   // allocated.
   //
   findGlobalPoolNodes (GlobalPoolNodes);

   // We never modify anything in this pass
   return false;
 }

 void
 AllInOneGlobalPoolHeuristic::AssignToPools(const DSNodeList_t &NodesToPA,
                                            Function *F, DSGraph* G,
                                            std::vector<OnePool> &ResultPools) {
   if (TheGlobalPD == 0)
     TheGlobalPD = PA->CreateGlobalPool(0, 0);

   // All nodes allocate from the same global pool.
   OnePool Pool;
   Pool.NodesInPool = NodesToPA;
   Pool.PoolDesc = TheGlobalPD;
   ResultPools.push_back(Pool);
 }

 //===-- OnlyOverhead Heuristic --------------------------------------------===//
 //
 // This heuristic is a hack to evaluate how much overhead pool allocation adds
 // to a program.  It adds all of the arguments, poolinits and pool destroys to
 // the program, but dynamically only passes null into the pool alloc/free
 // functions, causing them to allocate from the heap.
 //
 bool
 OnlyOverheadHeuristic::runOnModule (Module & Module) {
   //
   // Remember which module we are analyzing.
   //
   M = &Module;

   //
   // Get the reference to the DSA Graph.
   //
   Graphs = &getAnalysis<EQTDDataStructures>();

   //
   // Find DSNodes which are reachable from globals and should be pool
   // allocated.
   //
   findGlobalPoolNodes (GlobalPoolNodes);

   // We never modify anything in this pass
   return false;
 }

 void
 OnlyOverheadHeuristic::AssignToPools(const DSNodeList_t &NodesToPA,
                                      Function *F, DSGraph* G,
                                      std::vector<OnePool> &ResultPools) {
   // For this heuristic, we assign everything possible to its own pool.
   for (unsigned i = 0, e = NodesToPA.size(); i != e; ++i)
     ResultPools.push_back(OnePool(NodesToPA[i]));
 }

 /// getDynamicallyNullPool - Return a PoolDescriptor* that is always dynamically
 /// null.  Insert the code necessary to produce it before the specified
 /// instruction.
 static Value *
 getDynamicallyNullPool(BasicBlock::iterator I) {
   // Arrange to dynamically pass null into all of the pool functions if we are
   // only checking for overhead.
   static Value *NullGlobal = 0;
   if (!NullGlobal) {
     Module *M = I->getParent()->getParent()->getParent();
     Type * PoolTy = PoolAllocate::PoolDescPtrTy;
     Constant * Init = ConstantPointerNull::get(cast<PointerType>(PoolTy));
     NullGlobal = new GlobalVariable(*M,
                                     PoolAllocate::PoolDescPtrTy, false,
                                     GlobalValue::ExternalLinkage,
                                     Init,
                                     "llvm-poolalloc-null-init");
   }
   while (isa<AllocaInst>(I)) ++I;

   return new LoadInst(NullGlobal, "nullpd", I);
 }

 // HackFunctionBody - This method is called on every transformed function body.
 // Basically it replaces all uses of real pool descriptors with dynamically null
 // values.  However, it leaves pool init/destroy alone.
 void
 OnlyOverheadHeuristic::HackFunctionBody(Function &F,
                                         std::map<const DSNode*, Value*> &PDs) {
   Constant *PoolInit = PA->PoolInit;
   Constant *PoolDestroy = PA->PoolDestroy;

   Value *NullPD = getDynamicallyNullPool(F.front().begin());
   for (std::map<const DSNode*, Value*>::iterator PDI = PDs.begin(),
          E = PDs.end(); PDI != E; ++PDI) {
     Value *OldPD = PDI->second;
     std::vector<User*> OldPDUsers(OldPD->user_begin(), OldPD->user_end());
     for (unsigned i = 0, e = OldPDUsers.size(); i != e; ++i) {
       CallSite PDUser(cast<Instruction>(OldPDUsers[i]));
       if (PDUser.getCalledValue() != PoolInit &&
           PDUser.getCalledValue() != PoolDestroy) {
         assert(PDUser.getInstruction()->getParent()->getParent() == &F &&
                "Not in cur fn??");
         PDUser.getInstruction()->replaceUsesOfWith(OldPD, NullPD);
       }
     }
   }
 }


 //===-- NoNodes Heuristic -------------------------------------------------===//
 //
 // This dummy heuristic chooses to not pool allocate anything.
 //
 bool
 NoNodesHeuristic::runOnModule (Module & Module) {
   //
   // Remember which module we are analyzing.
   //
   M = &Module;

   // We never modify anything in this pass
   return false;
 }

 //
 // Register all of the heuristic passes.
 //
 static RegisterPass<AllButUnreachableFromMemoryHeuristic>
 B ("paheur-AllButUnreachableFromMemory", "Pool allocate all reachable from memory objects");

 static RegisterPass<CyclicNodesHeuristic>
 C ("paheur-CyclicNodes", "Pool allocate nodes with cycles");

 static RegisterPass<SmartCoallesceNodesHeuristic>
 D ("paheur-SmartCoallesceNodes", "Pool allocate using the smart node merging heuristic ");

 static RegisterPass<AllInOneGlobalPoolHeuristic>
 E ("paheur-AllInOneGlobalPool", "Pool allocate using the pool library to replace malloc/free");

 static RegisterPass<OnlyOverheadHeuristic>
 F ("paheur-OnlyOverhead", "Do not pool allocate anything, but induce all overhead from it");

 static RegisterPass<NoNodesHeuristic>
 G ("paheur-NoNodes", "Pool allocate nothing heuristic");

 //
 // Create the heuristic analysis group.
 //
 static RegisterAnalysisGroup<Heuristic>
 HeuristicGroup ("Pool Allocation Heuristic");

 RegisterAnalysisGroup<Heuristic> Heuristic2(B);
 RegisterAnalysisGroup<Heuristic> Heuristic3(C);
 RegisterAnalysisGroup<Heuristic> Heuristic4(D);
 RegisterAnalysisGroup<Heuristic> Heuristic5(E);
 RegisterAnalysisGroup<Heuristic> Heuristic6(F);
 RegisterAnalysisGroup<Heuristic, true> Heuristic7(G);

 char Heuristic::ID = 0;
 char AllButUnreachableFromMemoryHeuristic::ID = 0;
 char CyclicNodesHeuristic::ID = 0;
 char SmartCoallesceNodesHeuristic::ID = 0;
 char AllInOneGlobalPoolHeuristic::ID = 0;
 char OnlyOverheadHeuristic::ID = 0;
 char NoNodesHeuristic::ID = 0;