lib/Transforms/Utils/SampleProfileInference.cpp - llvm-project/llvm - Git at Google

 //===- SampleProfileInference.cpp - Adjust sample profiles in the IR ------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 // This file implements a profile inference algorithm. Given an incomplete and
 // possibly imprecise block counts, the algorithm reconstructs realistic block
 // and edge counts that satisfy flow conservation rules, while minimally modify
 // input block counts.
 //
 //===----------------------------------------------------------------------===//

 #include "llvm/Transforms/Utils/SampleProfileInference.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include <queue>
 #include <set>
 #include <stack>

 using namespace llvm;
 #define DEBUG_TYPE "sample-profile-inference"

 namespace {

 static cl::opt<bool> SampleProfileEvenCountDistribution(
     "sample-profile-even-count-distribution", cl::init(true), cl::Hidden,
     cl::desc("Try to evenly distribute counts when there are multiple equally "
              "likely options."));

 static cl::opt<unsigned> SampleProfileMaxDfsCalls(
     "sample-profile-max-dfs-calls", cl::init(10), cl::Hidden,
     cl::desc("Maximum number of dfs iterations for even count distribution."));

 static cl::opt<unsigned> SampleProfileProfiCostInc(
     "sample-profile-profi-cost-inc", cl::init(10), cl::Hidden,
     cl::desc("A cost of increasing a block's count by one."));

 static cl::opt<unsigned> SampleProfileProfiCostDec(
     "sample-profile-profi-cost-dec", cl::init(20), cl::Hidden,
     cl::desc("A cost of decreasing a block's count by one."));

 static cl::opt<unsigned> SampleProfileProfiCostIncZero(
     "sample-profile-profi-cost-inc-zero", cl::init(11), cl::Hidden,
     cl::desc("A cost of increasing a count of zero-weight block by one."));

 static cl::opt<unsigned> SampleProfileProfiCostIncEntry(
     "sample-profile-profi-cost-inc-entry", cl::init(40), cl::Hidden,
     cl::desc("A cost of increasing the entry block's count by one."));

 static cl::opt<unsigned> SampleProfileProfiCostDecEntry(
     "sample-profile-profi-cost-dec-entry", cl::init(10), cl::Hidden,
     cl::desc("A cost of decreasing the entry block's count by one."));

 /// A value indicating an infinite flow/capacity/weight of a block/edge.
 /// Not using numeric_limits<int64_t>::max(), as the values can be summed up
 /// during the execution.
 static constexpr int64_t INF = ((int64_t)1) << 50;

 /// The minimum-cost maximum flow algorithm.
 ///
 /// The algorithm finds the maximum flow of minimum cost on a given (directed)
 /// network using a modified version of the classical Moore-Bellman-Ford
 /// approach. The algorithm applies a number of augmentation iterations in which
 /// flow is sent along paths of positive capacity from the source to the sink.
 /// The worst-case time complexity of the implementation is O(v(f)*m*n), where
 /// where m is the number of edges, n is the number of vertices, and v(f) is the
 /// value of the maximum flow. However, the observed running time on typical
 /// instances is sub-quadratic, that is, o(n^2).
 ///
 /// The input is a set of edges with specified costs and capacities, and a pair
 /// of nodes (source and sink). The output is the flow along each edge of the
 /// minimum total cost respecting the given edge capacities.
 class MinCostMaxFlow {
 public:
   // Initialize algorithm's data structures for a network of a given size.
   void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) {
     Source = SourceNode;
     Target = SinkNode;

     Nodes = std::vector<Node>(NodeCount);
     Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>());
     if (SampleProfileEvenCountDistribution)
       AugmentingEdges =
           std::vector<std::vector<Edge *>>(NodeCount, std::vector<Edge *>());
   }

   // Run the algorithm.
   int64_t run() {
     // Iteratively find an augmentation path/dag in the network and send the
     // flow along its edges
     size_t AugmentationIters = applyFlowAugmentation();

     // Compute the total flow and its cost
     int64_t TotalCost = 0;
     int64_t TotalFlow = 0;
     for (uint64_t Src = 0; Src < Nodes.size(); Src++) {
       for (auto &Edge : Edges[Src]) {
         if (Edge.Flow > 0) {
           TotalCost += Edge.Cost * Edge.Flow;
           if (Src == Source)
             TotalFlow += Edge.Flow;
         }
       }
     }
     LLVM_DEBUG(dbgs() << "Completed profi after " << AugmentationIters
                       << " iterations with " << TotalFlow << " total flow"
                       << " of " << TotalCost << " cost\n");
     (void)TotalFlow;
     (void)AugmentationIters;
     return TotalCost;
   }

   /// Adding an edge to the network with a specified capacity and a cost.
   /// Multiple edges between a pair of nodes are allowed but self-edges
   /// are not supported.
   void addEdge(uint64_t Src, uint64_t Dst, int64_t Capacity, int64_t Cost) {
     assert(Capacity > 0 && "adding an edge of zero capacity");
     assert(Src != Dst && "loop edge are not supported");

     Edge SrcEdge;
     SrcEdge.Dst = Dst;
     SrcEdge.Cost = Cost;
     SrcEdge.Capacity = Capacity;
     SrcEdge.Flow = 0;
     SrcEdge.RevEdgeIndex = Edges[Dst].size();

     Edge DstEdge;
     DstEdge.Dst = Src;
     DstEdge.Cost = -Cost;
     DstEdge.Capacity = 0;
     DstEdge.Flow = 0;
     DstEdge.RevEdgeIndex = Edges[Src].size();

     Edges[Src].push_back(SrcEdge);
     Edges[Dst].push_back(DstEdge);
   }

   /// Adding an edge to the network of infinite capacity and a given cost.
   void addEdge(uint64_t Src, uint64_t Dst, int64_t Cost) {
     addEdge(Src, Dst, INF, Cost);
   }

   /// Get the total flow from a given source node.
   /// Returns a list of pairs (target node, amount of flow to the target).
   const std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const {
     std::vector<std::pair<uint64_t, int64_t>> Flow;
     for (auto &Edge : Edges[Src]) {
       if (Edge.Flow > 0)
         Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow));
     }
     return Flow;
   }

   /// Get the total flow between a pair of nodes.
   int64_t getFlow(uint64_t Src, uint64_t Dst) const {
     int64_t Flow = 0;
     for (auto &Edge : Edges[Src]) {
       if (Edge.Dst == Dst) {
         Flow += Edge.Flow;
       }
     }
     return Flow;
   }

   /// A cost of taking an unlikely jump.
   static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 30;
   /// Minimum BaseDistance for the jump distance values in island joining.
   static constexpr uint64_t MinBaseDistance = 10000;

 private:
   /// Iteratively find an augmentation path/dag in the network and send the
   /// flow along its edges. The method returns the number of applied iterations.
   size_t applyFlowAugmentation() {
     size_t AugmentationIters = 0;
     while (findAugmentingPath()) {
       uint64_t PathCapacity = computeAugmentingPathCapacity();
       while (PathCapacity > 0) {
         bool Progress = false;
         if (SampleProfileEvenCountDistribution) {
           // Identify node/edge candidates for augmentation
           identifyShortestEdges(PathCapacity);

           // Find an augmenting DAG
           auto AugmentingOrder = findAugmentingDAG();

           // Apply the DAG augmentation
           Progress = augmentFlowAlongDAG(AugmentingOrder);
           PathCapacity = computeAugmentingPathCapacity();
         }

         if (!Progress) {
           augmentFlowAlongPath(PathCapacity);
           PathCapacity = 0;
         }

         AugmentationIters++;
       }
     }
     return AugmentationIters;
   }

   /// Compute the capacity of the cannonical augmenting path. If the path is
   /// saturated (that is, no flow can be sent along the path), then return 0.
   uint64_t computeAugmentingPathCapacity() {
     uint64_t PathCapacity = INF;
     uint64_t Now = Target;
     while (Now != Source) {
       uint64_t Pred = Nodes[Now].ParentNode;
       auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];

       assert(Edge.Capacity >= Edge.Flow && "incorrect edge flow");
       uint64_t EdgeCapacity = uint64_t(Edge.Capacity - Edge.Flow);
       PathCapacity = std::min(PathCapacity, EdgeCapacity);

       Now = Pred;
     }
     return PathCapacity;
   }

   /// Check for existence of an augmenting path with a positive capacity.
   bool findAugmentingPath() {
     // Initialize data structures
     for (auto &Node : Nodes) {
       Node.Distance = INF;
       Node.ParentNode = uint64_t(-1);
       Node.ParentEdgeIndex = uint64_t(-1);
       Node.Taken = false;
     }

     std::queue<uint64_t> Queue;
     Queue.push(Source);
     Nodes[Source].Distance = 0;
     Nodes[Source].Taken = true;
     while (!Queue.empty()) {
       uint64_t Src = Queue.front();
       Queue.pop();
       Nodes[Src].Taken = false;
       // Although the residual network contains edges with negative costs
       // (in particular, backward edges), it can be shown that there are no
       // negative-weight cycles and the following two invariants are maintained:
       // (i) Dist[Source, V] >= 0 and (ii) Dist[V, Target] >= 0 for all nodes V,
       // where Dist is the length of the shortest path between two nodes. This
       // allows to prune the search-space of the path-finding algorithm using
       // the following early-stop criteria:
       // -- If we find a path with zero-distance from Source to Target, stop the
       //    search, as the path is the shortest since Dist[Source, Target] >= 0;
       // -- If we have Dist[Source, V] > Dist[Source, Target], then do not
       //    process node V, as it is guaranteed _not_ to be on a shortest path
       //    from Source to Target; it follows from inequalities
       //    Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target]
       //                         >= Dist[Source, V]
       if (!SampleProfileEvenCountDistribution && Nodes[Target].Distance == 0)
         break;
       if (Nodes[Src].Distance > Nodes[Target].Distance)
         continue;

       // Process adjacent edges
       for (uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) {
         auto &Edge = Edges[Src][EdgeIdx];
         if (Edge.Flow < Edge.Capacity) {
           uint64_t Dst = Edge.Dst;
           int64_t NewDistance = Nodes[Src].Distance + Edge.Cost;
           if (Nodes[Dst].Distance > NewDistance) {
             // Update the distance and the parent node/edge
             Nodes[Dst].Distance = NewDistance;
             Nodes[Dst].ParentNode = Src;
             Nodes[Dst].ParentEdgeIndex = EdgeIdx;
             // Add the node to the queue, if it is not there yet
             if (!Nodes[Dst].Taken) {
               Queue.push(Dst);
               Nodes[Dst].Taken = true;
             }
           }
         }
       }
     }

     return Nodes[Target].Distance != INF;
   }

   /// Update the current flow along the augmenting path.
   void augmentFlowAlongPath(uint64_t PathCapacity) {
     assert(PathCapacity > 0 && "found an incorrect augmenting path");
     uint64_t Now = Target;
     while (Now != Source) {
       uint64_t Pred = Nodes[Now].ParentNode;
       auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
       auto &RevEdge = Edges[Now][Edge.RevEdgeIndex];

       Edge.Flow += PathCapacity;
       RevEdge.Flow -= PathCapacity;

       Now = Pred;
     }
   }

   /// Find an Augmenting DAG order using a modified version of DFS in which we
   /// can visit a node multiple times. In the DFS search, when scanning each
   /// edge out of a node, continue search at Edge.Dst endpoint if it has not
   /// been discovered yet and its NumCalls < MaxDfsCalls. The algorithm
   /// runs in O(MaxDfsCalls * |Edges| + |Nodes|) time.
   /// It returns an Augmenting Order (Taken nodes in decreasing Finish time)
   /// that starts with Source and ends with Target.
   std::vector<uint64_t> findAugmentingDAG() {
     // We use a stack based implemenation of DFS to avoid recursion.
     // Defining DFS data structures:
     // A pair (NodeIdx, EdgeIdx) at the top of the Stack denotes that
     //  - we are currently visiting Nodes[NodeIdx] and
     //  - the next edge to scan is Edges[NodeIdx][EdgeIdx]
     typedef std::pair<uint64_t, uint64_t> StackItemType;
     std::stack<StackItemType> Stack;
     std::vector<uint64_t> AugmentingOrder;

     // Phase 0: Initialize Node attributes and Time for DFS run
     for (auto &Node : Nodes) {
       Node.Discovery = 0;
       Node.Finish = 0;
       Node.NumCalls = 0;
       Node.Taken = false;
     }
     uint64_t Time = 0;
     // Mark Target as Taken
     // Taken attribute will be propagated backwards from Target towards Source
     Nodes[Target].Taken = true;

     // Phase 1: Start DFS traversal from Source
     Stack.emplace(Source, 0);
     Nodes[Source].Discovery = ++Time;
     while (!Stack.empty()) {
       auto NodeIdx = Stack.top().first;
       auto EdgeIdx = Stack.top().second;

       // If we haven't scanned all edges out of NodeIdx, continue scanning
       if (EdgeIdx < Edges[NodeIdx].size()) {
         auto &Edge = Edges[NodeIdx][EdgeIdx];
         auto &Dst = Nodes[Edge.Dst];
         Stack.top().second++;

         if (Edge.OnShortestPath) {
           // If we haven't seen Edge.Dst so far, continue DFS search there
           if (Dst.Discovery == 0 && Dst.NumCalls < SampleProfileMaxDfsCalls) {
             Dst.Discovery = ++Time;
             Stack.emplace(Edge.Dst, 0);
             Dst.NumCalls++;
           } else if (Dst.Taken && Dst.Finish != 0) {
             // Else, if Edge.Dst already have a path to Target, so that NodeIdx
             Nodes[NodeIdx].Taken = true;
           }
         }
       } else {
         // If we are done scanning all edge out of NodeIdx
         Stack.pop();
         // If we haven't found a path from NodeIdx to Target, forget about it
         if (!Nodes[NodeIdx].Taken) {
           Nodes[NodeIdx].Discovery = 0;
         } else {
           // If we have found a path from NodeIdx to Target, then finish NodeIdx
           // and propagate Taken flag to DFS parent unless at the Source
           Nodes[NodeIdx].Finish = ++Time;
           // NodeIdx == Source if and only if the stack is empty
           if (NodeIdx != Source) {
             assert(!Stack.empty() && "empty stack while running dfs");
             Nodes[Stack.top().first].Taken = true;
           }
           AugmentingOrder.push_back(NodeIdx);
         }
       }
     }
     // Nodes are collected decreasing Finish time, so the order is reversed
     std::reverse(AugmentingOrder.begin(), AugmentingOrder.end());

     // Phase 2: Extract all forward (DAG) edges and fill in AugmentingEdges
     for (size_t Src : AugmentingOrder) {
       AugmentingEdges[Src].clear();
       for (auto &Edge : Edges[Src]) {
         uint64_t Dst = Edge.Dst;
         if (Edge.OnShortestPath && Nodes[Src].Taken && Nodes[Dst].Taken &&
             Nodes[Dst].Finish < Nodes[Src].Finish) {
           AugmentingEdges[Src].push_back(&Edge);
         }
       }
       assert((Src == Target || !AugmentingEdges[Src].empty()) &&
              "incorrectly constructed augmenting edges");
     }

     return AugmentingOrder;
   }

   /// Update the current flow along the given (acyclic) subgraph specified by
   /// the vertex order, AugmentingOrder. The objective is to send as much flow
   /// as possible while evenly distributing flow among successors of each node.
   /// After the update at least one edge is saturated.
   bool augmentFlowAlongDAG(const std::vector<uint64_t> &AugmentingOrder) {
     // Phase 0: Initialization
     for (uint64_t Src : AugmentingOrder) {
       Nodes[Src].FracFlow = 0;
       Nodes[Src].IntFlow = 0;
       for (auto &Edge : AugmentingEdges[Src]) {
         Edge->AugmentedFlow = 0;
       }
     }

     // Phase 1: Send a unit of fractional flow along the DAG
     uint64_t MaxFlowAmount = INF;
     Nodes[Source].FracFlow = 1.0;
     for (uint64_t Src : AugmentingOrder) {
       assert((Src == Target || Nodes[Src].FracFlow > 0.0) &&
              "incorrectly computed fractional flow");
       // Distribute flow evenly among successors of Src
       uint64_t Degree = AugmentingEdges[Src].size();
       for (auto &Edge : AugmentingEdges[Src]) {
         double EdgeFlow = Nodes[Src].FracFlow / Degree;
         Nodes[Edge->Dst].FracFlow += EdgeFlow;
         if (Edge->Capacity == INF)
           continue;
         uint64_t MaxIntFlow = double(Edge->Capacity - Edge->Flow) / EdgeFlow;
         MaxFlowAmount = std::min(MaxFlowAmount, MaxIntFlow);
       }
     }
     // Stop early if we cannot send any (integral) flow from Source to Target
     if (MaxFlowAmount == 0)
       return false;

     // Phase 2: Send an integral flow of MaxFlowAmount
     Nodes[Source].IntFlow = MaxFlowAmount;
     for (uint64_t Src : AugmentingOrder) {
       if (Src == Target)
         break;
       // Distribute flow evenly among successors of Src, rounding up to make
       // sure all flow is sent
       uint64_t Degree = AugmentingEdges[Src].size();
       // We are guaranteeed that Node[Src].IntFlow <= SuccFlow * Degree
       uint64_t SuccFlow = (Nodes[Src].IntFlow + Degree - 1) / Degree;
       for (auto &Edge : AugmentingEdges[Src]) {
         uint64_t Dst = Edge->Dst;
         uint64_t EdgeFlow = std::min(Nodes[Src].IntFlow, SuccFlow);
         EdgeFlow = std::min(EdgeFlow, uint64_t(Edge->Capacity - Edge->Flow));
         Nodes[Dst].IntFlow += EdgeFlow;
         Nodes[Src].IntFlow -= EdgeFlow;
         Edge->AugmentedFlow += EdgeFlow;
       }
     }
     assert(Nodes[Target].IntFlow <= MaxFlowAmount);
     Nodes[Target].IntFlow = 0;

     // Phase 3: Send excess flow back traversing the nodes backwards.
     // Because of rounding, not all flow can be sent along the edges of Src.
     // Hence, sending the remaining flow back to maintain flow conservation
     for (size_t Idx = AugmentingOrder.size() - 1; Idx > 0; Idx--) {
       uint64_t Src = AugmentingOrder[Idx - 1];
       // Try to send excess flow back along each edge.
       // Make sure we only send back flow we just augmented (AugmentedFlow).
       for (auto &Edge : AugmentingEdges[Src]) {
         uint64_t Dst = Edge->Dst;
         if (Nodes[Dst].IntFlow == 0)
           continue;
         uint64_t EdgeFlow = std::min(Nodes[Dst].IntFlow, Edge->AugmentedFlow);
         Nodes[Dst].IntFlow -= EdgeFlow;
         Nodes[Src].IntFlow += EdgeFlow;
         Edge->AugmentedFlow -= EdgeFlow;
       }
     }

     // Phase 4: Update flow values along all edges
     bool HasSaturatedEdges = false;
     for (uint64_t Src : AugmentingOrder) {
       // Verify that we have sent all the excess flow from the node
       assert(Src == Source || Nodes[Src].IntFlow == 0);
       for (auto &Edge : AugmentingEdges[Src]) {
         assert(uint64_t(Edge->Capacity - Edge->Flow) >= Edge->AugmentedFlow);
         // Update flow values along the edge and its reverse copy
         auto &RevEdge = Edges[Edge->Dst][Edge->RevEdgeIndex];
         Edge->Flow += Edge->AugmentedFlow;
         RevEdge.Flow -= Edge->AugmentedFlow;
         if (Edge->Capacity == Edge->Flow && Edge->AugmentedFlow > 0)
           HasSaturatedEdges = true;
       }
     }

     // The augmentation is successful iff at least one edge becomes saturated
     return HasSaturatedEdges;
   }

   /// Identify candidate (shortest) edges for augmentation.
   void identifyShortestEdges(uint64_t PathCapacity) {
     assert(PathCapacity > 0 && "found an incorrect augmenting DAG");
     // To make sure the augmentation DAG contains only edges with large residual
     // capacity, we prune all edges whose capacity is below a fraction of
     // the capacity of the augmented path.
     // (All edges of the path itself are always in the DAG)
     uint64_t MinCapacity = std::max(PathCapacity / 2, uint64_t(1));

     // Decide which edges are on a shortest path from Source to Target
     for (size_t Src = 0; Src < Nodes.size(); Src++) {
       // An edge cannot be augmenting if the endpoint has large distance
       if (Nodes[Src].Distance > Nodes[Target].Distance)
         continue;

       for (auto &Edge : Edges[Src]) {
         uint64_t Dst = Edge.Dst;
         Edge.OnShortestPath =
             Src != Target && Dst != Source &&
             Nodes[Dst].Distance <= Nodes[Target].Distance &&
             Nodes[Dst].Distance == Nodes[Src].Distance + Edge.Cost &&
             Edge.Capacity > Edge.Flow &&
             uint64_t(Edge.Capacity - Edge.Flow) >= MinCapacity;
       }
     }
   }

   /// A node in a flow network.
   struct Node {
     /// The cost of the cheapest path from the source to the current node.
     int64_t Distance;
     /// The node preceding the current one in the path.
     uint64_t ParentNode;
     /// The index of the edge between ParentNode and the current node.
     uint64_t ParentEdgeIndex;
     /// An indicator of whether the current node is in a queue.
     bool Taken;

     /// Data fields utilized in DAG-augmentation:
     /// Fractional flow.
     double FracFlow;
     /// Integral flow.
     uint64_t IntFlow;
     /// Discovery time.
     uint64_t Discovery;
     /// Finish time.
     uint64_t Finish;
     /// NumCalls.
     uint64_t NumCalls;
   };

   /// An edge in a flow network.
   struct Edge {
     /// The cost of the edge.
     int64_t Cost;
     /// The capacity of the edge.
     int64_t Capacity;
     /// The current flow on the edge.
     int64_t Flow;
     /// The destination node of the edge.
     uint64_t Dst;
     /// The index of the reverse edge between Dst and the current node.
     uint64_t RevEdgeIndex;

     /// Data fields utilized in DAG-augmentation:
     /// Whether the edge is currently on a shortest path from Source to Target.
     bool OnShortestPath;
     /// Extra flow along the edge.
     uint64_t AugmentedFlow;
   };

   /// The set of network nodes.
   std::vector<Node> Nodes;
   /// The set of network edges.
   std::vector<std::vector<Edge>> Edges;
   /// Source node of the flow.
   uint64_t Source;
   /// Target (sink) node of the flow.
   uint64_t Target;
   /// Augmenting edges.
   std::vector<std::vector<Edge *>> AugmentingEdges;
 };

 constexpr int64_t MinCostMaxFlow::AuxCostUnlikely;
 constexpr uint64_t MinCostMaxFlow::MinBaseDistance;

 /// A post-processing adjustment of control flow. It applies two steps by
 /// rerouting some flow and making it more realistic:
 ///
 /// - First, it removes all isolated components ("islands") with a positive flow
 ///   that are unreachable from the entry block. For every such component, we
 ///   find the shortest from the entry to an exit passing through the component,
 ///   and increase the flow by one unit along the path.
 ///
 /// - Second, it identifies all "unknown subgraphs" consisting of basic blocks
 ///   with no sampled counts. Then it rebalnces the flow that goes through such
 ///   a subgraph so that each branch is taken with probability 50%.
 ///   An unknown subgraph is such that for every two nodes u and v:
 ///     - u dominates v and u is not unknown;
 ///     - v post-dominates u; and
 ///     - all inner-nodes of all (u,v)-paths are unknown.
 ///
 class FlowAdjuster {
 public:
   FlowAdjuster(FlowFunction &Func) : Func(Func) {
     assert(Func.Blocks[Func.Entry].isEntry() &&
            "incorrect index of the entry block");
   }

   // Run the post-processing
   void run() {
     /// Adjust the flow to get rid of isolated components.
     joinIsolatedComponents();

     /// Rebalance the flow inside unknown subgraphs.
     rebalanceUnknownSubgraphs();
   }

 private:
   void joinIsolatedComponents() {
     // Find blocks that are reachable from the source
     auto Visited = BitVector(NumBlocks(), false);
     findReachable(Func.Entry, Visited);

     // Iterate over all non-reachable blocks and adjust their weights
     for (uint64_t I = 0; I < NumBlocks(); I++) {
       auto &Block = Func.Blocks[I];
       if (Block.Flow > 0 && !Visited[I]) {
         // Find a path from the entry to an exit passing through the block I
         auto Path = findShortestPath(I);
         // Increase the flow along the path
         assert(Path.size() > 0 && Path[0]->Source == Func.Entry &&
                "incorrectly computed path adjusting control flow");
         Func.Blocks[Func.Entry].Flow += 1;
         for (auto &Jump : Path) {
           Jump->Flow += 1;
           Func.Blocks[Jump->Target].Flow += 1;
           // Update reachability
           findReachable(Jump->Target, Visited);
         }
       }
     }
   }

   /// Run BFS from a given block along the jumps with a positive flow and mark
   /// all reachable blocks.
   void findReachable(uint64_t Src, BitVector &Visited) {
     if (Visited[Src])
       return;
     std::queue<uint64_t> Queue;
     Queue.push(Src);
     Visited[Src] = true;
     while (!Queue.empty()) {
       Src = Queue.front();
       Queue.pop();
       for (auto Jump : Func.Blocks[Src].SuccJumps) {
         uint64_t Dst = Jump->Target;
         if (Jump->Flow > 0 && !Visited[Dst]) {
           Queue.push(Dst);
           Visited[Dst] = true;
         }
       }
     }
   }

   /// Find the shortest path from the entry block to an exit block passing
   /// through a given block.
   std::vector<FlowJump *> findShortestPath(uint64_t BlockIdx) {
     // A path from the entry block to BlockIdx
     auto ForwardPath = findShortestPath(Func.Entry, BlockIdx);
     // A path from BlockIdx to an exit block
     auto BackwardPath = findShortestPath(BlockIdx, AnyExitBlock);

     // Concatenate the two paths
     std::vector<FlowJump *> Result;
     Result.insert(Result.end(), ForwardPath.begin(), ForwardPath.end());
     Result.insert(Result.end(), BackwardPath.begin(), BackwardPath.end());
     return Result;
   }

   /// Apply the Dijkstra algorithm to find the shortest path from a given
   /// Source to a given Target block.
   /// If Target == -1, then the path ends at an exit block.
   std::vector<FlowJump *> findShortestPath(uint64_t Source, uint64_t Target) {
     // Quit early, if possible
     if (Source == Target)
       return std::vector<FlowJump *>();
     if (Func.Blocks[Source].isExit() && Target == AnyExitBlock)
       return std::vector<FlowJump *>();

     // Initialize data structures
     auto Distance = std::vector<int64_t>(NumBlocks(), INF);
     auto Parent = std::vector<FlowJump *>(NumBlocks(), nullptr);
     Distance[Source] = 0;
     std::set<std::pair<uint64_t, uint64_t>> Queue;
     Queue.insert(std::make_pair(Distance[Source], Source));

     // Run the Dijkstra algorithm
     while (!Queue.empty()) {
       uint64_t Src = Queue.begin()->second;
       Queue.erase(Queue.begin());
       // If we found a solution, quit early
       if (Src == Target ||
           (Func.Blocks[Src].isExit() && Target == AnyExitBlock))
         break;

       for (auto Jump : Func.Blocks[Src].SuccJumps) {
         uint64_t Dst = Jump->Target;
         int64_t JumpDist = jumpDistance(Jump);
         if (Distance[Dst] > Distance[Src] + JumpDist) {
           Queue.erase(std::make_pair(Distance[Dst], Dst));

           Distance[Dst] = Distance[Src] + JumpDist;
           Parent[Dst] = Jump;

           Queue.insert(std::make_pair(Distance[Dst], Dst));
         }
       }
     }
     // If Target is not provided, find the closest exit block
     if (Target == AnyExitBlock) {
       for (uint64_t I = 0; I < NumBlocks(); I++) {
         if (Func.Blocks[I].isExit() && Parent[I] != nullptr) {
           if (Target == AnyExitBlock || Distance[Target] > Distance[I]) {
             Target = I;
           }
         }
       }
     }
     assert(Parent[Target] != nullptr && "a path does not exist");

     // Extract the constructed path
     std::vector<FlowJump *> Result;
     uint64_t Now = Target;
     while (Now != Source) {
       assert(Now == Parent[Now]->Target && "incorrect parent jump");
       Result.push_back(Parent[Now]);
       Now = Parent[Now]->Source;
     }
     // Reverse the path, since it is extracted from Target to Source
     std::reverse(Result.begin(), Result.end());
     return Result;
   }

   /// A distance of a path for a given jump.
   /// In order to incite the path to use blocks/jumps with large positive flow,
   /// and avoid changing branch probability of outgoing edges drastically,
   /// set the jump distance so as:
   ///   - to minimize the number of unlikely jumps used and subject to that,
   ///   - to minimize the number of Flow == 0 jumps used and subject to that,
   ///   - minimizes total multiplicative Flow increase for the remaining edges.
   /// To capture this objective with integer distances, we round off fractional
   /// parts to a multiple of 1 / BaseDistance.
   int64_t jumpDistance(FlowJump *Jump) const {
     uint64_t BaseDistance =
         std::max(MinCostMaxFlow::MinBaseDistance,
                  std::min(Func.Blocks[Func.Entry].Flow,
                           MinCostMaxFlow::AuxCostUnlikely / NumBlocks()));
     if (Jump->IsUnlikely)
       return MinCostMaxFlow::AuxCostUnlikely;
     if (Jump->Flow > 0)
       return BaseDistance + BaseDistance / Jump->Flow;
     return BaseDistance * NumBlocks();
   };

   uint64_t NumBlocks() const { return Func.Blocks.size(); }

   /// Rebalance unknown subgraphs so that the flow is split evenly across the
   /// outgoing branches of every block of the subgraph. The method iterates over
   /// blocks with known weight and identifies unknown subgraphs rooted at the
   /// blocks. Then it verifies if flow rebalancing is feasible and applies it.
   void rebalanceUnknownSubgraphs() {
     // Try to find unknown subgraphs from each block
     for (uint64_t I = 0; I < Func.Blocks.size(); I++) {
       auto SrcBlock = &Func.Blocks[I];
       // Verify if rebalancing rooted at SrcBlock is feasible
       if (!canRebalanceAtRoot(SrcBlock))
         continue;

       // Find an unknown subgraphs starting at SrcBlock. Along the way,
       // fill in known destinations and intermediate unknown blocks.
       std::vector<FlowBlock *> UnknownBlocks;
       std::vector<FlowBlock *> KnownDstBlocks;
       findUnknownSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks);

       // Verify if rebalancing of the subgraph is feasible. If the search is
       // successful, find the unique destination block (which can be null)
       FlowBlock *DstBlock = nullptr;
       if (!canRebalanceSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks,
                                 DstBlock))
         continue;

       // We cannot rebalance subgraphs containing cycles among unknown blocks
       if (!isAcyclicSubgraph(SrcBlock, DstBlock, UnknownBlocks))
         continue;

       // Rebalance the flow
       rebalanceUnknownSubgraph(SrcBlock, DstBlock, UnknownBlocks);
     }
   }

   /// Verify if rebalancing rooted at a given block is possible.
   bool canRebalanceAtRoot(const FlowBlock *SrcBlock) {
     // Do not attempt to find unknown subgraphs from an unknown or a
     // zero-flow block
     if (SrcBlock->UnknownWeight || SrcBlock->Flow == 0)
       return false;

     // Do not attempt to process subgraphs from a block w/o unknown sucessors
     bool HasUnknownSuccs = false;
     for (auto Jump : SrcBlock->SuccJumps) {
       if (Func.Blocks[Jump->Target].UnknownWeight) {
         HasUnknownSuccs = true;
         break;
       }
     }
     if (!HasUnknownSuccs)
       return false;

     return true;
   }

   /// Find an unknown subgraph starting at block SrcBlock. The method sets
   /// identified destinations, KnownDstBlocks, and intermediate UnknownBlocks.
   void findUnknownSubgraph(const FlowBlock *SrcBlock,
                            std::vector<FlowBlock *> &KnownDstBlocks,
                            std::vector<FlowBlock *> &UnknownBlocks) {
     // Run BFS from SrcBlock and make sure all paths are going through unknown
     // blocks and end at a known DstBlock
     auto Visited = BitVector(NumBlocks(), false);
     std::queue<uint64_t> Queue;

     Queue.push(SrcBlock->Index);
     Visited[SrcBlock->Index] = true;
     while (!Queue.empty()) {
       auto &Block = Func.Blocks[Queue.front()];
       Queue.pop();
       // Process blocks reachable from Block
       for (auto Jump : Block.SuccJumps) {
         // If Jump can be ignored, skip it
         if (ignoreJump(SrcBlock, nullptr, Jump))
           continue;

         uint64_t Dst = Jump->Target;
         // If Dst has been visited, skip Jump
         if (Visited[Dst])
           continue;
         // Process block Dst
         Visited[Dst] = true;
         if (!Func.Blocks[Dst].UnknownWeight) {
           KnownDstBlocks.push_back(&Func.Blocks[Dst]);
         } else {
           Queue.push(Dst);
           UnknownBlocks.push_back(&Func.Blocks[Dst]);
         }
       }
     }
   }

   /// Verify if rebalancing of the subgraph is feasible. If the checks are
   /// successful, set the unique destination block, DstBlock (can be null).
   bool canRebalanceSubgraph(const FlowBlock *SrcBlock,
                             const std::vector<FlowBlock *> &KnownDstBlocks,
                             const std::vector<FlowBlock *> &UnknownBlocks,
                             FlowBlock *&DstBlock) {
     // If the list of unknown blocks is empty, we don't need rebalancing
     if (UnknownBlocks.empty())
       return false;

     // If there are multiple known sinks, we can't rebalance
     if (KnownDstBlocks.size() > 1)
       return false;
     DstBlock = KnownDstBlocks.empty() ? nullptr : KnownDstBlocks.front();

     // Verify sinks of the subgraph
     for (auto Block : UnknownBlocks) {
       if (Block->SuccJumps.empty()) {
         // If there are multiple (known and unknown) sinks, we can't rebalance
         if (DstBlock != nullptr)
           return false;
         continue;
       }
       size_t NumIgnoredJumps = 0;
       for (auto Jump : Block->SuccJumps) {
         if (ignoreJump(SrcBlock, DstBlock, Jump))
           NumIgnoredJumps++;
       }
       // If there is a non-sink block in UnknownBlocks with all jumps ignored,
       // then we can't rebalance
       if (NumIgnoredJumps == Block->SuccJumps.size())
         return false;
     }

     return true;
   }

   /// Decide whether the Jump is ignored while processing an unknown subgraphs
   /// rooted at basic block SrcBlock with the destination block, DstBlock.
   bool ignoreJump(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
                   const FlowJump *Jump) {
     // Ignore unlikely jumps with zero flow
     if (Jump->IsUnlikely && Jump->Flow == 0)
       return true;

     auto JumpSource = &Func.Blocks[Jump->Source];
     auto JumpTarget = &Func.Blocks[Jump->Target];

     // Do not ignore jumps coming into DstBlock
     if (DstBlock != nullptr && JumpTarget == DstBlock)
       return false;

     // Ignore jumps out of SrcBlock to known blocks
     if (!JumpTarget->UnknownWeight && JumpSource == SrcBlock)
       return true;

     // Ignore jumps to known blocks with zero flow
     if (!JumpTarget->UnknownWeight && JumpTarget->Flow == 0)
       return true;

     return false;
   }

   /// Verify if the given unknown subgraph is acyclic, and if yes, reorder
   /// UnknownBlocks in the topological order (so that all jumps are "forward").
   bool isAcyclicSubgraph(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
                          std::vector<FlowBlock *> &UnknownBlocks) {
     // Extract local in-degrees in the considered subgraph
     auto LocalInDegree = std::vector<uint64_t>(NumBlocks(), 0);
     auto fillInDegree = [&](const FlowBlock *Block) {
       for (auto Jump : Block->SuccJumps) {
         if (ignoreJump(SrcBlock, DstBlock, Jump))
           continue;
         LocalInDegree[Jump->Target]++;
       }
     };
     fillInDegree(SrcBlock);
     for (auto Block : UnknownBlocks) {
       fillInDegree(Block);
     }
     // A loop containing SrcBlock
     if (LocalInDegree[SrcBlock->Index] > 0)
       return false;

     std::vector<FlowBlock *> AcyclicOrder;
     std::queue<uint64_t> Queue;
     Queue.push(SrcBlock->Index);
     while (!Queue.empty()) {
       FlowBlock *Block = &Func.Blocks[Queue.front()];
       Queue.pop();
       // Stop propagation once we reach DstBlock, if any
       if (DstBlock != nullptr && Block == DstBlock)
         break;

       // Keep an acyclic order of unknown blocks
       if (Block->UnknownWeight && Block != SrcBlock)
         AcyclicOrder.push_back(Block);

       // Add to the queue all successors with zero local in-degree
       for (auto Jump : Block->SuccJumps) {
         if (ignoreJump(SrcBlock, DstBlock, Jump))
           continue;
         uint64_t Dst = Jump->Target;
         LocalInDegree[Dst]--;
         if (LocalInDegree[Dst] == 0) {
           Queue.push(Dst);
         }
       }
     }

     // If there is a cycle in the subgraph, AcyclicOrder contains only a subset
     // of all blocks
     if (UnknownBlocks.size() != AcyclicOrder.size())
       return false;
     UnknownBlocks = AcyclicOrder;
     return true;
   }

   /// Rebalance a given subgraph rooted at SrcBlock, ending at DstBlock and
   /// having UnknownBlocks intermediate blocks.
   void rebalanceUnknownSubgraph(const FlowBlock *SrcBlock,
                                 const FlowBlock *DstBlock,
                                 const std::vector<FlowBlock *> &UnknownBlocks) {
     assert(SrcBlock->Flow > 0 && "zero-flow block in unknown subgraph");

     // Ditribute flow from the source block
     uint64_t BlockFlow = 0;
     // SrcBlock's flow is the sum of outgoing flows along non-ignored jumps
     for (auto Jump : SrcBlock->SuccJumps) {
       if (ignoreJump(SrcBlock, DstBlock, Jump))
         continue;
       BlockFlow += Jump->Flow;
     }
     rebalanceBlock(SrcBlock, DstBlock, SrcBlock, BlockFlow);

     // Ditribute flow from the remaining blocks
     for (auto Block : UnknownBlocks) {
       assert(Block->UnknownWeight && "incorrect unknown subgraph");
       uint64_t BlockFlow = 0;
       // Block's flow is the sum of incoming flows
       for (auto Jump : Block->PredJumps) {
         BlockFlow += Jump->Flow;
       }
       Block->Flow = BlockFlow;
       rebalanceBlock(SrcBlock, DstBlock, Block, BlockFlow);
     }
   }

   /// Redistribute flow for a block in a subgraph rooted at SrcBlock,
   /// and ending at DstBlock.
   void rebalanceBlock(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
                       const FlowBlock *Block, uint64_t BlockFlow) {
     // Process all successor jumps and update corresponding flow values
     size_t BlockDegree = 0;
     for (auto Jump : Block->SuccJumps) {
       if (ignoreJump(SrcBlock, DstBlock, Jump))
         continue;
       BlockDegree++;
     }
     // If all successor jumps of the block are ignored, skip it
     if (DstBlock == nullptr && BlockDegree == 0)
       return;
     assert(BlockDegree > 0 && "all outgoing jumps are ignored");

     // Each of the Block's successors gets the following amount of flow.
     // Rounding the value up so that all flow is propagated
     uint64_t SuccFlow = (BlockFlow + BlockDegree - 1) / BlockDegree;
     for (auto Jump : Block->SuccJumps) {
       if (ignoreJump(SrcBlock, DstBlock, Jump))
         continue;
       uint64_t Flow = std::min(SuccFlow, BlockFlow);
       Jump->Flow = Flow;
       BlockFlow -= Flow;
     }
     assert(BlockFlow == 0 && "not all flow is propagated");
   }

   /// A constant indicating an arbitrary exit block of a function.
   static constexpr uint64_t AnyExitBlock = uint64_t(-1);

   /// The function.
   FlowFunction &Func;
 };

 /// Initializing flow network for a given function.
 ///
 /// Every block is split into three nodes that are responsible for (i) an
 /// incoming flow, (ii) an outgoing flow, and (iii) penalizing an increase or
 /// reduction of the block weight.
 void initializeNetwork(MinCostMaxFlow &Network, FlowFunction &Func) {
   uint64_t NumBlocks = Func.Blocks.size();
   assert(NumBlocks > 1 && "Too few blocks in a function");
   LLVM_DEBUG(dbgs() << "Initializing profi for " << NumBlocks << " blocks\n");

   // Pre-process data: make sure the entry weight is at least 1
   if (Func.Blocks[Func.Entry].Weight == 0) {
     Func.Blocks[Func.Entry].Weight = 1;
   }
   // Introducing dummy source/sink pairs to allow flow circulation.
   // The nodes corresponding to blocks of Func have indicies in the range
   // [0..3 * NumBlocks); the dummy nodes are indexed by the next four values.
   uint64_t S = 3 * NumBlocks;
   uint64_t T = S + 1;
   uint64_t S1 = S + 2;
   uint64_t T1 = S + 3;

   Network.initialize(3 * NumBlocks + 4, S1, T1);

   // Create three nodes for every block of the function
   for (uint64_t B = 0; B < NumBlocks; B++) {
     auto &Block = Func.Blocks[B];
     assert((!Block.UnknownWeight || Block.Weight == 0 || Block.isEntry()) &&
            "non-zero weight of a block w/o weight except for an entry");

     // Split every block into two nodes
     uint64_t Bin = 3 * B;
     uint64_t Bout = 3 * B + 1;
     uint64_t Baux = 3 * B + 2;
     if (Block.Weight > 0) {
       Network.addEdge(S1, Bout, Block.Weight, 0);
       Network.addEdge(Bin, T1, Block.Weight, 0);
     }

     // Edges from S and to T
     assert((!Block.isEntry() || !Block.isExit()) &&
            "a block cannot be an entry and an exit");
     if (Block.isEntry()) {
       Network.addEdge(S, Bin, 0);
     } else if (Block.isExit()) {
       Network.addEdge(Bout, T, 0);
     }

     // An auxiliary node to allow increase/reduction of block counts:
     // We assume that decreasing block counts is more expensive than increasing,
     // and thus, setting separate costs here. In the future we may want to tune
     // the relative costs so as to maximize the quality of generated profiles.
     int64_t AuxCostInc = SampleProfileProfiCostInc;
     int64_t AuxCostDec = SampleProfileProfiCostDec;
     if (Block.UnknownWeight) {
       // Do not penalize changing weights of blocks w/o known profile count
       AuxCostInc = 0;
       AuxCostDec = 0;
     } else {
       // Increasing the count for "cold" blocks with zero initial count is more
       // expensive than for "hot" ones
       if (Block.Weight == 0) {
         AuxCostInc = SampleProfileProfiCostIncZero;
       }
       // Modifying the count of the entry block is expensive
       if (Block.isEntry()) {
         AuxCostInc = SampleProfileProfiCostIncEntry;
         AuxCostDec = SampleProfileProfiCostDecEntry;
       }
     }
     // For blocks with self-edges, do not penalize a reduction of the count,
     // as all of the increase can be attributed to the self-edge
     if (Block.HasSelfEdge) {
       AuxCostDec = 0;
     }

     Network.addEdge(Bin, Baux, AuxCostInc);
     Network.addEdge(Baux, Bout, AuxCostInc);
     if (Block.Weight > 0) {
       Network.addEdge(Bout, Baux, AuxCostDec);
       Network.addEdge(Baux, Bin, AuxCostDec);
     }
   }

   // Creating edges for every jump
   for (auto &Jump : Func.Jumps) {
     uint64_t Src = Jump.Source;
     uint64_t Dst = Jump.Target;
     if (Src != Dst) {
       uint64_t SrcOut = 3 * Src + 1;
       uint64_t DstIn = 3 * Dst;
       uint64_t Cost = Jump.IsUnlikely ? MinCostMaxFlow::AuxCostUnlikely : 0;
       Network.addEdge(SrcOut, DstIn, Cost);
     }
   }

   // Make sure we have a valid flow circulation
   Network.addEdge(T, S, 0);
 }

 /// Extract resulting block and edge counts from the flow network.
 void extractWeights(MinCostMaxFlow &Network, FlowFunction &Func) {
   uint64_t NumBlocks = Func.Blocks.size();

   // Extract resulting block counts
   for (uint64_t Src = 0; Src < NumBlocks; Src++) {
     auto &Block = Func.Blocks[Src];
     uint64_t SrcOut = 3 * Src + 1;
     int64_t Flow = 0;
     for (auto &Adj : Network.getFlow(SrcOut)) {
       uint64_t DstIn = Adj.first;
       int64_t DstFlow = Adj.second;
       bool IsAuxNode = (DstIn < 3 * NumBlocks && DstIn % 3 == 2);
       if (!IsAuxNode || Block.HasSelfEdge) {
         Flow += DstFlow;
       }
     }
     Block.Flow = Flow;
     assert(Flow >= 0 && "negative block flow");
   }

   // Extract resulting jump counts
   for (auto &Jump : Func.Jumps) {
     uint64_t Src = Jump.Source;
     uint64_t Dst = Jump.Target;
     int64_t Flow = 0;
     if (Src != Dst) {
       uint64_t SrcOut = 3 * Src + 1;
       uint64_t DstIn = 3 * Dst;
       Flow = Network.getFlow(SrcOut, DstIn);
     } else {
       uint64_t SrcOut = 3 * Src + 1;
       uint64_t SrcAux = 3 * Src + 2;
       int64_t AuxFlow = Network.getFlow(SrcOut, SrcAux);
       if (AuxFlow > 0)
         Flow = AuxFlow;
     }
     Jump.Flow = Flow;
     assert(Flow >= 0 && "negative jump flow");
   }
 }

 #ifndef NDEBUG
 /// Verify that the computed flow values satisfy flow conservation rules
 void verifyWeights(const FlowFunction &Func) {
   const uint64_t NumBlocks = Func.Blocks.size();
   auto InFlow = std::vector<uint64_t>(NumBlocks, 0);
   auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);
   for (auto &Jump : Func.Jumps) {
     InFlow[Jump.Target] += Jump.Flow;
     OutFlow[Jump.Source] += Jump.Flow;
   }

   uint64_t TotalInFlow = 0;
   uint64_t TotalOutFlow = 0;
   for (uint64_t I = 0; I < NumBlocks; I++) {
     auto &Block = Func.Blocks[I];
     if (Block.isEntry()) {
       TotalInFlow += Block.Flow;
       assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
     } else if (Block.isExit()) {
       TotalOutFlow += Block.Flow;
       assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
     } else {
       assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
       assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
     }
   }
   assert(TotalInFlow == TotalOutFlow && "incorrectly computed control flow");

   // Verify that there are no isolated flow components
   // One could modify FlowFunction to hold edges indexed by the sources, which
   // will avoid a creation of the object
   auto PositiveFlowEdges = std::vector<std::vector<uint64_t>>(NumBlocks);
   for (auto &Jump : Func.Jumps) {
     if (Jump.Flow > 0) {
       PositiveFlowEdges[Jump.Source].push_back(Jump.Target);
     }
   }

   // Run BFS from the source along edges with positive flow
   std::queue<uint64_t> Queue;
   auto Visited = BitVector(NumBlocks, false);
   Queue.push(Func.Entry);
   Visited[Func.Entry] = true;
   while (!Queue.empty()) {
     uint64_t Src = Queue.front();
     Queue.pop();
     for (uint64_t Dst : PositiveFlowEdges[Src]) {
       if (!Visited[Dst]) {
         Queue.push(Dst);
         Visited[Dst] = true;
       }
     }
   }

   // Verify that every block that has a positive flow is reached from the source
   // along edges with a positive flow
   for (uint64_t I = 0; I < NumBlocks; I++) {
     auto &Block = Func.Blocks[I];
     assert((Visited[I] || Block.Flow == 0) && "an isolated flow component");
   }
 }
 #endif

 } // end of anonymous namespace

 /// Apply the profile inference algorithm for a given flow function
 void llvm::applyFlowInference(FlowFunction &Func) {
   // Create and apply an inference network model
   auto InferenceNetwork = MinCostMaxFlow();
   initializeNetwork(InferenceNetwork, Func);
   InferenceNetwork.run();

   // Extract flow values for every block and every edge
   extractWeights(InferenceNetwork, Func);

   // Post-processing adjustments to the flow
   auto Adjuster = FlowAdjuster(Func);
   Adjuster.run();

 #ifndef NDEBUG
   // Verify the result
   verifyWeights(Func);
 #endif
 }