blob: b3d372d1c90ab83ad10d5064de0c618a8baab460 [file] [log] [blame]
//===- bolt/Passes/ReorderAlgorithm.h - Basic block reorderng ---*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Interface to different basic block reordering algorithms.
//
//===----------------------------------------------------------------------===//
#ifndef BOLT_PASSES_REORDER_ALGORITHM_H
#define BOLT_PASSES_REORDER_ALGORITHM_H
#include "bolt/Core/BinaryFunction.h"
#include <memory>
#include <unordered_map>
#include <vector>
namespace llvm {
class raw_ostream;
namespace bolt {
/// Objects of this class implement various basic block clustering algorithms.
/// Basic block clusters are chains of basic blocks that should be laid out
/// in this order to maximize performace. These algorithms group basic blocks
/// into clusters using execution profile data and various heuristics.
class ClusterAlgorithm {
public:
using ClusterTy = std::vector<BinaryBasicBlock *>;
std::vector<ClusterTy> Clusters;
std::vector<std::unordered_map<uint32_t, uint64_t>> ClusterEdges;
std::vector<double> AvgFreq;
/// Group the basic blocks in the given function into clusters stored in the
/// Clusters vector. Also encode relative weights between two clusters in
/// the ClusterEdges vector if requested. This vector is indexed by
/// the clusters indices in the Clusters vector.
virtual void clusterBasicBlocks(BinaryFunction &BF,
bool ComputeEdges = false) = 0;
/// Compute for each cluster its averagae execution frequency, that is
/// the sum of average frequencies of its blocks (execution count / # instrs).
/// The average frequencies are stored in the AvgFreq vector, index by the
/// cluster indices in the Clusters vector.
void computeClusterAverageFrequency(const BinaryContext &BC);
/// Clear clusters and related info.
virtual void reset();
void printClusters() const;
virtual ~ClusterAlgorithm() {}
};
/// Base class for a greedy clustering algorithm that selects edges in order
/// based on some heuristic and uses them to join basic blocks into clusters.
class GreedyClusterAlgorithm : public ClusterAlgorithm {
protected:
// Represents an edge between two basic blocks, with source, destination, and
// profile count.
struct EdgeTy {
const BinaryBasicBlock *Src;
const BinaryBasicBlock *Dst;
uint64_t Count;
EdgeTy(const BinaryBasicBlock *Src, const BinaryBasicBlock *Dst,
uint64_t Count)
: Src(Src), Dst(Dst), Count(Count) {}
void print(raw_ostream &OS) const;
};
struct EdgeHash {
size_t operator()(const EdgeTy &E) const;
};
struct EdgeEqual {
bool operator()(const EdgeTy &A, const EdgeTy &B) const;
};
// Virtual methods that allow custom specialization of the heuristic used by
// the algorithm to select edges.
virtual void initQueue(std::vector<EdgeTy> &Queue,
const BinaryFunction &BF) = 0;
virtual void adjustQueue(std::vector<EdgeTy> &Queue,
const BinaryFunction &BF) = 0;
virtual bool areClustersCompatible(const ClusterTy &Front,
const ClusterTy &Back,
const EdgeTy &E) const = 0;
// Map from basic block to owning cluster index.
using BBToClusterMapTy =
std::unordered_map<const BinaryBasicBlock *, unsigned>;
BBToClusterMapTy BBToClusterMap;
public:
void clusterBasicBlocks(BinaryFunction &BF,
bool ComputeEdges = false) override;
void reset() override;
};
/// This clustering algorithm is based on a greedy heuristic suggested by
/// Pettis and Hansen (PLDI '90).
class PHGreedyClusterAlgorithm : public GreedyClusterAlgorithm {
protected:
void initQueue(std::vector<EdgeTy> &Queue, const BinaryFunction &BF) override;
void adjustQueue(std::vector<EdgeTy> &Queue,
const BinaryFunction &BF) override;
bool areClustersCompatible(const ClusterTy &Front, const ClusterTy &Back,
const EdgeTy &E) const override;
};
/// This clustering algorithm is based on a greedy heuristic that is a
/// modification of the heuristic suggested by Pettis (PLDI '90). It is
/// geared towards minimizing branches.
class MinBranchGreedyClusterAlgorithm : public GreedyClusterAlgorithm {
private:
// Map from an edge to its weight which is used by the algorithm to sort the
// edges.
std::unordered_map<EdgeTy, int64_t, EdgeHash, EdgeEqual> Weight;
// The weight of an edge is calculated as the win in branches if we choose
// to layout this edge as a fall-through. For example, consider the edges
// A -> B with execution count 500,
// A -> C with execution count 100, and
// D -> B with execution count 150
// wher B, C are the only successors of A and A, D are thr only predessecors
// of B. Then if we choose to layout edge A -> B as a fallthrough, the win in
// branches would be 500 - 100 - 150 = 250. That is the weight of edge A->B.
int64_t calculateWeight(const EdgeTy &E, const BinaryFunction &BF) const;
protected:
void initQueue(std::vector<EdgeTy> &Queue, const BinaryFunction &BF) override;
void adjustQueue(std::vector<EdgeTy> &Queue,
const BinaryFunction &BF) override;
bool areClustersCompatible(const ClusterTy &Front, const ClusterTy &Back,
const EdgeTy &E) const override;
public:
void reset() override;
};
/// Objects of this class implement various basic block reordering alogrithms.
/// Most of these algorithms depend on a clustering alogrithm.
/// Here we have 3 conflicting goals as to how to layout clusters. If we want
/// to minimize jump offsets, we should put clusters with heavy inter-cluster
/// dependence as close as possible. If we want to maximize the probability
/// that all inter-cluster edges are predicted as not-taken, we should enforce
/// a topological order to make targets appear after sources, creating forward
/// branches. If we want to separate hot from cold blocks to maximize the
/// probability that unfrequently executed code doesn't pollute the cache, we
/// should put clusters in descending order of hotness.
class ReorderAlgorithm {
protected:
std::unique_ptr<ClusterAlgorithm> CAlgo;
public:
ReorderAlgorithm() {}
explicit ReorderAlgorithm(std::unique_ptr<ClusterAlgorithm> CAlgo)
: CAlgo(std::move(CAlgo)) {}
using BasicBlockOrder = BinaryFunction::BasicBlockOrderType;
/// Reorder the basic blocks of the given function and store the new order in
/// the new Clusters vector.
virtual void reorderBasicBlocks(BinaryFunction &BF,
BasicBlockOrder &Order) const = 0;
void setClusterAlgorithm(ClusterAlgorithm *CAlgo) {
this->CAlgo.reset(CAlgo);
}
virtual ~ReorderAlgorithm() {}
};
/// Dynamic programming implementation for the TSP, applied to BB layout. Find
/// the optimal way to maximize weight during a path traversing all BBs. In
/// this way, we will convert the hottest branches into fall-throughs.
///
/// Uses exponential amount of memory on the number of basic blocks and should
/// only be used for small functions.
class TSPReorderAlgorithm : public ReorderAlgorithm {
public:
void reorderBasicBlocks(BinaryFunction &BF,
BasicBlockOrder &Order) const override;
};
/// Simple algorithm that groups basic blocks into clusters and then
/// lays them out cluster after cluster.
class OptimizeReorderAlgorithm : public ReorderAlgorithm {
public:
explicit OptimizeReorderAlgorithm(std::unique_ptr<ClusterAlgorithm> CAlgo)
: ReorderAlgorithm(std::move(CAlgo)) {}
void reorderBasicBlocks(BinaryFunction &BF,
BasicBlockOrder &Order) const override;
};
/// This reorder algorithm tries to ensure that all inter-cluster edges are
/// predicted as not-taken, by enforcing a topological order to make
/// targets appear after sources, creating forward branches.
class OptimizeBranchReorderAlgorithm : public ReorderAlgorithm {
public:
explicit OptimizeBranchReorderAlgorithm(
std::unique_ptr<ClusterAlgorithm> CAlgo)
: ReorderAlgorithm(std::move(CAlgo)) {}
void reorderBasicBlocks(BinaryFunction &BF,
BasicBlockOrder &Order) const override;
};
/// This reorder tries to separate hot from cold blocks to maximize the
/// probability that unfrequently executed code doesn't pollute the cache, by
/// putting clusters in descending order of hotness.
class OptimizeCacheReorderAlgorithm : public ReorderAlgorithm {
public:
explicit OptimizeCacheReorderAlgorithm(
std::unique_ptr<ClusterAlgorithm> CAlgo)
: ReorderAlgorithm(std::move(CAlgo)) {}
void reorderBasicBlocks(BinaryFunction &BF,
BasicBlockOrder &Order) const override;
};
/// A new reordering algorithm for basic blocks, ext-tsp
class ExtTSPReorderAlgorithm : public ReorderAlgorithm {
public:
void reorderBasicBlocks(BinaryFunction &BF,
BasicBlockOrder &Order) const override;
};
/// Toy example that simply reverses the original basic block order.
class ReverseReorderAlgorithm : public ReorderAlgorithm {
public:
void reorderBasicBlocks(BinaryFunction &BF,
BasicBlockOrder &Order) const override;
};
/// Create clusters as usual and place them in random order.
class RandomClusterReorderAlgorithm : public ReorderAlgorithm {
public:
explicit RandomClusterReorderAlgorithm(
std::unique_ptr<ClusterAlgorithm> CAlgo)
: ReorderAlgorithm(std::move(CAlgo)) {}
void reorderBasicBlocks(BinaryFunction &BF,
BasicBlockOrder &Order) const override;
};
} // namespace bolt
} // namespace llvm
#endif