blob: 4121c7e28a0284e2d729781a2008e65e32e2a812 [file] [log] [blame]
//===- bolt/Passes/TailDuplication.h ----------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This pass founds cases when BBs have layout:
// #BB0:
// <body>
// jmp #BB2
// ....
// #BB1
// <body>
// #BB2:
// <body>
//
// And duplicates #BB2 and puts it after #BB0:
// #BB0:
// <body>
// #BB2:
// <body>
// ....
// #BB1
// <body>
// #BB2:
// <body>
//
// The advantage is getting rid of an unconditional branch and hopefully to
// improve i-cache performance by reducing fragmentation The disadvantage is
// that if there is too much code duplication, we may end up evicting hot cache
// lines and causing the opposite effect, hurting i-cache performance This needs
// to be well balanced to achieve the optimal effect
//
//===----------------------------------------------------------------------===//
#ifndef BOLT_PASSES_TAILDUPLICATION_H
#define BOLT_PASSES_TAILDUPLICATION_H
#include "bolt/Passes/BinaryPasses.h"
namespace llvm {
namespace bolt {
/// Pass for duplicating blocks that would require a jump.
class TailDuplication : public BinaryFunctionPass {
/// Record how many possible tail duplications there can be.
uint64_t ModifiedFunctions = 0;
/// The number of duplicated basic blocks.
uint64_t DuplicatedBlockCount = 0;
/// The size (in bytes) of duplicated basic blocks.
uint64_t DuplicatedByteCount = 0;
/// Record how many times these duplications would get used.
uint64_t DuplicationsDynamicCount = 0;
/// Record the execution count of all blocks.
uint64_t AllDynamicCount = 0;
/// Record the number of instructions deleted because of propagation
uint64_t StaticInstructionDeletionCount = 0;
/// Record the number of instructions deleted because of propagation
uint64_t DynamicInstructionDeletionCount = 0;
/// Sets Regs with the caller saved registers
void getCallerSavedRegs(const MCInst &Inst, BitVector &Regs,
BinaryContext &BC) const;
/// Returns true if Reg is possibly overwritten by Inst
bool regIsPossiblyOverwritten(const MCInst &Inst, unsigned Reg,
BinaryContext &BC) const;
/// Returns true if Reg is definitely overwritten by Inst
bool regIsDefinitelyOverwritten(const MCInst &Inst, unsigned Reg,
BinaryContext &BC) const;
/// Returns true if Reg is used by Inst
bool regIsUsed(const MCInst &Inst, unsigned Reg, BinaryContext &BC) const;
/// Returns true if Reg is overwritten before its used by StartBB's sucessors
bool isOverwrittenBeforeUsed(BinaryBasicBlock &StartBB, unsigned Reg) const;
/// Constant and Copy Propagate for the block formed by OriginalBB and
/// BlocksToPropagate
void
constantAndCopyPropagate(BinaryBasicBlock &OriginalBB,
std::vector<BinaryBasicBlock *> &BlocksToPropagate);
/// True if Tail is in the same cache line as BB (approximately)
bool isInCacheLine(const BinaryBasicBlock &BB,
const BinaryBasicBlock &Tail) const;
/// Duplicates BlocksToDuplicate and places them after BB.
std::vector<BinaryBasicBlock *> duplicateBlocks(
BinaryBasicBlock &BB,
const std::vector<BinaryBasicBlock *> &BlocksToDuplicate) const;
/// Decide whether the tail basic blocks should be duplicated after BB.
bool shouldDuplicate(BinaryBasicBlock *BB, BinaryBasicBlock *Tail) const;
/// Compute the cache score for a jump (Src, Dst) with frequency Count.
/// The value is in the range [0..1] and quantifies how "cache-friendly"
/// the jump is. The score is close to 1 for "short" forward jumps and
/// it is 0 for "long" jumps exceeding a specified threshold; between the
/// bounds, the value decreases linearly. For backward jumps, the value is
/// scaled by a specified factor.
double cacheScore(uint64_t SrcAddr, uint64_t SrcSize, uint64_t DstAddr,
uint64_t DstSize, uint64_t Count) const;
/// Decide whether the cache score has been improved after duplication.
bool cacheScoreImproved(const MCCodeEmitter *Emitter, BinaryFunction &BF,
BinaryBasicBlock *Pred, BinaryBasicBlock *Tail) const;
/// A moderate strategy for tail duplication.
/// Returns a vector of BinaryBasicBlock to copy after BB. If it's empty,
/// nothing should be duplicated.
std::vector<BinaryBasicBlock *>
moderateDuplicate(BinaryBasicBlock &BB, BinaryBasicBlock &Tail) const;
/// An aggressive strategy for tail duplication.
std::vector<BinaryBasicBlock *>
aggressiveDuplicate(BinaryBasicBlock &BB, BinaryBasicBlock &Tail) const;
/// A cache-aware strategy for tail duplication.
std::vector<BinaryBasicBlock *> cacheDuplicate(const MCCodeEmitter *Emitter,
BinaryFunction &BF,
BinaryBasicBlock *BB,
BinaryBasicBlock *Tail) const;
void runOnFunction(BinaryFunction &Function);
public:
enum DuplicationMode : char {
TD_NONE = 0,
TD_AGGRESSIVE,
TD_MODERATE,
TD_CACHE
};
explicit TailDuplication() : BinaryFunctionPass(false) {}
const char *getName() const override { return "tail duplication"; }
void runOnFunctions(BinaryContext &BC) override;
};
} // namespace bolt
} // namespace llvm
#endif