bolt/lib/Passes/Inliner.cpp - llvm-project - Git at Google

 //===- bolt/Passes/Inliner.cpp - Inlining pass for low-level binary IR ----===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 // This file implements the Inliner class used for inlining binary functions.
 //
 // The current inliner has a limited callee support
 // (see Inliner::getInliningInfo() for the most up-to-date details):
 //
 //  * No exception handling
 //  * No jump tables
 //  * Single entry point
 //  * CFI update not supported - breaks unwinding
 //  * Regular Call Sites:
 //    - only leaf functions (or callees with only tail calls)
 //      * no invokes (they can't be tail calls)
 //    - no direct use of %rsp
 //  * Tail Call Sites:
 //    - since the stack is unmodified, the regular call limitations are lifted
 //
 //===----------------------------------------------------------------------===//

 #include "bolt/Passes/Inliner.h"
 #include "bolt/Core/MCPlus.h"
 #include "llvm/Support/CommandLine.h"
 #include <map>

 #define DEBUG_TYPE "bolt-inliner"

 using namespace llvm;

 namespace opts {

 extern cl::OptionCategory BoltOptCategory;

 static cl::opt<bool>
     AdjustProfile("inline-ap",
                   cl::desc("adjust function profile after inlining"),
                   cl::cat(BoltOptCategory));

 static cl::list<std::string>
 ForceInlineFunctions("force-inline",
   cl::CommaSeparated,
   cl::desc("list of functions to always consider for inlining"),
   cl::value_desc("func1,func2,func3,..."),
   cl::Hidden,
   cl::cat(BoltOptCategory));

 static cl::opt<bool> InlineAll("inline-all", cl::desc("inline all functions"),
                                cl::cat(BoltOptCategory));

 static cl::opt<bool> InlineIgnoreLeafCFI(
     "inline-ignore-leaf-cfi",
     cl::desc("inline leaf functions with CFI programs (can break unwinding)"),
     cl::init(true), cl::ReallyHidden, cl::cat(BoltOptCategory));

 static cl::opt<bool> InlineIgnoreCFI(
     "inline-ignore-cfi",
     cl::desc(
         "inline functions with CFI programs (can break exception handling)"),
     cl::ReallyHidden, cl::cat(BoltOptCategory));

 static cl::opt<unsigned>
     InlineLimit("inline-limit",
                 cl::desc("maximum number of call sites to inline"), cl::init(0),
                 cl::Hidden, cl::cat(BoltOptCategory));

 static cl::opt<unsigned>
     InlineMaxIters("inline-max-iters",
                    cl::desc("maximum number of inline iterations"), cl::init(3),
                    cl::Hidden, cl::cat(BoltOptCategory));

 static cl::opt<bool> InlineSmallFunctions(
     "inline-small-functions",
     cl::desc("inline functions if increase in size is less than defined by "
              "-inline-small-functions-bytes"),
     cl::cat(BoltOptCategory));

 static cl::opt<unsigned> InlineSmallFunctionsBytes(
     "inline-small-functions-bytes",
     cl::desc("max number of bytes for the function to be considered small for "
              "inlining purposes"),
     cl::init(4), cl::Hidden, cl::cat(BoltOptCategory));

 static cl::opt<bool> NoInline(
     "no-inline",
     cl::desc("disable all inlining (overrides other inlining options)"),
     cl::cat(BoltOptCategory));

 /// This function returns true if any of inlining options are specified and the
 /// inlining pass should be executed. Whenever a new inlining option is added,
 /// this function should reflect the change.
 bool inliningEnabled() {
   return !NoInline &&
          (InlineAll || InlineSmallFunctions || !ForceInlineFunctions.empty());
 }

 bool mustConsider(const llvm::bolt::BinaryFunction &Function) {
   for (std::string &Name : opts::ForceInlineFunctions)
     if (Function.hasName(Name))
       return true;
   return false;
 }

 void syncOptions() {
   if (opts::InlineIgnoreCFI)
     opts::InlineIgnoreLeafCFI = true;

   if (opts::InlineAll)
     opts::InlineSmallFunctions = true;
 }

 } // namespace opts

 namespace llvm {
 namespace bolt {

 uint64_t Inliner::SizeOfCallInst;
 uint64_t Inliner::SizeOfTailCallInst;

 uint64_t Inliner::getSizeOfCallInst(const BinaryContext &BC) {
   if (SizeOfCallInst)
     return SizeOfCallInst;

   MCInst Inst;
   BC.MIB->createCall(Inst, BC.Ctx->createNamedTempSymbol(), BC.Ctx.get());
   SizeOfCallInst = BC.computeInstructionSize(Inst);

   return SizeOfCallInst;
 }

 uint64_t Inliner::getSizeOfTailCallInst(const BinaryContext &BC) {
   if (SizeOfTailCallInst)
     return SizeOfTailCallInst;

   MCInst Inst;
   BC.MIB->createTailCall(Inst, BC.Ctx->createNamedTempSymbol(), BC.Ctx.get());
   SizeOfTailCallInst = BC.computeInstructionSize(Inst);

   return SizeOfTailCallInst;
 }

 InliningInfo getInliningInfo(const BinaryFunction &BF) {
   const BinaryContext &BC = BF.getBinaryContext();
   bool DirectSP = false;
   bool HasCFI = false;
   bool IsLeaf = true;

   // Perform necessary checks unless the option overrides it.
   if (!opts::mustConsider(BF)) {
     if (BF.hasSDTMarker())
       return INL_NONE;

     if (BF.hasEHRanges())
       return INL_NONE;

     if (BF.isMultiEntry())
       return INL_NONE;

     if (BF.hasJumpTables())
       return INL_NONE;

     const MCPhysReg SPReg = BC.MIB->getStackPointer();
     for (const BinaryBasicBlock &BB : BF) {
       for (const MCInst &Inst : BB) {
         // Tail calls are marked as implicitly using the stack pointer and they
         // could be inlined.
         if (BC.MIB->isTailCall(Inst))
           break;

         if (BC.MIB->isCFI(Inst)) {
           HasCFI = true;
           continue;
         }

         if (BC.MIB->isCall(Inst))
           IsLeaf = false;

         // Push/pop instructions are straightforward to handle.
         if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst))
           continue;

         DirectSP |= BC.MIB->hasDefOfPhysReg(Inst, SPReg) ||
                     BC.MIB->hasUseOfPhysReg(Inst, SPReg);
       }
     }
   }

   if (HasCFI) {
     if (!opts::InlineIgnoreLeafCFI)
       return INL_NONE;

     if (!IsLeaf && !opts::InlineIgnoreCFI)
       return INL_NONE;
   }

   InliningInfo Info(DirectSP ? INL_TAILCALL : INL_ANY);

   size_t Size = BF.estimateSize();

   Info.SizeAfterInlining = Size;
   Info.SizeAfterTailCallInlining = Size;

   // Handle special case of the known size reduction.
   if (BF.size() == 1) {
     // For a regular call the last return instruction could be removed
     // (or converted to a branch).
     const MCInst *LastInst = BF.back().getLastNonPseudoInstr();
     if (LastInst && BC.MIB->isReturn(*LastInst) &&
         !BC.MIB->isTailCall(*LastInst)) {
       const uint64_t RetInstSize = BC.computeInstructionSize(*LastInst);
       assert(Size >= RetInstSize);
       Info.SizeAfterInlining -= RetInstSize;
     }
   }

   return Info;
 }

 void Inliner::findInliningCandidates(BinaryContext &BC) {
   for (const auto &BFI : BC.getBinaryFunctions()) {
     const BinaryFunction &Function = BFI.second;
     if (!shouldOptimize(Function))
       continue;
     const InliningInfo InlInfo = getInliningInfo(Function);
     if (InlInfo.Type != INL_NONE)
       InliningCandidates[&Function] = InlInfo;
   }
 }

 std::pair<BinaryBasicBlock *, BinaryBasicBlock::iterator>
 Inliner::inlineCall(BinaryBasicBlock &CallerBB,
                     BinaryBasicBlock::iterator CallInst,
                     const BinaryFunction &Callee) {
   BinaryFunction &CallerFunction = *CallerBB.getFunction();
   BinaryContext &BC = CallerFunction.getBinaryContext();
   auto &MIB = *BC.MIB;

   assert(MIB.isCall(*CallInst) && "can only inline a call or a tail call");
   assert(!Callee.isMultiEntry() &&
          "cannot inline function with multiple entries");
   assert(!Callee.hasJumpTables() &&
          "cannot inline function with jump table(s)");

   // Get information about the call site.
   const bool CSIsInvoke = BC.MIB->isInvoke(*CallInst);
   const bool CSIsTailCall = BC.MIB->isTailCall(*CallInst);
   const int64_t CSGNUArgsSize = BC.MIB->getGnuArgsSize(*CallInst);
   const std::optional<MCPlus::MCLandingPad> CSEHInfo =
       BC.MIB->getEHInfo(*CallInst);

   // Split basic block at the call site if there will be more incoming edges
   // coming from the callee.
   BinaryBasicBlock *FirstInlinedBB = &CallerBB;
   if (Callee.front().pred_size() && CallInst != CallerBB.begin()) {
     FirstInlinedBB = CallerBB.splitAt(CallInst);
     CallInst = FirstInlinedBB->begin();
   }

   // Split basic block after the call instruction unless the callee is trivial
   // (i.e. consists of a single basic block). If necessary, obtain a basic block
   // for return instructions in the callee to redirect to.
   BinaryBasicBlock *NextBB = nullptr;
   if (Callee.size() > 1) {
     if (std::next(CallInst) != FirstInlinedBB->end())
       NextBB = FirstInlinedBB->splitAt(std::next(CallInst));
     else
       NextBB = FirstInlinedBB->getSuccessor();
   }
   if (NextBB)
     FirstInlinedBB->removeSuccessor(NextBB);

   // Remove the call instruction.
   auto InsertII = FirstInlinedBB->eraseInstruction(CallInst);

   double ProfileRatio = 0;
   if (uint64_t CalleeExecCount = Callee.getKnownExecutionCount())
     ProfileRatio =
         (double)FirstInlinedBB->getKnownExecutionCount() / CalleeExecCount;

   // Save execution count of the first block as we don't want it to change
   // later due to profile adjustment rounding errors.
   const uint64_t FirstInlinedBBCount = FirstInlinedBB->getKnownExecutionCount();

   // Copy basic blocks and maintain a map from their origin.
   std::unordered_map<const BinaryBasicBlock *, BinaryBasicBlock *> InlinedBBMap;
   InlinedBBMap[&Callee.front()] = FirstInlinedBB;
   for (const BinaryBasicBlock &BB : llvm::drop_begin(Callee)) {
     BinaryBasicBlock *InlinedBB = CallerFunction.addBasicBlock();
     InlinedBBMap[&BB] = InlinedBB;
     InlinedBB->setCFIState(FirstInlinedBB->getCFIState());
     if (Callee.hasValidProfile())
       InlinedBB->setExecutionCount(BB.getKnownExecutionCount());
     else
       InlinedBB->setExecutionCount(FirstInlinedBBCount);
   }

   // Copy over instructions and edges.
   for (const BinaryBasicBlock &BB : Callee) {
     BinaryBasicBlock *InlinedBB = InlinedBBMap[&BB];

     if (InlinedBB != FirstInlinedBB)
       InsertII = InlinedBB->begin();

     // Copy over instructions making any necessary mods.
     for (MCInst Inst : BB) {
       if (MIB.isPseudo(Inst))
         continue;

       MIB.stripAnnotations(Inst, /*KeepTC=*/BC.isX86());

       // Fix branch target. Strictly speaking, we don't have to do this as
       // targets of direct branches will be fixed later and don't matter
       // in the CFG state. However, disassembly may look misleading, and
       // hence we do the fixing.
       if (MIB.isBranch(Inst)) {
         assert(!MIB.isIndirectBranch(Inst) &&
                "unexpected indirect branch in callee");
         const BinaryBasicBlock *TargetBB =
             Callee.getBasicBlockForLabel(MIB.getTargetSymbol(Inst));
         assert(TargetBB && "cannot find target block in callee");
         MIB.replaceBranchTarget(Inst, InlinedBBMap[TargetBB]->getLabel(),
                                 BC.Ctx.get());
       }

       if (CSIsTailCall || (!MIB.isCall(Inst) && !MIB.isReturn(Inst))) {
         InsertII =
             std::next(InlinedBB->insertInstruction(InsertII, std::move(Inst)));
         continue;
       }

       // Handle special instructions for a non-tail call site.
       if (!MIB.isCall(Inst)) {
         // Returns are removed.
         break;
       }

       MIB.convertTailCallToCall(Inst);

       // Propagate EH-related info to call instructions.
       if (CSIsInvoke) {
         MIB.addEHInfo(Inst, *CSEHInfo);
         if (CSGNUArgsSize >= 0)
           MIB.addGnuArgsSize(Inst, CSGNUArgsSize);
       }

       InsertII =
           std::next(InlinedBB->insertInstruction(InsertII, std::move(Inst)));
     }

     // Add CFG edges to the basic blocks of the inlined instance.
     std::vector<BinaryBasicBlock *> Successors(BB.succ_size());
     llvm::transform(BB.successors(), Successors.begin(),
                     [&InlinedBBMap](const BinaryBasicBlock *BB) {
                       return InlinedBBMap.at(BB);
                     });

     if (CallerFunction.hasValidProfile() && Callee.hasValidProfile())
       InlinedBB->addSuccessors(Successors.begin(), Successors.end(),
                                BB.branch_info_begin(), BB.branch_info_end());
     else
       InlinedBB->addSuccessors(Successors.begin(), Successors.end());

     if (!CSIsTailCall && BB.succ_size() == 0 && NextBB) {
       // Either it's a return block or the last instruction never returns.
       InlinedBB->addSuccessor(NextBB, InlinedBB->getExecutionCount());
     }

     // Scale profiling info for blocks and edges after inlining.
     if (CallerFunction.hasValidProfile() && Callee.size() > 1) {
       if (opts::AdjustProfile)
         InlinedBB->adjustExecutionCount(ProfileRatio);
       else
         InlinedBB->setExecutionCount(InlinedBB->getKnownExecutionCount() *
                                      ProfileRatio);
     }
   }

   // Restore the original execution count of the first inlined basic block.
   FirstInlinedBB->setExecutionCount(FirstInlinedBBCount);

   CallerFunction.recomputeLandingPads();

   if (NextBB)
     return std::make_pair(NextBB, NextBB->begin());

   if (Callee.size() == 1)
     return std::make_pair(FirstInlinedBB, InsertII);

   return std::make_pair(FirstInlinedBB, FirstInlinedBB->end());
 }

 bool Inliner::inlineCallsInFunction(BinaryFunction &Function) {
   BinaryContext &BC = Function.getBinaryContext();
   std::vector<BinaryBasicBlock *> Blocks(Function.getLayout().block_begin(),
                                          Function.getLayout().block_end());
   llvm::sort(
       Blocks, [](const BinaryBasicBlock *BB1, const BinaryBasicBlock *BB2) {
         return BB1->getKnownExecutionCount() > BB2->getKnownExecutionCount();
       });

   bool DidInlining = false;
   for (BinaryBasicBlock *BB : Blocks) {
     for (auto InstIt = BB->begin(); InstIt != BB->end();) {
       MCInst &Inst = *InstIt;
       if (!BC.MIB->isCall(Inst) || MCPlus::getNumPrimeOperands(Inst) != 1 ||
           !Inst.getOperand(0).isExpr()) {
         ++InstIt;
         continue;
       }

       const MCSymbol *TargetSymbol = BC.MIB->getTargetSymbol(Inst);
       assert(TargetSymbol && "target symbol expected for direct call");

       // Don't inline calls to a secondary entry point in a target function.
       uint64_t EntryID = 0;
       BinaryFunction *TargetFunction =
           BC.getFunctionForSymbol(TargetSymbol, &EntryID);
       if (!TargetFunction || EntryID != 0) {
         ++InstIt;
         continue;
       }

       // Don't do recursive inlining.
       if (TargetFunction == &Function) {
         ++InstIt;
         continue;
       }

       auto IInfo = InliningCandidates.find(TargetFunction);
       if (IInfo == InliningCandidates.end()) {
         ++InstIt;
         continue;
       }

       const bool IsTailCall = BC.MIB->isTailCall(Inst);
       if (!IsTailCall && IInfo->second.Type == INL_TAILCALL) {
         ++InstIt;
         continue;
       }

       int64_t SizeAfterInlining;
       if (IsTailCall)
         SizeAfterInlining =
             IInfo->second.SizeAfterTailCallInlining - getSizeOfTailCallInst(BC);
       else
         SizeAfterInlining =
             IInfo->second.SizeAfterInlining - getSizeOfCallInst(BC);

       if (!opts::InlineAll && !opts::mustConsider(*TargetFunction)) {
         if (!opts::InlineSmallFunctions ||
             SizeAfterInlining > opts::InlineSmallFunctionsBytes) {
           ++InstIt;
           continue;
         }
       }

       LLVM_DEBUG(dbgs() << "BOLT-DEBUG: inlining call to " << *TargetFunction
                         << " in " << Function << " : " << BB->getName()
                         << ". Count: " << BB->getKnownExecutionCount()
                         << ". Size change: " << SizeAfterInlining
                         << " bytes.\n");

       std::tie(BB, InstIt) = inlineCall(*BB, InstIt, *TargetFunction);

       DidInlining = true;
       TotalInlinedBytes += SizeAfterInlining;

       ++NumInlinedCallSites;
       NumInlinedDynamicCalls += BB->getExecutionCount();

       // Subtract basic block execution count from the callee execution count.
       if (opts::AdjustProfile)
         TargetFunction->adjustExecutionCount(BB->getKnownExecutionCount());

       // Check if the caller inlining status has to be adjusted.
       if (IInfo->second.Type == INL_TAILCALL) {
         auto CallerIInfo = InliningCandidates.find(&Function);
         if (CallerIInfo != InliningCandidates.end() &&
             CallerIInfo->second.Type == INL_ANY) {
           LLVM_DEBUG(dbgs() << "adjusting inlining status for function "
                             << Function << '\n');
           CallerIInfo->second.Type = INL_TAILCALL;
         }
       }

       if (NumInlinedCallSites == opts::InlineLimit)
         return true;
     }
   }

   return DidInlining;
 }

 void Inliner::runOnFunctions(BinaryContext &BC) {
   opts::syncOptions();

   if (!opts::inliningEnabled())
     return;

   bool InlinedOnce;
   unsigned NumIters = 0;
   do {
     if (opts::InlineLimit && NumInlinedCallSites >= opts::InlineLimit)
       break;

     InlinedOnce = false;

     InliningCandidates.clear();
     findInliningCandidates(BC);

     std::vector<BinaryFunction *> ConsideredFunctions;
     for (auto &BFI : BC.getBinaryFunctions()) {
       BinaryFunction &Function = BFI.second;
       if (!shouldOptimize(Function))
         continue;
       ConsideredFunctions.push_back(&Function);
     }
     llvm::sort(ConsideredFunctions, [](const BinaryFunction *A,
                                        const BinaryFunction *B) {
       return B->getKnownExecutionCount() < A->getKnownExecutionCount();
     });
     for (BinaryFunction *Function : ConsideredFunctions) {
       if (opts::InlineLimit && NumInlinedCallSites >= opts::InlineLimit)
         break;

       const bool DidInline = inlineCallsInFunction(*Function);

       if (DidInline)
         Modified.insert(Function);

       InlinedOnce |= DidInline;
     }

     ++NumIters;
   } while (InlinedOnce && NumIters < opts::InlineMaxIters);

   if (NumInlinedCallSites)
     outs() << "BOLT-INFO: inlined " << NumInlinedDynamicCalls << " calls at "
            << NumInlinedCallSites << " call sites in " << NumIters
            << " iteration(s). Change in binary size: " << TotalInlinedBytes
            << " bytes.\n";
 }

 } // namespace bolt
 } // namespace llvm
	//===- bolt/Passes/Inliner.cpp - Inlining pass for low-level binary IR ----===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file implements the Inliner class used for inlining binary functions.
	//
	// The current inliner has a limited callee support
	// (see Inliner::getInliningInfo() for the most up-to-date details):
	//
	// * No exception handling
	// * No jump tables
	// * Single entry point
	// * CFI update not supported - breaks unwinding
	// * Regular Call Sites:
	// - only leaf functions (or callees with only tail calls)
	// * no invokes (they can't be tail calls)
	// - no direct use of %rsp
	// * Tail Call Sites:
	// - since the stack is unmodified, the regular call limitations are lifted
	//
	//===----------------------------------------------------------------------===//

	#include "bolt/Passes/Inliner.h"
	#include "bolt/Core/MCPlus.h"
	#include "llvm/Support/CommandLine.h"
	#include <map>

	#define DEBUG_TYPE "bolt-inliner"

	using namespace llvm;

	namespace opts {

	extern cl::OptionCategory BoltOptCategory;

	static cl::opt<bool>
	AdjustProfile("inline-ap",
	cl::desc("adjust function profile after inlining"),
	cl::cat(BoltOptCategory));

	static cl::list<std::string>
	ForceInlineFunctions("force-inline",
	cl::CommaSeparated,
	cl::desc("list of functions to always consider for inlining"),
	cl::value_desc("func1,func2,func3,..."),
	cl::Hidden,
	cl::cat(BoltOptCategory));

	static cl::opt<bool> InlineAll("inline-all", cl::desc("inline all functions"),
	cl::cat(BoltOptCategory));

	static cl::opt<bool> InlineIgnoreLeafCFI(
	"inline-ignore-leaf-cfi",
	cl::desc("inline leaf functions with CFI programs (can break unwinding)"),
	cl::init(true), cl::ReallyHidden, cl::cat(BoltOptCategory));

	static cl::opt<bool> InlineIgnoreCFI(
	"inline-ignore-cfi",
	cl::desc(
	"inline functions with CFI programs (can break exception handling)"),
	cl::ReallyHidden, cl::cat(BoltOptCategory));

	static cl::opt<unsigned>
	InlineLimit("inline-limit",
	cl::desc("maximum number of call sites to inline"), cl::init(0),
	cl::Hidden, cl::cat(BoltOptCategory));

	static cl::opt<unsigned>
	InlineMaxIters("inline-max-iters",
	cl::desc("maximum number of inline iterations"), cl::init(3),
	cl::Hidden, cl::cat(BoltOptCategory));

	static cl::opt<bool> InlineSmallFunctions(
	"inline-small-functions",
	cl::desc("inline functions if increase in size is less than defined by "
	"-inline-small-functions-bytes"),
	cl::cat(BoltOptCategory));

	static cl::opt<unsigned> InlineSmallFunctionsBytes(
	"inline-small-functions-bytes",
	cl::desc("max number of bytes for the function to be considered small for "
	"inlining purposes"),
	cl::init(4), cl::Hidden, cl::cat(BoltOptCategory));

	static cl::opt<bool> NoInline(
	"no-inline",
	cl::desc("disable all inlining (overrides other inlining options)"),
	cl::cat(BoltOptCategory));

	/// This function returns true if any of inlining options are specified and the
	/// inlining pass should be executed. Whenever a new inlining option is added,
	/// this function should reflect the change.
	bool inliningEnabled() {
	return !NoInline &&
	(InlineAll \|\| InlineSmallFunctions \|\| !ForceInlineFunctions.empty());
	}

	bool mustConsider(const llvm::bolt::BinaryFunction &Function) {
	for (std::string &Name : opts::ForceInlineFunctions)
	if (Function.hasName(Name))
	return true;
	return false;
	}

	void syncOptions() {
	if (opts::InlineIgnoreCFI)
	opts::InlineIgnoreLeafCFI = true;

	if (opts::InlineAll)
	opts::InlineSmallFunctions = true;
	}

	} // namespace opts

	namespace llvm {
	namespace bolt {

	uint64_t Inliner::SizeOfCallInst;
	uint64_t Inliner::SizeOfTailCallInst;

	uint64_t Inliner::getSizeOfCallInst(const BinaryContext &BC) {
	if (SizeOfCallInst)
	return SizeOfCallInst;

	MCInst Inst;
	BC.MIB->createCall(Inst, BC.Ctx->createNamedTempSymbol(), BC.Ctx.get());
	SizeOfCallInst = BC.computeInstructionSize(Inst);

	return SizeOfCallInst;
	}

	uint64_t Inliner::getSizeOfTailCallInst(const BinaryContext &BC) {
	if (SizeOfTailCallInst)
	return SizeOfTailCallInst;

	MCInst Inst;
	BC.MIB->createTailCall(Inst, BC.Ctx->createNamedTempSymbol(), BC.Ctx.get());
	SizeOfTailCallInst = BC.computeInstructionSize(Inst);

	return SizeOfTailCallInst;
	}

	InliningInfo getInliningInfo(const BinaryFunction &BF) {
	const BinaryContext &BC = BF.getBinaryContext();
	bool DirectSP = false;
	bool HasCFI = false;
	bool IsLeaf = true;

	// Perform necessary checks unless the option overrides it.
	if (!opts::mustConsider(BF)) {
	if (BF.hasSDTMarker())
	return INL_NONE;

	if (BF.hasEHRanges())
	return INL_NONE;

	if (BF.isMultiEntry())
	return INL_NONE;

	if (BF.hasJumpTables())
	return INL_NONE;

	const MCPhysReg SPReg = BC.MIB->getStackPointer();
	for (const BinaryBasicBlock &BB : BF) {
	for (const MCInst &Inst : BB) {
	// Tail calls are marked as implicitly using the stack pointer and they
	// could be inlined.
	if (BC.MIB->isTailCall(Inst))
	break;

	if (BC.MIB->isCFI(Inst)) {
	HasCFI = true;
	continue;
	}

	if (BC.MIB->isCall(Inst))
	IsLeaf = false;

	// Push/pop instructions are straightforward to handle.
	if (BC.MIB->isPush(Inst) \|\| BC.MIB->isPop(Inst))
	continue;

	DirectSP \|= BC.MIB->hasDefOfPhysReg(Inst, SPReg) \|\|
	BC.MIB->hasUseOfPhysReg(Inst, SPReg);
	}
	}
	}

	if (HasCFI) {
	if (!opts::InlineIgnoreLeafCFI)
	return INL_NONE;

	if (!IsLeaf && !opts::InlineIgnoreCFI)
	return INL_NONE;
	}

	InliningInfo Info(DirectSP ? INL_TAILCALL : INL_ANY);

	size_t Size = BF.estimateSize();

	Info.SizeAfterInlining = Size;
	Info.SizeAfterTailCallInlining = Size;

	// Handle special case of the known size reduction.
	if (BF.size() == 1) {
	// For a regular call the last return instruction could be removed
	// (or converted to a branch).
	const MCInst *LastInst = BF.back().getLastNonPseudoInstr();
	if (LastInst && BC.MIB->isReturn(*LastInst) &&
	!BC.MIB->isTailCall(*LastInst)) {
	const uint64_t RetInstSize = BC.computeInstructionSize(*LastInst);
	assert(Size >= RetInstSize);
	Info.SizeAfterInlining -= RetInstSize;
	}
	}

	return Info;
	}

	void Inliner::findInliningCandidates(BinaryContext &BC) {
	for (const auto &BFI : BC.getBinaryFunctions()) {
	const BinaryFunction &Function = BFI.second;
	if (!shouldOptimize(Function))
	continue;
	const InliningInfo InlInfo = getInliningInfo(Function);
	if (InlInfo.Type != INL_NONE)
	InliningCandidates[&Function] = InlInfo;
	}
	}

	std::pair<BinaryBasicBlock *, BinaryBasicBlock::iterator>
	Inliner::inlineCall(BinaryBasicBlock &CallerBB,
	BinaryBasicBlock::iterator CallInst,
	const BinaryFunction &Callee) {
	BinaryFunction &CallerFunction = *CallerBB.getFunction();
	BinaryContext &BC = CallerFunction.getBinaryContext();
	auto &MIB = *BC.MIB;

	assert(MIB.isCall(*CallInst) && "can only inline a call or a tail call");
	assert(!Callee.isMultiEntry() &&
	"cannot inline function with multiple entries");
	assert(!Callee.hasJumpTables() &&
	"cannot inline function with jump table(s)");

	// Get information about the call site.
	const bool CSIsInvoke = BC.MIB->isInvoke(*CallInst);
	const bool CSIsTailCall = BC.MIB->isTailCall(*CallInst);
	const int64_t CSGNUArgsSize = BC.MIB->getGnuArgsSize(*CallInst);
	const std::optional<MCPlus::MCLandingPad> CSEHInfo =
	BC.MIB->getEHInfo(*CallInst);

	// Split basic block at the call site if there will be more incoming edges
	// coming from the callee.
	BinaryBasicBlock *FirstInlinedBB = &CallerBB;
	if (Callee.front().pred_size() && CallInst != CallerBB.begin()) {
	FirstInlinedBB = CallerBB.splitAt(CallInst);
	CallInst = FirstInlinedBB->begin();
	}

	// Split basic block after the call instruction unless the callee is trivial
	// (i.e. consists of a single basic block). If necessary, obtain a basic block
	// for return instructions in the callee to redirect to.
	BinaryBasicBlock *NextBB = nullptr;
	if (Callee.size() > 1) {
	if (std::next(CallInst) != FirstInlinedBB->end())
	NextBB = FirstInlinedBB->splitAt(std::next(CallInst));
	else
	NextBB = FirstInlinedBB->getSuccessor();
	}
	if (NextBB)
	FirstInlinedBB->removeSuccessor(NextBB);

	// Remove the call instruction.
	auto InsertII = FirstInlinedBB->eraseInstruction(CallInst);

	double ProfileRatio = 0;
	if (uint64_t CalleeExecCount = Callee.getKnownExecutionCount())
	ProfileRatio =
	(double)FirstInlinedBB->getKnownExecutionCount() / CalleeExecCount;

	// Save execution count of the first block as we don't want it to change
	// later due to profile adjustment rounding errors.
	const uint64_t FirstInlinedBBCount = FirstInlinedBB->getKnownExecutionCount();

	// Copy basic blocks and maintain a map from their origin.
	std::unordered_map<const BinaryBasicBlock , BinaryBasicBlock > InlinedBBMap;
	InlinedBBMap[&Callee.front()] = FirstInlinedBB;
	for (const BinaryBasicBlock &BB : llvm::drop_begin(Callee)) {
	BinaryBasicBlock *InlinedBB = CallerFunction.addBasicBlock();
	InlinedBBMap[&BB] = InlinedBB;
	InlinedBB->setCFIState(FirstInlinedBB->getCFIState());
	if (Callee.hasValidProfile())
	InlinedBB->setExecutionCount(BB.getKnownExecutionCount());
	else
	InlinedBB->setExecutionCount(FirstInlinedBBCount);
	}

	// Copy over instructions and edges.
	for (const BinaryBasicBlock &BB : Callee) {
	BinaryBasicBlock *InlinedBB = InlinedBBMap[&BB];

	if (InlinedBB != FirstInlinedBB)
	InsertII = InlinedBB->begin();

	// Copy over instructions making any necessary mods.
	for (MCInst Inst : BB) {
	if (MIB.isPseudo(Inst))
	continue;

	MIB.stripAnnotations(Inst, /KeepTC=/BC.isX86());

	// Fix branch target. Strictly speaking, we don't have to do this as
	// targets of direct branches will be fixed later and don't matter
	// in the CFG state. However, disassembly may look misleading, and
	// hence we do the fixing.
	if (MIB.isBranch(Inst)) {
	assert(!MIB.isIndirectBranch(Inst) &&
	"unexpected indirect branch in callee");
	const BinaryBasicBlock *TargetBB =
	Callee.getBasicBlockForLabel(MIB.getTargetSymbol(Inst));
	assert(TargetBB && "cannot find target block in callee");
	MIB.replaceBranchTarget(Inst, InlinedBBMap[TargetBB]->getLabel(),
	BC.Ctx.get());
	}

	if (CSIsTailCall \|\| (!MIB.isCall(Inst) && !MIB.isReturn(Inst))) {
	InsertII =
	std::next(InlinedBB->insertInstruction(InsertII, std::move(Inst)));
	continue;
	}

	// Handle special instructions for a non-tail call site.
	if (!MIB.isCall(Inst)) {
	// Returns are removed.
	break;
	}

	MIB.convertTailCallToCall(Inst);

	// Propagate EH-related info to call instructions.
	if (CSIsInvoke) {
	MIB.addEHInfo(Inst, *CSEHInfo);
	if (CSGNUArgsSize >= 0)
	MIB.addGnuArgsSize(Inst, CSGNUArgsSize);
	}

	InsertII =
	std::next(InlinedBB->insertInstruction(InsertII, std::move(Inst)));
	}

	// Add CFG edges to the basic blocks of the inlined instance.
	std::vector<BinaryBasicBlock *> Successors(BB.succ_size());
	llvm::transform(BB.successors(), Successors.begin(),
	[&InlinedBBMap](const BinaryBasicBlock *BB) {
	return InlinedBBMap.at(BB);
	});

	if (CallerFunction.hasValidProfile() && Callee.hasValidProfile())
	InlinedBB->addSuccessors(Successors.begin(), Successors.end(),
	BB.branch_info_begin(), BB.branch_info_end());
	else
	InlinedBB->addSuccessors(Successors.begin(), Successors.end());

	if (!CSIsTailCall && BB.succ_size() == 0 && NextBB) {
	// Either it's a return block or the last instruction never returns.
	InlinedBB->addSuccessor(NextBB, InlinedBB->getExecutionCount());
	}

	// Scale profiling info for blocks and edges after inlining.
	if (CallerFunction.hasValidProfile() && Callee.size() > 1) {
	if (opts::AdjustProfile)
	InlinedBB->adjustExecutionCount(ProfileRatio);
	else
	InlinedBB->setExecutionCount(InlinedBB->getKnownExecutionCount() *
	ProfileRatio);
	}
	}

	// Restore the original execution count of the first inlined basic block.
	FirstInlinedBB->setExecutionCount(FirstInlinedBBCount);

	CallerFunction.recomputeLandingPads();

	if (NextBB)
	return std::make_pair(NextBB, NextBB->begin());

	if (Callee.size() == 1)
	return std::make_pair(FirstInlinedBB, InsertII);

	return std::make_pair(FirstInlinedBB, FirstInlinedBB->end());
	}

	bool Inliner::inlineCallsInFunction(BinaryFunction &Function) {
	BinaryContext &BC = Function.getBinaryContext();
	std::vector<BinaryBasicBlock *> Blocks(Function.getLayout().block_begin(),
	Function.getLayout().block_end());
	llvm::sort(
	Blocks, [](const BinaryBasicBlock BB1, const BinaryBasicBlock BB2) {
	return BB1->getKnownExecutionCount() > BB2->getKnownExecutionCount();
	});

	bool DidInlining = false;
	for (BinaryBasicBlock *BB : Blocks) {
	for (auto InstIt = BB->begin(); InstIt != BB->end();) {
	MCInst &Inst = *InstIt;
	if (!BC.MIB->isCall(Inst) \|\| MCPlus::getNumPrimeOperands(Inst) != 1 \|\|
	!Inst.getOperand(0).isExpr()) {
	++InstIt;
	continue;
	}

	const MCSymbol *TargetSymbol = BC.MIB->getTargetSymbol(Inst);
	assert(TargetSymbol && "target symbol expected for direct call");

	// Don't inline calls to a secondary entry point in a target function.
	uint64_t EntryID = 0;
	BinaryFunction *TargetFunction =
	BC.getFunctionForSymbol(TargetSymbol, &EntryID);
	if (!TargetFunction \|\| EntryID != 0) {
	++InstIt;
	continue;
	}

	// Don't do recursive inlining.
	if (TargetFunction == &Function) {
	++InstIt;
	continue;
	}

	auto IInfo = InliningCandidates.find(TargetFunction);
	if (IInfo == InliningCandidates.end()) {
	++InstIt;
	continue;
	}

	const bool IsTailCall = BC.MIB->isTailCall(Inst);
	if (!IsTailCall && IInfo->second.Type == INL_TAILCALL) {
	++InstIt;
	continue;
	}

	int64_t SizeAfterInlining;
	if (IsTailCall)
	SizeAfterInlining =
	IInfo->second.SizeAfterTailCallInlining - getSizeOfTailCallInst(BC);
	else
	SizeAfterInlining =
	IInfo->second.SizeAfterInlining - getSizeOfCallInst(BC);

	if (!opts::InlineAll && !opts::mustConsider(*TargetFunction)) {
	if (!opts::InlineSmallFunctions \|\|
	SizeAfterInlining > opts::InlineSmallFunctionsBytes) {
	++InstIt;
	continue;
	}
	}

	LLVM_DEBUG(dbgs() << "BOLT-DEBUG: inlining call to " << *TargetFunction
	<< " in " << Function << " : " << BB->getName()
	<< ". Count: " << BB->getKnownExecutionCount()
	<< ". Size change: " << SizeAfterInlining
	<< " bytes.\n");

	std::tie(BB, InstIt) = inlineCall(BB, InstIt, TargetFunction);

	DidInlining = true;
	TotalInlinedBytes += SizeAfterInlining;

	++NumInlinedCallSites;
	NumInlinedDynamicCalls += BB->getExecutionCount();

	// Subtract basic block execution count from the callee execution count.
	if (opts::AdjustProfile)
	TargetFunction->adjustExecutionCount(BB->getKnownExecutionCount());

	// Check if the caller inlining status has to be adjusted.
	if (IInfo->second.Type == INL_TAILCALL) {
	auto CallerIInfo = InliningCandidates.find(&Function);
	if (CallerIInfo != InliningCandidates.end() &&
	CallerIInfo->second.Type == INL_ANY) {
	LLVM_DEBUG(dbgs() << "adjusting inlining status for function "
	<< Function << '\n');
	CallerIInfo->second.Type = INL_TAILCALL;
	}
	}

	if (NumInlinedCallSites == opts::InlineLimit)
	return true;
	}
	}

	return DidInlining;
	}

	void Inliner::runOnFunctions(BinaryContext &BC) {
	opts::syncOptions();

	if (!opts::inliningEnabled())
	return;

	bool InlinedOnce;
	unsigned NumIters = 0;
	do {
	if (opts::InlineLimit && NumInlinedCallSites >= opts::InlineLimit)
	break;

	InlinedOnce = false;

	InliningCandidates.clear();
	findInliningCandidates(BC);

	std::vector<BinaryFunction *> ConsideredFunctions;
	for (auto &BFI : BC.getBinaryFunctions()) {
	BinaryFunction &Function = BFI.second;
	if (!shouldOptimize(Function))
	continue;
	ConsideredFunctions.push_back(&Function);
	}
	llvm::sort(ConsideredFunctions, [](const BinaryFunction *A,
	const BinaryFunction *B) {
	return B->getKnownExecutionCount() < A->getKnownExecutionCount();
	});
	for (BinaryFunction *Function : ConsideredFunctions) {
	if (opts::InlineLimit && NumInlinedCallSites >= opts::InlineLimit)
	break;

	const bool DidInline = inlineCallsInFunction(*Function);

	if (DidInline)
	Modified.insert(Function);

	InlinedOnce \|= DidInline;
	}

	++NumIters;
	} while (InlinedOnce && NumIters < opts::InlineMaxIters);

	if (NumInlinedCallSites)
	outs() << "BOLT-INFO: inlined " << NumInlinedDynamicCalls << " calls at "
	<< NumInlinedCallSites << " call sites in " << NumIters
	<< " iteration(s). Change in binary size: " << TotalInlinedBytes
	<< " bytes.\n";
	}

	} // namespace bolt
	} // namespace llvm