[PGO][PGSO] SizeOpts changes.

Summary:
(Split of off D67120)

SizeOpts/MachineSizeOpts changes for profile guided size optimization.

Reviewers: davidxl

Subscribers: mgorny, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D69070

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@375254 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/include/llvm/CodeGen/MachineBlockFrequencyInfo.h b/include/llvm/CodeGen/MachineBlockFrequencyInfo.h
index a438ecf..2a826d0 100644
--- a/include/llvm/CodeGen/MachineBlockFrequencyInfo.h
+++ b/include/llvm/CodeGen/MachineBlockFrequencyInfo.h
@@ -38,6 +38,9 @@
   static char ID;
 
   MachineBlockFrequencyInfo();
+  explicit MachineBlockFrequencyInfo(MachineFunction &F,
+                                     MachineBranchProbabilityInfo &MBPI,
+                                     MachineLoopInfo &MLI);
   ~MachineBlockFrequencyInfo() override;
 
   void getAnalysisUsage(AnalysisUsage &AU) const override;
diff --git a/include/llvm/CodeGen/MachineDominators.h b/include/llvm/CodeGen/MachineDominators.h
index e4d7a02..9d31232 100644
--- a/include/llvm/CodeGen/MachineDominators.h
+++ b/include/llvm/CodeGen/MachineDominators.h
@@ -81,6 +81,9 @@
   static char ID; // Pass ID, replacement for typeid
 
   MachineDominatorTree();
+  explicit MachineDominatorTree(MachineFunction &MF) : MachineFunctionPass(ID) {
+    calculate(MF);
+  }
 
   DomTreeT &getBase() {
     if (!DT) DT.reset(new DomTreeT());
@@ -111,6 +114,8 @@
 
   bool runOnMachineFunction(MachineFunction &F) override;
 
+  void calculate(MachineFunction &F);
+
   bool dominates(const MachineDomTreeNode *A,
                  const MachineDomTreeNode *B) const {
     applySplitCriticalEdges();
diff --git a/include/llvm/CodeGen/MachineLoopInfo.h b/include/llvm/CodeGen/MachineLoopInfo.h
index da6df59..1612c8b 100644
--- a/include/llvm/CodeGen/MachineLoopInfo.h
+++ b/include/llvm/CodeGen/MachineLoopInfo.h
@@ -37,6 +37,7 @@
 
 namespace llvm {
 
+class MachineDominatorTree;
 // Implementation in LoopInfoImpl.h
 class MachineLoop;
 extern template class LoopBase<MachineBasicBlock, MachineLoop>;
@@ -91,6 +92,10 @@
   MachineLoopInfo() : MachineFunctionPass(ID) {
     initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
   }
+  explicit MachineLoopInfo(MachineDominatorTree &MDT)
+      : MachineFunctionPass(ID) {
+    calculate(MDT);
+  }
   MachineLoopInfo(const MachineLoopInfo &) = delete;
   MachineLoopInfo &operator=(const MachineLoopInfo &) = delete;
 
@@ -133,6 +138,7 @@
 
   /// Calculate the natural loop information.
   bool runOnMachineFunction(MachineFunction &F) override;
+  void calculate(MachineDominatorTree &MDT);
 
   void releaseMemory() override { LI.releaseMemory(); }
 
diff --git a/include/llvm/CodeGen/MachineSizeOpts.h b/include/llvm/CodeGen/MachineSizeOpts.h
new file mode 100644
index 0000000..75e871d
--- /dev/null
+++ b/include/llvm/CodeGen/MachineSizeOpts.h
@@ -0,0 +1,37 @@
+//===- MachineSizeOpts.h - machine size optimization ------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains some shared machine IR code size optimization related
+// code.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_CODEGEN_MACHINE_SIZEOPTS_H
+#define LLVM_CODEGEN_MACHINE_SIZEOPTS_H
+
+#include "llvm/Transforms/Utils/SizeOpts.h"
+
+namespace llvm {
+
+class ProfileSummaryInfo;
+class MachineBasicBlock;
+class MachineBlockFrequencyInfo;
+class MachineFunction;
+
+/// Returns true if machine function \p MF is suggested to be size-optimized
+/// base on the profile.
+bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI,
+                           const MachineBlockFrequencyInfo *BFI);
+/// Returns true if machine basic block \p MBB is suggested to be size-optimized
+/// base on the profile.
+bool shouldOptimizeForSize(const MachineBasicBlock *MBB,
+                           ProfileSummaryInfo *PSI,
+                           const MachineBlockFrequencyInfo *MBFI);
+
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_MACHINE_SIZEOPTS_H
diff --git a/include/llvm/Transforms/Utils/SizeOpts.h b/include/llvm/Transforms/Utils/SizeOpts.h
index 1a052c6..1c56da0 100644
--- a/include/llvm/Transforms/Utils/SizeOpts.h
+++ b/include/llvm/Transforms/Utils/SizeOpts.h
@@ -13,6 +13,18 @@
 #ifndef LLVM_TRANSFORMS_UTILS_SIZEOPTS_H
 #define LLVM_TRANSFORMS_UTILS_SIZEOPTS_H
 
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+extern cl::opt<bool> EnablePGSO;
+extern cl::opt<bool> PGSOLargeWorkingSetSizeOnly;
+extern cl::opt<bool> ForcePGSO;
+extern cl::opt<int> PgsoCutoffInstrProf;
+extern cl::opt<int> PgsoCutoffSampleProf;
+
 namespace llvm {
 
 class BasicBlock;
@@ -20,13 +32,52 @@
 class Function;
 class ProfileSummaryInfo;
 
+template<typename AdapterT, typename FuncT, typename BFIT>
+bool shouldFuncOptimizeForSizeImpl(const FuncT *F, ProfileSummaryInfo *PSI,
+                                   BFIT *BFI) {
+  assert(F);
+  if (!PSI || !BFI || !PSI->hasProfileSummary())
+    return false;
+  if (ForcePGSO)
+    return true;
+  if (!EnablePGSO)
+    return false;
+  if (PGSOLargeWorkingSetSizeOnly && !PSI->hasLargeWorkingSetSize()) {
+    // Even if the working set size isn't large, size-optimize cold code.
+    return AdapterT::isFunctionColdInCallGraph(F, PSI, *BFI);
+  }
+  return !AdapterT::isFunctionHotInCallGraphNthPercentile(
+      PSI->hasSampleProfile() ? PgsoCutoffSampleProf : PgsoCutoffInstrProf,
+      F, PSI, *BFI);
+}
+
+template<typename AdapterT, typename BlockT, typename BFIT>
+bool shouldOptimizeForSizeImpl(const BlockT *BB, ProfileSummaryInfo *PSI,
+                               BFIT *BFI) {
+  assert(BB);
+  if (!PSI || !BFI || !PSI->hasProfileSummary())
+    return false;
+  if (ForcePGSO)
+    return true;
+  if (!EnablePGSO)
+    return false;
+  if (PGSOLargeWorkingSetSizeOnly && !PSI->hasLargeWorkingSetSize()) {
+    // Even if the working set size isn't large, size-optimize cold code.
+    return AdapterT::isColdBlock(BB, PSI, BFI);
+  }
+  return !AdapterT::isHotBlockNthPercentile(
+      PSI->hasSampleProfile() ? PgsoCutoffSampleProf : PgsoCutoffInstrProf,
+      BB, PSI, BFI);
+}
+
 /// Returns true if function \p F is suggested to be size-optimized base on the
 /// profile.
-bool shouldOptimizeForSize(Function *F, ProfileSummaryInfo *PSI,
+bool shouldOptimizeForSize(const Function *F, ProfileSummaryInfo *PSI,
                            BlockFrequencyInfo *BFI);
+
 /// Returns true if basic block \p BB is suggested to be size-optimized base
 /// on the profile.
-bool shouldOptimizeForSize(BasicBlock *BB, ProfileSummaryInfo *PSI,
+bool shouldOptimizeForSize(const BasicBlock *BB, ProfileSummaryInfo *PSI,
                            BlockFrequencyInfo *BFI);
 
 } // end namespace llvm
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 50b469d..f5c18f8 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -92,6 +92,7 @@
   MachineRegisterInfo.cpp
   MachineScheduler.cpp
   MachineSink.cpp
+  MachineSizeOpts.cpp
   MachineSSAUpdater.cpp
   MachineTraceMetrics.cpp
   MachineVerifier.cpp
diff --git a/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/lib/CodeGen/MachineBlockFrequencyInfo.cpp
index 53a35b7..889fde6 100644
--- a/lib/CodeGen/MachineBlockFrequencyInfo.cpp
+++ b/lib/CodeGen/MachineBlockFrequencyInfo.cpp
@@ -172,6 +172,13 @@
   initializeMachineBlockFrequencyInfoPass(*PassRegistry::getPassRegistry());
 }
 
+MachineBlockFrequencyInfo::MachineBlockFrequencyInfo(
+      MachineFunction &F,
+      MachineBranchProbabilityInfo &MBPI,
+      MachineLoopInfo &MLI) : MachineFunctionPass(ID) {
+  calculate(F, MBPI, MLI);
+}
+
 MachineBlockFrequencyInfo::~MachineBlockFrequencyInfo() = default;
 
 void MachineBlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const {
diff --git a/lib/CodeGen/MachineDominators.cpp b/lib/CodeGen/MachineDominators.cpp
index 706c706..0d57bca 100644
--- a/lib/CodeGen/MachineDominators.cpp
+++ b/lib/CodeGen/MachineDominators.cpp
@@ -49,11 +49,15 @@
 }
 
 bool MachineDominatorTree::runOnMachineFunction(MachineFunction &F) {
+  calculate(F);
+  return false;
+}
+
+void MachineDominatorTree::calculate(MachineFunction &F) {
   CriticalEdgesToSplit.clear();
   NewBBs.clear();
   DT.reset(new DomTreeBase<MachineBasicBlock>());
   DT->recalculate(F);
-  return false;
 }
 
 MachineDominatorTree::MachineDominatorTree()
diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp
index 3b8b430..85822a6 100644
--- a/lib/CodeGen/MachineLoopInfo.cpp
+++ b/lib/CodeGen/MachineLoopInfo.cpp
@@ -36,11 +36,15 @@
 char &llvm::MachineLoopInfoID = MachineLoopInfo::ID;
 
 bool MachineLoopInfo::runOnMachineFunction(MachineFunction &) {
-  releaseMemory();
-  LI.analyze(getAnalysis<MachineDominatorTree>().getBase());
+  calculate(getAnalysis<MachineDominatorTree>());
   return false;
 }
 
+void MachineLoopInfo::calculate(MachineDominatorTree &MDT) {
+  releaseMemory();
+  LI.analyze(MDT.getBase());
+}
+
 void MachineLoopInfo::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
   AU.addRequired<MachineDominatorTree>();
diff --git a/lib/CodeGen/MachineSizeOpts.cpp b/lib/CodeGen/MachineSizeOpts.cpp
new file mode 100644
index 0000000..0c2ef33
--- /dev/null
+++ b/lib/CodeGen/MachineSizeOpts.cpp
@@ -0,0 +1,120 @@
+//===- MachineSizeOpts.cpp - code size optimization related code ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains some shared machine IR code size optimization related
+// code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineSizeOpts.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+
+using namespace llvm;
+
+extern cl::opt<bool> EnablePGSO;
+extern cl::opt<bool> PGSOLargeWorkingSetSizeOnly;
+extern cl::opt<bool> ForcePGSO;
+extern cl::opt<int> PgsoCutoffInstrProf;
+extern cl::opt<int> PgsoCutoffSampleProf;
+
+namespace machine_size_opts_detail {
+
+/// Like ProfileSummaryInfo::isColdBlock but for MachineBasicBlock.
+bool isColdBlock(const MachineBasicBlock *MBB,
+                 ProfileSummaryInfo *PSI,
+                 const MachineBlockFrequencyInfo *MBFI) {
+  auto Count = MBFI->getBlockProfileCount(MBB);
+  return Count && PSI->isColdCount(*Count);
+}
+
+/// Like ProfileSummaryInfo::isHotBlockNthPercentile but for MachineBasicBlock.
+static bool isHotBlockNthPercentile(int PercentileCutoff,
+                                    const MachineBasicBlock *MBB,
+                                    ProfileSummaryInfo *PSI,
+                                    const MachineBlockFrequencyInfo *MBFI) {
+  auto Count = MBFI->getBlockProfileCount(MBB);
+  return Count && PSI->isHotCountNthPercentile(PercentileCutoff, *Count);
+}
+
+/// Like ProfileSummaryInfo::isFunctionColdInCallGraph but for
+/// MachineFunction.
+bool isFunctionColdInCallGraph(
+    const MachineFunction *MF,
+    ProfileSummaryInfo *PSI,
+    const MachineBlockFrequencyInfo &MBFI) {
+  if (auto FunctionCount = MF->getFunction().getEntryCount())
+    if (!PSI->isColdCount(FunctionCount.getCount()))
+      return false;
+  for (const auto &MBB : *MF)
+    if (!isColdBlock(&MBB, PSI, &MBFI))
+      return false;
+  return true;
+}
+
+/// Like ProfileSummaryInfo::isFunctionHotInCallGraphNthPercentile but for
+/// MachineFunction.
+bool isFunctionHotInCallGraphNthPercentile(
+    int PercentileCutoff,
+    const MachineFunction *MF,
+    ProfileSummaryInfo *PSI,
+    const MachineBlockFrequencyInfo &MBFI) {
+  if (auto FunctionCount = MF->getFunction().getEntryCount())
+    if (PSI->isHotCountNthPercentile(PercentileCutoff,
+                                     FunctionCount.getCount()))
+      return true;
+  for (const auto &MBB : *MF)
+    if (isHotBlockNthPercentile(PercentileCutoff, &MBB, PSI, &MBFI))
+      return true;
+  return false;
+}
+} // namespace machine_size_opts_detail
+
+namespace {
+struct MachineBasicBlockBFIAdapter {
+  static bool isFunctionColdInCallGraph(const MachineFunction *MF,
+                                        ProfileSummaryInfo *PSI,
+                                        const MachineBlockFrequencyInfo &MBFI) {
+    return machine_size_opts_detail::isFunctionColdInCallGraph(MF, PSI, MBFI);
+  }
+  static bool isFunctionHotInCallGraphNthPercentile(
+      int CutOff,
+      const MachineFunction *MF,
+      ProfileSummaryInfo *PSI,
+      const MachineBlockFrequencyInfo &MBFI) {
+    return machine_size_opts_detail::isFunctionHotInCallGraphNthPercentile(
+        CutOff, MF, PSI, MBFI);
+  }
+  static bool isColdBlock(const MachineBasicBlock *MBB,
+                          ProfileSummaryInfo *PSI,
+                          const MachineBlockFrequencyInfo *MBFI) {
+    return machine_size_opts_detail::isColdBlock(MBB, PSI, MBFI);
+  }
+  static bool isHotBlockNthPercentile(int CutOff,
+                                      const MachineBasicBlock *MBB,
+                                      ProfileSummaryInfo *PSI,
+                                      const MachineBlockFrequencyInfo *MBFI) {
+    return machine_size_opts_detail::isHotBlockNthPercentile(
+        CutOff, MBB, PSI, MBFI);
+  }
+};
+} // end anonymous namespace
+
+bool llvm::shouldOptimizeForSize(const MachineFunction *MF,
+                                 ProfileSummaryInfo *PSI,
+                                 const MachineBlockFrequencyInfo *MBFI) {
+  return shouldFuncOptimizeForSizeImpl<MachineBasicBlockBFIAdapter>(
+      MF, PSI, MBFI);
+}
+
+bool llvm::shouldOptimizeForSize(const MachineBasicBlock *MBB,
+                                 ProfileSummaryInfo *PSI,
+                                 const MachineBlockFrequencyInfo *MBFI) {
+  return shouldOptimizeForSizeImpl<MachineBasicBlockBFIAdapter>(
+      MBB, PSI, MBFI);
+}
diff --git a/lib/Transforms/Utils/SizeOpts.cpp b/lib/Transforms/Utils/SizeOpts.cpp
index 1519751..f819c67 100644
--- a/lib/Transforms/Utils/SizeOpts.cpp
+++ b/lib/Transforms/Utils/SizeOpts.cpp
@@ -10,28 +10,66 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Analysis/BlockFrequencyInfo.h"
-#include "llvm/Analysis/ProfileSummaryInfo.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Transforms/Utils/SizeOpts.h"
+
 using namespace llvm;
 
-static cl::opt<bool> ProfileGuidedSizeOpt(
+cl::opt<bool> EnablePGSO(
     "pgso", cl::Hidden, cl::init(true),
-    cl::desc("Enable the profile guided size optimization. "));
+    cl::desc("Enable the profile guided size optimizations. "));
 
-bool llvm::shouldOptimizeForSize(Function *F, ProfileSummaryInfo *PSI,
+cl::opt<bool> PGSOLargeWorkingSetSizeOnly(
+    "pgso-lwss-only", cl::Hidden, cl::init(true),
+    cl::desc("Apply the profile guided size optimizations only "
+             "if the working set size is large (except for cold code.)"));
+
+cl::opt<bool> ForcePGSO(
+    "force-pgso", cl::Hidden, cl::init(false),
+    cl::desc("Force the (profiled-guided) size optimizations. "));
+
+cl::opt<int> PgsoCutoffInstrProf(
+    "pgso-cutoff-instr-prof", cl::Hidden, cl::init(250000), cl::ZeroOrMore,
+    cl::desc("The profile guided size optimization profile summary cutoff "
+             "for instrumentation profile."));
+
+cl::opt<int> PgsoCutoffSampleProf(
+    "pgso-cutoff-sample-prof", cl::Hidden, cl::init(800000), cl::ZeroOrMore,
+    cl::desc("The profile guided size optimization profile summary cutoff "
+             "for sample profile."));
+
+namespace {
+struct BasicBlockBFIAdapter {
+  static bool isFunctionColdInCallGraph(const Function *F,
+                                        ProfileSummaryInfo *PSI,
+                                        BlockFrequencyInfo &BFI) {
+    return PSI->isFunctionColdInCallGraph(F, BFI);
+  }
+  static bool isFunctionHotInCallGraphNthPercentile(int CutOff,
+                                                    const Function *F,
+                                                    ProfileSummaryInfo *PSI,
+                                                    BlockFrequencyInfo &BFI) {
+    return PSI->isFunctionHotInCallGraphNthPercentile(CutOff, F, BFI);
+  }
+  static bool isColdBlock(const BasicBlock *BB,
+                          ProfileSummaryInfo *PSI,
+                          BlockFrequencyInfo *BFI) {
+    return PSI->isColdBlock(BB, BFI);
+  }
+  static bool isHotBlockNthPercentile(int CutOff,
+                                      const BasicBlock *BB,
+                                      ProfileSummaryInfo *PSI,
+                                      BlockFrequencyInfo *BFI) {
+    return PSI->isHotBlockNthPercentile(CutOff, BB, BFI);
+  }
+};
+} // end anonymous namespace
+
+bool llvm::shouldOptimizeForSize(const Function *F, ProfileSummaryInfo *PSI,
                                  BlockFrequencyInfo *BFI) {
-  assert(F);
-  if (!PSI || !BFI || !PSI->hasProfileSummary())
-    return false;
-  return ProfileGuidedSizeOpt && PSI->isFunctionColdInCallGraph(F, *BFI);
+  return shouldFuncOptimizeForSizeImpl<BasicBlockBFIAdapter>(F, PSI, BFI);
 }
 
-bool llvm::shouldOptimizeForSize(BasicBlock *BB, ProfileSummaryInfo *PSI,
+bool llvm::shouldOptimizeForSize(const BasicBlock *BB, ProfileSummaryInfo *PSI,
                                  BlockFrequencyInfo *BFI) {
-  assert(BB);
-  if (!PSI || !BFI || !PSI->hasProfileSummary())
-    return false;
-  return ProfileGuidedSizeOpt && PSI->isColdBlock(BB, BFI);
+  return shouldOptimizeForSizeImpl<BasicBlockBFIAdapter>(BB, PSI, BFI);
 }
diff --git a/unittests/CodeGen/CMakeLists.txt b/unittests/CodeGen/CMakeLists.txt
index 706a65b..1553641 100644
--- a/unittests/CodeGen/CMakeLists.txt
+++ b/unittests/CodeGen/CMakeLists.txt
@@ -18,6 +18,7 @@
   MachineInstrBundleIteratorTest.cpp
   MachineInstrTest.cpp
   MachineOperandTest.cpp
+  MachineSizeOptsTest.cpp
   ScalableVectorMVTsTest.cpp
   TypeTraitsTest.cpp
   TargetOptionsTest.cpp
diff --git a/unittests/CodeGen/MachineSizeOptsTest.cpp b/unittests/CodeGen/MachineSizeOptsTest.cpp
new file mode 100644
index 0000000..f8b0c23
--- /dev/null
+++ b/unittests/CodeGen/MachineSizeOptsTest.cpp
@@ -0,0 +1,234 @@
+//===- MachineSizeOptsTest.cpp --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineSizeOpts.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/CodeGen/MIRParser/MIRParser.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Target/TargetMachine.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+
+std::unique_ptr<LLVMTargetMachine> createTargetMachine() {
+  auto TT(Triple::normalize("x86_64--"));
+  std::string Error;
+  const Target *TheTarget = TargetRegistry::lookupTarget(TT, Error);
+  return std::unique_ptr<LLVMTargetMachine>(static_cast<LLVMTargetMachine*>(
+      TheTarget->createTargetMachine(TT, "", "", TargetOptions(), None, None,
+                                     CodeGenOpt::Default)));
+}
+
+class MachineSizeOptsTest : public testing::Test {
+ protected:
+  static const char* MIRString;
+  LLVMContext Context;
+  std::unique_ptr<LLVMTargetMachine> TM;
+  std::unique_ptr<MachineModuleInfo> MMI;
+  std::unique_ptr<MIRParser> Parser;
+  std::unique_ptr<Module> M;
+  struct BFIData {
+    std::unique_ptr<MachineDominatorTree> MDT;
+    std::unique_ptr<MachineLoopInfo> MLI;
+    std::unique_ptr<MachineBranchProbabilityInfo> MBPI;
+    std::unique_ptr<MachineBlockFrequencyInfo> MBFI;
+    BFIData(MachineFunction &MF) {
+      MDT.reset(new MachineDominatorTree(MF));
+      MLI.reset(new MachineLoopInfo(*MDT));
+      MBPI.reset(new MachineBranchProbabilityInfo());
+      MBFI.reset(new MachineBlockFrequencyInfo(MF, *MBPI, *MLI));
+    }
+    MachineBlockFrequencyInfo *get() { return MBFI.get(); }
+  };
+
+  static void SetUpTestCase() {
+    InitializeAllTargets();
+    InitializeAllTargetMCs();
+  }
+
+  void SetUp() override {
+    TM = createTargetMachine();
+    std::unique_ptr<MemoryBuffer> MBuffer =
+        MemoryBuffer::getMemBuffer(MIRString);
+    Parser = createMIRParser(std::move(MBuffer), Context);
+    if (!Parser)
+      report_fatal_error("null MIRParser");
+    M = Parser->parseIRModule();
+    if (!M)
+      report_fatal_error("parseIRModule failed");
+    M->setTargetTriple(TM->getTargetTriple().getTriple());
+    M->setDataLayout(TM->createDataLayout());
+    MMI = std::make_unique<MachineModuleInfo>(TM.get());
+    if (Parser->parseMachineFunctions(*M, *MMI.get()))
+      report_fatal_error("parseMachineFunctions failed");
+  }
+
+  MachineFunction *getMachineFunction(Module *M, StringRef Name) {
+    auto F = M->getFunction(Name);
+    if (!F)
+      report_fatal_error("null Function");
+    auto &MF = MMI->getOrCreateMachineFunction(*F);
+    return &MF;
+  }
+};
+
+TEST_F(MachineSizeOptsTest, Test) {
+  MachineFunction *F = getMachineFunction(M.get(), "f");
+  ASSERT_TRUE(F != nullptr);
+  MachineFunction *G = getMachineFunction(M.get(), "g");
+  ASSERT_TRUE(G != nullptr);
+  MachineFunction *H = getMachineFunction(M.get(), "h");
+  ASSERT_TRUE(H != nullptr);
+  ProfileSummaryInfo PSI = ProfileSummaryInfo(*M.get());
+  ASSERT_TRUE(PSI.hasProfileSummary());
+  BFIData BFID_F(*F);
+  BFIData BFID_G(*G);
+  BFIData BFID_H(*H);
+  MachineBlockFrequencyInfo *MBFI_F = BFID_F.get();
+  MachineBlockFrequencyInfo *MBFI_G = BFID_G.get();
+  MachineBlockFrequencyInfo *MBFI_H = BFID_H.get();
+  MachineBasicBlock &BB0 = F->front();
+  auto iter = BB0.succ_begin();
+  MachineBasicBlock *BB1 = *iter;
+  iter++;
+  MachineBasicBlock *BB2 = *iter;
+  iter++;
+  ASSERT_TRUE(iter == BB0.succ_end());
+  MachineBasicBlock *BB3 = *BB1->succ_begin();
+  ASSERT_TRUE(BB3 == *BB2->succ_begin());
+  EXPECT_FALSE(shouldOptimizeForSize(F, &PSI, MBFI_F));
+  EXPECT_TRUE(shouldOptimizeForSize(G, &PSI, MBFI_G));
+  EXPECT_FALSE(shouldOptimizeForSize(H, &PSI, MBFI_H));
+  EXPECT_FALSE(shouldOptimizeForSize(&BB0, &PSI, MBFI_F));
+  EXPECT_FALSE(shouldOptimizeForSize(BB1, &PSI, MBFI_F));
+  EXPECT_TRUE(shouldOptimizeForSize(BB2, &PSI, MBFI_F));
+  EXPECT_FALSE(shouldOptimizeForSize(BB3, &PSI, MBFI_F));
+}
+
+const char* MachineSizeOptsTest::MIRString = R"MIR(
+--- |
+  define i32 @g(i32 %x) !prof !14 {
+    ret i32 0
+  }
+
+  define i32 @h(i32 %x) !prof !15 {
+    ret i32 0
+  }
+
+  define i32 @f(i32 %x) !prof !16 {
+  bb0:
+    %y1 = icmp eq i32 %x, 0
+    br i1 %y1, label %bb1, label %bb2, !prof !17
+
+  bb1:                                              ; preds = %bb0
+    %z1 = call i32 @g(i32 %x)
+    br label %bb3
+
+  bb2:                                              ; preds = %bb0
+    %z2 = call i32 @h(i32 %x)
+    br label %bb3
+
+  bb3:                                              ; preds = %bb2, %bb1
+    %y2 = phi i32 [ 0, %bb1 ], [ 1, %bb2 ]
+    ret i32 %y2
+  }
+
+  !llvm.module.flags = !{!0}
+
+  !0 = !{i32 1, !"ProfileSummary", !1}
+  !1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
+  !2 = !{!"ProfileFormat", !"InstrProf"}
+  !3 = !{!"TotalCount", i64 10000}
+  !4 = !{!"MaxCount", i64 10}
+  !5 = !{!"MaxInternalCount", i64 1}
+  !6 = !{!"MaxFunctionCount", i64 1000}
+  !7 = !{!"NumCounts", i64 3}
+  !8 = !{!"NumFunctions", i64 3}
+  !9 = !{!"DetailedSummary", !10}
+  !10 = !{!11, !12, !13}
+  !11 = !{i32 10000, i64 1000, i32 1}
+  !12 = !{i32 999000, i64 300, i32 3}
+  !13 = !{i32 999999, i64 5, i32 10}
+  !14 = !{!"function_entry_count", i64 1}
+  !15 = !{!"function_entry_count", i64 100}
+  !16 = !{!"function_entry_count", i64 400}
+  !17 = !{!"branch_weights", i32 100, i32 1}
+
+...
+---
+name:            g
+body:             |
+  bb.0:
+    %1:gr32 = MOV32r0 implicit-def dead $eflags
+    $eax = COPY %1
+    RET 0, $eax
+
+...
+---
+name:            h
+body:             |
+  bb.0:
+    %1:gr32 = MOV32r0 implicit-def dead $eflags
+    $eax = COPY %1
+    RET 0, $eax
+
+...
+---
+name:            f
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    successors: %bb.1(0x7ebb907a), %bb.2(0x01446f86)
+    liveins: $edi
+
+    %1:gr32 = COPY $edi
+    TEST32rr %1, %1, implicit-def $eflags
+    JCC_1 %bb.2, 5, implicit $eflags
+    JMP_1 %bb.1
+
+  bb.1:
+    successors: %bb.3(0x80000000)
+
+    ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    $edi = COPY %1
+    CALL64pcrel32 @g, csr_64, implicit $rsp, implicit $ssp, implicit $edi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax
+    ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    %5:gr32 = COPY $eax
+    %4:gr32 = MOV32r0 implicit-def dead $eflags
+    JMP_1 %bb.3
+
+  bb.2:
+    successors: %bb.3(0x80000000)
+
+    ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    $edi = COPY %1
+    CALL64pcrel32 @h, csr_64, implicit $rsp, implicit $ssp, implicit $edi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax
+    ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    %3:gr32 = COPY $eax
+    %2:gr32 = MOV32ri 1
+
+  bb.3:
+    %0:gr32 = PHI %2, %bb.2, %4, %bb.1
+    $eax = COPY %0
+    RET 0, $eax
+
+...
+)MIR";
+
+} // anonymous namespace
diff --git a/unittests/Transforms/Utils/CMakeLists.txt b/unittests/Transforms/Utils/CMakeLists.txt
index 785b798..bc993a8 100644
--- a/unittests/Transforms/Utils/CMakeLists.txt
+++ b/unittests/Transforms/Utils/CMakeLists.txt
@@ -14,6 +14,7 @@
   FunctionComparatorTest.cpp
   IntegerDivisionTest.cpp
   LocalTest.cpp
+  SizeOptsTest.cpp
   SSAUpdaterBulkTest.cpp
   UnrollLoopTest.cpp
   ValueMapperTest.cpp
diff --git a/unittests/Transforms/Utils/SizeOptsTest.cpp b/unittests/Transforms/Utils/SizeOptsTest.cpp
new file mode 100644
index 0000000..55ca786
--- /dev/null
+++ b/unittests/Transforms/Utils/SizeOptsTest.cpp
@@ -0,0 +1,129 @@
+//===- SizeOptsTest.cpp - SizeOpts unit tests -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/SizeOpts.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/AsmParser/Parser.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/SourceMgr.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+
+class SizeOptsTest : public testing::Test {
+protected:
+  static const char* IRString;
+  LLVMContext C;
+  std::unique_ptr<Module> M;
+  struct BFIData {
+    std::unique_ptr<DominatorTree> DT;
+    std::unique_ptr<LoopInfo> LI;
+    std::unique_ptr<BranchProbabilityInfo> BPI;
+    std::unique_ptr<BlockFrequencyInfo> BFI;
+    BFIData(Function &F) {
+      DT.reset(new DominatorTree(F));
+      LI.reset(new LoopInfo(*DT));
+      BPI.reset(new BranchProbabilityInfo(F, *LI));
+      BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
+    }
+    BlockFrequencyInfo *get() { return BFI.get(); }
+  };
+
+  void SetUp() override {
+    SMDiagnostic Err;
+    M = parseAssemblyString(IRString, Err, C);
+  }
+};
+
+TEST_F(SizeOptsTest, Test) {
+  Function *F = M->getFunction("f");
+  Function *G = M->getFunction("g");
+  Function *H = M->getFunction("h");
+
+  ProfileSummaryInfo PSI(*M.get());
+  BFIData BFID_F(*F);
+  BFIData BFID_G(*G);
+  BFIData BFID_H(*H);
+  BlockFrequencyInfo *BFI_F = BFID_F.get();
+  BlockFrequencyInfo *BFI_G = BFID_G.get();
+  BlockFrequencyInfo *BFI_H = BFID_H.get();
+  BasicBlock &BB0 = F->getEntryBlock();
+  BasicBlock *BB1 = BB0.getTerminator()->getSuccessor(0);
+  BasicBlock *BB2 = BB0.getTerminator()->getSuccessor(1);
+  BasicBlock *BB3 = BB1->getSingleSuccessor();
+
+  EXPECT_TRUE(PSI.hasProfileSummary());
+  EXPECT_FALSE(shouldOptimizeForSize(F, &PSI, BFI_F));
+  EXPECT_TRUE(shouldOptimizeForSize(G, &PSI, BFI_G));
+  EXPECT_FALSE(shouldOptimizeForSize(H, &PSI, BFI_H));
+  EXPECT_FALSE(shouldOptimizeForSize(&BB0, &PSI, BFI_F));
+  EXPECT_FALSE(shouldOptimizeForSize(BB1, &PSI, BFI_F));
+  EXPECT_TRUE(shouldOptimizeForSize(BB2, &PSI, BFI_F));
+  EXPECT_FALSE(shouldOptimizeForSize(BB3, &PSI, BFI_F));
+}
+
+const char* SizeOptsTest::IRString = R"IR(
+  define i32 @g(i32 %x) !prof !14 {
+    ret i32 0
+  }
+
+  define i32 @h(i32 %x) !prof !15 {
+    ret i32 0
+  }
+
+  define i32 @f(i32 %x) !prof !16 {
+  bb0:
+    %y1 = icmp eq i32 %x, 0
+    br i1 %y1, label %bb1, label %bb2, !prof !17
+
+  bb1:                                              ; preds = %bb0
+    %z1 = call i32 @g(i32 %x)
+    br label %bb3
+
+  bb2:                                              ; preds = %bb0
+    %z2 = call i32 @h(i32 %x)
+    br label %bb3
+
+  bb3:                                              ; preds = %bb2, %bb1
+    %y2 = phi i32 [ 0, %bb1 ], [ 1, %bb2 ]
+    ret i32 %y2
+  }
+
+  !llvm.module.flags = !{!0}
+
+  !0 = !{i32 1, !"ProfileSummary", !1}
+  !1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
+  !2 = !{!"ProfileFormat", !"InstrProf"}
+  !3 = !{!"TotalCount", i64 10000}
+  !4 = !{!"MaxCount", i64 10}
+  !5 = !{!"MaxInternalCount", i64 1}
+  !6 = !{!"MaxFunctionCount", i64 1000}
+  !7 = !{!"NumCounts", i64 3}
+  !8 = !{!"NumFunctions", i64 3}
+  !9 = !{!"DetailedSummary", !10}
+  !10 = !{!11, !12, !13}
+  !11 = !{i32 10000, i64 1000, i32 1}
+  !12 = !{i32 999000, i64 300, i32 3}
+  !13 = !{i32 999999, i64 5, i32 10}
+  !14 = !{!"function_entry_count", i64 1}
+  !15 = !{!"function_entry_count", i64 100}
+  !16 = !{!"function_entry_count", i64 400}
+  !17 = !{!"branch_weights", i32 100, i32 1}
+)IR";
+
+} // end anonymous namespace