[LTO] Record whether LTOUnit splitting is enabled in index

Summary:
Records in the module summary index whether the bitcode was compiled
with the option necessary to enable splitting the LTO unit
(e.g. -fsanitize=cfi, -fwhole-program-vtables, or -fsplit-lto-unit).

The information is passed down to the ModuleSummaryIndex builder via a
new module flag "EnableSplitLTOUnit", which is propagated onto a flag
on the summary index.

This is then used during the LTO link to check whether all linked
summaries were built with the same value of this flag. If not, an error
is issued when we detect a situation requiring whole program visibility
of the class hierarchy. This is the case when both of the following
conditions are met:
1) We are performing LowerTypeTests or Whole Program Devirtualization.
2) There are type tests or type checked loads in the code.

Note I have also changed the ThinLTOBitcodeWriter to also gate the
module splitting on the value of this flag.

Reviewers: pcc

Subscribers: ormris, mehdi_amini, Prazek, inglorion, eraman, steven_wu, dexonsmith, arphaman, dang, llvm-commits

Differential Revision: https://reviews.llvm.org/D53890

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@350948 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/include/llvm/Bitcode/BitcodeReader.h b/include/llvm/Bitcode/BitcodeReader.h
index ce8bdd9..0d7cc14 100644
--- a/include/llvm/Bitcode/BitcodeReader.h
+++ b/include/llvm/Bitcode/BitcodeReader.h
@@ -51,6 +51,7 @@
   struct BitcodeLTOInfo {
     bool IsThinLTO;
     bool HasSummary;
+    bool EnableSplitLTOUnit;
   };
 
   /// Represents a module in a bitcode file.
diff --git a/include/llvm/IR/ModuleSummaryIndex.h b/include/llvm/IR/ModuleSummaryIndex.h
index 6653795..a1acee4 100644
--- a/include/llvm/IR/ModuleSummaryIndex.h
+++ b/include/llvm/IR/ModuleSummaryIndex.h
@@ -831,6 +831,13 @@
   /// union.
   bool HaveGVs;
 
+  // True if the index was created for a module compiled with -fsplit-lto-unit.
+  bool EnableSplitLTOUnit;
+
+  // True if some of the modules were compiled with -fsplit-lto-unit and
+  // some were not. Set when the combined index is created during the thin link.
+  bool PartiallySplitLTOUnits = false;
+
   std::set<std::string> CfiFunctionDefs;
   std::set<std::string> CfiFunctionDecls;
 
@@ -850,7 +857,9 @@
 
 public:
   // See HaveGVs variable comment.
-  ModuleSummaryIndex(bool HaveGVs) : HaveGVs(HaveGVs), Saver(Alloc) {}
+  ModuleSummaryIndex(bool HaveGVs, bool EnableSplitLTOUnit = false)
+      : HaveGVs(HaveGVs), EnableSplitLTOUnit(EnableSplitLTOUnit), Saver(Alloc) {
+  }
 
   bool haveGVs() const { return HaveGVs; }
 
@@ -940,6 +949,12 @@
     SkipModuleByDistributedBackend = true;
   }
 
+  bool enableSplitLTOUnit() const { return EnableSplitLTOUnit; }
+  void setEnableSplitLTOUnit() { EnableSplitLTOUnit = true; }
+
+  bool partiallySplitLTOUnits() const { return PartiallySplitLTOUnits; }
+  void setPartiallySplitLTOUnits() { PartiallySplitLTOUnits = true; }
+
   bool isGlobalValueLive(const GlobalValueSummary *GVS) const {
     return !WithGlobalValueDeadStripping || GVS->isLive();
   }
diff --git a/include/llvm/LTO/LTO.h b/include/llvm/LTO/LTO.h
index 1539087..534d9b6 100644
--- a/include/llvm/LTO/LTO.h
+++ b/include/llvm/LTO/LTO.h
@@ -400,6 +400,9 @@
   Error runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache);
 
   mutable bool CalledGetMaxTasks = false;
+
+  // Use Optional to distinguish false from not yet initialized.
+  Optional<bool> EnableSplitLTOUnit;
 };
 
 /// The resolution for a symbol. The linker must provide a SymbolResolution for
diff --git a/lib/Analysis/ModuleSummaryAnalysis.cpp b/lib/Analysis/ModuleSummaryAnalysis.cpp
index 6bda1d1..87f76d4 100644
--- a/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -457,7 +457,11 @@
     std::function<BlockFrequencyInfo *(const Function &F)> GetBFICallback,
     ProfileSummaryInfo *PSI) {
   assert(PSI);
-  ModuleSummaryIndex Index(/*HaveGVs=*/true);
+  bool EnableSplitLTOUnit = false;
+  if (auto *MD = mdconst::extract_or_null<ConstantInt>(
+          M.getModuleFlag("EnableSplitLTOUnit")))
+    EnableSplitLTOUnit = MD->getZExtValue();
+  ModuleSummaryIndex Index(/*HaveGVs=*/true, EnableSplitLTOUnit);
 
   // Identify the local values in the llvm.used and llvm.compiler.used sets,
   // which should not be exported as they would then require renaming and
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 2f3d2f3..fe051e7 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -5294,18 +5294,30 @@
       break;
     case bitc::FS_FLAGS: {  // [flags]
       uint64_t Flags = Record[0];
-      // Scan flags (set only on the combined index).
-      assert(Flags <= 0x3 && "Unexpected bits in flag");
+      // Scan flags.
+      assert(Flags <= 0x1f && "Unexpected bits in flag");
 
       // 1 bit: WithGlobalValueDeadStripping flag.
+      // Set on combined index only.
       if (Flags & 0x1)
         TheIndex.setWithGlobalValueDeadStripping();
       // 1 bit: SkipModuleByDistributedBackend flag.
+      // Set on combined index only.
       if (Flags & 0x2)
         TheIndex.setSkipModuleByDistributedBackend();
       // 1 bit: HasSyntheticEntryCounts flag.
+      // Set on combined index only.
       if (Flags & 0x4)
         TheIndex.setHasSyntheticEntryCounts();
+      // 1 bit: DisableSplitLTOUnit flag.
+      // Set on per module indexes. It is up to the client to validate
+      // the consistency of this flag across modules being linked.
+      if (Flags & 0x8)
+        TheIndex.setEnableSplitLTOUnit();
+      // 1 bit: PartiallySplitLTOUnits flag.
+      // Set on combined index only.
+      if (Flags & 0x10)
+        TheIndex.setPartiallySplitLTOUnits();
       break;
     }
     case bitc::FS_VALUE_GUID: { // [valueid, refguid]
@@ -5917,6 +5929,46 @@
   return std::move(Index);
 }
 
+static Expected<bool> getEnableSplitLTOUnitFlag(BitstreamCursor &Stream,
+                                                unsigned ID) {
+  if (Stream.EnterSubBlock(ID))
+    return error("Invalid record");
+  SmallVector<uint64_t, 64> Record;
+
+  while (true) {
+    BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+
+    switch (Entry.Kind) {
+    case BitstreamEntry::SubBlock: // Handled for us already.
+    case BitstreamEntry::Error:
+      return error("Malformed block");
+    case BitstreamEntry::EndBlock:
+      // If no flags record found, conservatively return true to mimic
+      // behavior before this flag was added.
+      return true;
+    case BitstreamEntry::Record:
+      // The interesting case.
+      break;
+    }
+
+    // Look for the FS_FLAGS record.
+    Record.clear();
+    auto BitCode = Stream.readRecord(Entry.ID, Record);
+    switch (BitCode) {
+    default: // Default behavior: ignore.
+      break;
+    case bitc::FS_FLAGS: { // [flags]
+      uint64_t Flags = Record[0];
+      // Scan flags.
+      assert(Flags <= 0x1f && "Unexpected bits in flag");
+
+      return Flags & 0x8;
+    }
+    }
+  }
+  llvm_unreachable("Exit infinite loop");
+}
+
 // Check if the given bitcode buffer contains a global value summary block.
 Expected<BitcodeLTOInfo> BitcodeModule::getLTOInfo() {
   BitstreamCursor Stream(Buffer);
@@ -5932,14 +5984,27 @@
     case BitstreamEntry::Error:
       return error("Malformed block");
     case BitstreamEntry::EndBlock:
-      return BitcodeLTOInfo{/*IsThinLTO=*/false, /*HasSummary=*/false};
+      return BitcodeLTOInfo{/*IsThinLTO=*/false, /*HasSummary=*/false,
+                            /*EnableSplitLTOUnit=*/false};
 
     case BitstreamEntry::SubBlock:
-      if (Entry.ID == bitc::GLOBALVAL_SUMMARY_BLOCK_ID)
-        return BitcodeLTOInfo{/*IsThinLTO=*/true, /*HasSummary=*/true};
+      if (Entry.ID == bitc::GLOBALVAL_SUMMARY_BLOCK_ID) {
+        Expected<bool> EnableSplitLTOUnit =
+            getEnableSplitLTOUnitFlag(Stream, Entry.ID);
+        if (!EnableSplitLTOUnit)
+          return EnableSplitLTOUnit.takeError();
+        return BitcodeLTOInfo{/*IsThinLTO=*/true, /*HasSummary=*/true,
+                              *EnableSplitLTOUnit};
+      }
 
-      if (Entry.ID == bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID)
-        return BitcodeLTOInfo{/*IsThinLTO=*/false, /*HasSummary=*/true};
+      if (Entry.ID == bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID) {
+        Expected<bool> EnableSplitLTOUnit =
+            getEnableSplitLTOUnitFlag(Stream, Entry.ID);
+        if (!EnableSplitLTOUnit)
+          return EnableSplitLTOUnit.takeError();
+        return BitcodeLTOInfo{/*IsThinLTO=*/false, /*HasSummary=*/true,
+                              *EnableSplitLTOUnit};
+      }
 
       // Ignore other sub-blocks.
       if (Stream.SkipBlock())
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index 68d79ed..ba4f932 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -3618,6 +3618,13 @@
 
   Stream.EmitRecord(bitc::FS_VERSION, ArrayRef<uint64_t>{INDEX_VERSION});
 
+  // Write the index flags.
+  uint64_t Flags = 0;
+  // Bits 1-3 are set only in the combined index, skip them.
+  if (Index->enableSplitLTOUnit())
+    Flags |= 0x8;
+  Stream.EmitRecord(bitc::FS_FLAGS, ArrayRef<uint64_t>{Flags});
+
   if (Index->begin() == Index->end()) {
     Stream.ExitBlock();
     return;
@@ -3734,6 +3741,10 @@
     Flags |= 0x2;
   if (Index.hasSyntheticEntryCounts())
     Flags |= 0x4;
+  if (Index.enableSplitLTOUnit())
+    Flags |= 0x8;
+  if (Index.partiallySplitLTOUnits())
+    Flags |= 0x10;
   Stream.EmitRecord(bitc::FS_FLAGS, ArrayRef<uint64_t>{Flags});
 
   for (const auto &GVI : valueIds()) {
diff --git a/lib/LTO/LTO.cpp b/lib/LTO/LTO.cpp
index f736ef8..3a95506 100644
--- a/lib/LTO/LTO.cpp
+++ b/lib/LTO/LTO.cpp
@@ -546,6 +546,15 @@
   if (!LTOInfo)
     return LTOInfo.takeError();
 
+  if (EnableSplitLTOUnit.hasValue()) {
+    // If only some modules were split, flag this in the index so that
+    // we can skip or error on optimizations that need consistently split
+    // modules (whole program devirt and lower type tests).
+    if (EnableSplitLTOUnit.getValue() != LTOInfo->EnableSplitLTOUnit)
+      ThinLTO.CombinedIndex.setPartiallySplitLTOUnits();
+  } else
+    EnableSplitLTOUnit = LTOInfo->EnableSplitLTOUnit;
+
   BitcodeModule BM = Input.Mods[ModI];
   auto ModSyms = Input.module_symbols(ModI);
   addModuleToGlobalRes(ModSyms, {ResI, ResE},
diff --git a/lib/Transforms/IPO/LowerTypeTests.cpp b/lib/Transforms/IPO/LowerTypeTests.cpp
index e4dcd4d..87c65db 100644
--- a/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -1702,6 +1702,13 @@
       !ExportSummary && !ImportSummary)
     return false;
 
+  // If only some of the modules were split, we cannot correctly handle
+  // code that contains type tests.
+  if (TypeTestFunc && !TypeTestFunc->use_empty() &&
+      ((ExportSummary && ExportSummary->partiallySplitLTOUnits()) ||
+       (ImportSummary && ImportSummary->partiallySplitLTOUnits())))
+    report_fatal_error("inconsistent LTO Unit splitting with llvm.type.test");
+
   if (ImportSummary) {
     if (TypeTestFunc) {
       for (auto UI = TypeTestFunc->use_begin(), UE = TypeTestFunc->use_end();
diff --git a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
index a5382c4..510ecb5 100644
--- a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
+++ b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
@@ -418,8 +418,18 @@
   }
 }
 
-// Returns whether this module needs to be split because it uses type metadata.
+// Returns whether this module needs to be split because splitting is
+// enabled and it uses type metadata.
 bool requiresSplit(Module &M) {
+  // First check if the LTO Unit splitting has been enabled.
+  bool EnableSplitLTOUnit = false;
+  if (auto *MD = mdconst::extract_or_null<ConstantInt>(
+          M.getModuleFlag("EnableSplitLTOUnit")))
+    EnableSplitLTOUnit = MD->getZExtValue();
+  if (!EnableSplitLTOUnit)
+    return false;
+
+  // Module only needs to be split if it contains type metadata.
   for (auto &GO : M.global_objects()) {
     if (GO.hasMetadata(LLVMContext::MD_type))
       return true;
@@ -431,7 +441,7 @@
 void writeThinLTOBitcode(raw_ostream &OS, raw_ostream *ThinLinkOS,
                          function_ref<AAResults &(Function &)> AARGetter,
                          Module &M, const ModuleSummaryIndex *Index) {
-  // See if this module has any type metadata. If so, we need to split it.
+  // Split module if splitting is enabled and it contains any type metadata.
   if (requiresSplit(M))
     return splitAndWriteThinLTOBitcode(OS, ThinLinkOS, AARGetter, M);
 
diff --git a/lib/Transforms/IPO/WholeProgramDevirt.cpp b/lib/Transforms/IPO/WholeProgramDevirt.cpp
index 37905da..48bd0cd 100644
--- a/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -1563,6 +1563,17 @@
       M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load));
   Function *AssumeFunc = M.getFunction(Intrinsic::getName(Intrinsic::assume));
 
+  // If only some of the modules were split, we cannot correctly handle
+  // code that contains type tests or type checked loads.
+  if ((ExportSummary && ExportSummary->partiallySplitLTOUnits()) ||
+      (ImportSummary && ImportSummary->partiallySplitLTOUnits())) {
+    if ((TypeTestFunc && !TypeTestFunc->use_empty()) ||
+        (TypeCheckedLoadFunc && !TypeCheckedLoadFunc->use_empty()))
+      report_fatal_error("inconsistent LTO Unit splitting with llvm.type.test "
+                         "or llvm.type.checked.load");
+    return false;
+  }
+
   // Normally if there are no users of the devirtualization intrinsics in the
   // module, this pass has nothing to do. But if we are exporting, we also need
   // to handle any users that appear only in the function summaries.
diff --git a/test/Bitcode/thinlto-alias.ll b/test/Bitcode/thinlto-alias.ll
index 835d720..f3896d2 100644
--- a/test/Bitcode/thinlto-alias.ll
+++ b/test/Bitcode/thinlto-alias.ll
@@ -18,6 +18,7 @@
 ; CHECK-NEXT: <FUNCTION op0=4 op1=7
 ; CHECK:       <GLOBALVAL_SUMMARY_BLOCK
 ; CHECK-NEXT:    <VERSION
+; CHECK-NEXT:    <FLAGS
 ; See if the call to func is registered.
 ; The value id 1 matches the second FUNCTION record above.
 ; CHECK-NEXT:    <PERMODULE {{.*}} op6=1/>
diff --git a/test/Bitcode/thinlto-alias2.ll b/test/Bitcode/thinlto-alias2.ll
index 3d68e3f..8b04ee7 100644
--- a/test/Bitcode/thinlto-alias2.ll
+++ b/test/Bitcode/thinlto-alias2.ll
@@ -4,6 +4,7 @@
 
 ; CHECK:       <GLOBALVAL_SUMMARY_BLOCK
 ; CHECK-NEXT:    <VERSION
+; CHECK-NEXT:    <FLAGS
 ; CHECK-NEXT:    <PERMODULE {{.*}} op4=0 op5=0 op6=[[ALIASID:[0-9]+]]/>
 ; CHECK-NEXT:    <PERMODULE {{.*}} op0=[[ALIASEEID:[0-9]+]]
 ; CHECK-NEXT:    <ALIAS {{.*}} op0=[[ALIASID]] {{.*}} op2=[[ALIASEEID]]/>
diff --git a/test/Bitcode/thinlto-function-summary-callgraph-cast.ll b/test/Bitcode/thinlto-function-summary-callgraph-cast.ll
index 7964440..d4b4d54 100644
--- a/test/Bitcode/thinlto-function-summary-callgraph-cast.ll
+++ b/test/Bitcode/thinlto-function-summary-callgraph-cast.ll
@@ -5,6 +5,7 @@
 
 ; CHECK:       <GLOBALVAL_SUMMARY_BLOCK
 ; CHECK-NEXT:    <VERSION
+; CHECK-NEXT:    <FLAGS
 ; "op7" is a call to "callee" function.
 ; CHECK-NEXT:    <PERMODULE {{.*}} op8=3 op9=[[ALIASID:[0-9]+]]/>
 ; "another_caller" has only references but no calls.
diff --git a/test/Bitcode/thinlto-function-summary-callgraph-pgo.ll b/test/Bitcode/thinlto-function-summary-callgraph-pgo.ll
index e332224..b9613f7 100644
--- a/test/Bitcode/thinlto-function-summary-callgraph-pgo.ll
+++ b/test/Bitcode/thinlto-function-summary-callgraph-pgo.ll
@@ -16,6 +16,7 @@
 ; CHECK-NEXT: <FUNCTION op0=4 op1=4
 ; CHECK:       <GLOBALVAL_SUMMARY_BLOCK
 ; CHECK-NEXT:    <VERSION
+; CHECK-NEXT:    <FLAGS
 ; See if the call to func is registered, using the expected hotness type.
 ; CHECK-NEXT:    <PERMODULE_PROFILE {{.*}} op6=1 op7=2/>
 ; CHECK-NEXT:  </GLOBALVAL_SUMMARY_BLOCK>
diff --git a/test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll b/test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll
index 31c99c18..0cd1098 100644
--- a/test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll
+++ b/test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll
@@ -46,6 +46,7 @@
 ; CHECK-NEXT: <FUNCTION op0=42 op1=5
 ; CHECK-LABEL:       <GLOBALVAL_SUMMARY_BLOCK
 ; CHECK-NEXT:    <VERSION
+; CHECK-NEXT:    <FLAGS
 ; CHECK-NEXT:    <VALUE_GUID op0=25 op1=123/>
 ; op4=hot1 op6=cold op8=hot2 op10=hot4 op12=none1 op14=hot3 op16=none2 op18=none3 op20=123
 ; CHECK-NEXT:    <PERMODULE_PROFILE {{.*}} op6=1 op7=3 op8=5 op9=1 op10=2 op11=3 op12=4 op13=1 op14=6 op15=2 op16=3 op17=3 op18=7 op19=2 op20=8 op21=2 op22=25 op23=4/>
diff --git a/test/Bitcode/thinlto-function-summary-callgraph-relbf.ll b/test/Bitcode/thinlto-function-summary-callgraph-relbf.ll
index 6c14465..7c7a6f6 100644
--- a/test/Bitcode/thinlto-function-summary-callgraph-relbf.ll
+++ b/test/Bitcode/thinlto-function-summary-callgraph-relbf.ll
@@ -12,6 +12,7 @@
 ; CHECK-NEXT: <FUNCTION op0=17 op1=4
 ; CHECK:       <GLOBALVAL_SUMMARY_BLOCK
 ; CHECK-NEXT:    <VERSION
+; CHECK-NEXT:    <FLAGS
 ; See if the call to func is registered.
 ; CHECK-NEXT:    <PERMODULE_RELBF {{.*}} op4=1 {{.*}} op8=256
 ; CHECK-NEXT:  </GLOBALVAL_SUMMARY_BLOCK>
diff --git a/test/Bitcode/thinlto-function-summary-callgraph-sample-profile-summary.ll b/test/Bitcode/thinlto-function-summary-callgraph-sample-profile-summary.ll
index d1f980a..8bf65ab 100644
--- a/test/Bitcode/thinlto-function-summary-callgraph-sample-profile-summary.ll
+++ b/test/Bitcode/thinlto-function-summary-callgraph-sample-profile-summary.ll
@@ -29,6 +29,7 @@
 
 ; CHECK-LABEL:       <GLOBALVAL_SUMMARY_BLOCK
 ; CHECK-NEXT:    <VERSION
+; CHECK-NEXT:    <FLAGS
 ; CHECK-NEXT:    <VALUE_GUID op0=26 op1=123/>
 ; op4=none1 op6=hot1 op8=cold1 op10=none2 op12=hot2 op14=cold2 op16=none3 op18=hot3 op20=cold3 op22=123
 ; CHECK-NEXT:    <PERMODULE_PROFILE {{.*}} op6=7 op7=0 op8=1 op9=3 op10=4 op11=1 op12=8 op13=0 op14=2 op15=3 op16=5 op17=1 op18=9 op19=0 op20=3 op21=3 op22=6 op23=1 op24=26 op25=4/>
diff --git a/test/Bitcode/thinlto-function-summary-callgraph.ll b/test/Bitcode/thinlto-function-summary-callgraph.ll
index a605b7e..0969b84 100644
--- a/test/Bitcode/thinlto-function-summary-callgraph.ll
+++ b/test/Bitcode/thinlto-function-summary-callgraph.ll
@@ -17,6 +17,7 @@
 ; CHECK-NEXT: <FUNCTION op0=17 op1=4
 ; CHECK:       <GLOBALVAL_SUMMARY_BLOCK
 ; CHECK-NEXT:    <VERSION
+; CHECK-NEXT:    <FLAGS
 ; See if the call to func is registered
 ; CHECK-NEXT:    <PERMODULE {{.*}} op4=1
 ; CHECK-NEXT:  </GLOBALVAL_SUMMARY_BLOCK>
diff --git a/test/Bitcode/thinlto-function-summary.ll b/test/Bitcode/thinlto-function-summary.ll
index be7e974..67c5037 100644
--- a/test/Bitcode/thinlto-function-summary.ll
+++ b/test/Bitcode/thinlto-function-summary.ll
@@ -19,6 +19,7 @@
 ; BC-NEXT: <ALIAS op0=67 op1=1
 ; BC: <GLOBALVAL_SUMMARY_BLOCK
 ; BC-NEXT: <VERSION
+; BC-NEXT: <FLAGS
 ; BC-NEXT: <PERMODULE {{.*}} op0=1 op1=0
 ; BC-NEXT: <PERMODULE {{.*}} op0=2 op1=0
 ; BC-NEXT: <PERMODULE {{.*}} op0=3 op1=7
diff --git a/test/LTO/Resolution/X86/export-jumptable.ll b/test/LTO/Resolution/X86/export-jumptable.ll
index d6cce85..8ced9d9 100644
--- a/test/LTO/Resolution/X86/export-jumptable.ll
+++ b/test/LTO/Resolution/X86/export-jumptable.ll
@@ -2,7 +2,7 @@
 ; the full LTO object file; any such functions will be referenced by the jump
 ; table.
 
-; RUN: opt -thinlto-bc -o %t %s
+; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t %s
 ; RUN: llvm-lto2 run -o %t2 -r %t,f1,p -r %t,f2,p -r %t,_start,px %t -save-temps
 ; RUN: llvm-dis %t2.1.2.internalize.bc -o - | FileCheck %s
 
diff --git a/test/LTO/Resolution/X86/local-def-dllimport.ll b/test/LTO/Resolution/X86/local-def-dllimport.ll
index ddb78fb..c97e4b7 100644
--- a/test/LTO/Resolution/X86/local-def-dllimport.ll
+++ b/test/LTO/Resolution/X86/local-def-dllimport.ll
@@ -1,4 +1,4 @@
-; RUN: opt -thinlto-bc -o %t0.bc %s
+; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t0.bc %s
 ; RUN: llvm-lto2 run -r %t0.bc,__imp_f,l \
 ; RUN:               -r %t0.bc,g,p \
 ; RUN:               -r %t0.bc,g,l \
diff --git a/test/LTO/Resolution/X86/lowertypetests.ll b/test/LTO/Resolution/X86/lowertypetests.ll
index b87452c..e5be4b9 100644
--- a/test/LTO/Resolution/X86/lowertypetests.ll
+++ b/test/LTO/Resolution/X86/lowertypetests.ll
@@ -1,4 +1,4 @@
-; RUN: opt -thinlto-bc -o %t %s
+; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t %s
 ; RUN: llvm-lto2 run -r %t,f,plx -r %t,g_alias,plx -r %t,foo,lx -r %t,foo,plx -r %t,bar,lx -r %t,bar,plx -o %t1 %t
 ; RUN: llvm-nm %t1.0 | FileCheck --check-prefix=MERGED %s
 ; RUN: llvm-nm %t1.1 | FileCheck %s
diff --git a/test/LTO/Resolution/X86/lto-unit-check.ll b/test/LTO/Resolution/X86/lto-unit-check.ll
new file mode 100644
index 0000000..1736a5b
--- /dev/null
+++ b/test/LTO/Resolution/X86/lto-unit-check.ll
@@ -0,0 +1,55 @@
+; Test to ensure that the Enable Split LTO Unit flag is set properly in the
+; summary, and that we correctly silently handle linking bitcode files with
+; different values of this flag.
+
+; Linking bitcode both with EnableSplitLTOUnit set should work
+; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t1 %s
+; RUN: llvm-bcanalyzer -dump %t1 | FileCheck %s --check-prefix=SPLITLTOUNIT
+; RUN: llvm-dis -o - %t1 | FileCheck %s --check-prefix=ENABLESPLITFLAG
+; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t2 %s
+; RUN: llvm-bcanalyzer -dump %t2 | FileCheck %s --check-prefix=SPLITLTOUNIT
+; RUN: llvm-dis -o - %t2 | FileCheck %s --check-prefix=ENABLESPLITFLAG
+; RUN: llvm-lto2 run -o %t3 %t1 %t2
+
+; Linking bitcode both without EnableSplitLTOUnit set should work
+; RUN: opt -thinlto-bc -thinlto-split-lto-unit=false -o %t1 %s
+; RUN: llvm-bcanalyzer -dump %t1 | FileCheck %s --check-prefix=NOSPLITLTOUNIT
+; RUN: llvm-dis -o - %t1 | FileCheck %s --check-prefix=NOENABLESPLITFLAG
+; RUN: opt -thinlto-bc -thinlto-split-lto-unit=false -o %t2 %s
+; RUN: llvm-bcanalyzer -dump %t2 | FileCheck %s --check-prefix=NOSPLITLTOUNIT
+; RUN: llvm-dis -o - %t2 | FileCheck %s --check-prefix=NOENABLESPLITFLAG
+; RUN: llvm-lto2 run -o %t3 %t1 %t2
+
+; Linking bitcode with different values of EnableSplitLTOUnit should succeed
+; (silently skipping any optimizations like whole program devirt that rely
+; on all modules being split).
+; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t1 %s
+; RUN: llvm-bcanalyzer -dump %t1 | FileCheck %s --check-prefix=SPLITLTOUNIT
+; RUN: llvm-dis -o - %t1 | FileCheck %s --check-prefix=ENABLESPLITFLAG
+; RUN: opt -thinlto-bc -thinlto-split-lto-unit=false -o %t2 %s
+; RUN: llvm-bcanalyzer -dump %t2 | FileCheck %s --check-prefix=NOSPLITLTOUNIT
+; RUN: llvm-dis -o - %t2 | FileCheck %s --check-prefix=NOENABLESPLITFLAG
+; RUN: llvm-lto2 run -o %t3 %t1 %t2
+
+; Linking bitcode with different values of EnableSplitLTOUnit (reverse order)
+; should succeed (silently skipping any optimizations like whole program devirt
+; that rely on all modules being split).
+; RUN: opt -thinlto-bc -thinlto-split-lto-unit=false -o %t1 %s
+; RUN: llvm-bcanalyzer -dump %t1 | FileCheck %s --check-prefix=NOSPLITLTOUNIT
+; RUN: llvm-dis -o - %t1 | FileCheck %s --check-prefix=NOENABLESPLITFLAG
+; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t2 %s
+; RUN: llvm-bcanalyzer -dump %t2 | FileCheck %s --check-prefix=SPLITLTOUNIT
+; RUN: llvm-dis -o - %t2 | FileCheck %s --check-prefix=ENABLESPLITFLAG
+; RUN: llvm-lto2 run -o %t3 %t1 %t2
+
+; The flag should be set when splitting is disabled (for backwards compatibility
+; with older bitcode where it was always enabled).
+; SPLITLTOUNIT: <FLAGS op0=8/>
+; NOSPLITLTOUNIT: <FLAGS op0=0/>
+
+; Check that the corresponding module flag is set when expected.
+; ENABLESPLITFLAG: !{i32 1, !"EnableSplitLTOUnit", i32 1}
+; NOENABLESPLITFLAG-NOT: !{i32 1, !"EnableSplitLTOUnit", i32 1}
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/ThinLTO/X86/cache-icall.ll b/test/ThinLTO/X86/cache-icall.ll
index 5e64a71..034ff97 100644
--- a/test/ThinLTO/X86/cache-icall.ll
+++ b/test/ThinLTO/X86/cache-icall.ll
@@ -4,8 +4,8 @@
 ; This affects code generated for any users of f(). Make sure that we don't pull a stale object
 ; file for %t.o from the cache.
 
-; RUN: opt -module-hash -module-summary -thinlto-bc %s -o %t.bc
-; RUN: opt -module-hash -module-summary -thinlto-bc %p/Inputs/cache-icall.ll -o %t2.bc
+; RUN: opt -module-hash -module-summary -thinlto-bc -thinlto-split-lto-unit %s -o %t.bc
+; RUN: opt -module-hash -module-summary -thinlto-bc -thinlto-split-lto-unit %p/Inputs/cache-icall.ll -o %t2.bc
 
 ; RUN: rm -Rf %t.cache && mkdir %t.cache
 
diff --git a/test/ThinLTO/X86/cfi-devirt.ll b/test/ThinLTO/X86/cfi-devirt.ll
index 7ade794..45d6960 100644
--- a/test/ThinLTO/X86/cfi-devirt.ll
+++ b/test/ThinLTO/X86/cfi-devirt.ll
@@ -2,7 +2,7 @@
 
 ; Test CFI devirtualization through the thin link and backend.
 
-; RUN: opt -thinlto-bc -o %t.o %s
+; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t.o %s
 
 ; Legacy PM
 ; FIXME: Fix machine verifier issues and remove -verify-machineinstrs=0. PR39436.
@@ -44,6 +44,27 @@
 
 ; REMARK: single-impl: devirtualized a call to _ZN1A1nEi
 
+; Next check that we emit an error when trying to LTO link this module
+; containing an llvm.type.checked.load (with a split LTO Unit) with one
+; that does not have a split LTO Unit.
+; RUN: opt -thinlto-bc -o %t2.o %S/Inputs/empty.ll
+; RUN: not llvm-lto2 run %t.o %t2.o -save-temps -pass-remarks=. \
+; RUN:   -verify-machineinstrs=0 \
+; RUN:   -o %t3 \
+; RUN:   -r=%t.o,test,px \
+; RUN:   -r=%t.o,_ZN1A1nEi,p \
+; RUN:   -r=%t.o,_ZN1B1fEi,p \
+; RUN:   -r=%t.o,_ZN1C1fEi,p \
+; RUN:   -r=%t.o,empty,p \
+; RUN:   -r=%t.o,_ZTV1B, \
+; RUN:   -r=%t.o,_ZTV1C, \
+; RUN:   -r=%t.o,_ZN1A1nEi, \
+; RUN:   -r=%t.o,_ZN1B1fEi, \
+; RUN:   -r=%t.o,_ZN1C1fEi, \
+; RUN:   -r=%t.o,_ZTV1B,px \
+; RUN:   -r=%t.o,_ZTV1C,px 2>&1 | FileCheck %s --check-prefix=ERROR
+; ERROR: LLVM ERROR: inconsistent LTO Unit splitting with llvm.type.test or llvm.type.checked.load
+
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-grtev4-linux-gnu"
 
diff --git a/test/ThinLTO/X86/cfi-distributed.ll b/test/ThinLTO/X86/cfi-distributed.ll
index 94f3f95..5339228 100644
--- a/test/ThinLTO/X86/cfi-distributed.ll
+++ b/test/ThinLTO/X86/cfi-distributed.ll
@@ -3,8 +3,8 @@
 ; Test to ensure that only referenced type ID records are emitted into
 ; each distributed index file.
 
-; RUN: opt -thinlto-bc -o %t1.o %s
-; RUN: opt -thinlto-bc -o %t2.o %p/Inputs/cfi-distributed.ll
+; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t1.o %s
+; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t2.o %p/Inputs/cfi-distributed.ll
 
 ; RUN: llvm-lto2 run -thinlto-distributed-indexes %t1.o %t2.o \
 ; RUN:   -o %t3 \
diff --git a/test/ThinLTO/X86/cfi-icall.ll b/test/ThinLTO/X86/cfi-icall.ll
index 1ab184f..42c26f1 100644
--- a/test/ThinLTO/X86/cfi-icall.ll
+++ b/test/ThinLTO/X86/cfi-icall.ll
@@ -1,4 +1,4 @@
-; RUN: opt -thinlto-bc %s -o %t1.bc
+; RUN: opt -thinlto-bc -thinlto-split-lto-unit %s -o %t1.bc
 ; RUN: llvm-lto2 run  -thinlto-distributed-indexes %t1.bc -o %t.out -save-temps \
 ; RUN:   -r %t1.bc,foo,plx \
 ; RUN:   -r %t1.bc,bar,x \
diff --git a/test/ThinLTO/X86/cfi.ll b/test/ThinLTO/X86/cfi.ll
index 9b1bde3..0edddb8 100644
--- a/test/ThinLTO/X86/cfi.ll
+++ b/test/ThinLTO/X86/cfi.ll
@@ -2,7 +2,7 @@
 
 ; Test CFI through the thin link and backend.
 
-; RUN: opt -thinlto-bc -o %t.o %s
+; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t.o %s
 
 ; Legacy PM
 ; RUN: llvm-lto2 run -save-temps %t.o \
diff --git a/test/ThinLTO/X86/devirt-after-icp.ll b/test/ThinLTO/X86/devirt-after-icp.ll
index 9872217..fd5dcb7 100644
--- a/test/ThinLTO/X86/devirt-after-icp.ll
+++ b/test/ThinLTO/X86/devirt-after-icp.ll
@@ -42,7 +42,7 @@
 ; will use the same vtable pointer. Without a dominance check, we could
 ; incorrectly devirtualize a->foo() to B::foo();
 
-; RUN: opt -thinlto-bc -o %t.o %s
+; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t.o %s
 
 ; Legacy PM
 ; FIXME: Fix machine verifier issues and remove -verify-machineinstrs=0. PR39436.
diff --git a/test/Transforms/ThinLTOBitcodeWriter/circular-reference.ll b/test/Transforms/ThinLTOBitcodeWriter/circular-reference.ll
index eeda793..fb239b0 100644
--- a/test/Transforms/ThinLTOBitcodeWriter/circular-reference.ll
+++ b/test/Transforms/ThinLTOBitcodeWriter/circular-reference.ll
@@ -1,4 +1,4 @@
-; RUN: opt -thinlto-bc -o %t %s
+; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t %s
 ; RUN: llvm-modextract -b -n 0 -o - %t | llvm-dis | FileCheck --check-prefix=M0 %s
 ; RUN: llvm-modextract -b -n 1 -o - %t | llvm-dis | FileCheck --check-prefix=M1 %s
 
diff --git a/test/Transforms/ThinLTOBitcodeWriter/comdat.ll b/test/Transforms/ThinLTOBitcodeWriter/comdat.ll
index caea48e..a43fa1c 100644
--- a/test/Transforms/ThinLTOBitcodeWriter/comdat.ll
+++ b/test/Transforms/ThinLTOBitcodeWriter/comdat.ll
@@ -1,4 +1,4 @@
-; RUN: opt -thinlto-bc -o %t %s
+; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t %s
 ; RUN: llvm-modextract -n 0 -o - %t | llvm-dis | FileCheck --check-prefix=THIN %s
 ; RUN: llvm-modextract -n 1 -o - %t | llvm-dis | FileCheck --check-prefix=MERGED %s
 
diff --git a/test/Transforms/ThinLTOBitcodeWriter/filter-alias.ll b/test/Transforms/ThinLTOBitcodeWriter/filter-alias.ll
index eb0cbe7..200d494 100644
--- a/test/Transforms/ThinLTOBitcodeWriter/filter-alias.ll
+++ b/test/Transforms/ThinLTOBitcodeWriter/filter-alias.ll
@@ -1,4 +1,4 @@
-; RUN: opt -thinlto-bc -o %t %s
+; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t %s
 ; RUN: llvm-modextract -n 0 -o - %t | llvm-dis | FileCheck --check-prefix=CHECK0 %s
 ; RUN: llvm-modextract -n 1 -o - %t | llvm-dis | FileCheck --check-prefix=CHECK1 %s
 ; CHECK0-NOT: @{{.*}}anon{{.*}}=
diff --git a/test/Transforms/ThinLTOBitcodeWriter/function-alias.ll b/test/Transforms/ThinLTOBitcodeWriter/function-alias.ll
index 119b821..a1dbd96 100644
--- a/test/Transforms/ThinLTOBitcodeWriter/function-alias.ll
+++ b/test/Transforms/ThinLTOBitcodeWriter/function-alias.ll
@@ -1,4 +1,4 @@
-; RUN: opt -thinlto-bc -o %t %s
+; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t %s
 ; RUN: llvm-modextract -n 1 -o - %t | llvm-dis | FileCheck --check-prefix=CHECK1 %s
 
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Transforms/ThinLTOBitcodeWriter/pr33536.ll b/test/Transforms/ThinLTOBitcodeWriter/pr33536.ll
index 661d073..c405c36 100644
--- a/test/Transforms/ThinLTOBitcodeWriter/pr33536.ll
+++ b/test/Transforms/ThinLTOBitcodeWriter/pr33536.ll
@@ -1,7 +1,7 @@
 ; Test for a bug specific to the new pass manager where we may build a domtree
 ; to make more precise AA queries for functions.
 ;
-; RUN: opt -aa-pipeline=default -passes='no-op-module' -debug-pass-manager -thinlto-bc -o %t %s
+; RUN: opt -aa-pipeline=default -passes='no-op-module' -debug-pass-manager -thinlto-bc -thinlto-split-lto-unit -o %t %s
 ; RUN: llvm-modextract -b -n 0 -o - %t | llvm-dis | FileCheck --check-prefix=M0 %s
 ; RUN: llvm-modextract -b -n 1 -o - %t | llvm-dis | FileCheck --check-prefix=M1 %s
 
diff --git a/test/Transforms/ThinLTOBitcodeWriter/split-internal-typeid.ll b/test/Transforms/ThinLTOBitcodeWriter/split-internal-typeid.ll
index a43db9a..290df00 100644
--- a/test/Transforms/ThinLTOBitcodeWriter/split-internal-typeid.ll
+++ b/test/Transforms/ThinLTOBitcodeWriter/split-internal-typeid.ll
@@ -1,4 +1,4 @@
-; RUN: opt -thinlto-bc -o %t %s
+; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t %s
 ; RUN: llvm-modextract -b -n 0 -o %t0 %t
 ; RUN: llvm-modextract -b -n 1 -o %t1 %t
 ; RUN: not llvm-modextract -b -n 2 -o - %t 2>&1 | FileCheck --check-prefix=ERROR %s
diff --git a/test/Transforms/ThinLTOBitcodeWriter/split-internal1.ll b/test/Transforms/ThinLTOBitcodeWriter/split-internal1.ll
index 6d18c4f..42a06bd 100644
--- a/test/Transforms/ThinLTOBitcodeWriter/split-internal1.ll
+++ b/test/Transforms/ThinLTOBitcodeWriter/split-internal1.ll
@@ -1,4 +1,4 @@
-; RUN: opt -thinlto-bc -o %t %s
+; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t %s
 ; RUN: llvm-modextract -b -n 0 -o %t0 %t
 ; RUN: llvm-modextract -b -n 1 -o %t1 %t
 ; RUN: not llvm-modextract -b -n 2 -o - %t 2>&1 | FileCheck --check-prefix=ERROR %s
diff --git a/test/Transforms/ThinLTOBitcodeWriter/split-internal2.ll b/test/Transforms/ThinLTOBitcodeWriter/split-internal2.ll
index fbe618f..02fc3d1 100644
--- a/test/Transforms/ThinLTOBitcodeWriter/split-internal2.ll
+++ b/test/Transforms/ThinLTOBitcodeWriter/split-internal2.ll
@@ -1,4 +1,4 @@
-; RUN: opt -thinlto-bc -o %t %s
+; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t %s
 ; RUN: llvm-modextract -b -n 0 -o %t0 %t
 ; RUN: llvm-modextract -b -n 1 -o %t1 %t
 ; RUN: not llvm-modextract -b -n 2 -o - %t 2>&1 | FileCheck --check-prefix=ERROR %s
diff --git a/test/Transforms/ThinLTOBitcodeWriter/split-vfunc-internal.ll b/test/Transforms/ThinLTOBitcodeWriter/split-vfunc-internal.ll
index 087796b..7ebb30a 100644
--- a/test/Transforms/ThinLTOBitcodeWriter/split-vfunc-internal.ll
+++ b/test/Transforms/ThinLTOBitcodeWriter/split-vfunc-internal.ll
@@ -1,4 +1,4 @@
-; RUN: opt -thinlto-bc -o %t %s
+; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t %s
 ; RUN: llvm-modextract -b -n 0 -o - %t | llvm-dis | FileCheck --check-prefix=M0 %s
 ; RUN: llvm-modextract -b -n 1 -o - %t | llvm-dis | FileCheck --check-prefix=M1 %s
 
diff --git a/test/Transforms/ThinLTOBitcodeWriter/split-vfunc.ll b/test/Transforms/ThinLTOBitcodeWriter/split-vfunc.ll
index 66d37d5..fcf5751 100644
--- a/test/Transforms/ThinLTOBitcodeWriter/split-vfunc.ll
+++ b/test/Transforms/ThinLTOBitcodeWriter/split-vfunc.ll
@@ -1,4 +1,4 @@
-; RUN: opt -thinlto-bc -o %t %s
+; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t %s
 ; RUN: llvm-modextract -b -n 0 -o - %t | llvm-dis | FileCheck --check-prefix=M0 %s
 ; RUN: llvm-modextract -b -n 1 -o - %t | llvm-dis | FileCheck --check-prefix=M1 %s
 
diff --git a/test/Transforms/ThinLTOBitcodeWriter/split.ll b/test/Transforms/ThinLTOBitcodeWriter/split.ll
index 08ed92e..5502f7a 100644
--- a/test/Transforms/ThinLTOBitcodeWriter/split.ll
+++ b/test/Transforms/ThinLTOBitcodeWriter/split.ll
@@ -1,6 +1,6 @@
 ; Generate bitcode files with summary, as well as minimized bitcode without
 ; the debug metadata for the thin link.
-; RUN: opt -thinlto-bc -thin-link-bitcode-file=%t2 -o %t %s
+; RUN: opt -thinlto-bc -thin-link-bitcode-file=%t2 -thinlto-split-lto-unit -o %t %s
 ; RUN: llvm-modextract -b -n 0 -o %t0.bc %t
 ; RUN: llvm-modextract -b -n 1 -o %t1.bc %t
 ; RUN: llvm-modextract -b -n 0 -o %t0.thinlink.bc %t2
diff --git a/test/Transforms/ThinLTOBitcodeWriter/symver.ll b/test/Transforms/ThinLTOBitcodeWriter/symver.ll
index 078825c..8acdd0c 100644
--- a/test/Transforms/ThinLTOBitcodeWriter/symver.ll
+++ b/test/Transforms/ThinLTOBitcodeWriter/symver.ll
@@ -1,4 +1,4 @@
-; RUN: opt -thinlto-bc -o %t %s
+; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t %s
 ; RUN: llvm-modextract -n 1 -o - %t | llvm-dis | FileCheck %s
 
 ; The target assembly parser is required to parse the symver directives
diff --git a/test/Transforms/ThinLTOBitcodeWriter/unsplittable.ll b/test/Transforms/ThinLTOBitcodeWriter/unsplittable.ll
index 5413e0f..46c87bc 100644
--- a/test/Transforms/ThinLTOBitcodeWriter/unsplittable.ll
+++ b/test/Transforms/ThinLTOBitcodeWriter/unsplittable.ll
@@ -1,4 +1,4 @@
-; RUN: opt -thinlto-bc -thin-link-bitcode-file=%t2 -o %t %s
+; RUN: opt -thinlto-bc -thin-link-bitcode-file=%t2 -thinlto-split-lto-unit -o %t %s
 ; RUN: llvm-dis -o - %t | FileCheck %s
 ; RUN: llvm-bcanalyzer -dump %t | FileCheck --check-prefix=BCA %s
 ; When not splitting the module, the thin link bitcode file should simply be a
@@ -28,7 +28,8 @@
   ret void
 }
 
-; CHECK: !llvm.module.flags = !{![[FLAG:[0-9]+]]}
-; CHECK: ![[FLAG]] = !{i32 1, !"ThinLTO", i32 0}
+; CHECK: !llvm.module.flags = !{![[FLAG1:[0-9]+]], ![[FLAG2:[0-9]+]]}
+; CHECK: ![[FLAG1]] = !{i32 1, !"EnableSplitLTOUnit", i32 1}
+; CHECK: ![[FLAG2]] = !{i32 1, !"ThinLTO", i32 0}
 
 !0 = !{i32 0, !"typeid"}
diff --git a/test/Transforms/ThinLTOBitcodeWriter/x86/module-asm.ll b/test/Transforms/ThinLTOBitcodeWriter/x86/module-asm.ll
index 15e4778..587ab3f 100644
--- a/test/Transforms/ThinLTOBitcodeWriter/x86/module-asm.ll
+++ b/test/Transforms/ThinLTOBitcodeWriter/x86/module-asm.ll
@@ -1,4 +1,4 @@
-; RUN: opt -thinlto-bc -o %t %s
+; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t %s
 ; RUN: llvm-modextract -b -n 0 -o - %t | llvm-dis | FileCheck --check-prefix=M0 %s
 ; RUN: llvm-modextract -b -n 1 -o - %t | llvm-dis | FileCheck --check-prefix=M1 %s
 
diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp
index 2666039..a4967a2 100644
--- a/tools/opt/opt.cpp
+++ b/tools/opt/opt.cpp
@@ -103,6 +103,10 @@
     OutputThinLTOBC("thinlto-bc",
                     cl::desc("Write output as ThinLTO-ready bitcode"));
 
+static cl::opt<bool>
+    SplitLTOUnit("thinlto-split-lto-unit",
+                 cl::desc("Enable splitting of a ThinLTO LTOUnit"));
+
 static cl::opt<std::string> ThinLinkBitcodeFile(
     "thin-link-bitcode-file", cl::value_desc("filename"),
     cl::desc(
@@ -596,6 +600,9 @@
     if (CheckBitcodeOutputToConsole(Out->os(), !Quiet))
       NoOutput = true;
 
+  if (OutputThinLTOBC)
+    M->addModuleFlag(Module::Error, "EnableSplitLTOUnit", SplitLTOUnit);
+
   if (PassPipeline.getNumOccurrences() > 0) {
     OutputKind OK = OK_NoOutput;
     if (!NoOutput)