[Clang][OpenMP Offload] Add new tool for wrapping offload device binaries

This patch removes the remaining part of the OpenMP offload linker scripts which was used for inserting device binaries into the output linked binary. Device binaries are now inserted into the host binary with a help of the wrapper bit-code file which contains device binaries as data. Wrapper bit-code file is dynamically created by the clang driver with a help of new tool clang-offload-wrapper which takes device binaries as input and produces bit-code file with required contents. Wrapper bit-code is then compiled to an object and resulting object is appended to the host linking by the clang driver.

This is the second part of the patch for eliminating OpenMP linker script (please see https://reviews.llvm.org/D64943).

Differential Revision: https://reviews.llvm.org/D68166

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@374219 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/include/clang/Driver/Action.h b/include/clang/Driver/Action.h
index c6e90b2..8ccbb6c 100644
--- a/include/clang/Driver/Action.h
+++ b/include/clang/Driver/Action.h
@@ -72,9 +72,10 @@
     VerifyPCHJobClass,
     OffloadBundlingJobClass,
     OffloadUnbundlingJobClass,
+    OffloadWrapperJobClass,
 
     JobClassFirst = PreprocessJobClass,
-    JobClassLast = OffloadUnbundlingJobClass
+    JobClassLast = OffloadWrapperJobClass
   };
 
   // The offloading kind determines if this action is binded to a particular
@@ -625,6 +626,17 @@
   }
 };
 
+class OffloadWrapperJobAction : public JobAction {
+  void anchor() override;
+
+public:
+  OffloadWrapperJobAction(ActionList &Inputs, types::ID Type);
+
+  static bool classof(const Action *A) {
+    return A->getKind() == OffloadWrapperJobClass;
+  }
+};
+
 } // namespace driver
 } // namespace clang
 
diff --git a/include/clang/Driver/Options.td b/include/clang/Driver/Options.td
index cbcf2ab..c6e06f2 100644
--- a/include/clang/Driver/Options.td
+++ b/include/clang/Driver/Options.td
@@ -1604,8 +1604,6 @@
   Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
 def fopenmp_targets_EQ : CommaJoined<["-"], "fopenmp-targets=">, Flags<[DriverOption, CC1Option]>,
   HelpText<"Specify comma-separated list of triples OpenMP offloading targets to be supported">;
-def fopenmp_dump_offload_linker_script : Flag<["-"], "fopenmp-dump-offload-linker-script">,
-  Group<f_Group>, Flags<[NoArgumentUnused, HelpHidden]>;
 def fopenmp_relocatable_target : Flag<["-"], "fopenmp-relocatable-target">,
   Group<f_Group>, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
 def fnoopenmp_relocatable_target : Flag<["-"], "fnoopenmp-relocatable-target">,
diff --git a/include/clang/Driver/ToolChain.h b/include/clang/Driver/ToolChain.h
index 2ba16ab..f0676ee 100644
--- a/include/clang/Driver/ToolChain.h
+++ b/include/clang/Driver/ToolChain.h
@@ -138,6 +138,7 @@
   mutable std::unique_ptr<Tool> Link;
   mutable std::unique_ptr<Tool> IfsMerge;
   mutable std::unique_ptr<Tool> OffloadBundler;
+  mutable std::unique_ptr<Tool> OffloadWrapper;
 
   Tool *getClang() const;
   Tool *getAssemble() const;
@@ -145,6 +146,7 @@
   Tool *getIfsMerge() const;
   Tool *getClangAs() const;
   Tool *getOffloadBundler() const;
+  Tool *getOffloadWrapper() const;
 
   mutable std::unique_ptr<SanitizerArgs> SanitizerArguments;
   mutable std::unique_ptr<XRayArgs> XRayArguments;
diff --git a/lib/Driver/Action.cpp b/lib/Driver/Action.cpp
index 0187cf9..0eb4c72 100644
--- a/lib/Driver/Action.cpp
+++ b/lib/Driver/Action.cpp
@@ -41,6 +41,8 @@
     return "clang-offload-bundler";
   case OffloadUnbundlingJobClass:
     return "clang-offload-unbundler";
+  case OffloadWrapperJobClass:
+    return "clang-offload-wrapper";
   }
 
   llvm_unreachable("invalid class");
@@ -407,3 +409,9 @@
 
 OffloadUnbundlingJobAction::OffloadUnbundlingJobAction(Action *Input)
     : JobAction(OffloadUnbundlingJobClass, Input, Input->getType()) {}
+
+void OffloadWrapperJobAction::anchor() {}
+
+OffloadWrapperJobAction::OffloadWrapperJobAction(ActionList &Inputs,
+                                                 types::ID Type)
+  : JobAction(OffloadWrapperJobClass, Inputs, Type) {}
diff --git a/lib/Driver/Driver.cpp b/lib/Driver/Driver.cpp
index a095274..51c262a 100644
--- a/lib/Driver/Driver.cpp
+++ b/lib/Driver/Driver.cpp
@@ -2288,6 +2288,9 @@
     virtual void appendTopLevelActions(ActionList &AL) {}
 
     /// Append linker actions generated by the builder.
+    virtual void appendLinkActions(ActionList &AL) {}
+
+    /// Append linker actions generated by the builder.
     virtual void appendLinkDependences(OffloadAction::DeviceDependences &DA) {}
 
     /// Initialize the builder. Return true if any initialization errors are
@@ -2890,7 +2893,7 @@
       OpenMPDeviceActions.clear();
     }
 
-    void appendLinkDependences(OffloadAction::DeviceDependences &DA) override {
+    void appendLinkActions(ActionList &AL) override {
       assert(ToolChains.size() == DeviceLinkerInputs.size() &&
              "Toolchains and linker inputs sizes do not match.");
 
@@ -2899,12 +2902,18 @@
       for (auto &LI : DeviceLinkerInputs) {
         auto *DeviceLinkAction =
             C.MakeAction<LinkJobAction>(LI, types::TY_Image);
-        DA.add(*DeviceLinkAction, **TC, /*BoundArch=*/nullptr,
-               Action::OFK_OpenMP);
+        OffloadAction::DeviceDependences DeviceLinkDeps;
+        DeviceLinkDeps.add(*DeviceLinkAction, **TC, /*BoundArch=*/nullptr,
+		        Action::OFK_OpenMP);
+        AL.push_back(C.MakeAction<OffloadAction>(DeviceLinkDeps,
+            DeviceLinkAction->getType()));
         ++TC;
       }
+      DeviceLinkerInputs.clear();
     }
 
+    void appendLinkDependences(OffloadAction::DeviceDependences &DA) override {}
+
     bool initialize() override {
       // Get the OpenMP toolchains. If we don't get any, the action builder will
       // know there is nothing to do related to OpenMP offloading.
@@ -3129,6 +3138,25 @@
     return false;
   }
 
+  Action* makeHostLinkAction() {
+    // Build a list of device linking actions.
+    ActionList DeviceAL;
+    for (DeviceActionBuilder *SB : SpecializedBuilders) {
+      if (!SB->isValid())
+        continue;
+      SB->appendLinkActions(DeviceAL);
+    }
+
+    if (DeviceAL.empty())
+      return nullptr;
+
+    // Create wrapper bitcode from the result of device link actions and compile
+    // it to an object which will be added to the host link command.
+    auto *BC = C.MakeAction<OffloadWrapperJobAction>(DeviceAL, types::TY_LLVM_BC);
+    auto *ASM = C.MakeAction<BackendJobAction>(BC, types::TY_PP_Asm);
+    return C.MakeAction<AssembleJobAction>(ASM, types::TY_Object);
+  }
+
   /// Processes the host linker action. This currently consists of replacing it
   /// with an offload action if there are device link objects and propagate to
   /// the host action all the offload kinds used in the current compilation. The
@@ -3434,6 +3462,8 @@
 
   // Add a link action if necessary.
   if (!LinkerInputs.empty()) {
+    if (Action *Wrapper = OffloadBuilder.makeHostLinkAction())
+      LinkerInputs.push_back(Wrapper);
     Action *LA = C.MakeAction<LinkJobAction>(LinkerInputs, types::TY_Image);
     LA = OffloadBuilder.processHostLinkAction(LA);
     Actions.push_back(LA);
@@ -3820,18 +3850,8 @@
     if (!AJ || !BJ)
       return nullptr;
 
-    // Retrieve the compile job, backend action must always be preceded by one.
-    ActionList CompileJobOffloadActions;
-    auto *CJ = getPrevDependentAction(BJ->getInputs(), CompileJobOffloadActions,
-                                      /*CanBeCollapsed=*/false);
-    if (!AJ || !BJ || !CJ)
-      return nullptr;
-
-    assert(isa<CompileJobAction>(CJ) &&
-           "Expecting compile job preceding backend job.");
-
-    // Get compiler tool.
-    const Tool *T = TC.SelectTool(*CJ);
+    // Get backend tool.
+    const Tool *T = TC.SelectTool(*BJ);
     if (!T)
       return nullptr;
 
@@ -4236,6 +4256,13 @@
         A->getOffloadingDeviceKind(), TC->getTriple().normalize(),
         /*CreatePrefixForHost=*/!!A->getOffloadingHostActiveKinds() &&
             !AtTopLevel);
+    if (isa<OffloadWrapperJobAction>(JA)) {
+      OffloadingPrefix += "-wrapper";
+      if (Arg *FinalOutput = C.getArgs().getLastArg(options::OPT_o))
+        BaseInput = FinalOutput->getValue();
+      else
+        BaseInput = getDefaultImageName();
+    }
     Result = InputInfo(A, GetNamedOutputPath(C, *JA, BaseInput, BoundArch,
                                              AtTopLevel, MultipleArchs,
                                              OffloadingPrefix),
diff --git a/lib/Driver/ToolChain.cpp b/lib/Driver/ToolChain.cpp
index db2497a..357a510 100644
--- a/lib/Driver/ToolChain.cpp
+++ b/lib/Driver/ToolChain.cpp
@@ -292,6 +292,12 @@
   return OffloadBundler.get();
 }
 
+Tool *ToolChain::getOffloadWrapper() const {
+  if (!OffloadWrapper)
+    OffloadWrapper.reset(new tools::OffloadWrapper(*this));
+  return OffloadWrapper.get();
+}
+
 Tool *ToolChain::getTool(Action::ActionClass AC) const {
   switch (AC) {
   case Action::AssembleJobClass:
@@ -324,6 +330,9 @@
   case Action::OffloadBundlingJobClass:
   case Action::OffloadUnbundlingJobClass:
     return getOffloadBundler();
+
+  case Action::OffloadWrapperJobClass:
+    return getOffloadWrapper();
   }
 
   llvm_unreachable("Invalid tool kind.");
diff --git a/lib/Driver/ToolChains/Clang.cpp b/lib/Driver/ToolChains/Clang.cpp
index 98e8c57..22b830e 100644
--- a/lib/Driver/ToolChains/Clang.cpp
+++ b/lib/Driver/ToolChains/Clang.cpp
@@ -6463,3 +6463,57 @@
       TCArgs.MakeArgString(getToolChain().GetProgramPath(getShortName())),
       CmdArgs, None));
 }
+
+void OffloadWrapper::ConstructJob(Compilation &C, const JobAction &JA,
+                                  const InputInfo &Output,
+                                  const InputInfoList &Inputs,
+                                  const ArgList &Args,
+                                  const char *LinkingOutput) const {
+  ArgStringList CmdArgs;
+
+  const llvm::Triple &Triple = getToolChain().getEffectiveTriple();
+
+  // Add the "effective" target triple.
+  CmdArgs.push_back("-target");
+  CmdArgs.push_back(Args.MakeArgString(Triple.getTriple()));
+
+  assert(JA.getInputs().size() == Inputs.size() &&
+         "Not have inputs for all dependence actions??");
+
+  // Add offload targets. It is a comma-separated list of offload target
+  // triples.
+  SmallString<128> Targets;
+  Targets += "-offload-targets=";
+  for (unsigned I = 0; I < Inputs.size(); ++I) {
+    if (I)
+      Targets += ',';
+
+    // Get input's Offload Kind and ToolChain.
+    const auto *OA = cast<OffloadAction>(JA.getInputs()[I]);
+    assert(OA->hasSingleDeviceDependence(/*DoNotConsiderHostActions=*/true) &&
+           "Expected one device dependence!");
+    const ToolChain *DeviceTC = nullptr;
+    OA->doOnEachDependence([&DeviceTC](Action *, const ToolChain *TC,
+                                       const char *) { DeviceTC = TC; });
+
+    // And add it to the offload targets.
+    Targets += DeviceTC->getTriple().normalize();
+  }
+  CmdArgs.push_back(Args.MakeArgString(Targets));
+
+  // Add the output file name.
+  assert(Output.isFilename() && "Invalid output.");
+  CmdArgs.push_back("-o");
+  CmdArgs.push_back(Output.getFilename());
+
+  // Add inputs.
+  for (const InputInfo &I : Inputs) {
+    assert(I.isFilename() && "Invalid input.");
+    CmdArgs.push_back(I.getFilename());
+  }
+
+  C.addCommand(std::make_unique<Command>(
+    JA, *this,
+    Args.MakeArgString(getToolChain().GetProgramPath(getShortName())),
+    CmdArgs, Inputs));
+}
diff --git a/lib/Driver/ToolChains/Clang.h b/lib/Driver/ToolChains/Clang.h
index 8b6ac43..b345c02 100644
--- a/lib/Driver/ToolChains/Clang.h
+++ b/lib/Driver/ToolChains/Clang.h
@@ -152,6 +152,20 @@
                                    const llvm::opt::ArgList &TCArgs,
                                    const char *LinkingOutput) const override;
 };
+
+/// Offload wrapper tool.
+class LLVM_LIBRARY_VISIBILITY OffloadWrapper final : public Tool {
+public:
+  OffloadWrapper(const ToolChain &TC)
+      : Tool("offload wrapper", "clang-offload-wrapper", TC) {}
+
+  bool hasIntegratedCPP() const override { return false; }
+  void ConstructJob(Compilation &C, const JobAction &JA,
+                    const InputInfo &Output, const InputInfoList &Inputs,
+                    const llvm::opt::ArgList &TCArgs,
+                    const char *LinkingOutput) const override;
+};
+
 } // end namespace tools
 
 } // end namespace driver
diff --git a/lib/Driver/ToolChains/CommonArgs.cpp b/lib/Driver/ToolChains/CommonArgs.cpp
index 4796409..159b42a 100644
--- a/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/lib/Driver/ToolChains/CommonArgs.cpp
@@ -1244,120 +1244,6 @@
   }
 }
 
-/// Add OpenMP linker script arguments at the end of the argument list so that
-/// the fat binary is built by embedding each of the device images into the
-/// host. The linker script also defines a few symbols required by the code
-/// generation so that the images can be easily retrieved at runtime by the
-/// offloading library. This should be used only in tool chains that support
-/// linker scripts.
-void tools::AddOpenMPLinkerScript(const ToolChain &TC, Compilation &C,
-                                  const InputInfo &Output,
-                                  const InputInfoList &Inputs,
-                                  const ArgList &Args, ArgStringList &CmdArgs,
-                                  const JobAction &JA) {
-
-  // If this is not an OpenMP host toolchain, we don't need to do anything.
-  if (!JA.isHostOffloading(Action::OFK_OpenMP))
-    return;
-
-  // Create temporary linker script. Keep it if save-temps is enabled.
-  const char *LKS;
-  SmallString<256> Name = llvm::sys::path::filename(Output.getFilename());
-  if (C.getDriver().isSaveTempsEnabled()) {
-    llvm::sys::path::replace_extension(Name, "lk");
-    LKS = C.getArgs().MakeArgString(Name.c_str());
-  } else {
-    llvm::sys::path::replace_extension(Name, "");
-    Name = C.getDriver().GetTemporaryPath(Name, "lk");
-    LKS = C.addTempFile(C.getArgs().MakeArgString(Name.c_str()));
-  }
-
-  // Add linker script option to the command.
-  CmdArgs.push_back("-T");
-  CmdArgs.push_back(LKS);
-
-  // Create a buffer to write the contents of the linker script.
-  std::string LksBuffer;
-  llvm::raw_string_ostream LksStream(LksBuffer);
-
-  // Get the OpenMP offload tool chains so that we can extract the triple
-  // associated with each device input.
-  auto OpenMPToolChains = C.getOffloadToolChains<Action::OFK_OpenMP>();
-  assert(OpenMPToolChains.first != OpenMPToolChains.second &&
-         "No OpenMP toolchains??");
-
-  // Track the input file name and device triple in order to build the script,
-  // inserting binaries in the designated sections.
-  SmallVector<std::pair<std::string, const char *>, 8> InputBinaryInfo;
-
-  // Add commands to embed target binaries. We ensure that each section and
-  // image is 16-byte aligned. This is not mandatory, but increases the
-  // likelihood of data to be aligned with a cache block in several main host
-  // machines.
-  LksStream << "/*\n";
-  LksStream << "       OpenMP Offload Linker Script\n";
-  LksStream << " *** Automatically generated by Clang ***\n";
-  LksStream << "*/\n";
-  LksStream << "TARGET(binary)\n";
-  auto DTC = OpenMPToolChains.first;
-  for (auto &II : Inputs) {
-    const Action *A = II.getAction();
-    // Is this a device linking action?
-    if (A && isa<LinkJobAction>(A) &&
-        A->isDeviceOffloading(Action::OFK_OpenMP)) {
-      assert(DTC != OpenMPToolChains.second &&
-             "More device inputs than device toolchains??");
-      InputBinaryInfo.push_back(std::make_pair(
-          DTC->second->getTriple().normalize(), II.getFilename()));
-      ++DTC;
-      LksStream << "INPUT(" << II.getFilename() << ")\n";
-    }
-  }
-
-  assert(DTC == OpenMPToolChains.second &&
-         "Less device inputs than device toolchains??");
-
-  LksStream << "SECTIONS\n";
-  LksStream << "{\n";
-
-  // Put each target binary into a separate section.
-  for (const auto &BI : InputBinaryInfo) {
-    LksStream << "  .omp_offloading." << BI.first << " :\n";
-    LksStream << "  ALIGN(0x10)\n";
-    LksStream << "  {\n";
-    LksStream << "    PROVIDE_HIDDEN(.omp_offloading.img_start." << BI.first
-              << " = .);\n";
-    LksStream << "    " << BI.second << "\n";
-    LksStream << "    PROVIDE_HIDDEN(.omp_offloading.img_end." << BI.first
-              << " = .);\n";
-    LksStream << "  }\n";
-  }
-
-  LksStream << "}\n";
-  LksStream << "INSERT BEFORE .data\n";
-  LksStream.flush();
-
-  // Dump the contents of the linker script if the user requested that. We
-  // support this option to enable testing of behavior with -###.
-  if (C.getArgs().hasArg(options::OPT_fopenmp_dump_offload_linker_script))
-    llvm::errs() << LksBuffer;
-
-  // If this is a dry run, do not create the linker script file.
-  if (C.getArgs().hasArg(options::OPT__HASH_HASH_HASH))
-    return;
-
-  // Open script file and write the contents.
-  std::error_code EC;
-  llvm::raw_fd_ostream Lksf(LKS, EC, llvm::sys::fs::OF_None);
-
-  if (EC) {
-    C.getDriver().Diag(clang::diag::err_unable_to_make_temp) << EC.message();
-    return;
-  }
-
-  Lksf << LksBuffer;
-}
-
 /// Add HIP linker script arguments at the end of the argument list so that
 /// the fat binary is built by embedding the device images into the host. The
 /// linker script also defines a symbol required by the code generation so that
diff --git a/lib/Driver/ToolChains/CommonArgs.h b/lib/Driver/ToolChains/CommonArgs.h
index 1aff07a..63359d7 100644
--- a/lib/Driver/ToolChains/CommonArgs.h
+++ b/lib/Driver/ToolChains/CommonArgs.h
@@ -45,13 +45,6 @@
                     llvm::opt::ArgStringList &CmdArgs,
                     const llvm::opt::ArgList &Args);
 
-void AddOpenMPLinkerScript(const ToolChain &TC, Compilation &C,
-                           const InputInfo &Output,
-                           const InputInfoList &Inputs,
-                           const llvm::opt::ArgList &Args,
-                           llvm::opt::ArgStringList &CmdArgs,
-                           const JobAction &JA);
-
 void AddHIPLinkerScript(const ToolChain &TC, Compilation &C,
                         const InputInfo &Output, const InputInfoList &Inputs,
                         const llvm::opt::ArgList &Args,
diff --git a/lib/Driver/ToolChains/Cuda.cpp b/lib/Driver/ToolChains/Cuda.cpp
index 4cada74..8c704a3 100644
--- a/lib/Driver/ToolChains/Cuda.cpp
+++ b/lib/Driver/ToolChains/Cuda.cpp
@@ -563,8 +563,6 @@
     CmdArgs.push_back(CubinF);
   }
 
-  AddOpenMPLinkerScript(getToolChain(), C, Output, Inputs, Args, CmdArgs, JA);
-
   const char *Exec =
       Args.MakeArgString(getToolChain().GetProgramPath("nvlink"));
   C.addCommand(std::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
diff --git a/lib/Driver/ToolChains/Gnu.cpp b/lib/Driver/ToolChains/Gnu.cpp
index c7abe5c..c302a31 100644
--- a/lib/Driver/ToolChains/Gnu.cpp
+++ b/lib/Driver/ToolChains/Gnu.cpp
@@ -499,7 +499,7 @@
         P = ToolChain.GetFilePath(crtbegin);
       }
       CmdArgs.push_back(Args.MakeArgString(P));
-	  }
+    }
 
     // Add crtfastmath.o if available and fast math is enabled.
     ToolChain.AddFastMathRuntimeIfAvailable(Args, CmdArgs);
@@ -623,9 +623,6 @@
     }
   }
 
-  // Add OpenMP offloading linker script args if required.
-  AddOpenMPLinkerScript(getToolChain(), C, Output, Inputs, Args, CmdArgs, JA);
-
   // Add HIP offloading linker script args if required.
   AddHIPLinkerScript(getToolChain(), C, Output, Inputs, Args, CmdArgs, JA,
                      *this);
diff --git a/test/Driver/clang-offload-wrapper.c b/test/Driver/clang-offload-wrapper.c
new file mode 100644
index 0000000..542930b
--- /dev/null
+++ b/test/Driver/clang-offload-wrapper.c
@@ -0,0 +1,29 @@
+// REQUIRES: x86-registered-target
+
+//
+// Check help message.
+//
+// RUN: clang-offload-wrapper --help | FileCheck %s --check-prefix CHECK-HELP
+// CHECK-HELP: {{.*}}OVERVIEW: A tool to create a wrapper bitcode for offload target binaries. Takes offload
+// CHECK-HELP: {{.*}}target binaries as input and produces bitcode file containing target binaries packaged
+// CHECK-HELP: {{.*}}as data.
+// CHECK-HELP: {{.*}}USAGE: clang-offload-wrapper [options] <input files>
+// CHECK-HELP: {{.*}}  -o=<filename>               - Output filename
+// CHECK-HELP: {{.*}}  --offload-targets=<triples> - Comma-separated list of device target triples
+// CHECK-HELP: {{.*}}  --target=<triple>           - Target triple for the output module
+
+//
+// Generate a file to wrap.
+//
+// RUN: echo 'Content of device file' > %t.tgt
+
+//
+// Check bitcode produced by the wrapper tool.
+//
+// RUN: clang-offload-wrapper -target=x86_64-pc-linux-gnu -offload-targets=powerpc64le-ibm-linux-gnu -o %t.wrapper.bc %t.tgt
+// RUN: llvm-dis %t.wrapper.bc -o - | FileCheck %s --check-prefix CHECK-IR
+
+// CHECK-IR: target triple = "x86_64-pc-linux-gnu"
+
+// CHECK-IR: @.omp_offloading.img_start.powerpc64le-ibm-linux-gnu = hidden unnamed_addr constant [{{[0-9]+}} x i8] c"Content of device file{{.+}}", section ".omp_offloading.powerpc64le-ibm-linux-gnu"
+// CHECK-IR: @.omp_offloading.img_end.powerpc64le-ibm-linux-gnu = hidden unnamed_addr constant [0 x i8] zeroinitializer, section ".omp_offloading.powerpc64le-ibm-linux-gnu"
diff --git a/test/Driver/openmp-offload-gpu.c b/test/Driver/openmp-offload-gpu.c
index 3d2ac45..dc4dbd1 100644
--- a/test/Driver/openmp-offload-gpu.c
+++ b/test/Driver/openmp-offload-gpu.c
@@ -55,7 +55,7 @@
 // RUN:          -fopenmp-targets=nvptx64-nvidia-cuda %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHK-CUBIN-NVLINK %s
 
-// CHK-CUBIN-NVLINK: clang{{.*}}" "-o" "[[PTX:.*\.s]]"
+// CHK-CUBIN-NVLINK: clang{{.*}}" {{.*}}"-fopenmp-is-device" {{.*}}"-o" "[[PTX:.*\.s]]"
 // CHK-CUBIN-NVLINK-NEXT: ptxas{{.*}}" "--output-file" "[[CUBIN:.*\.cubin]]" {{.*}}"[[PTX]]"
 // CHK-CUBIN-NVLINK-NEXT: nvlink{{.*}}" {{.*}}"[[CUBIN]]"
 
diff --git a/test/Driver/openmp-offload.c b/test/Driver/openmp-offload.c
index 0ba3c43..292818b 100644
--- a/test/Driver/openmp-offload.c
+++ b/test/Driver/openmp-offload.c
@@ -106,15 +106,18 @@
 // CHK-PHASES: 2: compiler, {1}, ir, (host-openmp)
 // CHK-PHASES: 3: backend, {2}, assembler, (host-openmp)
 // CHK-PHASES: 4: assembler, {3}, object, (host-openmp)
-// CHK-PHASES: 5: linker, {4}, image, (host-openmp)
-// CHK-PHASES: 6: input, "[[INPUT]]", c, (device-openmp)
-// CHK-PHASES: 7: preprocessor, {6}, cpp-output, (device-openmp)
-// CHK-PHASES: 8: compiler, {7}, ir, (device-openmp)
-// CHK-PHASES: 9: offload, "host-openmp (powerpc64le-ibm-linux-gnu)" {2}, "device-openmp (x86_64-pc-linux-gnu)" {8}, ir
-// CHK-PHASES: 10: backend, {9}, assembler, (device-openmp)
-// CHK-PHASES: 11: assembler, {10}, object, (device-openmp)
-// CHK-PHASES: 12: linker, {11}, image, (device-openmp)
-// CHK-PHASES: 13: offload, "host-openmp (powerpc64le-ibm-linux-gnu)" {5}, "device-openmp (x86_64-pc-linux-gnu)" {12}, image
+// CHK-PHASES: 5: input, "[[INPUT]]", c, (device-openmp)
+// CHK-PHASES: 6: preprocessor, {5}, cpp-output, (device-openmp)
+// CHK-PHASES: 7: compiler, {6}, ir, (device-openmp)
+// CHK-PHASES: 8: offload, "host-openmp (powerpc64le-ibm-linux-gnu)" {2}, "device-openmp (x86_64-pc-linux-gnu)" {7}, ir
+// CHK-PHASES: 9: backend, {8}, assembler, (device-openmp)
+// CHK-PHASES: 10: assembler, {9}, object, (device-openmp)
+// CHK-PHASES: 11: linker, {10}, image, (device-openmp)
+// CHK-PHASES: 12: offload, "device-openmp (x86_64-pc-linux-gnu)" {11}, image
+// CHK-PHASES: 13: clang-offload-wrapper, {12}, ir, (host-openmp)
+// CHK-PHASES: 14: backend, {13}, assembler, (host-openmp)
+// CHK-PHASES: 15: assembler, {14}, object, (host-openmp)
+// CHK-PHASES: 16: linker, {4, 15}, image, (host-openmp)
 
 /// ###########################################################################
 
@@ -128,15 +131,15 @@
 // CHK-PHASES-LIB: 3: compiler, {2}, ir, (host-openmp)
 // CHK-PHASES-LIB: 4: backend, {3}, assembler, (host-openmp)
 // CHK-PHASES-LIB: 5: assembler, {4}, object, (host-openmp)
-// CHK-PHASES-LIB: 6: linker, {0, 5}, image, (host-openmp)
-// CHK-PHASES-LIB: 7: input, "somelib", object, (device-openmp)
-// CHK-PHASES-LIB: 8: input, "[[INPUT]]", c, (device-openmp)
-// CHK-PHASES-LIB: 9: preprocessor, {8}, cpp-output, (device-openmp)
-// CHK-PHASES-LIB: 10: compiler, {9}, ir, (device-openmp)
-// CHK-PHASES-LIB: 11: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {3}, "device-openmp (x86_64-pc-linux-gnu)" {10}, ir
-// CHK-PHASES-LIB: 12: backend, {11}, assembler, (device-openmp)
-// CHK-PHASES-LIB: 13: assembler, {12}, object, (device-openmp)
-// CHK-PHASES-LIB: 14: linker, {7, 13}, image, (device-openmp)
+// CHK-PHASES-LIB: 6: input, "somelib", object, (device-openmp)
+// CHK-PHASES-LIB: 7: input, "[[INPUT]]", c, (device-openmp)
+// CHK-PHASES-LIB: 8: preprocessor, {7}, cpp-output, (device-openmp)
+// CHK-PHASES-LIB: 9: compiler, {8}, ir, (device-openmp)
+// CHK-PHASES-LIB: 10: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {3}, "device-openmp (x86_64-pc-linux-gnu)" {9}, ir
+// CHK-PHASES-LIB: 11: backend, {10}, assembler, (device-openmp)
+// CHK-PHASES-LIB: 12: assembler, {11}, object, (device-openmp)
+// CHK-PHASES-LIB: 13: linker, {6, 12}, image, (device-openmp)
+// CHK-PHASES-LIB: 14: offload, "device-openmp (x86_64-pc-linux-gnu)" {13}, image
 // CHK-PHASES-LIB: 15: input, "somelib", object, (device-openmp)
 // CHK-PHASES-LIB: 16: input, "[[INPUT]]", c, (device-openmp)
 // CHK-PHASES-LIB: 17: preprocessor, {16}, cpp-output, (device-openmp)
@@ -145,8 +148,11 @@
 // CHK-PHASES-LIB: 20: backend, {19}, assembler, (device-openmp)
 // CHK-PHASES-LIB: 21: assembler, {20}, object, (device-openmp)
 // CHK-PHASES-LIB: 22: linker, {15, 21}, image, (device-openmp)
-// CHK-PHASES-LIB: 23: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {6}, "device-openmp (x86_64-pc-linux-gnu)" {14}, "device-openmp (powerpc64-ibm-linux-gnu)" {22}, image
-
+// CHK-PHASES-LIB: 23: offload, "device-openmp (powerpc64-ibm-linux-gnu)" {22}, image
+// CHK-PHASES-LIB: 24: clang-offload-wrapper, {14, 23}, ir, (host-openmp)
+// CHK-PHASES-LIB: 25: backend, {24}, assembler, (host-openmp)
+// CHK-PHASES-LIB: 26: assembler, {25}, object, (host-openmp)
+// CHK-PHASES-LIB: 27: linker, {0, 5, 26}, image, (host-openmp)
 
 /// ###########################################################################
 
@@ -165,21 +171,21 @@
 // CHK-PHASES-FILES: 8: compiler, {7}, ir, (host-openmp)
 // CHK-PHASES-FILES: 9: backend, {8}, assembler, (host-openmp)
 // CHK-PHASES-FILES: 10: assembler, {9}, object, (host-openmp)
-// CHK-PHASES-FILES: 11: linker, {0, 5, 10}, image, (host-openmp)
-// CHK-PHASES-FILES: 12: input, "somelib", object, (device-openmp)
-// CHK-PHASES-FILES: 13: input, "[[INPUT1]]", c, (device-openmp)
-// CHK-PHASES-FILES: 14: preprocessor, {13}, cpp-output, (device-openmp)
-// CHK-PHASES-FILES: 15: compiler, {14}, ir, (device-openmp)
-// CHK-PHASES-FILES: 16: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {3}, "device-openmp (x86_64-pc-linux-gnu)" {15}, ir
-// CHK-PHASES-FILES: 17: backend, {16}, assembler, (device-openmp)
-// CHK-PHASES-FILES: 18: assembler, {17}, object, (device-openmp)
-// CHK-PHASES-FILES: 19: input, "[[INPUT2]]", c, (device-openmp)
-// CHK-PHASES-FILES: 20: preprocessor, {19}, cpp-output, (device-openmp)
-// CHK-PHASES-FILES: 21: compiler, {20}, ir, (device-openmp)
-// CHK-PHASES-FILES: 22: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {8}, "device-openmp (x86_64-pc-linux-gnu)" {21}, ir
-// CHK-PHASES-FILES: 23: backend, {22}, assembler, (device-openmp)
-// CHK-PHASES-FILES: 24: assembler, {23}, object, (device-openmp)
-// CHK-PHASES-FILES: 25: linker, {12, 18, 24}, image, (device-openmp)
+// CHK-PHASES-FILES: 11: input, "somelib", object, (device-openmp)
+// CHK-PHASES-FILES: 12: input, "[[INPUT1]]", c, (device-openmp)
+// CHK-PHASES-FILES: 13: preprocessor, {12}, cpp-output, (device-openmp)
+// CHK-PHASES-FILES: 14: compiler, {13}, ir, (device-openmp)
+// CHK-PHASES-FILES: 15: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {3}, "device-openmp (x86_64-pc-linux-gnu)" {14}, ir
+// CHK-PHASES-FILES: 16: backend, {15}, assembler, (device-openmp)
+// CHK-PHASES-FILES: 17: assembler, {16}, object, (device-openmp)
+// CHK-PHASES-FILES: 18: input, "[[INPUT2]]", c, (device-openmp)
+// CHK-PHASES-FILES: 19: preprocessor, {18}, cpp-output, (device-openmp)
+// CHK-PHASES-FILES: 20: compiler, {19}, ir, (device-openmp)
+// CHK-PHASES-FILES: 21: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {8}, "device-openmp (x86_64-pc-linux-gnu)" {20}, ir
+// CHK-PHASES-FILES: 22: backend, {21}, assembler, (device-openmp)
+// CHK-PHASES-FILES: 23: assembler, {22}, object, (device-openmp)
+// CHK-PHASES-FILES: 24: linker, {11, 17, 23}, image, (device-openmp)
+// CHK-PHASES-FILES: 25: offload, "device-openmp (x86_64-pc-linux-gnu)" {24}, image
 // CHK-PHASES-FILES: 26: input, "somelib", object, (device-openmp)
 // CHK-PHASES-FILES: 27: input, "[[INPUT1]]", c, (device-openmp)
 // CHK-PHASES-FILES: 28: preprocessor, {27}, cpp-output, (device-openmp)
@@ -194,7 +200,11 @@
 // CHK-PHASES-FILES: 37: backend, {36}, assembler, (device-openmp)
 // CHK-PHASES-FILES: 38: assembler, {37}, object, (device-openmp)
 // CHK-PHASES-FILES: 39: linker, {26, 32, 38}, image, (device-openmp)
-// CHK-PHASES-FILES: 40: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {11}, "device-openmp (x86_64-pc-linux-gnu)" {25}, "device-openmp (powerpc64-ibm-linux-gnu)" {39}, image
+// CHK-PHASES-FILES: 40: offload, "device-openmp (powerpc64-ibm-linux-gnu)" {39}, image
+// CHK-PHASES-FILES: 41: clang-offload-wrapper, {25, 40}, ir, (host-openmp)
+// CHK-PHASES-FILES: 42: backend, {41}, assembler, (host-openmp)
+// CHK-PHASES-FILES: 43: assembler, {42}, object, (host-openmp)
+// CHK-PHASES-FILES: 44: linker, {0, 5, 10, 43}, image, (host-openmp)
 
 /// ###########################################################################
 
@@ -216,15 +226,18 @@
 // CHK-PHASES-WITH-CUDA: 11: offload, "host-cuda-openmp (powerpc64le-ibm-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {10}, ir
 // CHK-PHASES-WITH-CUDA: 12: backend, {11}, assembler, (host-cuda-openmp)
 // CHK-PHASES-WITH-CUDA: 13: assembler, {12}, object, (host-cuda-openmp)
-// CHK-PHASES-WITH-CUDA: 14: linker, {13}, image, (host-cuda-openmp)
-// CHK-PHASES-WITH-CUDA: 15: input, "[[INPUT]]", cuda, (device-openmp)
-// CHK-PHASES-WITH-CUDA: 16: preprocessor, {15}, cuda-cpp-output, (device-openmp)
-// CHK-PHASES-WITH-CUDA: 17: compiler, {16}, ir, (device-openmp)
-// CHK-PHASES-WITH-CUDA: 18: offload, "host-cuda-openmp (powerpc64le-ibm-linux-gnu)" {2}, "device-openmp (nvptx64-nvidia-cuda)" {17}, ir
-// CHK-PHASES-WITH-CUDA: 19: backend, {18}, assembler, (device-openmp)
-// CHK-PHASES-WITH-CUDA: 20: assembler, {19}, object, (device-openmp)
-// CHK-PHASES-WITH-CUDA: 21: linker, {20}, image, (device-openmp)
-// CHK-PHASES-WITH-CUDA: 22: offload, "host-cuda-openmp (powerpc64le-ibm-linux-gnu)" {14}, "device-openmp (nvptx64-nvidia-cuda)" {21}, image
+// CHK-PHASES-WITH-CUDA: 14: input, "[[INPUT]]", cuda, (device-openmp)
+// CHK-PHASES-WITH-CUDA: 15: preprocessor, {14}, cuda-cpp-output, (device-openmp)
+// CHK-PHASES-WITH-CUDA: 16: compiler, {15}, ir, (device-openmp)
+// CHK-PHASES-WITH-CUDA: 17: offload, "host-cuda-openmp (powerpc64le-ibm-linux-gnu)" {2}, "device-openmp (nvptx64-nvidia-cuda)" {16}, ir
+// CHK-PHASES-WITH-CUDA: 18: backend, {17}, assembler, (device-openmp)
+// CHK-PHASES-WITH-CUDA: 19: assembler, {18}, object, (device-openmp)
+// CHK-PHASES-WITH-CUDA: 20: linker, {19}, image, (device-openmp)
+// CHK-PHASES-WITH-CUDA: 21: offload, "device-openmp (nvptx64-nvidia-cuda)" {20}, image
+// CHK-PHASES-WITH-CUDA: 22: clang-offload-wrapper, {21}, ir, (host-cuda-openmp)
+// CHK-PHASES-WITH-CUDA: 23: backend, {22}, assembler, (host-cuda-openmp)
+// CHK-PHASES-WITH-CUDA: 24: assembler, {23}, object, (host-cuda-openmp)
+// CHK-PHASES-WITH-CUDA: 25: linker, {13, 24}, image, (host-cuda-openmp)
 
 /// ###########################################################################
 
@@ -237,65 +250,31 @@
 /// -fopenmp-host-ir-file-path: specifies the host IR file that can be loaded by
 /// the target code generation to gather information about which declaration
 /// really need to be emitted.
-/// We use -fopenmp-dump-offload-linker-script to dump the linker script and
-/// check its contents.
 ///
-// RUN:   %clang -### -fopenmp=libomp -o %t.out -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %s -fopenmp-dump-offload-linker-script -no-canonical-prefixes 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHK-COMMANDS -check-prefix=CHK-LKS -check-prefix=CHK-LKS-REG %s
-// RUN:   %clang -### -fopenmp=libomp -o %t.out -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %s -save-temps -fopenmp-dump-offload-linker-script -no-canonical-prefixes 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHK-COMMANDS-ST -check-prefix=CHK-LKS -check-prefix=CHK-LKS-ST %s
-
-// Make sure we are not dumping the script unless the user requested it.
 // RUN:   %clang -### -fopenmp=libomp -o %t.out -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %s -no-canonical-prefixes 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHK-LKS-NODUMP %s
+// RUN:   | FileCheck -check-prefix=CHK-COMMANDS %s
 // RUN:   %clang -### -fopenmp=libomp -o %t.out -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %s -save-temps -no-canonical-prefixes 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHK-LKS-NODUMP %s
+// RUN:   | FileCheck -check-prefix=CHK-COMMANDS-ST %s
 
 //
-// Check the linker script contains what we expect.
-//
-// CHK-LKS: /*
-// CHK-LKS:                   OpenMP Offload Linker Script
-// CHK-LKS:             *** Automatically generated by Clang ***
-// CHK-LKS-NODUMP-NOT:  OpenMP Offload Linker Script.
-// CHK-LKS: */
-// CHK-LKS: TARGET(binary)
-// CHK-LKS-REG: INPUT([[T1BIN:.+\.out]])
-// CHK-LKS-REG: INPUT([[T2BIN:.+\.out]])
-// CHK-LKS-ST: INPUT([[T1BIN:.+\.out-openmp-powerpc64le-ibm-linux-gnu]])
-// CHK-LKS-ST: INPUT([[T2BIN:.+\.out-openmp-x86_64-pc-linux-gnu]])
-// CHK-LKS: SECTIONS
-// CHK-LKS: {
-// CHK-LKS:   .omp_offloading.powerpc64le-ibm-linux-gnu :
-// CHK-LKS:   ALIGN(0x10)
-// CHK-LKS:   {
-// CHK-LKS:     PROVIDE_HIDDEN(.omp_offloading.img_start.powerpc64le-ibm-linux-gnu = .);
-// CHK-LKS:     [[T1BIN]]
-// CHK-LKS:     PROVIDE_HIDDEN(.omp_offloading.img_end.powerpc64le-ibm-linux-gnu = .);
-// CHK-LKS:   }
-// CHK-LKS:   .omp_offloading.x86_64-pc-linux-gnu :
-// CHK-LKS:   ALIGN(0x10)
-// CHK-LKS:   {
-// CHK-LKS:     PROVIDE_HIDDEN(.omp_offloading.img_start.x86_64-pc-linux-gnu = .);
-// CHK-LKS:     [[T2BIN]]
-// CHK-LKS:     PROVIDE_HIDDEN(.omp_offloading.img_end.x86_64-pc-linux-gnu = .);
-// CHK-LKS:   }
-// CHK-LKS: }
-// CHK-LKS: INSERT BEFORE .data
-
-//
-// Generate host BC file.
+// Generate host BC file and host object.
 //
 // CHK-COMMANDS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc"
 // CHK-COMMANDS-SAME: "-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu"
 // CHK-COMMANDS-SAME: "-o" "
 // CHK-COMMANDS-SAME: [[HOSTBC:[^\\/]+\.bc]]" "-x" "c" "
 // CHK-COMMANDS-SAME: [[INPUT:[^\\/]+\.c]]"
+// CHK-COMMANDS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "
+// CHK-COMMANDS-SAME: [[HOSTOBJ:[^\\/]+\.o]]" "-x" "ir" "{{.*}}[[HOSTBC]]"
 // CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-E" {{.*}}"-fopenmp" {{.*}}"-o" "
 // CHK-COMMANDS-ST-SAME: [[HOSTPP:[^\\/]+\.i]]" "-x" "c" "
 // CHK-COMMANDS-ST-SAME: [[INPUT:[^\\/]+\.c]]"
 // CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" {{.*}}"-o" "
 // CHK-COMMANDS-ST-SAME: [[HOSTBC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[HOSTPP]]"
+// CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-S" {{.*}}"-fopenmp" {{.*}}"-o" "
+// CHK-COMMANDS-ST-SAME: [[HOSTASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[HOSTBC]]"
+// CHK-COMMANDS-ST: clang{{.*}}" "-cc1as" "-triple" "powerpc64le-unknown-linux" "-filetype" "obj" {{.*}}"-o" "
+// CHK-COMMANDS-ST-SAME: [[HOSTOBJ:[^\\/]+\.o]]" "{{.*}}[[HOSTASM]]"
 
 //
 // Compile for the powerpc device.
@@ -335,21 +314,26 @@
 // CHK-COMMANDS-ST-SAME: [[T2BIN:[^\\/]+\.out-openmp-x86_64-pc-linux-gnu]]" {{.*}}"{{.*}}[[T2OBJ]]"
 
 //
-// Generate host object from the BC file and link using the linker script.
+// Create wrapper BC file and wrapper object.
 //
+// CHK-COMMANDS: clang-offload-wrapper{{(\.exe)?}}" "-target" "powerpc64le-unknown-linux" {{.*}}"-o" "
+// CHK-COMMANDS-SAME: [[WRAPPERBC:[^\\/]+\.bc]]" "{{.*}}[[T1BIN]]" "{{.*}}[[T2BIN]]"
 // CHK-COMMANDS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "
-// CHK-COMMANDS-SAME: [[HOSTOBJ:[^\\/]+\.o]]" "-x" "ir" "{{.*}}[[HOSTBC]]"
-// CHK-COMMANDS: ld{{(\.exe)?}}" {{.*}}"-o" "
-// CHK-COMMANDS-SAME: [[HOSTBIN:[^\\/]+\.out]]"  {{.*}}"-lomptarget" {{.*}}"-T" "
-// CHK-COMMANDS-SAME: [[HOSTLK:[^\\/]+\.lk]]"
+// CHK-COMMANDS-SAME: [[WRAPPEROBJ:[^\\/]+\.o]]" "-x" "ir" "{{.*}}[[WRAPPERBC]]"
+// CHK-COMMANDS-ST: clang-offload-wrapper{{(\.exe)?}}" "-target" "powerpc64le-unknown-linux" {{.*}}"-o" "
+// CHK-COMMANDS-ST-SAME: [[WRAPPERBC:[^\\/]+\.bc]]" "{{.*}}[[T1BIN]]" "{{.*}}[[T2BIN]]"
 // CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-S" {{.*}}"-fopenmp" {{.*}}"-o" "
-// CHK-COMMANDS-ST-SAME: [[HOSTASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[HOSTBC]]"
+// CHK-COMMANDS-ST-SAME: [[WRAPPERASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[WRAPPERBC]]"
 // CHK-COMMANDS-ST: clang{{.*}}" "-cc1as" "-triple" "powerpc64le-unknown-linux" "-filetype" "obj" {{.*}}"-o" "
-// CHK-COMMANDS-ST-SAME: [[HOSTOBJ:[^\\/]+\.o]]" "{{.*}}[[HOSTASM]]"
-// CHK-COMMANDS-ST: ld{{(\.exe)?}}" {{.*}}"-o" "
-// CHK-COMMANDS-ST-SAME: [[HOSTBIN:[^\\/]+\.out]]"  {{.*}}"-lomptarget" {{.*}}"-T" "
-// CHK-COMMANDS-ST-SAME: [[HOSTLK:[^\\/]+\.lk]]"
+// CHK-COMMANDS-ST-SAME: [[WRAPPEROBJ:[^\\/]+\.o]]" "{{.*}}[[WRAPPERASM]]"
 
+//
+// Link host binary.
+//
+// CHK-COMMANDS: ld{{(\.exe)?}}" {{.*}}"-o" "
+// CHK-COMMANDS-SAME: [[HOSTBIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[HOSTOBJ]]" "{{.*}}[[WRAPPEROBJ]]" {{.*}}"-lomptarget"
+// CHK-COMMANDS-ST: ld{{(\.exe)?}}" {{.*}}"-o" "
+// CHK-COMMANDS-ST-SAME: [[HOSTBIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[HOSTOBJ]]" "{{.*}}[[WRAPPEROBJ]]" {{.*}}"-lomptarget"
 
 /// ###########################################################################
 
@@ -391,20 +375,24 @@
 // CHK-UBACTIONS: 3: compiler, {2}, ir, (host-openmp)
 // CHK-UBACTIONS: 4: backend, {3}, assembler, (host-openmp)
 // CHK-UBACTIONS: 5: assembler, {4}, object, (host-openmp)
-// CHK-UBACTIONS: 6: linker, {0, 5}, image, (host-openmp)
-// CHK-UBACTIONS: 7: input, "somelib", object, (device-openmp)
-// CHK-UBACTIONS: 8: compiler, {2}, ir, (device-openmp)
-// CHK-UBACTIONS: 9: offload, "host-openmp (powerpc64le-unknown-linux)" {3}, "device-openmp (powerpc64le-ibm-linux-gnu)" {8}, ir
-// CHK-UBACTIONS: 10: backend, {9}, assembler, (device-openmp)
-// CHK-UBACTIONS: 11: assembler, {10}, object, (device-openmp)
-// CHK-UBACTIONS: 12: linker, {7, 11}, image, (device-openmp)
+// CHK-UBACTIONS: 6: input, "somelib", object, (device-openmp)
+// CHK-UBACTIONS: 7: compiler, {2}, ir, (device-openmp)
+// CHK-UBACTIONS: 8: offload, "host-openmp (powerpc64le-unknown-linux)" {3}, "device-openmp (powerpc64le-ibm-linux-gnu)" {7}, ir
+// CHK-UBACTIONS: 9: backend, {8}, assembler, (device-openmp)
+// CHK-UBACTIONS: 10: assembler, {9}, object, (device-openmp)
+// CHK-UBACTIONS: 11: linker, {6, 10}, image, (device-openmp)
+// CHK-UBACTIONS: 12: offload, "device-openmp (powerpc64le-ibm-linux-gnu)" {11}, image
 // CHK-UBACTIONS: 13: input, "somelib", object, (device-openmp)
 // CHK-UBACTIONS: 14: compiler, {2}, ir, (device-openmp)
 // CHK-UBACTIONS: 15: offload, "host-openmp (powerpc64le-unknown-linux)" {3}, "device-openmp (x86_64-pc-linux-gnu)" {14}, ir
 // CHK-UBACTIONS: 16: backend, {15}, assembler, (device-openmp)
 // CHK-UBACTIONS: 17: assembler, {16}, object, (device-openmp)
 // CHK-UBACTIONS: 18: linker, {13, 17}, image, (device-openmp)
-// CHK-UBACTIONS: 19: offload, "host-openmp (powerpc64le-unknown-linux)" {6}, "device-openmp (powerpc64le-ibm-linux-gnu)" {12}, "device-openmp (x86_64-pc-linux-gnu)" {18}, image
+// CHK-UBACTIONS: 19: offload, "device-openmp (x86_64-pc-linux-gnu)" {18}, image
+// CHK-UBACTIONS: 20: clang-offload-wrapper, {12, 19}, ir, (host-openmp)
+// CHK-UBACTIONS: 21: backend, {20}, assembler, (host-openmp)
+// CHK-UBACTIONS: 22: assembler, {21}, object, (host-openmp)
+// CHK-UBACTIONS: 23: linker, {0, 5, 22}, image, (host-openmp)
 
 /// ###########################################################################
 
@@ -507,6 +495,8 @@
 // CHK-UBJOBS-SAME: [[T2PP:[^\\/]+\.i]]" "-unbundle"
 // CHK-UBJOBS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc"  {{.*}}"-fopenmp" {{.*}}"-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" {{.*}}"-o" "
 // CHK-UBJOBS-SAME: [[HOSTBC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[HOSTPP]]"
+// CHK-UBJOBS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "
+// CHK-UBJOBS-SAME: [[HOSTOBJ:[^\\/]+\.o]]" "-x" "ir" "{{.*}}[[HOSTBC]]"
 // CHK-UBJOBS-ST: clang-offload-bundler{{.*}}" "-type=i" "-targets=host-powerpc64le-unknown-linux,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu" "-inputs=
 // CHK-UBJOBS-ST-SAME: [[INPUT:[^\\/]+\.i]]" "-outputs=
 // CHK-UBJOBS-ST-SAME: [[HOSTPP:[^\\/,]+\.i]],
@@ -514,6 +504,10 @@
 // CHK-UBJOBS-ST-SAME: [[T2PP:[^\\/,]+\.i]]" "-unbundle"
 // CHK-UBJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc"  {{.*}}"-fopenmp" {{.*}}"-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" {{.*}}"-o" "
 // CHK-UBJOBS-ST-SAME: [[HOSTBC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[HOSTPP]]"
+// CHK-UBJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-S" {{.*}}"-fopenmp" {{.*}}"-o" "
+// CHK-UBJOBS-ST-SAME: [[HOSTASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[HOSTBC]]"
+// CHK-UBJOBS-ST: clang{{.*}}" "-cc1as" "-triple" "powerpc64le-unknown-linux" "-filetype" "obj" {{.*}}"-o" "
+// CHK-UBJOBS-ST-SAME: [[HOSTOBJ:[^\\/]+\.o]]" "{{.*}}[[HOSTASM]]"
 
 // Create target 1 object.
 // CHK-UBJOBS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-obj" {{.*}}"-fopenmp" {{.*}}"-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]" {{.*}}"-o" "
@@ -543,19 +537,23 @@
 // CHK-UBJOBS-ST: ld{{(\.exe)?}}" {{.*}}"-o" "
 // CHK-UBJOBS-ST-SAME: [[T2BIN:[^\\/]+\.out-openmp-x86_64-pc-linux-gnu]]" {{.*}}"{{.*}}[[T2OBJ]]"
 
-// Create binary.
+// Create wrapper BC file and wrapper object.
+// CHK-UBJOBS: clang-offload-wrapper{{(\.exe)?}}" "-target" "powerpc64le-unknown-linux" {{.*}}"-o" "
+// CHK-UBJOBS-SAME: [[WRAPPERBC:[^\\/]+\.bc]]" "{{.*}}[[T1BIN]]" "{{.*}}[[T2BIN]]"
 // CHK-UBJOBS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "
-// CHK-UBJOBS-SAME: [[HOSTOBJ:[^\\/]+\.o]]" "-x" "ir" "{{.*}}[[HOSTBC]]"
-// CHK-UBJOBS: ld{{(\.exe)?}}" {{.*}}"-o" "
-// CHK-UBJOBS-SAME: [[HOSTBIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[HOSTOBJ]]" {{.*}}"-T" "
-// CHK-UBJOBS-SAME: [[LKS:[^\\/]+\.lk]]"
+// CHK-UBJOBS-SAME: [[WRAPPEROBJ:[^\\/]+\.o]]" "-x" "ir" "{{.*}}[[WRAPPERBC]]"
+// CHK-UBJOBS-ST: clang-offload-wrapper{{(\.exe)?}}" "-target" "powerpc64le-unknown-linux" {{.*}}"-o" "
+// CHK-UBJOBS-ST-SAME: [[WRAPPERBC:[^\\/]+\.bc]]" "{{.*}}[[T1BIN]]" "{{.*}}[[T2BIN]]"
 // CHK-UBJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-S" {{.*}}"-fopenmp" {{.*}}"-o" "
-// CHK-UBJOBS-ST-SAME: [[HOSTASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[HOSTBC]]"
+// CHK-UBJOBS-ST-SAME: [[WRAPPERASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[WRAPPERBC]]"
 // CHK-UBJOBS-ST: clang{{.*}}" "-cc1as" "-triple" "powerpc64le-unknown-linux" "-filetype" "obj" {{.*}}"-o" "
-// CHK-UBJOBS-ST-SAME: [[HOSTOBJ:[^\\/]+\.o]]" "{{.*}}[[HOSTASM]]"
+// CHK-UBJOBS-ST-SAME: [[WRAPPEROBJ:[^\\/]+\.o]]" "{{.*}}[[WRAPPERASM]]"
+
+// Create binary.
+// CHK-UBJOBS: ld{{(\.exe)?}}" {{.*}}"-o" "
+// CHK-UBJOBS-SAME: [[HOSTBIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[HOSTOBJ]]" "{{.*}}[[WRAPPEROBJ]]"
 // CHK-UBJOBS-ST: ld{{(\.exe)?}}" {{.*}}"-o" "
-// CHK-UBJOBS-ST-SAME: [[HOSTBIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[HOSTOBJ]]" {{.*}}"-T" "
-// CHK-UBJOBS-ST-SAME: [[LKS:[^\\/]+\.lk]]"
+// CHK-UBJOBS-ST-SAME: [[HOSTBIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[HOSTOBJ]]" "{{.*}}[[WRAPPEROBJ]]"
 
 // Unbundle object file.
 // CHK-UBJOBS2: clang-offload-bundler{{.*}}" "-type=o" "-targets=host-powerpc64le-unknown-linux,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu" "-inputs=
@@ -567,9 +565,12 @@
 // CHK-UBJOBS2-SAME: [[T1BIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[T1OBJ]]"
 // CHK-UBJOBS2: ld{{(\.exe)?}}" {{.*}}"-o" "
 // CHK-UBJOBS2-SAME: [[T2BIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[T2OBJ]]"
+// CHK-UBJOBS2: clang-offload-wrapper{{(\.exe)?}}" "-target" "powerpc64le-unknown-linux" {{.*}}"-o" "
+// CHK-UBJOBS2-SAME: [[WRAPPERBC:[^\\/]+\.bc]]" "{{.*}}[[T1BIN]]" "{{.*}}[[T2BIN]]"
+// CHK-UBJOBS2: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" "-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "
+// CHK-UBJOBS2-SAME: [[WRAPPEROBJ:[^\\/]+\.o]]" "-x" "ir" "{{.*}}[[WRAPPERBC]]"
 // CHK-UBJOBS2: ld{{(\.exe)?}}" {{.*}}"-o" "
-// CHK-UBJOBS2-SAME: [[HOSTBIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[HOSTOBJ]]" {{.*}}"-T" "
-// CHK-UBJOBS2-SAME: [[LKS:[^\\/]+\.lk]]"
+// CHK-UBJOBS2-SAME: [[HOSTBIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[HOSTOBJ]]" "{{.*}}[[WRAPPEROBJ]]"
 // CHK-UBJOBS2-ST-NOT: clang-offload-bundler{{.*}}in.so
 // CHK-UBJOBS2-ST: clang-offload-bundler{{.*}}" "-type=o" "-targets=host-powerpc64le-unknown-linux,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu" "-inputs=
 // CHK-UBJOBS2-ST-SAME: [[INPUT:[^\\/]+\.o]]" "-outputs=
@@ -581,9 +582,14 @@
 // CHK-UBJOBS2-ST-SAME: [[T1BIN:[^\\/]+\.out-openmp-powerpc64le-ibm-linux-gnu]]" {{.*}}"{{.*}}[[T1OBJ]]"
 // CHK-UBJOBS2-ST: ld{{(\.exe)?}}" {{.*}}"-o" "
 // CHK-UBJOBS2-ST-SAME: [[T2BIN:[^\\/]+\.out-openmp-x86_64-pc-linux-gnu]]" {{.*}}"{{.*}}[[T2OBJ]]"
+// CHK-UBJOBS2-ST: clang-offload-wrapper{{(\.exe)?}}" "-target" "powerpc64le-unknown-linux" {{.*}}"-o" "
+// CHK-UBJOBS2-ST-SAME: [[WRAPPERBC:[^\\/]+\.bc]]" "{{.*}}[[T1BIN]]" "{{.*}}[[T2BIN]]"
+// CHK-UBJOBS2-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" "-S" {{.*}}"-fopenmp" {{.*}}"-o" "
+// CHK-UBJOBS2-ST-SAME: [[WRAPPERASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[WRAPPERBC]]"
+// CHK-UBJOBS2-ST: clang{{.*}}" "-cc1as" "-triple" "powerpc64le-unknown-linux" "-filetype" "obj" {{.*}}"-o" "
+// CHK-UBJOBS2-ST-SAME: [[WRAPPEROBJ:[^\\/]+\.o]]" "{{.*}}[[WRAPPERASM]]"
 // CHK-UBJOBS2-ST: ld{{(\.exe)?}}" {{.*}}"-o" "
-// CHK-UBJOBS2-ST-SAME: [[HOSTBIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[HOSTOBJ]]" {{.*}}"-T" "
-// CHK-UBJOBS2-ST-SAME: [[LKS:[^\\/]+\.lk]]"
+// CHK-UBJOBS2-ST-SAME: [[HOSTBIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[HOSTOBJ]]" "{{.*}}[[WRAPPEROBJ]]"
 
 /// ###########################################################################
 
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index 223f1f7..e46c366 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -8,6 +8,7 @@
 add_clang_subdirectory(clang-fuzzer)
 add_clang_subdirectory(clang-import-test)
 add_clang_subdirectory(clang-offload-bundler)
+add_clang_subdirectory(clang-offload-wrapper)
 add_clang_subdirectory(clang-scan-deps)
 
 add_clang_subdirectory(c-index-test)
diff --git a/tools/clang-offload-wrapper/CMakeLists.txt b/tools/clang-offload-wrapper/CMakeLists.txt
new file mode 100644
index 0000000..6f8940f
--- /dev/null
+++ b/tools/clang-offload-wrapper/CMakeLists.txt
@@ -0,0 +1,23 @@
+set(LLVM_LINK_COMPONENTS BitWriter Core Support TransformUtils)
+
+if(NOT CLANG_BUILT_STANDALONE)
+  set(tablegen_deps intrinsics_gen)
+endif()
+
+add_clang_tool(clang-offload-wrapper
+  ClangOffloadWrapper.cpp
+
+  DEPENDS
+  ${tablegen_deps}
+  )
+
+set(CLANG_OFFLOAD_WRAPPER_LIB_DEPS
+  clangBasic
+  )
+
+add_dependencies(clang clang-offload-wrapper)
+
+clang_target_link_libraries(clang-offload-wrapper
+  PRIVATE
+  ${CLANG_OFFLOAD_WRAPPER_LIB_DEPS}
+  )
diff --git a/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp b/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp
new file mode 100644
index 0000000..e18da35
--- /dev/null
+++ b/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp
@@ -0,0 +1,196 @@
+//===-- clang-offload-wrapper/ClangOffloadWrapper.cpp -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Implementation of the offload wrapper tool. It takes offload target binaries
+/// as input and creates wrapper bitcode file containing target binaries
+/// packaged as data.
+///
+//===----------------------------------------------------------------------===//
+
+#include "clang/Basic/Version.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Bitcode/BitcodeWriter.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/StringSaver.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/WithColor.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
+
+using namespace llvm;
+
+static cl::opt<bool> Help("h", cl::desc("Alias for -help"), cl::Hidden);
+
+// Mark all our options with this category, everything else (except for -version
+// and -help) will be hidden.
+static cl::OptionCategory
+    ClangOffloadWrapperCategory("clang-offload-wrapper options");
+
+static cl::opt<std::string> Output("o", cl::Required,
+                                   cl::desc("Output filename"),
+                                   cl::value_desc("filename"),
+                                   cl::cat(ClangOffloadWrapperCategory));
+
+static cl::list<std::string> Inputs(cl::Positional, cl::OneOrMore,
+                                    cl::desc("<input files>"),
+                                    cl::cat(ClangOffloadWrapperCategory));
+
+static cl::opt<std::string>
+    Target("target", cl::Required,
+           cl::desc("Target triple for the output module"),
+           cl::value_desc("triple"), cl::cat(ClangOffloadWrapperCategory));
+
+static cl::list<std::string>
+    OffloadTargets("offload-targets", cl::CommaSeparated, cl::OneOrMore,
+                   cl::desc("Comma-separated list of device target triples"),
+                   cl::value_desc("triples"),
+                   cl::cat(ClangOffloadWrapperCategory));
+
+namespace {
+
+class BinaryWrapper {
+public:
+  // Binary descriptor. The first field is the a reference to the binary bits,
+  // and the second is the target triple the binary was built for.
+  using BinaryDesc = std::pair<ArrayRef<char>, StringRef>;
+
+private:
+  LLVMContext C;
+  Module M;
+
+  // Saver for generated strings.
+  BumpPtrAllocator Alloc;
+  UniqueStringSaver SS;
+
+private:
+  void createImages(ArrayRef<BinaryDesc> Binaries) {
+    for (const BinaryDesc &Bin : Binaries) {
+      StringRef SectionName = SS.save(".omp_offloading." + Bin.second);
+
+      auto *DataC = ConstantDataArray::get(C, Bin.first);
+      auto *ImageB =
+          new GlobalVariable(M, DataC->getType(), /*isConstant=*/true,
+                             GlobalVariable::ExternalLinkage, DataC,
+                             ".omp_offloading.img_start." + Bin.second);
+      ImageB->setSection(SectionName);
+      ImageB->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+      ImageB->setVisibility(llvm::GlobalValue::HiddenVisibility);
+
+      auto *EmptyC =
+          ConstantAggregateZero::get(ArrayType::get(Type::getInt8Ty(C), 0u));
+      auto *ImageE =
+          new GlobalVariable(M, EmptyC->getType(), /*isConstant=*/true,
+                             GlobalVariable::ExternalLinkage, EmptyC,
+                             ".omp_offloading.img_end." + Bin.second);
+      ImageE->setSection(SectionName);
+      ImageE->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+      ImageE->setVisibility(GlobalValue::HiddenVisibility);
+    }
+  }
+
+public:
+  BinaryWrapper(StringRef Target) : M("offload.wrapper.object", C), SS(Alloc) {
+    M.setTargetTriple(Target);
+  }
+
+  const Module &wrapBinaries(ArrayRef<BinaryDesc> Binaries) {
+    createImages(Binaries);
+    return M;
+  }
+};
+
+} // anonymous namespace
+
+int main(int argc, const char **argv) {
+  sys::PrintStackTraceOnErrorSignal(argv[0]);
+
+  cl::HideUnrelatedOptions(ClangOffloadWrapperCategory);
+  cl::SetVersionPrinter([](raw_ostream &OS) {
+    OS << clang::getClangToolFullVersion("clang-offload-wrapper") << '\n';
+  });
+  cl::ParseCommandLineOptions(
+      argc, argv,
+      "A tool to create a wrapper bitcode for offload target binaries. Takes "
+      "offload\ntarget binaries as input and produces bitcode file containing "
+      "target binaries packaged\nas data.\n");
+
+  if (Help) {
+    cl::PrintHelpMessage();
+    return 0;
+  }
+
+  auto reportError = [argv](Error E) {
+    logAllUnhandledErrors(std::move(E), WithColor::error(errs(), argv[0]));
+  };
+
+  if (Triple(Target).getArch() == Triple::UnknownArch) {
+    reportError(createStringError(
+        errc::invalid_argument, "'" + Target + "': unsupported target triple"));
+    return 1;
+  }
+
+  if (Inputs.size() != OffloadTargets.size()) {
+    reportError(createStringError(
+        errc::invalid_argument,
+        "number of input files and offload targets should match"));
+    return 1;
+  }
+
+  // Read device binaries.
+  SmallVector<std::unique_ptr<MemoryBuffer>, 4u> Buffers;
+  SmallVector<BinaryWrapper::BinaryDesc, 4u> Images;
+  Buffers.reserve(Inputs.size());
+  Images.reserve(Inputs.size());
+  for (unsigned I = 0; I < Inputs.size(); ++I) {
+    const std::string &File = Inputs[I];
+    ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
+        MemoryBuffer::getFileOrSTDIN(File);
+    if (!BufOrErr) {
+      reportError(createFileError(File, BufOrErr.getError()));
+      return 1;
+    }
+    const std::unique_ptr<MemoryBuffer> &Buf =
+        Buffers.emplace_back(std::move(*BufOrErr));
+    Images.emplace_back(
+        makeArrayRef(Buf->getBufferStart(), Buf->getBufferSize()),
+        OffloadTargets[I]);
+  }
+
+  // Create the output file to write the resulting bitcode to.
+  std::error_code EC;
+  ToolOutputFile Out(Output, EC, sys::fs::OF_None);
+  if (EC) {
+    reportError(createFileError(Output, EC));
+    return 1;
+  }
+
+  // Create a wrapper for device binaries and write its bitcode to the file.
+  WriteBitcodeToFile(BinaryWrapper(Target).wrapBinaries(
+                         makeArrayRef(Images.data(), Images.size())),
+                     Out.os());
+  if (Out.os().has_error()) {
+    reportError(createFileError(Output, Out.os().error()));
+    return 1;
+  }
+
+  // Success.
+  Out.keep();
+  return 0;
+}