| //===- AMDGPUOpenMP.cpp - AMDGPUOpenMP ToolChain Implementation -*- C++ -*-===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "AMDGPUOpenMP.h" |
| #include "AMDGPU.h" |
| #include "CommonArgs.h" |
| #include "ToolChains/ROCm.h" |
| #include "clang/Basic/DiagnosticDriver.h" |
| #include "clang/Driver/Compilation.h" |
| #include "clang/Driver/Driver.h" |
| #include "clang/Driver/DriverDiagnostic.h" |
| #include "clang/Driver/InputInfo.h" |
| #include "clang/Driver/Options.h" |
| #include "llvm/ADT/STLExtras.h" |
| #include "llvm/Support/FileSystem.h" |
| #include "llvm/Support/FormatAdapters.h" |
| #include "llvm/Support/FormatVariadic.h" |
| #include "llvm/Support/Path.h" |
| |
| using namespace clang::driver; |
| using namespace clang::driver::toolchains; |
| using namespace clang::driver::tools; |
| using namespace clang; |
| using namespace llvm::opt; |
| |
| namespace { |
| |
| static const char *getOutputFileName(Compilation &C, StringRef Base, |
| const char *Postfix, |
| const char *Extension) { |
| const char *OutputFileName; |
| if (C.getDriver().isSaveTempsEnabled()) { |
| OutputFileName = |
| C.getArgs().MakeArgString(Base.str() + Postfix + "." + Extension); |
| } else { |
| std::string TmpName = |
| C.getDriver().GetTemporaryPath(Base.str() + Postfix, Extension); |
| OutputFileName = C.addTempFile(C.getArgs().MakeArgString(TmpName)); |
| } |
| return OutputFileName; |
| } |
| |
| static void addLLCOptArg(const llvm::opt::ArgList &Args, |
| llvm::opt::ArgStringList &CmdArgs) { |
| if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { |
| StringRef OOpt = "0"; |
| if (A->getOption().matches(options::OPT_O4) || |
| A->getOption().matches(options::OPT_Ofast)) |
| OOpt = "3"; |
| else if (A->getOption().matches(options::OPT_O0)) |
| OOpt = "0"; |
| else if (A->getOption().matches(options::OPT_O)) { |
| // Clang and opt support -Os/-Oz; llc only supports -O0, -O1, -O2 and -O3 |
| // so we map -Os/-Oz to -O2. |
| // Only clang supports -Og, and maps it to -O1. |
| // We map anything else to -O2. |
| OOpt = llvm::StringSwitch<const char *>(A->getValue()) |
| .Case("1", "1") |
| .Case("2", "2") |
| .Case("3", "3") |
| .Case("s", "2") |
| .Case("z", "2") |
| .Case("g", "1") |
| .Default("0"); |
| } |
| CmdArgs.push_back(Args.MakeArgString("-O" + OOpt)); |
| } |
| } |
| |
| static bool checkSystemForAMDGPU(const ArgList &Args, const AMDGPUToolChain &TC, |
| std::string &GPUArch) { |
| if (auto Err = TC.getSystemGPUArch(Args, GPUArch)) { |
| std::string ErrMsg = |
| llvm::formatv("{0}", llvm::fmt_consume(std::move(Err))); |
| TC.getDriver().Diag(diag::err_drv_undetermined_amdgpu_arch) << ErrMsg; |
| return false; |
| } |
| |
| return true; |
| } |
| } // namespace |
| |
| const char *AMDGCN::OpenMPLinker::constructLLVMLinkCommand( |
| const toolchains::AMDGPUOpenMPToolChain &AMDGPUOpenMPTC, Compilation &C, |
| const JobAction &JA, const InputInfoList &Inputs, const ArgList &Args, |
| StringRef SubArchName, StringRef OutputFilePrefix) const { |
| ArgStringList CmdArgs; |
| |
| for (const auto &II : Inputs) |
| if (II.isFilename()) |
| CmdArgs.push_back(II.getFilename()); |
| |
| if (Args.hasArg(options::OPT_l)) { |
| auto Lm = Args.getAllArgValues(options::OPT_l); |
| bool HasLibm = false; |
| for (auto &Lib : Lm) { |
| if (Lib == "m") { |
| HasLibm = true; |
| break; |
| } |
| } |
| |
| if (HasLibm) { |
| // This is not certain to work. The device libs added here, and passed to |
| // llvm-link, are missing attributes that they expect to be inserted when |
| // passed to mlink-builtin-bitcode. The amdgpu backend does not generate |
| // conservatively correct code when attributes are missing, so this may |
| // be the root cause of miscompilations. Passing via mlink-builtin-bitcode |
| // ultimately hits CodeGenModule::addDefaultFunctionDefinitionAttributes |
| // on each function, see D28538 for context. |
| // Potential workarounds: |
| // - unconditionally link all of the device libs to every translation |
| // unit in clang via mlink-builtin-bitcode |
| // - build a libm bitcode file as part of the DeviceRTL and explictly |
| // mlink-builtin-bitcode the rocm device libs components at build time |
| // - drop this llvm-link fork in favour or some calls into LLVM, chosen |
| // to do basically the same work as llvm-link but with that call first |
| // - write an opt pass that sets that on every function it sees and pipe |
| // the device-libs bitcode through that on the way to this llvm-link |
| SmallVector<std::string, 12> BCLibs = |
| AMDGPUOpenMPTC.getCommonDeviceLibNames(Args, SubArchName.str()); |
| llvm::for_each(BCLibs, [&](StringRef BCFile) { |
| CmdArgs.push_back(Args.MakeArgString(BCFile)); |
| }); |
| } |
| } |
| |
| AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, CmdArgs, "amdgcn", |
| SubArchName, |
| /* bitcode SDL?*/ true, |
| /* PostClang Link? */ false); |
| // Add an intermediate output file. |
| CmdArgs.push_back("-o"); |
| const char *OutputFileName = |
| getOutputFileName(C, OutputFilePrefix, "-linked", "bc"); |
| CmdArgs.push_back(OutputFileName); |
| const char *Exec = |
| Args.MakeArgString(getToolChain().GetProgramPath("llvm-link")); |
| C.addCommand(std::make_unique<Command>( |
| JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs, |
| InputInfo(&JA, Args.MakeArgString(OutputFileName)))); |
| return OutputFileName; |
| } |
| |
| const char *AMDGCN::OpenMPLinker::constructLlcCommand( |
| Compilation &C, const JobAction &JA, const InputInfoList &Inputs, |
| const llvm::opt::ArgList &Args, llvm::StringRef SubArchName, |
| llvm::StringRef OutputFilePrefix, const char *InputFileName, |
| bool OutputIsAsm) const { |
| // Construct llc command. |
| ArgStringList LlcArgs; |
| // The input to llc is the output from opt. |
| LlcArgs.push_back(InputFileName); |
| // Pass optimization arg to llc. |
| addLLCOptArg(Args, LlcArgs); |
| LlcArgs.push_back("-mtriple=amdgcn-amd-amdhsa"); |
| LlcArgs.push_back(Args.MakeArgString("-mcpu=" + SubArchName)); |
| LlcArgs.push_back( |
| Args.MakeArgString(Twine("-filetype=") + (OutputIsAsm ? "asm" : "obj"))); |
| |
| for (const Arg *A : Args.filtered(options::OPT_mllvm)) { |
| LlcArgs.push_back(A->getValue(0)); |
| } |
| |
| // Add output filename |
| LlcArgs.push_back("-o"); |
| const char *LlcOutputFile = |
| getOutputFileName(C, OutputFilePrefix, "", OutputIsAsm ? "s" : "o"); |
| LlcArgs.push_back(LlcOutputFile); |
| const char *Llc = Args.MakeArgString(getToolChain().GetProgramPath("llc")); |
| C.addCommand(std::make_unique<Command>( |
| JA, *this, ResponseFileSupport::AtFileCurCP(), Llc, LlcArgs, Inputs, |
| InputInfo(&JA, Args.MakeArgString(LlcOutputFile)))); |
| return LlcOutputFile; |
| } |
| |
| void AMDGCN::OpenMPLinker::constructLldCommand( |
| Compilation &C, const JobAction &JA, const InputInfoList &Inputs, |
| const InputInfo &Output, const llvm::opt::ArgList &Args, |
| const char *InputFileName) const { |
| // Construct lld command. |
| // The output from ld.lld is an HSA code object file. |
| ArgStringList LldArgs{"-flavor", "gnu", "--no-undefined", |
| "-shared", "-o", Output.getFilename(), |
| InputFileName}; |
| |
| const char *Lld = Args.MakeArgString(getToolChain().GetProgramPath("lld")); |
| C.addCommand(std::make_unique<Command>( |
| JA, *this, ResponseFileSupport::AtFileCurCP(), Lld, LldArgs, Inputs, |
| InputInfo(&JA, Args.MakeArgString(Output.getFilename())))); |
| } |
| |
| // For amdgcn the inputs of the linker job are device bitcode and output is |
| // object file. It calls llvm-link, opt, llc, then lld steps. |
| void AMDGCN::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA, |
| const InputInfo &Output, |
| const InputInfoList &Inputs, |
| const ArgList &Args, |
| const char *LinkingOutput) const { |
| const ToolChain &TC = getToolChain(); |
| assert(getToolChain().getTriple().isAMDGCN() && "Unsupported target"); |
| |
| const toolchains::AMDGPUOpenMPToolChain &AMDGPUOpenMPTC = |
| static_cast<const toolchains::AMDGPUOpenMPToolChain &>(TC); |
| |
| std::string GPUArch = Args.getLastArgValue(options::OPT_march_EQ).str(); |
| if (GPUArch.empty()) { |
| if (!checkSystemForAMDGPU(Args, AMDGPUOpenMPTC, GPUArch)) |
| return; |
| } |
| |
| // Prefix for temporary file name. |
| std::string Prefix; |
| for (const auto &II : Inputs) |
| if (II.isFilename()) |
| Prefix = llvm::sys::path::stem(II.getFilename()).str() + "-" + GPUArch; |
| assert(Prefix.length() && "no linker inputs are files "); |
| |
| // Each command outputs different files. |
| const char *LLVMLinkCommand = constructLLVMLinkCommand( |
| AMDGPUOpenMPTC, C, JA, Inputs, Args, GPUArch, Prefix); |
| |
| // Produce readable assembly if save-temps is enabled. |
| if (C.getDriver().isSaveTempsEnabled()) |
| constructLlcCommand(C, JA, Inputs, Args, GPUArch, Prefix, LLVMLinkCommand, |
| /*OutputIsAsm=*/true); |
| const char *LlcCommand = constructLlcCommand(C, JA, Inputs, Args, GPUArch, |
| Prefix, LLVMLinkCommand); |
| constructLldCommand(C, JA, Inputs, Output, Args, LlcCommand); |
| } |
| |
| AMDGPUOpenMPToolChain::AMDGPUOpenMPToolChain(const Driver &D, |
| const llvm::Triple &Triple, |
| const ToolChain &HostTC, |
| const ArgList &Args) |
| : ROCMToolChain(D, Triple, Args), HostTC(HostTC) { |
| // Lookup binaries into the driver directory, this is used to |
| // discover the clang-offload-bundler executable. |
| getProgramPaths().push_back(getDriver().Dir); |
| } |
| |
| void AMDGPUOpenMPToolChain::addClangTargetOptions( |
| const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, |
| Action::OffloadKind DeviceOffloadingKind) const { |
| HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind); |
| |
| std::string GPUArch = DriverArgs.getLastArgValue(options::OPT_march_EQ).str(); |
| if (GPUArch.empty()) { |
| if (!checkSystemForAMDGPU(DriverArgs, *this, GPUArch)) |
| return; |
| } |
| |
| assert(DeviceOffloadingKind == Action::OFK_OpenMP && |
| "Only OpenMP offloading kinds are supported."); |
| |
| CC1Args.push_back("-target-cpu"); |
| CC1Args.push_back(DriverArgs.MakeArgStringRef(GPUArch)); |
| CC1Args.push_back("-fcuda-is-device"); |
| |
| if (DriverArgs.hasArg(options::OPT_nogpulib)) |
| return; |
| |
| std::string BitcodeSuffix; |
| if (DriverArgs.hasFlag(options::OPT_fopenmp_target_new_runtime, |
| options::OPT_fno_openmp_target_new_runtime, false)) |
| BitcodeSuffix = "new-amdgpu-" + GPUArch; |
| else |
| BitcodeSuffix = "amdgcn-" + GPUArch; |
| |
| addOpenMPDeviceRTL(getDriver(), DriverArgs, CC1Args, BitcodeSuffix, |
| getTriple()); |
| } |
| |
| llvm::opt::DerivedArgList *AMDGPUOpenMPToolChain::TranslateArgs( |
| const llvm::opt::DerivedArgList &Args, StringRef BoundArch, |
| Action::OffloadKind DeviceOffloadKind) const { |
| DerivedArgList *DAL = |
| HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind); |
| if (!DAL) |
| DAL = new DerivedArgList(Args.getBaseArgs()); |
| |
| const OptTable &Opts = getDriver().getOpts(); |
| |
| if (DeviceOffloadKind != Action::OFK_OpenMP) { |
| for (Arg *A : Args) { |
| DAL->append(A); |
| } |
| } |
| |
| if (!BoundArch.empty()) { |
| DAL->eraseArg(options::OPT_march_EQ); |
| DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), |
| BoundArch); |
| } |
| |
| return DAL; |
| } |
| |
| Tool *AMDGPUOpenMPToolChain::buildLinker() const { |
| assert(getTriple().isAMDGCN()); |
| return new tools::AMDGCN::OpenMPLinker(*this); |
| } |
| |
| void AMDGPUOpenMPToolChain::addClangWarningOptions( |
| ArgStringList &CC1Args) const { |
| HostTC.addClangWarningOptions(CC1Args); |
| } |
| |
| ToolChain::CXXStdlibType |
| AMDGPUOpenMPToolChain::GetCXXStdlibType(const ArgList &Args) const { |
| return HostTC.GetCXXStdlibType(Args); |
| } |
| |
| void AMDGPUOpenMPToolChain::AddClangSystemIncludeArgs( |
| const ArgList &DriverArgs, ArgStringList &CC1Args) const { |
| HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args); |
| } |
| |
| void AMDGPUOpenMPToolChain::AddIAMCUIncludeArgs(const ArgList &Args, |
| ArgStringList &CC1Args) const { |
| HostTC.AddIAMCUIncludeArgs(Args, CC1Args); |
| } |
| |
| SanitizerMask AMDGPUOpenMPToolChain::getSupportedSanitizers() const { |
| // The AMDGPUOpenMPToolChain only supports sanitizers in the sense that it |
| // allows sanitizer arguments on the command line if they are supported by the |
| // host toolchain. The AMDGPUOpenMPToolChain will actually ignore any command |
| // line arguments for any of these "supported" sanitizers. That means that no |
| // sanitization of device code is actually supported at this time. |
| // |
| // This behavior is necessary because the host and device toolchains |
| // invocations often share the command line, so the device toolchain must |
| // tolerate flags meant only for the host toolchain. |
| return HostTC.getSupportedSanitizers(); |
| } |
| |
| VersionTuple |
| AMDGPUOpenMPToolChain::computeMSVCVersion(const Driver *D, |
| const ArgList &Args) const { |
| return HostTC.computeMSVCVersion(D, Args); |
| } |