| //===- Target.cpp - MLIR LLVM ROCDL target compilation ----------*- C++ -*-===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This files defines ROCDL target related functions including registration |
| // calls for the `#rocdl.target` compilation attribute. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "mlir/Target/LLVM/ROCDL/Target.h" |
| |
| #include "mlir/Dialect/GPU/IR/GPUDialect.h" |
| #include "mlir/Dialect/LLVMIR/ROCDLDialect.h" |
| #include "mlir/Support/FileUtilities.h" |
| #include "mlir/Target/LLVM/ROCDL/Utils.h" |
| #include "mlir/Target/LLVMIR/Export.h" |
| |
| #include "llvm/IR/Constants.h" |
| #include "llvm/MC/MCAsmBackend.h" |
| #include "llvm/MC/MCAsmInfo.h" |
| #include "llvm/MC/MCCodeEmitter.h" |
| #include "llvm/MC/MCContext.h" |
| #include "llvm/MC/MCInstrInfo.h" |
| #include "llvm/MC/MCObjectFileInfo.h" |
| #include "llvm/MC/MCObjectWriter.h" |
| #include "llvm/MC/MCParser/MCTargetAsmParser.h" |
| #include "llvm/MC/MCRegisterInfo.h" |
| #include "llvm/MC/MCStreamer.h" |
| #include "llvm/MC/MCSubtargetInfo.h" |
| #include "llvm/MC/TargetRegistry.h" |
| #include "llvm/Support/FileSystem.h" |
| #include "llvm/Support/FileUtilities.h" |
| #include "llvm/Support/Path.h" |
| #include "llvm/Support/Program.h" |
| #include "llvm/Support/SourceMgr.h" |
| #include "llvm/Support/TargetSelect.h" |
| #include "llvm/TargetParser/TargetParser.h" |
| |
| #include <cstdlib> |
| #include <optional> |
| |
| using namespace mlir; |
| using namespace mlir::ROCDL; |
| |
| #ifndef __DEFAULT_ROCM_PATH__ |
| #define __DEFAULT_ROCM_PATH__ "" |
| #endif |
| |
| namespace { |
| // Implementation of the `TargetAttrInterface` model. |
| class ROCDLTargetAttrImpl |
| : public gpu::TargetAttrInterface::FallbackModel<ROCDLTargetAttrImpl> { |
| public: |
| std::optional<SmallVector<char, 0>> |
| serializeToObject(Attribute attribute, Operation *module, |
| const gpu::TargetOptions &options) const; |
| |
| Attribute createObject(Attribute attribute, Operation *module, |
| const SmallVector<char, 0> &object, |
| const gpu::TargetOptions &options) const; |
| }; |
| } // namespace |
| |
| // Register the ROCDL dialect, the ROCDL translation and the target interface. |
| void mlir::ROCDL::registerROCDLTargetInterfaceExternalModels( |
| DialectRegistry ®istry) { |
| registry.addExtension(+[](MLIRContext *ctx, ROCDL::ROCDLDialect *dialect) { |
| ROCDLTargetAttr::attachInterface<ROCDLTargetAttrImpl>(*ctx); |
| }); |
| } |
| |
| void mlir::ROCDL::registerROCDLTargetInterfaceExternalModels( |
| MLIRContext &context) { |
| DialectRegistry registry; |
| registerROCDLTargetInterfaceExternalModels(registry); |
| context.appendDialectRegistry(registry); |
| } |
| |
| // Search for the ROCM path. |
| StringRef mlir::ROCDL::getROCMPath() { |
| if (const char *var = std::getenv("ROCM_PATH")) |
| return var; |
| if (const char *var = std::getenv("ROCM_ROOT")) |
| return var; |
| if (const char *var = std::getenv("ROCM_HOME")) |
| return var; |
| return __DEFAULT_ROCM_PATH__; |
| } |
| |
| SerializeGPUModuleBase::SerializeGPUModuleBase( |
| Operation &module, ROCDLTargetAttr target, |
| const gpu::TargetOptions &targetOptions) |
| : ModuleToObject(module, target.getTriple(), target.getChip(), |
| target.getFeatures(), target.getO()), |
| target(target), toolkitPath(targetOptions.getToolkitPath()), |
| librariesToLink(targetOptions.getLibrariesToLink()) { |
| |
| // If `targetOptions` has an empty toolkitPath use `getROCMPath` |
| if (toolkitPath.empty()) |
| toolkitPath = getROCMPath(); |
| |
| // Append the files in the target attribute. |
| if (target.getLink()) |
| librariesToLink.append(target.getLink().begin(), target.getLink().end()); |
| } |
| |
| void SerializeGPUModuleBase::init() { |
| static llvm::once_flag initializeBackendOnce; |
| llvm::call_once(initializeBackendOnce, []() { |
| // If the `AMDGPU` LLVM target was built, initialize it. |
| #if MLIR_ENABLE_ROCM_CONVERSIONS |
| LLVMInitializeAMDGPUTarget(); |
| LLVMInitializeAMDGPUTargetInfo(); |
| LLVMInitializeAMDGPUTargetMC(); |
| LLVMInitializeAMDGPUAsmParser(); |
| LLVMInitializeAMDGPUAsmPrinter(); |
| #endif |
| }); |
| } |
| |
| ROCDLTargetAttr SerializeGPUModuleBase::getTarget() const { return target; } |
| |
| StringRef SerializeGPUModuleBase::getToolkitPath() const { return toolkitPath; } |
| |
| ArrayRef<Attribute> SerializeGPUModuleBase::getLibrariesToLink() const { |
| return librariesToLink; |
| } |
| |
| LogicalResult SerializeGPUModuleBase::appendStandardLibs(AMDGCNLibraries libs) { |
| if (libs == AMDGCNLibraries::None) |
| return success(); |
| StringRef pathRef = getToolkitPath(); |
| |
| // Get the path for the device libraries |
| SmallString<256> path; |
| path.insert(path.begin(), pathRef.begin(), pathRef.end()); |
| llvm::sys::path::append(path, "amdgcn", "bitcode"); |
| pathRef = StringRef(path.data(), path.size()); |
| |
| // Fail if the path is invalid. |
| if (!llvm::sys::fs::is_directory(pathRef)) { |
| getOperation().emitError() << "ROCm amdgcn bitcode path: " << pathRef |
| << " does not exist or is not a directory"; |
| return failure(); |
| } |
| |
| // Helper function for adding a library. |
| auto addLib = [&](const Twine &lib) -> bool { |
| auto baseSize = path.size(); |
| llvm::sys::path::append(path, lib); |
| StringRef pathRef(path.data(), path.size()); |
| if (!llvm::sys::fs::is_regular_file(pathRef)) { |
| getOperation().emitRemark() << "bitcode library path: " << pathRef |
| << " does not exist or is not a file"; |
| return true; |
| } |
| librariesToLink.push_back(StringAttr::get(target.getContext(), pathRef)); |
| path.truncate(baseSize); |
| return false; |
| }; |
| |
| // Add ROCm device libraries. Fail if any of the libraries is not found, ie. |
| // if any of the `addLib` failed. |
| if ((any(libs & AMDGCNLibraries::Ocml) && addLib("ocml.bc")) || |
| (any(libs & AMDGCNLibraries::Ockl) && addLib("ockl.bc")) || |
| (any(libs & AMDGCNLibraries::Hip) && addLib("hip.bc")) || |
| (any(libs & AMDGCNLibraries::OpenCL) && addLib("opencl.bc"))) |
| return failure(); |
| return success(); |
| } |
| |
| std::optional<SmallVector<std::unique_ptr<llvm::Module>>> |
| SerializeGPUModuleBase::loadBitcodeFiles(llvm::Module &module) { |
| // Return if there are no libs to load. |
| if (deviceLibs == AMDGCNLibraries::None && librariesToLink.empty()) |
| return SmallVector<std::unique_ptr<llvm::Module>>(); |
| if (failed(appendStandardLibs(deviceLibs))) |
| return std::nullopt; |
| SmallVector<std::unique_ptr<llvm::Module>> bcFiles; |
| if (failed(loadBitcodeFilesFromList(module.getContext(), librariesToLink, |
| bcFiles, true))) |
| return std::nullopt; |
| return std::move(bcFiles); |
| } |
| |
| LogicalResult SerializeGPUModuleBase::handleBitcodeFile(llvm::Module &module) { |
| // Some ROCM builds don't strip this like they should |
| if (auto *openclVersion = module.getNamedMetadata("opencl.ocl.version")) |
| module.eraseNamedMetadata(openclVersion); |
| // Stop spamming us with clang version numbers |
| if (auto *ident = module.getNamedMetadata("llvm.ident")) |
| module.eraseNamedMetadata(ident); |
| // Override the libModules datalayout and target triple with the compiler's |
| // data layout should there be a discrepency. |
| setDataLayoutAndTriple(module); |
| return success(); |
| } |
| |
| void SerializeGPUModuleBase::handleModulePreLink(llvm::Module &module) { |
| // If all libraries are not set, traverse the module to determine which |
| // libraries are required. |
| if (deviceLibs != AMDGCNLibraries::All) { |
| for (llvm::Function &f : module.functions()) { |
| if (f.hasExternalLinkage() && f.hasName() && !f.hasExactDefinition()) { |
| StringRef funcName = f.getName(); |
| if ("printf" == funcName) |
| deviceLibs |= AMDGCNLibraries::OpenCL | AMDGCNLibraries::Ockl | |
| AMDGCNLibraries::Ocml; |
| if (funcName.starts_with("__ockl_")) |
| deviceLibs |= AMDGCNLibraries::Ockl; |
| if (funcName.starts_with("__ocml_")) |
| deviceLibs |= AMDGCNLibraries::Ocml; |
| if (funcName == "__atomic_work_item_fence") |
| deviceLibs |= AMDGCNLibraries::Hip; |
| } |
| } |
| } |
| addControlVariables(module, deviceLibs, target.hasWave64(), target.hasDaz(), |
| target.hasFiniteOnly(), target.hasUnsafeMath(), |
| target.hasFastMath(), target.hasCorrectSqrt(), |
| target.getAbi()); |
| } |
| |
| void SerializeGPUModuleBase::addControlVariables( |
| llvm::Module &module, AMDGCNLibraries libs, bool wave64, bool daz, |
| bool finiteOnly, bool unsafeMath, bool fastMath, bool correctSqrt, |
| StringRef abiVer) { |
| // Helper function for adding control variables. |
| auto addControlVariable = [&module](StringRef name, uint32_t value, |
| uint32_t bitwidth) { |
| if (module.getNamedGlobal(name)) |
| return; |
| llvm::IntegerType *type = |
| llvm::IntegerType::getIntNTy(module.getContext(), bitwidth); |
| llvm::GlobalVariable *controlVariable = new llvm::GlobalVariable( |
| module, /*isConstant=*/type, true, |
| llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage, |
| llvm::ConstantInt::get(type, value), name, /*before=*/nullptr, |
| /*threadLocalMode=*/llvm::GlobalValue::ThreadLocalMode::NotThreadLocal, |
| /*addressSpace=*/4); |
| controlVariable->setVisibility( |
| llvm::GlobalValue::VisibilityTypes::ProtectedVisibility); |
| controlVariable->setAlignment(llvm::MaybeAlign(bitwidth / 8)); |
| controlVariable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local); |
| }; |
| |
| // Note that COV6 requires ROCm 6.3+. |
| int abi = 600; |
| abiVer.getAsInteger(0, abi); |
| module.addModuleFlag(llvm::Module::Error, "amdhsa_code_object_version", abi); |
| // Return if no device libraries are required. |
| if (libs == AMDGCNLibraries::None) |
| return; |
| // Add ocml related control variables. |
| if (any(libs & AMDGCNLibraries::Ocml)) { |
| addControlVariable("__oclc_finite_only_opt", finiteOnly || fastMath, 8); |
| addControlVariable("__oclc_daz_opt", daz || fastMath, 8); |
| addControlVariable("__oclc_correctly_rounded_sqrt32", |
| correctSqrt && !fastMath, 8); |
| addControlVariable("__oclc_unsafe_math_opt", unsafeMath || fastMath, 8); |
| } |
| // Add ocml or ockl related control variables. |
| if (any(libs & (AMDGCNLibraries::Ocml | AMDGCNLibraries::Ockl))) { |
| addControlVariable("__oclc_wavefrontsize64", wave64, 8); |
| // Get the ISA version. |
| llvm::AMDGPU::IsaVersion isaVersion = llvm::AMDGPU::getIsaVersion(chip); |
| // Add the ISA control variable. |
| addControlVariable("__oclc_ISA_version", |
| isaVersion.Minor + 100 * isaVersion.Stepping + |
| 1000 * isaVersion.Major, |
| 32); |
| addControlVariable("__oclc_ABI_version", abi, 32); |
| } |
| } |
| |
| std::optional<SmallVector<char, 0>> |
| SerializeGPUModuleBase::assembleIsa(StringRef isa) { |
| auto loc = getOperation().getLoc(); |
| |
| StringRef targetTriple = this->triple; |
| |
| SmallVector<char, 0> result; |
| llvm::raw_svector_ostream os(result); |
| |
| llvm::Triple triple(llvm::Triple::normalize(targetTriple)); |
| std::string error; |
| const llvm::Target *target = |
| llvm::TargetRegistry::lookupTarget(triple.normalize(), error); |
| if (!target) { |
| emitError(loc, Twine("failed to lookup target: ") + error); |
| return std::nullopt; |
| } |
| |
| llvm::SourceMgr srcMgr; |
| srcMgr.AddNewSourceBuffer(llvm::MemoryBuffer::getMemBuffer(isa), SMLoc()); |
| |
| const llvm::MCTargetOptions mcOptions; |
| std::unique_ptr<llvm::MCRegisterInfo> mri( |
| target->createMCRegInfo(targetTriple)); |
| std::unique_ptr<llvm::MCAsmInfo> mai( |
| target->createMCAsmInfo(*mri, targetTriple, mcOptions)); |
| std::unique_ptr<llvm::MCSubtargetInfo> sti( |
| target->createMCSubtargetInfo(targetTriple, chip, features)); |
| |
| llvm::MCContext ctx(triple, mai.get(), mri.get(), sti.get(), &srcMgr, |
| &mcOptions); |
| std::unique_ptr<llvm::MCObjectFileInfo> mofi(target->createMCObjectFileInfo( |
| ctx, /*PIC=*/false, /*LargeCodeModel=*/false)); |
| ctx.setObjectFileInfo(mofi.get()); |
| |
| SmallString<128> cwd; |
| if (!llvm::sys::fs::current_path(cwd)) |
| ctx.setCompilationDir(cwd); |
| |
| std::unique_ptr<llvm::MCStreamer> mcStreamer; |
| std::unique_ptr<llvm::MCInstrInfo> mcii(target->createMCInstrInfo()); |
| |
| llvm::MCCodeEmitter *ce = target->createMCCodeEmitter(*mcii, ctx); |
| llvm::MCAsmBackend *mab = target->createMCAsmBackend(*sti, *mri, mcOptions); |
| mcStreamer.reset(target->createMCObjectStreamer( |
| triple, ctx, std::unique_ptr<llvm::MCAsmBackend>(mab), |
| mab->createObjectWriter(os), std::unique_ptr<llvm::MCCodeEmitter>(ce), |
| *sti)); |
| |
| std::unique_ptr<llvm::MCAsmParser> parser( |
| createMCAsmParser(srcMgr, ctx, *mcStreamer, *mai)); |
| std::unique_ptr<llvm::MCTargetAsmParser> tap( |
| target->createMCAsmParser(*sti, *parser, *mcii, mcOptions)); |
| |
| if (!tap) { |
| emitError(loc, "assembler initialization error"); |
| return std::nullopt; |
| } |
| |
| parser->setTargetParser(*tap); |
| parser->Run(false); |
| return std::move(result); |
| } |
| |
| std::optional<SmallVector<char, 0>> |
| SerializeGPUModuleBase::compileToBinary(const std::string &serializedISA) { |
| // Assemble the ISA. |
| std::optional<SmallVector<char, 0>> isaBinary = assembleIsa(serializedISA); |
| |
| if (!isaBinary) { |
| getOperation().emitError() << "failed during ISA assembling"; |
| return std::nullopt; |
| } |
| |
| // Save the ISA binary to a temp file. |
| int tempIsaBinaryFd = -1; |
| SmallString<128> tempIsaBinaryFilename; |
| if (llvm::sys::fs::createTemporaryFile("kernel%%", "o", tempIsaBinaryFd, |
| tempIsaBinaryFilename)) { |
| getOperation().emitError() |
| << "failed to create a temporary file for dumping the ISA binary"; |
| return std::nullopt; |
| } |
| llvm::FileRemover cleanupIsaBinary(tempIsaBinaryFilename); |
| { |
| llvm::raw_fd_ostream tempIsaBinaryOs(tempIsaBinaryFd, true); |
| tempIsaBinaryOs << StringRef(isaBinary->data(), isaBinary->size()); |
| tempIsaBinaryOs.flush(); |
| } |
| |
| // Create a temp file for HSA code object. |
| SmallString<128> tempHsacoFilename; |
| if (llvm::sys::fs::createTemporaryFile("kernel", "hsaco", |
| tempHsacoFilename)) { |
| getOperation().emitError() |
| << "failed to create a temporary file for the HSA code object"; |
| return std::nullopt; |
| } |
| llvm::FileRemover cleanupHsaco(tempHsacoFilename); |
| |
| llvm::SmallString<128> lldPath(toolkitPath); |
| llvm::sys::path::append(lldPath, "llvm", "bin", "ld.lld"); |
| int lldResult = llvm::sys::ExecuteAndWait( |
| lldPath, |
| {"ld.lld", "-shared", tempIsaBinaryFilename, "-o", tempHsacoFilename}); |
| if (lldResult != 0) { |
| getOperation().emitError() << "lld invocation failed"; |
| return std::nullopt; |
| } |
| |
| // Load the HSA code object. |
| auto hsacoFile = |
| llvm::MemoryBuffer::getFile(tempHsacoFilename, /*IsText=*/false); |
| if (!hsacoFile) { |
| getOperation().emitError() |
| << "failed to read the HSA code object from the temp file"; |
| return std::nullopt; |
| } |
| |
| StringRef buffer = (*hsacoFile)->getBuffer(); |
| |
| return SmallVector<char, 0>(buffer.begin(), buffer.end()); |
| } |
| |
| std::optional<SmallVector<char, 0>> SerializeGPUModuleBase::moduleToObjectImpl( |
| const gpu::TargetOptions &targetOptions, llvm::Module &llvmModule) { |
| // Return LLVM IR if the compilation target is offload. |
| #define DEBUG_TYPE "serialize-to-llvm" |
| LLVM_DEBUG({ |
| llvm::dbgs() << "LLVM IR for module: " |
| << cast<gpu::GPUModuleOp>(getOperation()).getNameAttr() << "\n" |
| << llvmModule << "\n"; |
| }); |
| #undef DEBUG_TYPE |
| if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Offload) |
| return SerializeGPUModuleBase::moduleToObject(llvmModule); |
| |
| std::optional<llvm::TargetMachine *> targetMachine = |
| getOrCreateTargetMachine(); |
| if (!targetMachine) { |
| getOperation().emitError() << "target Machine unavailable for triple " |
| << triple << ", can't compile with LLVM"; |
| return std::nullopt; |
| } |
| |
| // Translate the Module to ISA. |
| std::optional<std::string> serializedISA = |
| translateToISA(llvmModule, **targetMachine); |
| if (!serializedISA) { |
| getOperation().emitError() << "failed translating the module to ISA"; |
| return std::nullopt; |
| } |
| #define DEBUG_TYPE "serialize-to-isa" |
| LLVM_DEBUG({ |
| llvm::dbgs() << "ISA for module: " |
| << cast<gpu::GPUModuleOp>(getOperation()).getNameAttr() << "\n" |
| << *serializedISA << "\n"; |
| }); |
| #undef DEBUG_TYPE |
| // Return ISA assembly code if the compilation target is assembly. |
| if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Assembly) |
| return SmallVector<char, 0>(serializedISA->begin(), serializedISA->end()); |
| |
| // Compiling to binary requires a valid ROCm path, fail if it's not found. |
| if (getToolkitPath().empty()) { |
| getOperation().emitError() << "invalid ROCm path, please set a valid path"; |
| return std::nullopt; |
| } |
| |
| // Compile to binary. |
| return compileToBinary(*serializedISA); |
| } |
| |
| #if MLIR_ENABLE_ROCM_CONVERSIONS |
| namespace { |
| class AMDGPUSerializer : public SerializeGPUModuleBase { |
| public: |
| AMDGPUSerializer(Operation &module, ROCDLTargetAttr target, |
| const gpu::TargetOptions &targetOptions); |
| |
| std::optional<SmallVector<char, 0>> |
| moduleToObject(llvm::Module &llvmModule) override; |
| |
| private: |
| // Target options. |
| gpu::TargetOptions targetOptions; |
| }; |
| } // namespace |
| |
| AMDGPUSerializer::AMDGPUSerializer(Operation &module, ROCDLTargetAttr target, |
| const gpu::TargetOptions &targetOptions) |
| : SerializeGPUModuleBase(module, target, targetOptions), |
| targetOptions(targetOptions) {} |
| |
| std::optional<SmallVector<char, 0>> |
| AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule) { |
| return moduleToObjectImpl(targetOptions, llvmModule); |
| } |
| #endif // MLIR_ENABLE_ROCM_CONVERSIONS |
| |
| std::optional<SmallVector<char, 0>> ROCDLTargetAttrImpl::serializeToObject( |
| Attribute attribute, Operation *module, |
| const gpu::TargetOptions &options) const { |
| assert(module && "The module must be non null."); |
| if (!module) |
| return std::nullopt; |
| if (!mlir::isa<gpu::GPUModuleOp>(module)) { |
| module->emitError("module must be a GPU module"); |
| return std::nullopt; |
| } |
| #if MLIR_ENABLE_ROCM_CONVERSIONS |
| AMDGPUSerializer serializer(*module, cast<ROCDLTargetAttr>(attribute), |
| options); |
| serializer.init(); |
| return serializer.run(); |
| #else |
| module->emitError("the `AMDGPU` target was not built. Please enable it when " |
| "building LLVM"); |
| return std::nullopt; |
| #endif // MLIR_ENABLE_ROCM_CONVERSIONS |
| } |
| |
| Attribute |
| ROCDLTargetAttrImpl::createObject(Attribute attribute, Operation *module, |
| const SmallVector<char, 0> &object, |
| const gpu::TargetOptions &options) const { |
| gpu::CompilationTarget format = options.getCompilationTarget(); |
| // If format is `fatbin` transform it to binary as `fatbin` is not yet |
| // supported. |
| gpu::KernelTableAttr kernels; |
| if (format > gpu::CompilationTarget::Binary) { |
| format = gpu::CompilationTarget::Binary; |
| kernels = ROCDL::getKernelMetadata(module, object); |
| } |
| DictionaryAttr properties{}; |
| Builder builder(attribute.getContext()); |
| StringAttr objectStr = |
| builder.getStringAttr(StringRef(object.data(), object.size())); |
| return builder.getAttr<gpu::ObjectAttr>(attribute, format, objectStr, |
| properties, kernels); |
| } |