blob: 04b1b22279e5db3c1aee75f4d6374f222f33a17c [file] [log] [blame]
//===- Utils.cpp - MLIR ROCDL target utils ----------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This files defines ROCDL target related utility classes and functions.
//
//===----------------------------------------------------------------------===//
#include "mlir/Target/LLVM/ROCDL/Utils.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Frontend/Offloading/Utility.h"
using namespace mlir;
using namespace mlir::ROCDL;
std::optional<DenseMap<StringAttr, NamedAttrList>>
mlir::ROCDL::getAMDHSAKernelsELFMetadata(Builder &builder,
ArrayRef<char> elfData) {
uint16_t elfABIVersion;
llvm::StringMap<llvm::offloading::amdgpu::AMDGPUKernelMetaData> kernels;
llvm::MemoryBufferRef buffer(StringRef(elfData.data(), elfData.size()),
"buffer");
// Get the metadata.
llvm::Error error = llvm::offloading::amdgpu::getAMDGPUMetaDataFromImage(
buffer, kernels, elfABIVersion);
// Return `nullopt` if the metadata couldn't be retrieved.
if (error) {
llvm::consumeError(std::move(error));
return std::nullopt;
}
// Helper lambda for converting values.
auto getI32Array = [&builder](const uint32_t *array) {
return builder.getDenseI32ArrayAttr({static_cast<int32_t>(array[0]),
static_cast<int32_t>(array[1]),
static_cast<int32_t>(array[2])});
};
DenseMap<StringAttr, NamedAttrList> kernelMD;
for (const auto &[name, kernel] : kernels) {
NamedAttrList attrs;
// Add kernel metadata.
attrs.append("agpr_count", builder.getI64IntegerAttr(kernel.AGPRCount));
attrs.append("sgpr_count", builder.getI64IntegerAttr(kernel.SGPRCount));
attrs.append("vgpr_count", builder.getI64IntegerAttr(kernel.VGPRCount));
attrs.append("sgpr_spill_count",
builder.getI64IntegerAttr(kernel.SGPRSpillCount));
attrs.append("vgpr_spill_count",
builder.getI64IntegerAttr(kernel.VGPRSpillCount));
attrs.append("wavefront_size",
builder.getI64IntegerAttr(kernel.WavefrontSize));
attrs.append("max_flat_workgroup_size",
builder.getI64IntegerAttr(kernel.MaxFlatWorkgroupSize));
attrs.append("group_segment_fixed_size",
builder.getI64IntegerAttr(kernel.GroupSegmentList));
attrs.append("private_segment_fixed_size",
builder.getI64IntegerAttr(kernel.PrivateSegmentSize));
attrs.append("reqd_workgroup_size",
getI32Array(kernel.RequestedWorkgroupSize));
attrs.append("workgroup_size_hint", getI32Array(kernel.WorkgroupSizeHint));
kernelMD[builder.getStringAttr(name)] = std::move(attrs);
}
return std::move(kernelMD);
}
gpu::KernelTableAttr mlir::ROCDL::getKernelMetadata(Operation *gpuModule,
ArrayRef<char> elfData) {
auto module = cast<gpu::GPUModuleOp>(gpuModule);
Builder builder(module.getContext());
SmallVector<gpu::KernelMetadataAttr> kernels;
std::optional<DenseMap<StringAttr, NamedAttrList>> mdMapOrNull =
getAMDHSAKernelsELFMetadata(builder, elfData);
for (auto funcOp : module.getBody()->getOps<LLVM::LLVMFuncOp>()) {
if (!funcOp->getDiscardableAttr("rocdl.kernel"))
continue;
kernels.push_back(gpu::KernelMetadataAttr::get(
funcOp, mdMapOrNull ? builder.getDictionaryAttr(
mdMapOrNull->lookup(funcOp.getNameAttr()))
: nullptr));
}
return gpu::KernelTableAttr::get(gpuModule->getContext(), kernels);
}