[mlir][gpu] Add metadata attributes for storing kernel metadata in GPU objects (#95292)
This patch adds the `#gpu.kernel_metadata` and `#gpu.kernel_table`
attributes. The `#gpu.kernel_metadata` attribute allows storing metadata
related to a compiled kernel, for example, the number of scalar
registers used by the kernel. The attribute only has 2 required
parameters, the name and function type. It also has 2 optional
parameters, the arguments attributes and generic dictionary for storing
all other metadata.
The `#gpu.kernel_table` stores a table of `#gpu.kernel_metadata`,
mapping the name of the kernel to the metadata.
Finally, the function `ROCDL::getAMDHSAKernelsELFMetadata` was added to
collect ELF metadata from a binary, and to test the class methods in
both attributes.
Example:
```mlir
gpu.binary @binary [#gpu.object<#rocdl.target<chip = "gfx900">, kernels = #gpu.kernel_table<[
#gpu.kernel_metadata<"kernel0", (i32) -> (), metadata = {sgpr_count = 255}>,
#gpu.kernel_metadata<"kernel1", (i32, f32) -> (), arg_attrs = [{llvm.read_only}, {}]>
]> , bin = "BLOB">]
```
The motivation behind these attributes is to provide useful information
for things like tunning.
---------
Co-authored-by: Mehdi Amini <joker.eph@gmail.com>diff --git a/mlir/include/mlir-c/Dialect/GPU.h b/mlir/include/mlir-c/Dialect/GPU.h
index c42ff61..321c112 100644
--- a/mlir/include/mlir-c/Dialect/GPU.h
+++ b/mlir/include/mlir-c/Dialect/GPU.h
@@ -37,6 +37,11 @@
mlirGPUObjectAttrGet(MlirContext mlirCtx, MlirAttribute target, uint32_t format,
MlirStringRef objectStrRef, MlirAttribute mlirObjectProps);
+MLIR_CAPI_EXPORTED MlirAttribute mlirGPUObjectAttrGetWithKernels(
+ MlirContext mlirCtx, MlirAttribute target, uint32_t format,
+ MlirStringRef objectStrRef, MlirAttribute mlirObjectProps,
+ MlirAttribute mlirKernelsAttr);
+
MLIR_CAPI_EXPORTED MlirAttribute
mlirGPUObjectAttrGetTarget(MlirAttribute mlirObjectAttr);
@@ -52,6 +57,12 @@
MLIR_CAPI_EXPORTED MlirAttribute
mlirGPUObjectAttrGetProperties(MlirAttribute mlirObjectAttr);
+MLIR_CAPI_EXPORTED bool
+mlirGPUObjectAttrHasKernels(MlirAttribute mlirObjectAttr);
+
+MLIR_CAPI_EXPORTED MlirAttribute
+mlirGPUObjectAttrGetKernels(MlirAttribute mlirObjectAttr);
+
#ifdef __cplusplus
}
#endif
diff --git a/mlir/include/mlir/Dialect/GPU/IR/CompilationAttrs.td b/mlir/include/mlir/Dialect/GPU/IR/CompilationAttrs.td
index 6659f4a..07879a0 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/CompilationAttrs.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/CompilationAttrs.td
@@ -17,6 +17,155 @@
include "mlir/Dialect/GPU/IR/CompilationAttrInterfaces.td"
//===----------------------------------------------------------------------===//
+// GPU kernel metadata attribute
+//===----------------------------------------------------------------------===//
+
+def GPU_KernelMetadataAttr : GPU_Attr<"KernelMetadata", "kernel_metadata"> {
+ let description = [{
+ GPU attribute for storing metadata related to a compiled kernel. The
+ attribute contains the name and arguments type of the kernel.
+
+ The attribute also contains optional parameters for storing the arguments
+ attributes as well as a dictionary for additional metadata, like occupancy
+ information or other function attributes.
+
+ Note: The `arg_attrs` parameter is expected to follow all the constraints
+ imposed by the `mlir::FunctionOpInterface` interface.
+
+ Examples:
+ ```mlir
+ #gpu.kernel_metadata<@kernel1, (i32) -> (), arg_attrs = [...], metadata = {reg_count = 255, ...}>
+ #gpu.kernel_metadata<@kernel2, (i32, f64) -> ()>
+ ```
+ }];
+ let parameters = (ins
+ "StringAttr":$name,
+ "Type":$function_type,
+ OptionalParameter<"ArrayAttr", "arguments attributes">:$arg_attrs,
+ OptionalParameter<"DictionaryAttr", "metadata dictionary">:$metadata
+ );
+ let assemblyFormat = [{
+ `<` $name `,` $function_type (`,` struct($arg_attrs, $metadata)^)? `>`
+ }];
+ let builders = [
+ AttrBuilderWithInferredContext<(ins "StringAttr":$name,
+ "Type":$functionType,
+ CArg<"ArrayAttr", "nullptr">:$argAttrs,
+ CArg<"DictionaryAttr",
+ "nullptr">:$metadata), [{
+ assert(name && "invalid name");
+ return $_get(name.getContext(), name, functionType, argAttrs, metadata);
+ }]>,
+ AttrBuilderWithInferredContext<(ins "FunctionOpInterface":$kernel,
+ CArg<"DictionaryAttr",
+ "nullptr">:$metadata)>
+ ];
+ let genVerifyDecl = 1;
+ let extraClassDeclaration = [{
+ /// Compare two kernels based on the name.
+ bool operator<(const KernelMetadataAttr& other) const {
+ return getName().getValue() < other.getName().getValue();
+ }
+
+ /// Returns the metadata attribute corresponding to `key` or `nullptr`
+ /// if missing.
+ Attribute getAttr(StringRef key) const {
+ DictionaryAttr attrs = getMetadata();
+ return attrs ? attrs.get(key) : nullptr;
+ }
+ template <typename ConcreteAttr>
+ ConcreteAttr getAttr(StringRef key) const {
+ return llvm::dyn_cast_or_null<ConcreteAttr>(getAttr(key));
+ }
+ Attribute getAttr(StringAttr key) const {
+ DictionaryAttr attrs = getMetadata();
+ return attrs ? attrs.get(key) : nullptr;
+ }
+ template <typename ConcreteAttr>
+ ConcreteAttr getAttr(StringAttr key) const {
+ return llvm::dyn_cast_or_null<ConcreteAttr>(getAttr(key));
+ }
+
+ /// Returns the attribute dictionary at position `index`.
+ DictionaryAttr getArgAttrDict(unsigned index) {
+ ArrayAttr argArray = getArgAttrs();
+ return argArray ? llvm::cast<DictionaryAttr>(argArray[index]) : nullptr;
+ }
+
+ /// Return the specified attribute, if present, for the argument at 'index',
+ /// null otherwise.
+ Attribute getArgAttr(unsigned index, StringAttr name) {
+ DictionaryAttr argDict = getArgAttrDict(index);
+ return argDict ? argDict.get(name) : nullptr;
+ }
+ Attribute getArgAttr(unsigned index, StringRef name) {
+ DictionaryAttr argDict = getArgAttrDict(index);
+ return argDict ? argDict.get(name) : nullptr;
+ }
+
+ /// Returns a new KernelMetadataAttr that contains `attrs` in the metadata dictionary.
+ KernelMetadataAttr appendMetadata(ArrayRef<NamedAttribute> attrs) const;
+ }];
+}
+
+//===----------------------------------------------------------------------===//
+// GPU kernel table attribute
+//===----------------------------------------------------------------------===//
+
+def GPU_KernelTableAttr : GPU_Attr<"KernelTable", "kernel_table"> {
+ let description = [{
+ GPU attribute representing a list of `#gpu.kernel_metadata` attributes. This
+ attribute supports searching kernels by name. All kernels in the table must
+ have an unique name.
+
+ Examples:
+ ```mlir
+ // Empty table.
+ #gpu.kernel_table<>
+
+ // Table with a single kernel.
+ #gpu.kernel_table<[#gpu.kernel_metadata<kernel0, () -> () >]>
+
+ // Table with multiple kernels.
+ #gpu.kernel_table<[
+ #gpu.kernel_metadata<"kernel0", (i32, f32) -> (), metadata = {sgpr_count = 255}>,
+ #gpu.kernel_metadata<"kernel1", (i32) -> ()>
+ ]>
+ ```
+ }];
+ let parameters = (ins
+ OptionalArrayRefParameter<"KernelMetadataAttr", "array of kernels">:$kernel_table
+ );
+ let assemblyFormat = [{
+ `<` (`[` qualified($kernel_table)^ `]`)? `>`
+ }];
+ let builders = [
+ AttrBuilder<(ins "ArrayRef<KernelMetadataAttr>":$kernels,
+ CArg<"bool", "false">:$isSorted)>
+ ];
+ let skipDefaultBuilders = 1;
+ let genVerifyDecl = 1;
+ let extraClassDeclaration = [{
+ llvm::ArrayRef<KernelMetadataAttr>::iterator begin() const {
+ return getKernelTable().begin();
+ }
+ llvm::ArrayRef<KernelMetadataAttr>::iterator end() const {
+ return getKernelTable().end();
+ }
+ size_t size() const {
+ return getKernelTable().size();
+ }
+ bool empty() const {
+ return getKernelTable().empty();
+ }
+
+ /// Returns the kernel with name `key` or `nullptr` if not present.
+ KernelMetadataAttr lookup(StringRef key) const;
+ KernelMetadataAttr lookup(StringAttr key) const;
+ }];
+}
+
+//===----------------------------------------------------------------------===//
// GPU object attribute.
//===----------------------------------------------------------------------===//
@@ -36,8 +185,9 @@
def GPU_ObjectAttr : GPU_Attr<"Object", "object"> {
let description = [{
A GPU object attribute glues together a GPU target, the object kind, a
- binary string with the object, and the object properties, encapsulating how
- the object was generated and its properties with the object itself.
+ binary string with the object, the object properties, and kernel metadata,
+ encapsulating how the object was generated and its properties with the
+ object itself.
There are four object formats:
1. `Offload`: represents generic objects not described by the other three
@@ -55,6 +205,10 @@
Object properties are specified through the `properties` dictionary
attribute and can be used to define additional information.
+
+ Kernel metadata is specified through the `kernels` parameter, and can be
+ used to specify additional information on a kernel by kernel basis.
+
The target attribute must implement or promise the `TargetAttrInterface`
interface.
@@ -63,16 +217,29 @@
#gpu.object<#nvvm.target, properties = {O = 3 : i32}, assembly = "..."> // An assembly object with additional properties.
#gpu.object<#rocdl.target, bin = "..."> // A binary object.
#gpu.object<#nvvm.target, "..."> // A fatbin object.
+ #gpu.object<#nvvm.target, kernels = #gpu.kernel_table<...>, "..."> // An object with a kernel table.
```
}];
let parameters = (ins
"Attribute":$target,
DefaultValuedParameter<"CompilationTarget", "CompilationTarget::Fatbin">:$format,
"StringAttr":$object,
- OptionalParameter<"DictionaryAttr">:$properties
+ OptionalParameter<"DictionaryAttr">:$properties,
+ OptionalParameter<"KernelTableAttr">:$kernels
);
+ let builders = [
+ AttrBuilderWithInferredContext<(ins "Attribute":$target,
+ "CompilationTarget":$format,
+ "StringAttr":$object,
+ CArg<"DictionaryAttr", "nullptr">:$properties,
+ CArg<"KernelTableAttr", "nullptr">:$kernels), [{
+ assert(target && "invalid target");
+ return $_get(target.getContext(), target, format, object, properties, kernels);
+ }]>
+ ];
let assemblyFormat = [{ `<`
- $target `,` (`properties` `=` $properties ^ `,`)?
+ $target `,` (`properties` `=` $properties^ `,`)?
+ (`kernels` `=` $kernels^ `,`)?
custom<Object>($format, $object)
`>`
}];
diff --git a/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h b/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h
index 3c637a0..3d2174c 100644
--- a/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h
+++ b/mlir/include/mlir/Target/LLVM/ROCDL/Utils.h
@@ -14,6 +14,7 @@
#define MLIR_TARGET_LLVM_ROCDL_UTILS_H
#include "mlir/Dialect/GPU/IR/CompilationInterfaces.h"
+#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
#include "mlir/Support/LLVM.h"
#include "mlir/Target/LLVM/ModuleToObject.h"
@@ -107,6 +108,20 @@
/// AMD GCN libraries to use when linking, the default is using none.
AMDGCNLibraries deviceLibs = AMDGCNLibraries::None;
};
+
+/// Returns a map containing the `amdhsa.kernels` ELF metadata for each of the
+/// kernels in the binary, or `std::nullopt` if the metadata couldn't be
+/// retrieved. The map associates the name of the kernel with the list of named
+/// attributes found in `amdhsa.kernels`. For more information on the ELF
+/// metadata see: https://llvm.org/docs/AMDGPUUsage.html#amdhsa
+std::optional<DenseMap<StringAttr, NamedAttrList>>
+getAMDHSAKernelsELFMetadata(Builder &builder, ArrayRef<char> elfData);
+
+/// Returns a `#gpu.kernel_table` containing kernel metadata for each of the
+/// kernels in `gpuModule`. If `elfData` is valid, then the `amdhsa.kernels` ELF
+/// metadata will be added to the `#gpu.kernel_table`.
+gpu::KernelTableAttr getKernelMetadata(Operation *gpuModule,
+ ArrayRef<char> elfData = {});
} // namespace ROCDL
} // namespace mlir
diff --git a/mlir/lib/Bindings/Python/DialectGPU.cpp b/mlir/lib/Bindings/Python/DialectGPU.cpp
index a9e339b..560a54b 100644
--- a/mlir/lib/Bindings/Python/DialectGPU.cpp
+++ b/mlir/lib/Bindings/Python/DialectGPU.cpp
@@ -48,17 +48,21 @@
.def_classmethod(
"get",
[](py::object cls, MlirAttribute target, uint32_t format,
- py::bytes object, std::optional<MlirAttribute> mlirObjectProps) {
+ py::bytes object, std::optional<MlirAttribute> mlirObjectProps,
+ std::optional<MlirAttribute> mlirKernelsAttr) {
py::buffer_info info(py::buffer(object).request());
MlirStringRef objectStrRef =
mlirStringRefCreate(static_cast<char *>(info.ptr), info.size);
- return cls(mlirGPUObjectAttrGet(
+ return cls(mlirGPUObjectAttrGetWithKernels(
mlirAttributeGetContext(target), target, format, objectStrRef,
mlirObjectProps.has_value() ? *mlirObjectProps
+ : MlirAttribute{nullptr},
+ mlirKernelsAttr.has_value() ? *mlirKernelsAttr
: MlirAttribute{nullptr}));
},
"cls"_a, "target"_a, "format"_a, "object"_a,
- "properties"_a = py::none(), "Gets a gpu.object from parameters.")
+ "properties"_a = py::none(), "kernels"_a = py::none(),
+ "Gets a gpu.object from parameters.")
.def_property_readonly(
"target",
[](MlirAttribute self) { return mlirGPUObjectAttrGetTarget(self); })
@@ -71,9 +75,16 @@
MlirStringRef stringRef = mlirGPUObjectAttrGetObject(self);
return py::bytes(stringRef.data, stringRef.length);
})
- .def_property_readonly("properties", [](MlirAttribute self) {
- if (mlirGPUObjectAttrHasProperties(self))
- return py::cast(mlirGPUObjectAttrGetProperties(self));
+ .def_property_readonly("properties",
+ [](MlirAttribute self) {
+ if (mlirGPUObjectAttrHasProperties(self))
+ return py::cast(
+ mlirGPUObjectAttrGetProperties(self));
+ return py::none().cast<py::object>();
+ })
+ .def_property_readonly("kernels", [](MlirAttribute self) {
+ if (mlirGPUObjectAttrHasKernels(self))
+ return py::cast(mlirGPUObjectAttrGetKernels(self));
return py::none().cast<py::object>();
});
}
diff --git a/mlir/lib/CAPI/Dialect/GPU.cpp b/mlir/lib/CAPI/Dialect/GPU.cpp
index 0acebb2..e4796ed 100644
--- a/mlir/lib/CAPI/Dialect/GPU.cpp
+++ b/mlir/lib/CAPI/Dialect/GPU.cpp
@@ -43,9 +43,28 @@
DictionaryAttr objectProps;
if (mlirObjectProps.ptr != nullptr)
objectProps = llvm::cast<DictionaryAttr>(unwrap(mlirObjectProps));
- return wrap(gpu::ObjectAttr::get(ctx, unwrap(target),
- static_cast<gpu::CompilationTarget>(format),
- StringAttr::get(ctx, object), objectProps));
+ return wrap(gpu::ObjectAttr::get(
+ ctx, unwrap(target), static_cast<gpu::CompilationTarget>(format),
+ StringAttr::get(ctx, object), objectProps, nullptr));
+}
+
+MlirAttribute mlirGPUObjectAttrGetWithKernels(MlirContext mlirCtx,
+ MlirAttribute target,
+ uint32_t format,
+ MlirStringRef objectStrRef,
+ MlirAttribute mlirObjectProps,
+ MlirAttribute mlirKernelsAttr) {
+ MLIRContext *ctx = unwrap(mlirCtx);
+ llvm::StringRef object = unwrap(objectStrRef);
+ DictionaryAttr objectProps;
+ if (mlirObjectProps.ptr != nullptr)
+ objectProps = llvm::cast<DictionaryAttr>(unwrap(mlirObjectProps));
+ gpu::KernelTableAttr kernels;
+ if (mlirKernelsAttr.ptr != nullptr)
+ kernels = llvm::cast<gpu::KernelTableAttr>(unwrap(mlirKernelsAttr));
+ return wrap(gpu::ObjectAttr::get(
+ ctx, unwrap(target), static_cast<gpu::CompilationTarget>(format),
+ StringAttr::get(ctx, object), objectProps, kernels));
}
MlirAttribute mlirGPUObjectAttrGetTarget(MlirAttribute mlirObjectAttr) {
@@ -78,3 +97,15 @@
llvm::cast<gpu::ObjectAttr>(unwrap(mlirObjectAttr));
return wrap(objectAttr.getProperties());
}
+
+bool mlirGPUObjectAttrHasKernels(MlirAttribute mlirObjectAttr) {
+ gpu::ObjectAttr objectAttr =
+ llvm::cast<gpu::ObjectAttr>(unwrap(mlirObjectAttr));
+ return objectAttr.getKernels() != nullptr;
+}
+
+MlirAttribute mlirGPUObjectAttrGetKernels(MlirAttribute mlirObjectAttr) {
+ gpu::ObjectAttr objectAttr =
+ llvm::cast<gpu::ObjectAttr>(unwrap(mlirObjectAttr));
+ return wrap(objectAttr.getKernels());
+}
diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
index a599522..e45ba78 100644
--- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -2091,7 +2091,8 @@
LogicalResult ObjectAttr::verify(function_ref<InFlightDiagnostic()> emitError,
Attribute target, CompilationTarget format,
- StringAttr object, DictionaryAttr properties) {
+ StringAttr object, DictionaryAttr properties,
+ KernelTableAttr kernels) {
if (!target)
return emitError() << "the target attribute cannot be null";
if (target.hasPromiseOrImplementsInterface<TargetAttrInterface>())
@@ -2178,6 +2179,113 @@
}
//===----------------------------------------------------------------------===//
+// GPU KernelMetadataAttr
+//===----------------------------------------------------------------------===//
+
+KernelMetadataAttr KernelMetadataAttr::get(FunctionOpInterface kernel,
+ DictionaryAttr metadata) {
+ assert(kernel && "invalid kernel");
+ return get(kernel.getNameAttr(), kernel.getFunctionType(),
+ kernel.getAllArgAttrs(), metadata);
+}
+
+KernelMetadataAttr
+KernelMetadataAttr::getChecked(function_ref<InFlightDiagnostic()> emitError,
+ FunctionOpInterface kernel,
+ DictionaryAttr metadata) {
+ assert(kernel && "invalid kernel");
+ return getChecked(emitError, kernel.getNameAttr(), kernel.getFunctionType(),
+ kernel.getAllArgAttrs(), metadata);
+}
+
+KernelMetadataAttr
+KernelMetadataAttr::appendMetadata(ArrayRef<NamedAttribute> attrs) const {
+ if (attrs.empty())
+ return *this;
+ NamedAttrList attrList;
+ if (DictionaryAttr dict = getMetadata())
+ attrList.append(dict);
+ attrList.append(attrs);
+ return KernelMetadataAttr::get(getName(), getFunctionType(), getArgAttrs(),
+ attrList.getDictionary(getContext()));
+}
+
+LogicalResult
+KernelMetadataAttr::verify(function_ref<InFlightDiagnostic()> emitError,
+ StringAttr name, Type functionType,
+ ArrayAttr argAttrs, DictionaryAttr metadata) {
+ if (name.empty())
+ return emitError() << "the kernel name can't be empty";
+ if (argAttrs) {
+ if (llvm::any_of(argAttrs, [](Attribute attr) {
+ return !llvm::isa<DictionaryAttr>(attr);
+ }))
+ return emitError()
+ << "all attributes in the array must be a dictionary attribute";
+ }
+ return success();
+}
+
+//===----------------------------------------------------------------------===//
+// GPU KernelTableAttr
+//===----------------------------------------------------------------------===//
+
+KernelTableAttr KernelTableAttr::get(MLIRContext *context,
+ ArrayRef<KernelMetadataAttr> kernels,
+ bool isSorted) {
+ // Note that `is_sorted` is always only invoked once even with assertions ON.
+ assert((!isSorted || llvm::is_sorted(kernels)) &&
+ "expected a sorted kernel array");
+ // Immediately return the attribute if the array is sorted.
+ if (isSorted || llvm::is_sorted(kernels))
+ return Base::get(context, kernels);
+ // Sort the array.
+ SmallVector<KernelMetadataAttr> kernelsTmp(kernels);
+ llvm::array_pod_sort(kernelsTmp.begin(), kernelsTmp.end());
+ return Base::get(context, kernelsTmp);
+}
+
+KernelTableAttr KernelTableAttr::getChecked(
+ function_ref<InFlightDiagnostic()> emitError, MLIRContext *context,
+ ArrayRef<KernelMetadataAttr> kernels, bool isSorted) {
+ // Note that `is_sorted` is always only invoked once even with assertions ON.
+ assert((!isSorted || llvm::is_sorted(kernels)) &&
+ "expected a sorted kernel array");
+ // Immediately return the attribute if the array is sorted.
+ if (isSorted || llvm::is_sorted(kernels))
+ return Base::getChecked(emitError, context, kernels);
+ // Sort the array.
+ SmallVector<KernelMetadataAttr> kernelsTmp(kernels);
+ llvm::array_pod_sort(kernelsTmp.begin(), kernelsTmp.end());
+ return Base::getChecked(emitError, context, kernelsTmp);
+}
+
+LogicalResult
+KernelTableAttr::verify(function_ref<InFlightDiagnostic()> emitError,
+ ArrayRef<KernelMetadataAttr> kernels) {
+ if (kernels.size() < 2)
+ return success();
+ // Check that the kernels are uniquely named.
+ if (std::adjacent_find(kernels.begin(), kernels.end(),
+ [](KernelMetadataAttr l, KernelMetadataAttr r) {
+ return l.getName() == r.getName();
+ }) != kernels.end()) {
+ return emitError() << "expected all kernels to be uniquely named";
+ }
+ return success();
+}
+
+KernelMetadataAttr KernelTableAttr::lookup(StringRef key) const {
+ auto [iterator, found] = impl::findAttrSorted(begin(), end(), key);
+ return found ? *iterator : KernelMetadataAttr();
+}
+
+KernelMetadataAttr KernelTableAttr::lookup(StringAttr key) const {
+ auto [iterator, found] = impl::findAttrSorted(begin(), end(), key);
+ return found ? *iterator : KernelMetadataAttr();
+}
+
+//===----------------------------------------------------------------------===//
// GPU target options
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Target/LLVM/CMakeLists.txt b/mlir/lib/Target/LLVM/CMakeLists.txt
index 93dc5ff..bc14c56 100644
--- a/mlir/lib/Target/LLVM/CMakeLists.txt
+++ b/mlir/lib/Target/LLVM/CMakeLists.txt
@@ -110,10 +110,12 @@
add_mlir_dialect_library(MLIRROCDLTarget
ROCDL/Target.cpp
+ ROCDL/Utils.cpp
OBJECT
LINK_COMPONENTS
+ FrontendOffloading
MCParser
${AMDGPU_LIBS}
diff --git a/mlir/lib/Target/LLVM/NVVM/Target.cpp b/mlir/lib/Target/LLVM/NVVM/Target.cpp
index a75b7f9..806c405 100644
--- a/mlir/lib/Target/LLVM/NVVM/Target.cpp
+++ b/mlir/lib/Target/LLVM/NVVM/Target.cpp
@@ -604,5 +604,5 @@
return builder.getAttr<gpu::ObjectAttr>(
attribute, format,
builder.getStringAttr(StringRef(object.data(), object.size())),
- objectProps);
+ objectProps, /*kernels=*/nullptr);
}
diff --git a/mlir/lib/Target/LLVM/ROCDL/Target.cpp b/mlir/lib/Target/LLVM/ROCDL/Target.cpp
index e32a0c7..d8a79a7 100644
--- a/mlir/lib/Target/LLVM/ROCDL/Target.cpp
+++ b/mlir/lib/Target/LLVM/ROCDL/Target.cpp
@@ -506,13 +506,15 @@
gpu::CompilationTarget format = options.getCompilationTarget();
// If format is `fatbin` transform it to binary as `fatbin` is not yet
// supported.
- if (format > gpu::CompilationTarget::Binary)
+ gpu::KernelTableAttr kernels;
+ if (format > gpu::CompilationTarget::Binary) {
format = gpu::CompilationTarget::Binary;
-
+ kernels = ROCDL::getKernelMetadata(module, object);
+ }
DictionaryAttr properties{};
Builder builder(attribute.getContext());
- return builder.getAttr<gpu::ObjectAttr>(
- attribute, format,
- builder.getStringAttr(StringRef(object.data(), object.size())),
- properties);
+ StringAttr objectStr =
+ builder.getStringAttr(StringRef(object.data(), object.size()));
+ return builder.getAttr<gpu::ObjectAttr>(attribute, format, objectStr,
+ properties, kernels);
}
diff --git a/mlir/lib/Target/LLVM/ROCDL/Utils.cpp b/mlir/lib/Target/LLVM/ROCDL/Utils.cpp
new file mode 100644
index 0000000..04b1b22
--- /dev/null
+++ b/mlir/lib/Target/LLVM/ROCDL/Utils.cpp
@@ -0,0 +1,87 @@
+//===- Utils.cpp - MLIR ROCDL target utils ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This files defines ROCDL target related utility classes and functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Target/LLVM/ROCDL/Utils.h"
+#include "mlir/Dialect/GPU/IR/GPUDialect.h"
+#include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Frontend/Offloading/Utility.h"
+
+using namespace mlir;
+using namespace mlir::ROCDL;
+
+std::optional<DenseMap<StringAttr, NamedAttrList>>
+mlir::ROCDL::getAMDHSAKernelsELFMetadata(Builder &builder,
+ ArrayRef<char> elfData) {
+ uint16_t elfABIVersion;
+ llvm::StringMap<llvm::offloading::amdgpu::AMDGPUKernelMetaData> kernels;
+ llvm::MemoryBufferRef buffer(StringRef(elfData.data(), elfData.size()),
+ "buffer");
+ // Get the metadata.
+ llvm::Error error = llvm::offloading::amdgpu::getAMDGPUMetaDataFromImage(
+ buffer, kernels, elfABIVersion);
+ // Return `nullopt` if the metadata couldn't be retrieved.
+ if (error) {
+ llvm::consumeError(std::move(error));
+ return std::nullopt;
+ }
+ // Helper lambda for converting values.
+ auto getI32Array = [&builder](const uint32_t *array) {
+ return builder.getDenseI32ArrayAttr({static_cast<int32_t>(array[0]),
+ static_cast<int32_t>(array[1]),
+ static_cast<int32_t>(array[2])});
+ };
+ DenseMap<StringAttr, NamedAttrList> kernelMD;
+ for (const auto &[name, kernel] : kernels) {
+ NamedAttrList attrs;
+ // Add kernel metadata.
+ attrs.append("agpr_count", builder.getI64IntegerAttr(kernel.AGPRCount));
+ attrs.append("sgpr_count", builder.getI64IntegerAttr(kernel.SGPRCount));
+ attrs.append("vgpr_count", builder.getI64IntegerAttr(kernel.VGPRCount));
+ attrs.append("sgpr_spill_count",
+ builder.getI64IntegerAttr(kernel.SGPRSpillCount));
+ attrs.append("vgpr_spill_count",
+ builder.getI64IntegerAttr(kernel.VGPRSpillCount));
+ attrs.append("wavefront_size",
+ builder.getI64IntegerAttr(kernel.WavefrontSize));
+ attrs.append("max_flat_workgroup_size",
+ builder.getI64IntegerAttr(kernel.MaxFlatWorkgroupSize));
+ attrs.append("group_segment_fixed_size",
+ builder.getI64IntegerAttr(kernel.GroupSegmentList));
+ attrs.append("private_segment_fixed_size",
+ builder.getI64IntegerAttr(kernel.PrivateSegmentSize));
+ attrs.append("reqd_workgroup_size",
+ getI32Array(kernel.RequestedWorkgroupSize));
+ attrs.append("workgroup_size_hint", getI32Array(kernel.WorkgroupSizeHint));
+ kernelMD[builder.getStringAttr(name)] = std::move(attrs);
+ }
+ return std::move(kernelMD);
+}
+
+gpu::KernelTableAttr mlir::ROCDL::getKernelMetadata(Operation *gpuModule,
+ ArrayRef<char> elfData) {
+ auto module = cast<gpu::GPUModuleOp>(gpuModule);
+ Builder builder(module.getContext());
+ SmallVector<gpu::KernelMetadataAttr> kernels;
+ std::optional<DenseMap<StringAttr, NamedAttrList>> mdMapOrNull =
+ getAMDHSAKernelsELFMetadata(builder, elfData);
+ for (auto funcOp : module.getBody()->getOps<LLVM::LLVMFuncOp>()) {
+ if (!funcOp->getDiscardableAttr("rocdl.kernel"))
+ continue;
+ kernels.push_back(gpu::KernelMetadataAttr::get(
+ funcOp, mdMapOrNull ? builder.getDictionaryAttr(
+ mdMapOrNull->lookup(funcOp.getNameAttr()))
+ : nullptr));
+ }
+ return gpu::KernelTableAttr::get(gpuModule->getContext(), kernels);
+}
diff --git a/mlir/lib/Target/SPIRV/Target.cpp b/mlir/lib/Target/SPIRV/Target.cpp
index d48548b..dd128e2 100644
--- a/mlir/lib/Target/SPIRV/Target.cpp
+++ b/mlir/lib/Target/SPIRV/Target.cpp
@@ -98,5 +98,5 @@
return builder.getAttr<gpu::ObjectAttr>(
attribute, format,
builder.getStringAttr(StringRef(object.data(), object.size())),
- objectProps);
+ objectProps, /*kernels=*/nullptr);
}
diff --git a/mlir/test/Dialect/GPU/invalid.mlir b/mlir/test/Dialect/GPU/invalid.mlir
index 20c1c4c..fd76180 100644
--- a/mlir/test/Dialect/GPU/invalid.mlir
+++ b/mlir/test/Dialect/GPU/invalid.mlir
@@ -848,3 +848,15 @@
gpu.module @kernel <> {
}
}
+
+// -----
+
+gpu.binary @binary [#gpu.object<#rocdl.target<chip = "gfx900">,
+ // expected-error@+1{{expected all kernels to be uniquely named}}
+ kernels = #gpu.kernel_table<[
+ #gpu.kernel_metadata<"kernel", (i32) -> ()>,
+ #gpu.kernel_metadata<"kernel", (i32, f32) -> (), metadata = {sgpr_count = 255}>
+ // expected-error@below{{failed to parse GPU_ObjectAttr parameter 'kernels' which is to be a `KernelTableAttr`}}
+ ]>,
+ bin = "BLOB">
+ ]
diff --git a/mlir/test/Dialect/GPU/ops.mlir b/mlir/test/Dialect/GPU/ops.mlir
index ba7897f..b9c0a0e 100644
--- a/mlir/test/Dialect/GPU/ops.mlir
+++ b/mlir/test/Dialect/GPU/ops.mlir
@@ -441,3 +441,26 @@
gpu.module @module_with_offload_handler <#gpu.select_object<0>> [#nvvm.target] {
}
+
+// Test kernel attributes
+gpu.binary @kernel_attrs_1 [
+ #gpu.object<#rocdl.target<chip = "gfx900">,
+ kernels = #gpu.kernel_table<[
+ #gpu.kernel_metadata<"kernel0", (i32, f32) -> (), metadata = {sgpr_count = 255}>,
+ #gpu.kernel_metadata<"kernel1", (i32) -> (), arg_attrs = [{llvm.read_only}]>
+ ]>,
+ bin = "BLOB">
+ ]
+
+// Verify the kernels are sorted
+// CHECK-LABEL: gpu.binary @kernel_attrs_2
+gpu.binary @kernel_attrs_2 [
+ // CHECK: [#gpu.kernel_metadata<"a_kernel", () -> ()>, #gpu.kernel_metadata<"m_kernel", () -> ()>, #gpu.kernel_metadata<"z_kernel", () -> ()>]
+ #gpu.object<#rocdl.target<chip = "gfx900">,
+ kernels = #gpu.kernel_table<[
+ #gpu.kernel_metadata<"z_kernel", () -> ()>,
+ #gpu.kernel_metadata<"m_kernel", () -> ()>,
+ #gpu.kernel_metadata<"a_kernel", () -> ()>
+ ]>,
+ bin = "BLOB">
+ ]
diff --git a/mlir/test/python/dialects/gpu/dialect.py b/mlir/test/python/dialects/gpu/dialect.py
index aded35b..26ee9f3 100644
--- a/mlir/test/python/dialects/gpu/dialect.py
+++ b/mlir/test/python/dialects/gpu/dialect.py
@@ -55,3 +55,12 @@
# CHECK: #gpu.object<#nvvm.target, "//\0A// Generated by LLVM NVPTX Back-End\0A//\0A\0A.version 6.0\0A.target sm_50">
print(o)
assert o.object == object
+
+ object = b"BC\xc0\xde5\x14\x00\x00\x05\x00\x00\x00b\x0c0$MY\xbef"
+ kernelTable = Attribute.parse(
+ '#gpu.kernel_table<[#gpu.kernel_metadata<"kernel", () -> ()>]>'
+ )
+ o = gpu.ObjectAttr.get(target, format, object, kernels=kernelTable)
+ # CHECK: #gpu.object<#nvvm.target, kernels = <[#gpu.kernel_metadata<"kernel", () -> ()>]>, "BC\C0\DE5\14\00\00\05\00\00\00b\0C0$MY\BEf">
+ print(o)
+ assert o.kernels == kernelTable
diff --git a/mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp b/mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp
index 33291bc..43fa3d8 100644
--- a/mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp
+++ b/mlir/unittests/Target/LLVM/SerializeROCDLTarget.cpp
@@ -158,3 +158,69 @@
ASSERT_FALSE(object->empty());
}
}
+
+// Test ROCDL metadata.
+TEST_F(MLIRTargetLLVMROCDL, SKIP_WITHOUT_AMDGPU(GetELFMetadata)) {
+ if (!hasROCMTools())
+ GTEST_SKIP() << "ROCm installation not found, skipping test.";
+
+ MLIRContext context(registry);
+
+ // MLIR module used for the tests.
+ const std::string moduleStr = R"mlir(
+ gpu.module @rocdl_test {
+ llvm.func @rocdl_kernel_1(%arg0: f32) attributes {gpu.kernel, rocdl.kernel} {
+ llvm.return
+ }
+ llvm.func @rocdl_kernel_0(%arg0: f32) attributes {gpu.kernel, rocdl.kernel} {
+ llvm.return
+ }
+ llvm.func @rocdl_kernel_2(%arg0: f32) attributes {gpu.kernel, rocdl.kernel} {
+ llvm.return
+ }
+ llvm.func @a_kernel(%arg0: f32) attributes {gpu.kernel, rocdl.kernel} {
+ llvm.return
+ }
+ })mlir";
+
+ OwningOpRef<ModuleOp> module =
+ parseSourceString<ModuleOp>(moduleStr, &context);
+ ASSERT_TRUE(!!module);
+
+ // Create a ROCDL target.
+ ROCDL::ROCDLTargetAttr target = ROCDL::ROCDLTargetAttr::get(&context);
+
+ // Serialize the module.
+ auto serializer = dyn_cast<gpu::TargetAttrInterface>(target);
+ ASSERT_TRUE(!!serializer);
+ gpu::TargetOptions options("", {}, "", gpu::CompilationTarget::Binary);
+ for (auto gpuModule : (*module).getBody()->getOps<gpu::GPUModuleOp>()) {
+ std::optional<SmallVector<char, 0>> object =
+ serializer.serializeToObject(gpuModule, options);
+ // Check that the serializer was successful.
+ ASSERT_TRUE(object != std::nullopt);
+ ASSERT_FALSE(object->empty());
+ if (!object)
+ continue;
+ // Get the metadata.
+ gpu::KernelTableAttr metadata =
+ ROCDL::getKernelMetadata(gpuModule, *object);
+ ASSERT_TRUE(metadata != nullptr);
+ // There should be 4 kernels.
+ ASSERT_TRUE(metadata.size() == 4);
+ // Check that the lookup method returns finds the kernel.
+ ASSERT_TRUE(metadata.lookup("a_kernel") != nullptr);
+ ASSERT_TRUE(metadata.lookup("rocdl_kernel_0") != nullptr);
+ // Check that the kernel doesn't exist.
+ ASSERT_TRUE(metadata.lookup("not_existent_kernel") == nullptr);
+ // Test the `KernelMetadataAttr` iterators.
+ for (gpu::KernelMetadataAttr kernel : metadata) {
+ // Check that the ELF metadata is present.
+ ASSERT_TRUE(kernel.getMetadata() != nullptr);
+ // Verify that `sgpr_count` is present and it is an integer attribute.
+ ASSERT_TRUE(kernel.getAttr<IntegerAttr>("sgpr_count") != nullptr);
+ // Verify that `vgpr_count` is present and it is an integer attribute.
+ ASSERT_TRUE(kernel.getAttr<IntegerAttr>("vgpr_count") != nullptr);
+ }
+ }
+}
diff --git a/mlir/unittests/Target/LLVM/SerializeToLLVMBitcode.cpp b/mlir/unittests/Target/LLVM/SerializeToLLVMBitcode.cpp
index 37dbfe6..aaa281e 100644
--- a/mlir/unittests/Target/LLVM/SerializeToLLVMBitcode.cpp
+++ b/mlir/unittests/Target/LLVM/SerializeToLLVMBitcode.cpp
@@ -116,7 +116,7 @@
module->getContext(), attribute, gpu::CompilationTarget::Offload,
StringAttr::get(module->getContext(),
StringRef(object.data(), object.size())),
- module->getAttrDictionary());
+ module->getAttrDictionary(), /*kernels=*/nullptr);
}
TEST_F(MLIRTargetLLVM, SKIP_WITHOUT_NATIVE(TargetAttrAPI)) {