[AMDGPU] Add the support for `.cluster_dims` code object metadata (#158721)
Co-authored-by: Ivan Kosarev <ivan.kosarev@amd.com>
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index 259c02c..1265ec4 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -4839,7 +4839,24 @@
====================== ============== ========= ================================
-..
+.. _amdgpu-amdhsa-code-object-metadata-v6:
+
+Code Object V6 Metadata
++++++++++++++++++++++++
+
+Code object V6 metadata is the same as
+:ref:`amdgpu-amdhsa-code-object-metadata-v5` with the changes defined in table
+:ref:`amdgpu-amdhsa-code-object-kernel-metadata-map-table-v6`.
+
+ .. table:: AMDHSA Code Object V6 Kernel Metadata Map Additions
+ :name: amdgpu-amdhsa-code-object-kernel-metadata-map-table-v6
+
+ ============================= ============= ========== =======================================
+ String Key Value Type Required? Description
+ ============================= ============= ========== =======================================
+ ".cluster_dims" sequence of The dimension of the cluster.
+ 3 integers
+ ============================= ============= ========== =======================================
Kernel Dispatch
~~~~~~~~~~~~~~~
diff --git a/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp b/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
index 8737dc0..f2ada27 100644
--- a/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
+++ b/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
@@ -281,7 +281,14 @@
return false;
if (!verifyIntegerEntry(KernelMap, ".uniform_work_group_size", false))
return false;
-
+ if (!verifyEntry(
+ KernelMap, ".cluster_dims", false, [this](msgpack::DocNode &Node) {
+ return verifyArray(
+ Node,
+ [this](msgpack::DocNode &Node) { return verifyInteger(Node); },
+ 3);
+ }))
+ return false;
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
index 19b8757..3c88d1b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
@@ -254,9 +254,9 @@
}
void MetadataStreamerMsgPackV4::emitKernelAttrs(const AMDGPUTargetMachine &TM,
- const Function &Func,
+ const MachineFunction &MF,
msgpack::MapDocNode Kern) {
-
+ const Function &Func = MF.getFunction();
if (auto *Node = Func.getMetadata("reqd_work_group_size"))
Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node);
if (auto *Node = Func.getMetadata("work_group_size_hint"))
@@ -599,7 +599,7 @@
Kern[".symbol"] = Kern.getDocument()->getNode(
(Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true);
emitKernelLanguage(Func, Kern);
- emitKernelAttrs(TM, Func, Kern);
+ emitKernelAttrs(TM, MF, Kern);
emitKernelArgs(MF, Kern);
}
@@ -726,10 +726,11 @@
}
void MetadataStreamerMsgPackV5::emitKernelAttrs(const AMDGPUTargetMachine &TM,
- const Function &Func,
+ const MachineFunction &MF,
msgpack::MapDocNode Kern) {
- MetadataStreamerMsgPackV4::emitKernelAttrs(TM, Func, Kern);
+ MetadataStreamerMsgPackV4::emitKernelAttrs(TM, MF, Kern);
+ const Function &Func = MF.getFunction();
if (Func.getFnAttribute("uniform-work-group-size").getValueAsBool())
Kern[".uniform_work_group_size"] = Kern.getDocument()->getNode(1);
}
@@ -745,5 +746,21 @@
getRootMetadata("amdhsa.version") = Version;
}
+void MetadataStreamerMsgPackV6::emitKernelAttrs(const AMDGPUTargetMachine &TM,
+ const MachineFunction &MF,
+ msgpack::MapDocNode Kern) {
+ MetadataStreamerMsgPackV5::emitKernelAttrs(TM, MF, Kern);
+
+ const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
+ ClusterDimsAttr Attr = MFI.getClusterDims();
+ if (Attr.isFixedDims()) {
+ msgpack::ArrayDocNode ClusterDimsNode = HSAMetadataDoc->getArrayNode();
+ ClusterDimsNode.push_back(HSAMetadataDoc->getNode(Attr.getDims()[0]));
+ ClusterDimsNode.push_back(HSAMetadataDoc->getNode(Attr.getDims()[1]));
+ ClusterDimsNode.push_back(HSAMetadataDoc->getNode(Attr.getDims()[2]));
+ Kern[".cluster_dims"] = ClusterDimsNode;
+ }
+}
+
} // end namespace AMDGPU::HSAMD
} // end namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
index 22dfcb4..1b4b113 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
@@ -61,7 +61,7 @@
virtual void emitHiddenKernelArgs(const MachineFunction &MF, unsigned &Offset,
msgpack::ArrayDocNode Args) = 0;
virtual void emitKernelAttrs(const AMDGPUTargetMachine &TM,
- const Function &Func,
+ const MachineFunction &MF,
msgpack::MapDocNode Kern) = 0;
};
@@ -102,7 +102,7 @@
void emitKernelLanguage(const Function &Func, msgpack::MapDocNode Kern);
- void emitKernelAttrs(const AMDGPUTargetMachine &TM, const Function &Func,
+ void emitKernelAttrs(const AMDGPUTargetMachine &TM, const MachineFunction &MF,
msgpack::MapDocNode Kern) override;
void emitKernelArgs(const MachineFunction &MF, msgpack::MapDocNode Kern);
@@ -149,7 +149,7 @@
void emitVersion() override;
void emitHiddenKernelArgs(const MachineFunction &MF, unsigned &Offset,
msgpack::ArrayDocNode Args) override;
- void emitKernelAttrs(const AMDGPUTargetMachine &TM, const Function &Func,
+ void emitKernelAttrs(const AMDGPUTargetMachine &TM, const MachineFunction &MF,
msgpack::MapDocNode Kern) override;
public:
@@ -164,6 +164,9 @@
public:
MetadataStreamerMsgPackV6() = default;
~MetadataStreamerMsgPackV6() = default;
+
+ void emitKernelAttrs(const AMDGPUTargetMachine &TM, const MachineFunction &MF,
+ msgpack::MapDocNode Kern) override;
};
} // end namespace HSAMD
diff --git a/llvm/test/CodeGen/AMDGPU/cluster-dims.ll b/llvm/test/CodeGen/AMDGPU/cluster-dims.ll
new file mode 100644
index 0000000..62e8d9d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/cluster-dims.ll
@@ -0,0 +1,47 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 %s -o - | FileCheck %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -global-isel %s -o - | FileCheck %s
+
+; CHECK: .cluster_dims:
+; CHECK-NEXT: - 2
+; CHECK-NEXT: - 2
+; CHECK-NEXT: - 2
+define dso_local amdgpu_kernel void @_Z15test_literal_3dv() #0 {
+entry:
+ ret void
+}
+
+; CHECK: .cluster_dims:
+; CHECK-NEXT: - 2
+; CHECK-NEXT: - 2
+; CHECK-NEXT: - 1
+define dso_local amdgpu_kernel void @_Z15test_literal_2dv() #1 {
+entry:
+ ret void
+}
+
+; CHECK: .cluster_dims:
+; CHECK-NEXT: - 4
+; CHECK-NEXT: - 1
+; CHECK-NEXT: - 1
+define dso_local amdgpu_kernel void @_Z15test_literal_1dv() #2 {
+entry:
+ ret void
+}
+
+; CHECK: .cluster_dims:
+; CHECK-NEXT: - 4
+; CHECK-NEXT: - 2
+; CHECK-NEXT: - 1
+define dso_local amdgpu_kernel void @_Z13test_constantv() #3 {
+entry:
+ ret void
+}
+
+attributes #0 = { convergent mustprogress noinline norecurse nounwind "amdgpu-cluster-dims"="2,2,2" }
+attributes #1 = { convergent mustprogress noinline norecurse nounwind "amdgpu-cluster-dims"="2,2,1" }
+attributes #2 = { convergent mustprogress noinline norecurse nounwind "amdgpu-cluster-dims"="4,1,1" }
+attributes #3 = { convergent mustprogress noinline norecurse nounwind "amdgpu-cluster-dims"="4,2,1" }
+
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 1, !"amdhsa_code_object_version", i32 600}
diff --git a/llvm/test/MC/AMDGPU/hsa-metadata-kernel-attrs-v6.s b/llvm/test/MC/AMDGPU/hsa-metadata-kernel-attrs-v6.s
new file mode 100644
index 0000000..b91888d
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/hsa-metadata-kernel-attrs-v6.s
@@ -0,0 +1,35 @@
+// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1250 --amdhsa-code-object-version=6 -show-encoding %s | FileCheck %s
+
+// CHECK: .amdgpu_metadata
+// CHECK: amdhsa.kernels:
+// CHECK: - .cluster_dims:
+// CHECK-NEXT: - 4
+// CHECK-NEXT: - 2
+// CHECK-NEXT: - 1
+.amdgpu_metadata
+ amdhsa.version:
+ - 1
+ - 0
+ amdhsa.printf:
+ - '1:1:4:%d\n'
+ - '2:1:8:%g\n'
+ amdhsa.kernels:
+ - .name: test_kernel
+ .symbol: test_kernel@kd
+ .language: OpenCL C
+ .language_version:
+ - 2
+ - 0
+ .kernarg_segment_size: 8
+ .group_segment_fixed_size: 16
+ .private_segment_fixed_size: 32
+ .kernarg_segment_align: 64
+ .wavefront_size: 128
+ .sgpr_count: 14
+ .vgpr_count: 40
+ .max_flat_workgroup_size: 256
+ .cluster_dims:
+ - 4
+ - 2
+ - 1
+.end_amdgpu_metadata