[AMDGPU] Add the support for `.cluster_dims` code object metadata (#158721)

Co-authored-by: Ivan Kosarev <ivan.kosarev@amd.com>
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index 259c02c..1265ec4 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -4839,7 +4839,24 @@
 
      ====================== ============== ========= ================================
 
-..
+.. _amdgpu-amdhsa-code-object-metadata-v6:
+
+Code Object V6 Metadata
++++++++++++++++++++++++
+
+Code object V6 metadata is the same as
+:ref:`amdgpu-amdhsa-code-object-metadata-v5` with the changes defined in table
+:ref:`amdgpu-amdhsa-code-object-kernel-metadata-map-table-v6`.
+
+    .. table:: AMDHSA Code Object V6 Kernel Metadata Map Additions
+     :name: amdgpu-amdhsa-code-object-kernel-metadata-map-table-v6
+
+     ============================= ============= ========== =======================================
+     String Key                    Value Type     Required? Description
+     ============================= ============= ========== =======================================
+     ".cluster_dims"               sequence of              The dimension of the cluster.
+                                   3 integers
+     ============================= ============= ========== =======================================
 
 Kernel Dispatch
 ~~~~~~~~~~~~~~~
diff --git a/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp b/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
index 8737dc0..f2ada27 100644
--- a/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
+++ b/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
@@ -281,7 +281,14 @@
     return false;
   if (!verifyIntegerEntry(KernelMap, ".uniform_work_group_size", false))
     return false;
-
+  if (!verifyEntry(
+          KernelMap, ".cluster_dims", false, [this](msgpack::DocNode &Node) {
+            return verifyArray(
+                Node,
+                [this](msgpack::DocNode &Node) { return verifyInteger(Node); },
+                3);
+          }))
+    return false;
 
   return true;
 }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
index 19b8757..3c88d1b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
@@ -254,9 +254,9 @@
 }
 
 void MetadataStreamerMsgPackV4::emitKernelAttrs(const AMDGPUTargetMachine &TM,
-                                                const Function &Func,
+                                                const MachineFunction &MF,
                                                 msgpack::MapDocNode Kern) {
-
+  const Function &Func = MF.getFunction();
   if (auto *Node = Func.getMetadata("reqd_work_group_size"))
     Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node);
   if (auto *Node = Func.getMetadata("work_group_size_hint"))
@@ -599,7 +599,7 @@
     Kern[".symbol"] = Kern.getDocument()->getNode(
         (Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true);
     emitKernelLanguage(Func, Kern);
-    emitKernelAttrs(TM, Func, Kern);
+    emitKernelAttrs(TM, MF, Kern);
     emitKernelArgs(MF, Kern);
   }
 
@@ -726,10 +726,11 @@
 }
 
 void MetadataStreamerMsgPackV5::emitKernelAttrs(const AMDGPUTargetMachine &TM,
-                                                const Function &Func,
+                                                const MachineFunction &MF,
                                                 msgpack::MapDocNode Kern) {
-  MetadataStreamerMsgPackV4::emitKernelAttrs(TM, Func, Kern);
+  MetadataStreamerMsgPackV4::emitKernelAttrs(TM, MF, Kern);
 
+  const Function &Func = MF.getFunction();
   if (Func.getFnAttribute("uniform-work-group-size").getValueAsBool())
     Kern[".uniform_work_group_size"] = Kern.getDocument()->getNode(1);
 }
@@ -745,5 +746,21 @@
   getRootMetadata("amdhsa.version") = Version;
 }
 
+void MetadataStreamerMsgPackV6::emitKernelAttrs(const AMDGPUTargetMachine &TM,
+                                                const MachineFunction &MF,
+                                                msgpack::MapDocNode Kern) {
+  MetadataStreamerMsgPackV5::emitKernelAttrs(TM, MF, Kern);
+
+  const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
+  ClusterDimsAttr Attr = MFI.getClusterDims();
+  if (Attr.isFixedDims()) {
+    msgpack::ArrayDocNode ClusterDimsNode = HSAMetadataDoc->getArrayNode();
+    ClusterDimsNode.push_back(HSAMetadataDoc->getNode(Attr.getDims()[0]));
+    ClusterDimsNode.push_back(HSAMetadataDoc->getNode(Attr.getDims()[1]));
+    ClusterDimsNode.push_back(HSAMetadataDoc->getNode(Attr.getDims()[2]));
+    Kern[".cluster_dims"] = ClusterDimsNode;
+  }
+}
+
 } // end namespace AMDGPU::HSAMD
 } // end namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
index 22dfcb4..1b4b113 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
@@ -61,7 +61,7 @@
   virtual void emitHiddenKernelArgs(const MachineFunction &MF, unsigned &Offset,
                                     msgpack::ArrayDocNode Args) = 0;
   virtual void emitKernelAttrs(const AMDGPUTargetMachine &TM,
-                               const Function &Func,
+                               const MachineFunction &MF,
                                msgpack::MapDocNode Kern) = 0;
 };
 
@@ -102,7 +102,7 @@
 
   void emitKernelLanguage(const Function &Func, msgpack::MapDocNode Kern);
 
-  void emitKernelAttrs(const AMDGPUTargetMachine &TM, const Function &Func,
+  void emitKernelAttrs(const AMDGPUTargetMachine &TM, const MachineFunction &MF,
                        msgpack::MapDocNode Kern) override;
 
   void emitKernelArgs(const MachineFunction &MF, msgpack::MapDocNode Kern);
@@ -149,7 +149,7 @@
   void emitVersion() override;
   void emitHiddenKernelArgs(const MachineFunction &MF, unsigned &Offset,
                             msgpack::ArrayDocNode Args) override;
-  void emitKernelAttrs(const AMDGPUTargetMachine &TM, const Function &Func,
+  void emitKernelAttrs(const AMDGPUTargetMachine &TM, const MachineFunction &MF,
                        msgpack::MapDocNode Kern) override;
 
 public:
@@ -164,6 +164,9 @@
 public:
   MetadataStreamerMsgPackV6() = default;
   ~MetadataStreamerMsgPackV6() = default;
+
+  void emitKernelAttrs(const AMDGPUTargetMachine &TM, const MachineFunction &MF,
+                       msgpack::MapDocNode Kern) override;
 };
 
 } // end namespace HSAMD
diff --git a/llvm/test/CodeGen/AMDGPU/cluster-dims.ll b/llvm/test/CodeGen/AMDGPU/cluster-dims.ll
new file mode 100644
index 0000000..62e8d9d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/cluster-dims.ll
@@ -0,0 +1,47 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 %s -o - | FileCheck %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -global-isel %s -o - | FileCheck %s
+
+; CHECK: .cluster_dims:
+; CHECK-NEXT: - 2
+; CHECK-NEXT: - 2
+; CHECK-NEXT: - 2
+define dso_local amdgpu_kernel void @_Z15test_literal_3dv() #0 {
+entry:
+  ret void
+}
+
+; CHECK: .cluster_dims:
+; CHECK-NEXT: - 2
+; CHECK-NEXT: - 2
+; CHECK-NEXT: - 1
+define dso_local amdgpu_kernel void @_Z15test_literal_2dv() #1 {
+entry:
+  ret void
+}
+
+; CHECK: .cluster_dims:
+; CHECK-NEXT: - 4
+; CHECK-NEXT: - 1
+; CHECK-NEXT: - 1
+define dso_local amdgpu_kernel void @_Z15test_literal_1dv() #2 {
+entry:
+  ret void
+}
+
+; CHECK: .cluster_dims:
+; CHECK-NEXT: - 4
+; CHECK-NEXT: - 2
+; CHECK-NEXT: - 1
+define dso_local amdgpu_kernel void @_Z13test_constantv() #3 {
+entry:
+  ret void
+}
+
+attributes #0 = { convergent mustprogress noinline norecurse nounwind "amdgpu-cluster-dims"="2,2,2" }
+attributes #1 = { convergent mustprogress noinline norecurse nounwind "amdgpu-cluster-dims"="2,2,1" }
+attributes #2 = { convergent mustprogress noinline norecurse nounwind "amdgpu-cluster-dims"="4,1,1" }
+attributes #3 = { convergent mustprogress noinline norecurse nounwind "amdgpu-cluster-dims"="4,2,1" }
+
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 1, !"amdhsa_code_object_version", i32 600}
diff --git a/llvm/test/MC/AMDGPU/hsa-metadata-kernel-attrs-v6.s b/llvm/test/MC/AMDGPU/hsa-metadata-kernel-attrs-v6.s
new file mode 100644
index 0000000..b91888d
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/hsa-metadata-kernel-attrs-v6.s
@@ -0,0 +1,35 @@
+// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1250 --amdhsa-code-object-version=6 -show-encoding %s | FileCheck %s
+
+// CHECK: .amdgpu_metadata
+// CHECK: amdhsa.kernels:
+// CHECK: - .cluster_dims:
+// CHECK-NEXT: - 4
+// CHECK-NEXT: - 2
+// CHECK-NEXT: - 1
+.amdgpu_metadata
+  amdhsa.version:
+    - 1
+    - 0
+  amdhsa.printf:
+    - '1:1:4:%d\n'
+    - '2:1:8:%g\n'
+  amdhsa.kernels:
+    - .name:            test_kernel
+      .symbol:      test_kernel@kd
+      .language:        OpenCL C
+      .language_version:
+        - 2
+        - 0
+      .kernarg_segment_size: 8
+      .group_segment_fixed_size: 16
+      .private_segment_fixed_size: 32
+      .kernarg_segment_align: 64
+      .wavefront_size: 128
+      .sgpr_count: 14
+      .vgpr_count: 40
+      .max_flat_workgroup_size: 256
+      .cluster_dims:
+        - 4
+        - 2
+        - 1
+.end_amdgpu_metadata