[NFC][NVPTX] Fix tcgen05.mma PTX instruction encoding (#186602)

.ashift should be before .collector::a::* according to PTX ISA.

ptxas accepts both orderings, but the spec-correct order is used now.
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index 26f5f3f..a942989 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -6164,8 +6164,8 @@
 
   let AsmString = Prefix
                   # SpCtaKindStr
-                  # ".collector::a::" # CollectorUsage
                   # !if(IsAShift, ".ashift", "")
+                  # ".collector::a::" # CollectorUsage
                   # BaseOperandsStr
                   # InputDStr
                   # ScaleInpStr
diff --git a/llvm/test/CodeGen/NVPTX/tcgen05-mma-i8.ll b/llvm/test/CodeGen/NVPTX/tcgen05-mma-i8.ll
index 327f7f7..d8c0860 100644
--- a/llvm/test/CodeGen/NVPTX/tcgen05-mma-i8.ll
+++ b/llvm/test/CodeGen/NVPTX/tcgen05-mma-i8.ll
@@ -26,7 +26,7 @@
 ; CHECK-NEXT:    tcgen05.mma.cta_group::1.kind::i8.collector::a::discard [%r1], %rd1, %rd2, %r2, %p1;
 ; CHECK-NEXT:    ld.param.b32 %r3, [tcgen05_mma_i8_cta1_param_1];
 ; CHECK-NEXT:    tcgen05.mma.cta_group::1.kind::i8.collector::a::discard [%r1], [%r3], %rd2, %r2, %p1;
-; CHECK-NEXT:    tcgen05.mma.cta_group::1.kind::i8.collector::a::discard.ashift [%r1], [%r3], %rd2, %r2, %p1;
+; CHECK-NEXT:    tcgen05.mma.cta_group::1.kind::i8.ashift.collector::a::discard [%r1], [%r3], %rd2, %r2, %p1;
 ; CHECK-NEXT:    tcgen05.mma.cta_group::1.kind::i8.collector::a::lastuse [%r1], %rd1, %rd2, %r2, %p1;
 ; CHECK-NEXT:    tcgen05.mma.cta_group::1.kind::i8.collector::a::lastuse [%r1], [%r3], %rd2, %r2, %p1;
 ; CHECK-NEXT:    tcgen05.mma.cta_group::1.kind::i8.collector::a::fill [%r1], %rd1, %rd2, %r2, %p1;
@@ -66,7 +66,7 @@
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::1.kind::i8.collector::a::discard [%r1], %rd1, %rd2, [%r3], %r2, %p1;
 ; CHECK-NEXT:    ld.param.b32 %r4, [tcgen05_mma_sp_i8_cta1_param_1];
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::1.kind::i8.collector::a::discard [%r1], [%r4], %rd2, [%r3], %r2, %p1;
-; CHECK-NEXT:    tcgen05.mma.sp.cta_group::1.kind::i8.collector::a::discard.ashift [%r1], [%r4], %rd2, [%r3], %r2, %p1;
+; CHECK-NEXT:    tcgen05.mma.sp.cta_group::1.kind::i8.ashift.collector::a::discard [%r1], [%r4], %rd2, [%r3], %r2, %p1;
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::1.kind::i8.collector::a::lastuse [%r1], %rd1, %rd2, [%r3], %r2, %p1;
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::1.kind::i8.collector::a::lastuse [%r1], [%r4], %rd2, [%r3], %r2, %p1;
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::1.kind::i8.collector::a::fill [%r1], %rd1, %rd2, [%r3], %r2, %p1;
@@ -105,7 +105,7 @@
 ; CHECK-NEXT:    tcgen05.mma.cta_group::2.kind::i8.collector::a::discard [%r1], %rd1, %rd2, %r2, %p1;
 ; CHECK-NEXT:    ld.param.b32 %r3, [tcgen05_mma_i8_cta2_param_1];
 ; CHECK-NEXT:    tcgen05.mma.cta_group::2.kind::i8.collector::a::discard [%r1], [%r3], %rd2, %r2, %p1;
-; CHECK-NEXT:    tcgen05.mma.cta_group::2.kind::i8.collector::a::discard.ashift [%r1], [%r3], %rd2, %r2, %p1;
+; CHECK-NEXT:    tcgen05.mma.cta_group::2.kind::i8.ashift.collector::a::discard [%r1], [%r3], %rd2, %r2, %p1;
 ; CHECK-NEXT:    tcgen05.mma.cta_group::2.kind::i8.collector::a::lastuse [%r1], %rd1, %rd2, %r2, %p1;
 ; CHECK-NEXT:    tcgen05.mma.cta_group::2.kind::i8.collector::a::lastuse [%r1], [%r3], %rd2, %r2, %p1;
 ; CHECK-NEXT:    tcgen05.mma.cta_group::2.kind::i8.collector::a::fill [%r1], %rd1, %rd2, %r2, %p1;
@@ -145,7 +145,7 @@
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::2.kind::i8.collector::a::discard [%r1], %rd1, %rd2, [%r3], %r2, %p1;
 ; CHECK-NEXT:    ld.param.b32 %r4, [tcgen05_mma_sp_i8_cta2_param_1];
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::2.kind::i8.collector::a::discard [%r1], [%r4], %rd2, [%r3], %r2, %p1;
-; CHECK-NEXT:    tcgen05.mma.sp.cta_group::2.kind::i8.collector::a::discard.ashift [%r1], [%r4], %rd2, [%r3], %r2, %p1;
+; CHECK-NEXT:    tcgen05.mma.sp.cta_group::2.kind::i8.ashift.collector::a::discard [%r1], [%r4], %rd2, [%r3], %r2, %p1;
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::2.kind::i8.collector::a::lastuse [%r1], %rd1, %rd2, [%r3], %r2, %p1;
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::2.kind::i8.collector::a::lastuse [%r1], [%r4], %rd2, [%r3], %r2, %p1;
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::2.kind::i8.collector::a::fill [%r1], %rd1, %rd2, [%r3], %r2, %p1;
diff --git a/llvm/test/CodeGen/NVPTX/tcgen05-mma-scale-d.ll b/llvm/test/CodeGen/NVPTX/tcgen05-mma-scale-d.ll
index c22b718..cc30a26 100644
--- a/llvm/test/CodeGen/NVPTX/tcgen05-mma-scale-d.ll
+++ b/llvm/test/CodeGen/NVPTX/tcgen05-mma-scale-d.ll
@@ -23,8 +23,8 @@
 ; CHECK-NEXT:    tcgen05.mma.cta_group::1.kind::f16.collector::a::discard [%r1], %rd1, %rd2, %r2, %p1, 0;
 ; CHECK-NEXT:    ld.param.b32 %r3, [tcgen05_mma_fp16_cg1_param_1];
 ; CHECK-NEXT:    tcgen05.mma.cta_group::1.kind::f16.collector::a::discard [%r1], [%r3], %rd2, %r2, %p1, 0;
-; CHECK-NEXT:    tcgen05.mma.cta_group::1.kind::f16.collector::a::discard.ashift [%r1], [%r3], %rd2, %r2, %p1, 0;
-; CHECK-NEXT:    tcgen05.mma.cta_group::1.kind::f16.collector::a::lastuse.ashift [%r1], [%r3], %rd2, %r2, %p1, 0;
+; CHECK-NEXT:    tcgen05.mma.cta_group::1.kind::f16.ashift.collector::a::discard [%r1], [%r3], %rd2, %r2, %p1, 0;
+; CHECK-NEXT:    tcgen05.mma.cta_group::1.kind::f16.ashift.collector::a::lastuse [%r1], [%r3], %rd2, %r2, %p1, 0;
 ; CHECK-NEXT:    tcgen05.mma.cta_group::1.kind::f16.collector::a::lastuse [%r1], %rd1, %rd2, %r2, %p1, 0;
 ; CHECK-NEXT:    tcgen05.mma.cta_group::1.kind::f16.collector::a::lastuse [%r1], [%r3], %rd2, %r2, %p1, 0;
 ; CHECK-NEXT:    tcgen05.mma.cta_group::1.kind::f16.collector::a::fill [%r1], %rd1, %rd2, %r2, %p1, 0;
@@ -73,8 +73,8 @@
 ; CHECK-NEXT:    tcgen05.mma.cta_group::2.kind::f16.collector::a::discard [%r1], %rd1, %rd2, %r2, %p1, 0;
 ; CHECK-NEXT:    ld.param.b32 %r3, [tcgen05_mma_fp16_cg2_param_1];
 ; CHECK-NEXT:    tcgen05.mma.cta_group::2.kind::f16.collector::a::discard [%r1], [%r3], %rd2, %r2, %p1, 0;
-; CHECK-NEXT:    tcgen05.mma.cta_group::2.kind::f16.collector::a::discard.ashift [%r1], [%r3], %rd2, %r2, %p1, 0;
-; CHECK-NEXT:    tcgen05.mma.cta_group::2.kind::f16.collector::a::lastuse.ashift [%r1], [%r3], %rd2, %r2, %p1, 0;
+; CHECK-NEXT:    tcgen05.mma.cta_group::2.kind::f16.ashift.collector::a::discard [%r1], [%r3], %rd2, %r2, %p1, 0;
+; CHECK-NEXT:    tcgen05.mma.cta_group::2.kind::f16.ashift.collector::a::lastuse [%r1], [%r3], %rd2, %r2, %p1, 0;
 ; CHECK-NEXT:    tcgen05.mma.cta_group::2.kind::f16.collector::a::lastuse [%r1], %rd1, %rd2, %r2, %p1, 0;
 ; CHECK-NEXT:    tcgen05.mma.cta_group::2.kind::f16.collector::a::lastuse [%r1], [%r3], %rd2, %r2, %p1, 0;
 ; CHECK-NEXT:    tcgen05.mma.cta_group::2.kind::f16.collector::a::fill [%r1], %rd1, %rd2, %r2, %p1, 0;
@@ -125,8 +125,8 @@
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::1.kind::f16.collector::a::discard [%r1], %rd1, %rd2, [%r3], %r2, %p1, 0;
 ; CHECK-NEXT:    ld.param.b32 %r4, [tcgen05_mma_sp_fp16_cg1_param_1];
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::1.kind::f16.collector::a::discard [%r1], [%r4], %rd2, [%r3], %r2, %p1, 0;
-; CHECK-NEXT:    tcgen05.mma.sp.cta_group::1.kind::f16.collector::a::discard.ashift [%r1], [%r4], %rd2, [%r3], %r2, %p1, 0;
-; CHECK-NEXT:    tcgen05.mma.sp.cta_group::1.kind::f16.collector::a::lastuse.ashift [%r1], [%r4], %rd2, [%r3], %r2, %p1, 0;
+; CHECK-NEXT:    tcgen05.mma.sp.cta_group::1.kind::f16.ashift.collector::a::discard [%r1], [%r4], %rd2, [%r3], %r2, %p1, 0;
+; CHECK-NEXT:    tcgen05.mma.sp.cta_group::1.kind::f16.ashift.collector::a::lastuse [%r1], [%r4], %rd2, [%r3], %r2, %p1, 0;
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::1.kind::f16.collector::a::lastuse [%r1], %rd1, %rd2, [%r3], %r2, %p1, 0;
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::1.kind::f16.collector::a::lastuse [%r1], [%r4], %rd2, [%r3], %r2, %p1, 0;
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::1.kind::f16.collector::a::fill [%r1], %rd1, %rd2, [%r3], %r2, %p1, 0;
@@ -176,8 +176,8 @@
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::2.kind::f16.collector::a::discard [%r1], %rd1, %rd2, [%r3], %r2, %p1, 0;
 ; CHECK-NEXT:    ld.param.b32 %r4, [tcgen05_mma_sp_fp16_cg2_param_1];
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::2.kind::f16.collector::a::discard [%r1], [%r4], %rd2, [%r3], %r2, %p1, 0;
-; CHECK-NEXT:    tcgen05.mma.sp.cta_group::2.kind::f16.collector::a::discard.ashift [%r1], [%r4], %rd2, [%r3], %r2, %p1, 0;
-; CHECK-NEXT:    tcgen05.mma.sp.cta_group::2.kind::f16.collector::a::lastuse.ashift [%r1], [%r4], %rd2, [%r3], %r2, %p1, 0;
+; CHECK-NEXT:    tcgen05.mma.sp.cta_group::2.kind::f16.ashift.collector::a::discard [%r1], [%r4], %rd2, [%r3], %r2, %p1, 0;
+; CHECK-NEXT:    tcgen05.mma.sp.cta_group::2.kind::f16.ashift.collector::a::lastuse [%r1], [%r4], %rd2, [%r3], %r2, %p1, 0;
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::2.kind::f16.collector::a::lastuse [%r1], %rd1, %rd2, [%r3], %r2, %p1, 0;
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::2.kind::f16.collector::a::lastuse [%r1], [%r4], %rd2, [%r3], %r2, %p1, 0;
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::2.kind::f16.collector::a::fill [%r1], %rd1, %rd2, [%r3], %r2, %p1, 0;
@@ -227,8 +227,8 @@
 ; CHECK-NEXT:    tcgen05.mma.cta_group::1.kind::tf32.collector::a::discard [%r1], %rd1, %rd2, %r2, %p1, 0;
 ; CHECK-NEXT:    ld.param.b32 %r3, [tcgen05_mma_tf32_cg1_param_1];
 ; CHECK-NEXT:    tcgen05.mma.cta_group::1.kind::tf32.collector::a::discard [%r1], [%r3], %rd2, %r2, %p1, 0;
-; CHECK-NEXT:    tcgen05.mma.cta_group::1.kind::tf32.collector::a::discard.ashift [%r1], [%r3], %rd2, %r2, %p1, 0;
-; CHECK-NEXT:    tcgen05.mma.cta_group::1.kind::tf32.collector::a::lastuse.ashift [%r1], [%r3], %rd2, %r2, %p1, 0;
+; CHECK-NEXT:    tcgen05.mma.cta_group::1.kind::tf32.ashift.collector::a::discard [%r1], [%r3], %rd2, %r2, %p1, 0;
+; CHECK-NEXT:    tcgen05.mma.cta_group::1.kind::tf32.ashift.collector::a::lastuse [%r1], [%r3], %rd2, %r2, %p1, 0;
 ; CHECK-NEXT:    tcgen05.mma.cta_group::1.kind::tf32.collector::a::lastuse [%r1], %rd1, %rd2, %r2, %p1, 0;
 ; CHECK-NEXT:    tcgen05.mma.cta_group::1.kind::tf32.collector::a::lastuse [%r1], [%r3], %rd2, %r2, %p1, 0;
 ; CHECK-NEXT:    tcgen05.mma.cta_group::1.kind::tf32.collector::a::fill [%r1], %rd1, %rd2, %r2, %p1, 0;
@@ -277,8 +277,8 @@
 ; CHECK-NEXT:    tcgen05.mma.cta_group::2.kind::tf32.collector::a::discard [%r1], %rd1, %rd2, %r2, %p1, 0;
 ; CHECK-NEXT:    ld.param.b32 %r3, [tcgen05_mma_tf32_cg2_param_1];
 ; CHECK-NEXT:    tcgen05.mma.cta_group::2.kind::tf32.collector::a::discard [%r1], [%r3], %rd2, %r2, %p1, 0;
-; CHECK-NEXT:    tcgen05.mma.cta_group::2.kind::tf32.collector::a::discard.ashift [%r1], [%r3], %rd2, %r2, %p1, 0;
-; CHECK-NEXT:    tcgen05.mma.cta_group::2.kind::tf32.collector::a::lastuse.ashift [%r1], [%r3], %rd2, %r2, %p1, 0;
+; CHECK-NEXT:    tcgen05.mma.cta_group::2.kind::tf32.ashift.collector::a::discard [%r1], [%r3], %rd2, %r2, %p1, 0;
+; CHECK-NEXT:    tcgen05.mma.cta_group::2.kind::tf32.ashift.collector::a::lastuse [%r1], [%r3], %rd2, %r2, %p1, 0;
 ; CHECK-NEXT:    tcgen05.mma.cta_group::2.kind::tf32.collector::a::lastuse [%r1], %rd1, %rd2, %r2, %p1, 0;
 ; CHECK-NEXT:    tcgen05.mma.cta_group::2.kind::tf32.collector::a::lastuse [%r1], [%r3], %rd2, %r2, %p1, 0;
 ; CHECK-NEXT:    tcgen05.mma.cta_group::2.kind::tf32.collector::a::fill [%r1], %rd1, %rd2, %r2, %p1, 0;
@@ -329,8 +329,8 @@
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::1.kind::tf32.collector::a::discard [%r1], %rd1, %rd2, [%r3], %r2, %p1, 0;
 ; CHECK-NEXT:    ld.param.b32 %r4, [tcgen05_mma_sp_tf32_cg1_param_1];
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::1.kind::tf32.collector::a::discard [%r1], [%r4], %rd2, [%r3], %r2, %p1, 0;
-; CHECK-NEXT:    tcgen05.mma.sp.cta_group::1.kind::tf32.collector::a::discard.ashift [%r1], [%r4], %rd2, [%r3], %r2, %p1, 0;
-; CHECK-NEXT:    tcgen05.mma.sp.cta_group::1.kind::tf32.collector::a::lastuse.ashift [%r1], [%r4], %rd2, [%r3], %r2, %p1, 0;
+; CHECK-NEXT:    tcgen05.mma.sp.cta_group::1.kind::tf32.ashift.collector::a::discard [%r1], [%r4], %rd2, [%r3], %r2, %p1, 0;
+; CHECK-NEXT:    tcgen05.mma.sp.cta_group::1.kind::tf32.ashift.collector::a::lastuse [%r1], [%r4], %rd2, [%r3], %r2, %p1, 0;
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::1.kind::tf32.collector::a::lastuse [%r1], %rd1, %rd2, [%r3], %r2, %p1, 0;
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::1.kind::tf32.collector::a::lastuse [%r1], [%r4], %rd2, [%r3], %r2, %p1, 0;
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::1.kind::tf32.collector::a::fill [%r1], %rd1, %rd2, [%r3], %r2, %p1, 0;
@@ -380,8 +380,8 @@
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::2.kind::tf32.collector::a::discard [%r1], %rd1, %rd2, [%r3], %r2, %p1, 0;
 ; CHECK-NEXT:    ld.param.b32 %r4, [tcgen05_mma_sp_tf32_cg2_param_1];
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::2.kind::tf32.collector::a::discard [%r1], [%r4], %rd2, [%r3], %r2, %p1, 0;
-; CHECK-NEXT:    tcgen05.mma.sp.cta_group::2.kind::tf32.collector::a::discard.ashift [%r1], [%r4], %rd2, [%r3], %r2, %p1, 0;
-; CHECK-NEXT:    tcgen05.mma.sp.cta_group::2.kind::tf32.collector::a::lastuse.ashift [%r1], [%r4], %rd2, [%r3], %r2, %p1, 0;
+; CHECK-NEXT:    tcgen05.mma.sp.cta_group::2.kind::tf32.ashift.collector::a::discard [%r1], [%r4], %rd2, [%r3], %r2, %p1, 0;
+; CHECK-NEXT:    tcgen05.mma.sp.cta_group::2.kind::tf32.ashift.collector::a::lastuse [%r1], [%r4], %rd2, [%r3], %r2, %p1, 0;
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::2.kind::tf32.collector::a::lastuse [%r1], %rd1, %rd2, [%r3], %r2, %p1, 0;
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::2.kind::tf32.collector::a::lastuse [%r1], [%r4], %rd2, [%r3], %r2, %p1, 0;
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::2.kind::tf32.collector::a::fill [%r1], %rd1, %rd2, [%r3], %r2, %p1, 0;
diff --git a/llvm/test/CodeGen/NVPTX/tcgen05-mma.ll b/llvm/test/CodeGen/NVPTX/tcgen05-mma.ll
index fcde161..25bda65 100644
--- a/llvm/test/CodeGen/NVPTX/tcgen05-mma.ll
+++ b/llvm/test/CodeGen/NVPTX/tcgen05-mma.ll
@@ -29,7 +29,7 @@
 ; CHECK-NEXT:    tcgen05.mma.cta_group::1.kind::f16.collector::a::discard [%r1], %rd1, %rd2, %r2, %p1;
 ; CHECK-NEXT:    ld.param.b32 %r3, [tcgen05_mma_fp16_cta1_param_1];
 ; CHECK-NEXT:    tcgen05.mma.cta_group::1.kind::f16.collector::a::discard [%r1], [%r3], %rd2, %r2, %p1;
-; CHECK-NEXT:    tcgen05.mma.cta_group::1.kind::f16.collector::a::discard.ashift [%r1], [%r3], %rd2, %r2, %p1;
+; CHECK-NEXT:    tcgen05.mma.cta_group::1.kind::f16.ashift.collector::a::discard [%r1], [%r3], %rd2, %r2, %p1;
 ; CHECK-NEXT:    tcgen05.mma.cta_group::1.kind::f16.collector::a::lastuse [%r1], %rd1, %rd2, %r2, %p1;
 ; CHECK-NEXT:    tcgen05.mma.cta_group::1.kind::f16.collector::a::lastuse [%r1], [%r3], %rd2, %r2, %p1;
 ; CHECK-NEXT:    tcgen05.mma.cta_group::1.kind::f16.collector::a::fill [%r1], %rd1, %rd2, %r2, %p1;
@@ -69,7 +69,7 @@
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::1.kind::f16.collector::a::discard [%r1], %rd1, %rd2, [%r3], %r2, %p1;
 ; CHECK-NEXT:    ld.param.b32 %r4, [tcgen05_mma_sp_fp16_cta1_param_1];
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::1.kind::f16.collector::a::discard [%r1], [%r4], %rd2, [%r3], %r2, %p1;
-; CHECK-NEXT:    tcgen05.mma.sp.cta_group::1.kind::f16.collector::a::discard.ashift [%r1], [%r4], %rd2, [%r3], %r2, %p1;
+; CHECK-NEXT:    tcgen05.mma.sp.cta_group::1.kind::f16.ashift.collector::a::discard [%r1], [%r4], %rd2, [%r3], %r2, %p1;
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::1.kind::f16.collector::a::lastuse [%r1], %rd1, %rd2, [%r3], %r2, %p1;
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::1.kind::f16.collector::a::lastuse [%r1], [%r4], %rd2, [%r3], %r2, %p1;
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::1.kind::f16.collector::a::fill [%r1], %rd1, %rd2, [%r3], %r2, %p1;
@@ -108,7 +108,7 @@
 ; CHECK-NEXT:    tcgen05.mma.cta_group::1.kind::tf32.collector::a::discard [%r1], %rd1, %rd2, %r2, %p1;
 ; CHECK-NEXT:    ld.param.b32 %r3, [tcgen05_mma_tf32_cta1_param_1];
 ; CHECK-NEXT:    tcgen05.mma.cta_group::1.kind::tf32.collector::a::discard [%r1], [%r3], %rd2, %r2, %p1;
-; CHECK-NEXT:    tcgen05.mma.cta_group::1.kind::tf32.collector::a::discard.ashift [%r1], [%r3], %rd2, %r2, %p1;
+; CHECK-NEXT:    tcgen05.mma.cta_group::1.kind::tf32.ashift.collector::a::discard [%r1], [%r3], %rd2, %r2, %p1;
 ; CHECK-NEXT:    tcgen05.mma.cta_group::1.kind::tf32.collector::a::lastuse [%r1], %rd1, %rd2, %r2, %p1;
 ; CHECK-NEXT:    tcgen05.mma.cta_group::1.kind::tf32.collector::a::lastuse [%r1], [%r3], %rd2, %r2, %p1;
 ; CHECK-NEXT:    tcgen05.mma.cta_group::1.kind::tf32.collector::a::fill [%r1], %rd1, %rd2, %r2, %p1;
@@ -148,7 +148,7 @@
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::1.kind::tf32.collector::a::discard [%r1], %rd1, %rd2, [%r3], %r2, %p1;
 ; CHECK-NEXT:    ld.param.b32 %r4, [tcgen05_mma_sp_tf32_cta1_param_1];
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::1.kind::tf32.collector::a::discard [%r1], [%r4], %rd2, [%r3], %r2, %p1;
-; CHECK-NEXT:    tcgen05.mma.sp.cta_group::1.kind::tf32.collector::a::discard.ashift [%r1], [%r4], %rd2, [%r3], %r2, %p1;
+; CHECK-NEXT:    tcgen05.mma.sp.cta_group::1.kind::tf32.ashift.collector::a::discard [%r1], [%r4], %rd2, [%r3], %r2, %p1;
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::1.kind::tf32.collector::a::lastuse [%r1], %rd1, %rd2, [%r3], %r2, %p1;
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::1.kind::tf32.collector::a::lastuse [%r1], [%r4], %rd2, [%r3], %r2, %p1;
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::1.kind::tf32.collector::a::fill [%r1], %rd1, %rd2, [%r3], %r2, %p1;
@@ -187,7 +187,7 @@
 ; CHECK-NEXT:    tcgen05.mma.cta_group::1.kind::f8f6f4.collector::a::discard [%r1], %rd1, %rd2, %r2, %p1;
 ; CHECK-NEXT:    ld.param.b32 %r3, [tcgen05_mma_f8f6f4_cta1_param_1];
 ; CHECK-NEXT:    tcgen05.mma.cta_group::1.kind::f8f6f4.collector::a::discard [%r1], [%r3], %rd2, %r2, %p1;
-; CHECK-NEXT:    tcgen05.mma.cta_group::1.kind::f8f6f4.collector::a::discard.ashift [%r1], [%r3], %rd2, %r2, %p1;
+; CHECK-NEXT:    tcgen05.mma.cta_group::1.kind::f8f6f4.ashift.collector::a::discard [%r1], [%r3], %rd2, %r2, %p1;
 ; CHECK-NEXT:    tcgen05.mma.cta_group::1.kind::f8f6f4.collector::a::lastuse [%r1], %rd1, %rd2, %r2, %p1;
 ; CHECK-NEXT:    tcgen05.mma.cta_group::1.kind::f8f6f4.collector::a::lastuse [%r1], [%r3], %rd2, %r2, %p1;
 ; CHECK-NEXT:    tcgen05.mma.cta_group::1.kind::f8f6f4.collector::a::fill [%r1], %rd1, %rd2, %r2, %p1;
@@ -227,7 +227,7 @@
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::1.kind::f8f6f4.collector::a::discard [%r1], %rd1, %rd2, [%r3], %r2, %p1;
 ; CHECK-NEXT:    ld.param.b32 %r4, [tcgen05_mma_sp_f8f6fr_cta1_param_1];
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::1.kind::f8f6f4.collector::a::discard [%r1], [%r4], %rd2, [%r3], %r2, %p1;
-; CHECK-NEXT:    tcgen05.mma.sp.cta_group::1.kind::f8f6f4.collector::a::discard.ashift [%r1], [%r4], %rd2, [%r3], %r2, %p1;
+; CHECK-NEXT:    tcgen05.mma.sp.cta_group::1.kind::f8f6f4.ashift.collector::a::discard [%r1], [%r4], %rd2, [%r3], %r2, %p1;
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::1.kind::f8f6f4.collector::a::lastuse [%r1], %rd1, %rd2, [%r3], %r2, %p1;
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::1.kind::f8f6f4.collector::a::lastuse [%r1], [%r4], %rd2, [%r3], %r2, %p1;
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::1.kind::f8f6f4.collector::a::fill [%r1], %rd1, %rd2, [%r3], %r2, %p1;
@@ -266,7 +266,7 @@
 ; CHECK-NEXT:    tcgen05.mma.cta_group::2.kind::f16.collector::a::discard [%r1], %rd1, %rd2, %r2, %p1;
 ; CHECK-NEXT:    ld.param.b32 %r3, [tcgen05_mma_fp16_cta2_param_1];
 ; CHECK-NEXT:    tcgen05.mma.cta_group::2.kind::f16.collector::a::discard [%r1], [%r3], %rd2, %r2, %p1;
-; CHECK-NEXT:    tcgen05.mma.cta_group::2.kind::f16.collector::a::discard.ashift [%r1], [%r3], %rd2, %r2, %p1;
+; CHECK-NEXT:    tcgen05.mma.cta_group::2.kind::f16.ashift.collector::a::discard [%r1], [%r3], %rd2, %r2, %p1;
 ; CHECK-NEXT:    tcgen05.mma.cta_group::2.kind::f16.collector::a::lastuse [%r1], %rd1, %rd2, %r2, %p1;
 ; CHECK-NEXT:    tcgen05.mma.cta_group::2.kind::f16.collector::a::lastuse [%r1], [%r3], %rd2, %r2, %p1;
 ; CHECK-NEXT:    tcgen05.mma.cta_group::2.kind::f16.collector::a::fill [%r1], %rd1, %rd2, %r2, %p1;
@@ -306,7 +306,7 @@
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::2.kind::f16.collector::a::discard [%r1], %rd1, %rd2, [%r3], %r2, %p1;
 ; CHECK-NEXT:    ld.param.b32 %r4, [tcgen05_mma_sp_fp16_cta2_param_1];
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::2.kind::f16.collector::a::discard [%r1], [%r4], %rd2, [%r3], %r2, %p1;
-; CHECK-NEXT:    tcgen05.mma.sp.cta_group::2.kind::f16.collector::a::discard.ashift [%r1], [%r4], %rd2, [%r3], %r2, %p1;
+; CHECK-NEXT:    tcgen05.mma.sp.cta_group::2.kind::f16.ashift.collector::a::discard [%r1], [%r4], %rd2, [%r3], %r2, %p1;
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::2.kind::f16.collector::a::lastuse [%r1], %rd1, %rd2, [%r3], %r2, %p1;
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::2.kind::f16.collector::a::lastuse [%r1], [%r4], %rd2, [%r3], %r2, %p1;
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::2.kind::f16.collector::a::fill [%r1], %rd1, %rd2, [%r3], %r2, %p1;
@@ -345,7 +345,7 @@
 ; CHECK-NEXT:    tcgen05.mma.cta_group::2.kind::tf32.collector::a::discard [%r1], %rd1, %rd2, %r2, %p1;
 ; CHECK-NEXT:    ld.param.b32 %r3, [tcgen05_mma_tf32_cta2_param_1];
 ; CHECK-NEXT:    tcgen05.mma.cta_group::2.kind::tf32.collector::a::discard [%r1], [%r3], %rd2, %r2, %p1;
-; CHECK-NEXT:    tcgen05.mma.cta_group::2.kind::tf32.collector::a::discard.ashift [%r1], [%r3], %rd2, %r2, %p1;
+; CHECK-NEXT:    tcgen05.mma.cta_group::2.kind::tf32.ashift.collector::a::discard [%r1], [%r3], %rd2, %r2, %p1;
 ; CHECK-NEXT:    tcgen05.mma.cta_group::2.kind::tf32.collector::a::lastuse [%r1], %rd1, %rd2, %r2, %p1;
 ; CHECK-NEXT:    tcgen05.mma.cta_group::2.kind::tf32.collector::a::lastuse [%r1], [%r3], %rd2, %r2, %p1;
 ; CHECK-NEXT:    tcgen05.mma.cta_group::2.kind::tf32.collector::a::fill [%r1], %rd1, %rd2, %r2, %p1;
@@ -385,7 +385,7 @@
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::2.kind::tf32.collector::a::discard [%r1], %rd1, %rd2, [%r3], %r2, %p1;
 ; CHECK-NEXT:    ld.param.b32 %r4, [tcgen05_mma_sp_tf32_cta2_param_1];
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::2.kind::tf32.collector::a::discard [%r1], [%r4], %rd2, [%r3], %r2, %p1;
-; CHECK-NEXT:    tcgen05.mma.sp.cta_group::2.kind::tf32.collector::a::discard.ashift [%r1], [%r4], %rd2, [%r3], %r2, %p1;
+; CHECK-NEXT:    tcgen05.mma.sp.cta_group::2.kind::tf32.ashift.collector::a::discard [%r1], [%r4], %rd2, [%r3], %r2, %p1;
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::2.kind::tf32.collector::a::lastuse [%r1], %rd1, %rd2, [%r3], %r2, %p1;
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::2.kind::tf32.collector::a::lastuse [%r1], [%r4], %rd2, [%r3], %r2, %p1;
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::2.kind::tf32.collector::a::fill [%r1], %rd1, %rd2, [%r3], %r2, %p1;
@@ -424,7 +424,7 @@
 ; CHECK-NEXT:    tcgen05.mma.cta_group::2.kind::f8f6f4.collector::a::discard [%r1], %rd1, %rd2, %r2, %p1;
 ; CHECK-NEXT:    ld.param.b32 %r3, [tcgen05_mma_f8f6f4_cta2_param_1];
 ; CHECK-NEXT:    tcgen05.mma.cta_group::2.kind::f8f6f4.collector::a::discard [%r1], [%r3], %rd2, %r2, %p1;
-; CHECK-NEXT:    tcgen05.mma.cta_group::2.kind::f8f6f4.collector::a::discard.ashift [%r1], [%r3], %rd2, %r2, %p1;
+; CHECK-NEXT:    tcgen05.mma.cta_group::2.kind::f8f6f4.ashift.collector::a::discard [%r1], [%r3], %rd2, %r2, %p1;
 ; CHECK-NEXT:    tcgen05.mma.cta_group::2.kind::f8f6f4.collector::a::lastuse [%r1], %rd1, %rd2, %r2, %p1;
 ; CHECK-NEXT:    tcgen05.mma.cta_group::2.kind::f8f6f4.collector::a::lastuse [%r1], [%r3], %rd2, %r2, %p1;
 ; CHECK-NEXT:    tcgen05.mma.cta_group::2.kind::f8f6f4.collector::a::fill [%r1], %rd1, %rd2, %r2, %p1;
@@ -464,7 +464,7 @@
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::2.kind::f8f6f4.collector::a::discard [%r1], %rd1, %rd2, [%r3], %r2, %p1;
 ; CHECK-NEXT:    ld.param.b32 %r4, [tcgen05_mma_sp_f8f6fr_cta2_param_1];
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::2.kind::f8f6f4.collector::a::discard [%r1], [%r4], %rd2, [%r3], %r2, %p1;
-; CHECK-NEXT:    tcgen05.mma.sp.cta_group::2.kind::f8f6f4.collector::a::discard.ashift [%r1], [%r4], %rd2, [%r3], %r2, %p1;
+; CHECK-NEXT:    tcgen05.mma.sp.cta_group::2.kind::f8f6f4.ashift.collector::a::discard [%r1], [%r4], %rd2, [%r3], %r2, %p1;
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::2.kind::f8f6f4.collector::a::lastuse [%r1], %rd1, %rd2, [%r3], %r2, %p1;
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::2.kind::f8f6f4.collector::a::lastuse [%r1], [%r4], %rd2, [%r3], %r2, %p1;
 ; CHECK-NEXT:    tcgen05.mma.sp.cta_group::2.kind::f8f6f4.collector::a::fill [%r1], %rd1, %rd2, [%r3], %r2, %p1;