| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc < %s -o - -mcpu=sm_100a -march=nvptx64 -mattr=+ptx86 | FileCheck %s |
| ; RUN: llc < %s -o - -mcpu=sm_101a -march=nvptx64 -mattr=+ptx86 | FileCheck %s |
| ; RUN: llc < %s -o - -mcpu=sm_110a -march=nvptx64 -mattr=+ptx90 | FileCheck %s |
| ; RUN: %if ptxas-sm_100a && ptxas-isa-8.6 %{ llc < %s -march=nvptx64 -mattr=+ptx86 -mcpu=sm_100a | %ptxas-verify -arch=sm_100a %} |
| ; RUN: %if ptxas-sm_101a && ptxas-isa-8.6 %{ llc < %s -march=nvptx64 -mattr=+ptx86 -mcpu=sm_101a | %ptxas-verify -arch=sm_101a %} |
| ; RUN: %if ptxas-sm_110a && ptxas-isa-9.0 %{ llc < %s -march=nvptx64 -mattr=+ptx90 -mcpu=sm_110a | %ptxas-verify -arch=sm_110a %} |
| |
| define void @tcgen05_mma_fp16_shared_disable_output_lane_cta1(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, <8 x i32> %disable_output_lanev8) { |
| ; CHECK-LABEL: tcgen05_mma_fp16_shared_disable_output_lane_cta1( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<2>; |
| ; CHECK-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NEXT: .reg .b32 %r<8>; |
| ; CHECK-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b8 %rs1, [tcgen05_mma_fp16_shared_disable_output_lane_cta1_param_5]; |
| ; CHECK-NEXT: and.b16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: setp.ne.b16 %p1, %rs2, 0; |
| ; CHECK-NEXT: ld.param.b32 %r1, [tcgen05_mma_fp16_shared_disable_output_lane_cta1_param_0]; |
| ; CHECK-NEXT: ld.param.b64 %rd1, [tcgen05_mma_fp16_shared_disable_output_lane_cta1_param_2]; |
| ; CHECK-NEXT: ld.param.b64 %rd2, [tcgen05_mma_fp16_shared_disable_output_lane_cta1_param_3]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [tcgen05_mma_fp16_shared_disable_output_lane_cta1_param_4]; |
| ; CHECK-NEXT: ld.param.v4.b32 {%r3, %r4, %r5, %r6}, [tcgen05_mma_fp16_shared_disable_output_lane_cta1_param_6]; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::1.kind::f16.collector::a::discard [%r1], %rd1, %rd2, %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: ld.param.b32 %r7, [tcgen05_mma_fp16_shared_disable_output_lane_cta1_param_1]; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::1.kind::f16.collector::a::discard [%r1], [%r7], %rd2, %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::1.kind::f16.ashift.collector::a::discard [%r1], [%r7], %rd2, %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::1.kind::f16.collector::a::lastuse [%r1], %rd1, %rd2, %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::1.kind::f16.collector::a::lastuse [%r1], [%r7], %rd2, %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::1.kind::f16.ashift.collector::a::lastuse [%r1], [%r7], %rd2, %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::1.kind::f16.collector::a::fill [%r1], %rd1, %rd2, %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::1.kind::f16.collector::a::fill [%r1], [%r7], %rd2, %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::1.kind::f16.collector::a::use [%r1], %rd1, %rd2, %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::1.kind::f16.collector::a::use [%r1], [%r7], %rd2, %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: ret; |
| call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg1(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, i32 0, i32 0) |
| |
| call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, i32 0, i32 0) |
| |
| call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1.ashift(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, i32 0, i32 0) |
| |
| call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg1(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, i32 0, i32 1) |
| |
| call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, i32 0, i32 1) |
| |
| call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1.ashift(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, i32 0, i32 1) |
| |
| call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg1(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, i32 0, i32 2) |
| |
| call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, i32 0, i32 2) |
| |
| call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg1(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, i32 0, i32 3) |
| |
| call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, i32 0, i32 3) |
| |
| ret void |
| } |
| |
| define void @tcgen05_mma_fp16_shared_disable_output_lane_cta2(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, <8 x i32> %disable_output_lanev8) { |
| ; CHECK-LABEL: tcgen05_mma_fp16_shared_disable_output_lane_cta2( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<2>; |
| ; CHECK-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NEXT: .reg .b32 %r<12>; |
| ; CHECK-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b8 %rs1, [tcgen05_mma_fp16_shared_disable_output_lane_cta2_param_5]; |
| ; CHECK-NEXT: and.b16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: setp.ne.b16 %p1, %rs2, 0; |
| ; CHECK-NEXT: ld.param.b32 %r1, [tcgen05_mma_fp16_shared_disable_output_lane_cta2_param_0]; |
| ; CHECK-NEXT: ld.param.b64 %rd1, [tcgen05_mma_fp16_shared_disable_output_lane_cta2_param_2]; |
| ; CHECK-NEXT: ld.param.b64 %rd2, [tcgen05_mma_fp16_shared_disable_output_lane_cta2_param_3]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [tcgen05_mma_fp16_shared_disable_output_lane_cta2_param_4]; |
| ; CHECK-NEXT: ld.param.v4.b32 {%r3, %r4, %r5, %r6}, [tcgen05_mma_fp16_shared_disable_output_lane_cta2_param_7+16]; |
| ; CHECK-NEXT: ld.param.v4.b32 {%r7, %r8, %r9, %r10}, [tcgen05_mma_fp16_shared_disable_output_lane_cta2_param_7]; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::2.kind::f16.collector::a::discard [%r1], %rd1, %rd2, %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: ld.param.b32 %r11, [tcgen05_mma_fp16_shared_disable_output_lane_cta2_param_1]; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::2.kind::f16.collector::a::discard [%r1], [%r11], %rd2, %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::2.kind::f16.ashift.collector::a::discard [%r1], [%r11], %rd2, %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::2.kind::f16.collector::a::lastuse [%r1], %rd1, %rd2, %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::2.kind::f16.collector::a::lastuse [%r1], [%r11], %rd2, %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::2.kind::f16.ashift.collector::a::lastuse [%r1], [%r11], %rd2, %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::2.kind::f16.collector::a::fill [%r1], %rd1, %rd2, %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::2.kind::f16.collector::a::fill [%r1], [%r11], %rd2, %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::2.kind::f16.collector::a::use [%r1], %rd1, %rd2, %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::2.kind::f16.collector::a::use [%r1], [%r11], %rd2, %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: ret; |
| call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg2(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, <8 x i32> %disable_output_lanev8, i32 0, i32 0) |
| |
| call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, <8 x i32> %disable_output_lanev8, i32 0, i32 0) |
| |
| call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2.ashift(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, <8 x i32> %disable_output_lanev8, i32 0, i32 0) |
| |
| call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg2(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, <8 x i32> %disable_output_lanev8, i32 0, i32 1) |
| |
| call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, <8 x i32> %disable_output_lanev8, i32 0, i32 1) |
| |
| call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2.ashift(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, <8 x i32> %disable_output_lanev8, i32 0, i32 1) |
| |
| call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg2(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, <8 x i32> %disable_output_lanev8, i32 0, i32 2) |
| |
| call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, <8 x i32> %disable_output_lanev8, i32 0, i32 2) |
| |
| call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg2(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, <8 x i32> %disable_output_lanev8, i32 0, i32 3) |
| |
| call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, <8 x i32> %disable_output_lanev8, i32 0, i32 3) |
| |
| ret void |
| } |
| |
| define void @tcgen05_mma_sp_fp16_shared_disable_output_lane_cta1(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, <8 x i32> %disable_output_lanev8, ptr addrspace(6) %spmetadata) { |
| ; CHECK-LABEL: tcgen05_mma_sp_fp16_shared_disable_output_lane_cta1( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<2>; |
| ; CHECK-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NEXT: .reg .b32 %r<9>; |
| ; CHECK-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b8 %rs1, [tcgen05_mma_sp_fp16_shared_disable_output_lane_cta1_param_5]; |
| ; CHECK-NEXT: and.b16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: setp.ne.b16 %p1, %rs2, 0; |
| ; CHECK-NEXT: ld.param.b32 %r1, [tcgen05_mma_sp_fp16_shared_disable_output_lane_cta1_param_0]; |
| ; CHECK-NEXT: ld.param.b64 %rd1, [tcgen05_mma_sp_fp16_shared_disable_output_lane_cta1_param_2]; |
| ; CHECK-NEXT: ld.param.b64 %rd2, [tcgen05_mma_sp_fp16_shared_disable_output_lane_cta1_param_3]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [tcgen05_mma_sp_fp16_shared_disable_output_lane_cta1_param_4]; |
| ; CHECK-NEXT: ld.param.v4.b32 {%r3, %r4, %r5, %r6}, [tcgen05_mma_sp_fp16_shared_disable_output_lane_cta1_param_6]; |
| ; CHECK-NEXT: ld.param.b32 %r7, [tcgen05_mma_sp_fp16_shared_disable_output_lane_cta1_param_8]; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::1.kind::f16.collector::a::discard [%r1], %rd1, %rd2, [%r7], %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: ld.param.b32 %r8, [tcgen05_mma_sp_fp16_shared_disable_output_lane_cta1_param_1]; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::1.kind::f16.collector::a::discard [%r1], [%r8], %rd2, [%r7], %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::1.kind::f16.ashift.collector::a::discard [%r1], [%r8], %rd2, [%r7], %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::1.kind::f16.collector::a::lastuse [%r1], %rd1, %rd2, [%r7], %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::1.kind::f16.collector::a::lastuse [%r1], [%r8], %rd2, [%r7], %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::1.kind::f16.ashift.collector::a::lastuse [%r1], [%r8], %rd2, [%r7], %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::1.kind::f16.collector::a::fill [%r1], %rd1, %rd2, [%r7], %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::1.kind::f16.collector::a::fill [%r1], [%r8], %rd2, [%r7], %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::1.kind::f16.collector::a::use [%r1], %rd1, %rd2, [%r7], %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::1.kind::f16.collector::a::use [%r1], [%r8], %rd2, [%r7], %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: ret; |
| call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg1(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <4 x i32> %disable_output_lanev4, i32 0, i32 0) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <4 x i32> %disable_output_lanev4, i32 0, i32 0) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1.ashift(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <4 x i32> %disable_output_lanev4, i32 0, i32 0) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg1(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <4 x i32> %disable_output_lanev4, i32 0, i32 1) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <4 x i32> %disable_output_lanev4, i32 0, i32 1) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1.ashift(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <4 x i32> %disable_output_lanev4, i32 0, i32 1) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg1(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <4 x i32> %disable_output_lanev4, i32 0, i32 2) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <4 x i32> %disable_output_lanev4, i32 0, i32 2) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg1(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <4 x i32> %disable_output_lanev4, i32 0, i32 3) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <4 x i32> %disable_output_lanev4, i32 0, i32 3) |
| |
| ret void |
| } |
| |
| define void @tcgen05_mma_sp_fp16_shared_disable_output_lane_cta2(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, <8 x i32> %disable_output_lanev8, ptr addrspace(6) %spmetadata) { |
| ; CHECK-LABEL: tcgen05_mma_sp_fp16_shared_disable_output_lane_cta2( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<2>; |
| ; CHECK-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NEXT: .reg .b32 %r<13>; |
| ; CHECK-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b8 %rs1, [tcgen05_mma_sp_fp16_shared_disable_output_lane_cta2_param_5]; |
| ; CHECK-NEXT: and.b16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: setp.ne.b16 %p1, %rs2, 0; |
| ; CHECK-NEXT: ld.param.b32 %r1, [tcgen05_mma_sp_fp16_shared_disable_output_lane_cta2_param_0]; |
| ; CHECK-NEXT: ld.param.b64 %rd1, [tcgen05_mma_sp_fp16_shared_disable_output_lane_cta2_param_2]; |
| ; CHECK-NEXT: ld.param.b64 %rd2, [tcgen05_mma_sp_fp16_shared_disable_output_lane_cta2_param_3]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [tcgen05_mma_sp_fp16_shared_disable_output_lane_cta2_param_4]; |
| ; CHECK-NEXT: ld.param.v4.b32 {%r3, %r4, %r5, %r6}, [tcgen05_mma_sp_fp16_shared_disable_output_lane_cta2_param_7+16]; |
| ; CHECK-NEXT: ld.param.v4.b32 {%r7, %r8, %r9, %r10}, [tcgen05_mma_sp_fp16_shared_disable_output_lane_cta2_param_7]; |
| ; CHECK-NEXT: ld.param.b32 %r11, [tcgen05_mma_sp_fp16_shared_disable_output_lane_cta2_param_8]; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::2.kind::f16.collector::a::discard [%r1], %rd1, %rd2, [%r11], %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: ld.param.b32 %r12, [tcgen05_mma_sp_fp16_shared_disable_output_lane_cta2_param_1]; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::2.kind::f16.collector::a::discard [%r1], [%r12], %rd2, [%r11], %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::2.kind::f16.ashift.collector::a::discard [%r1], [%r12], %rd2, [%r11], %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::2.kind::f16.collector::a::lastuse [%r1], %rd1, %rd2, [%r11], %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::2.kind::f16.collector::a::lastuse [%r1], [%r12], %rd2, [%r11], %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::2.kind::f16.ashift.collector::a::lastuse [%r1], [%r12], %rd2, [%r11], %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::2.kind::f16.collector::a::fill [%r1], %rd1, %rd2, [%r11], %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::2.kind::f16.collector::a::fill [%r1], [%r12], %rd2, [%r11], %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::2.kind::f16.collector::a::use [%r1], %rd1, %rd2, [%r11], %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::2.kind::f16.collector::a::use [%r1], [%r12], %rd2, [%r11], %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: ret; |
| call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg2(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <8 x i32> %disable_output_lanev8, i32 0, i32 0) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <8 x i32> %disable_output_lanev8, i32 0, i32 0) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2.ashift(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <8 x i32> %disable_output_lanev8, i32 0, i32 0) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg2(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <8 x i32> %disable_output_lanev8, i32 0, i32 1) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <8 x i32> %disable_output_lanev8, i32 0, i32 1) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2.ashift(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <8 x i32> %disable_output_lanev8, i32 0, i32 1) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg2(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <8 x i32> %disable_output_lanev8, i32 0, i32 2) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <8 x i32> %disable_output_lanev8, i32 0, i32 2) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg2(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <8 x i32> %disable_output_lanev8, i32 0, i32 3) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <8 x i32> %disable_output_lanev8, i32 0, i32 3) |
| |
| ret void |
| } |
| |
| define void @tcgen05_mma_tf32_shared_disable_output_lane_cg1(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, <8 x i32> %disable_output_lanev8) { |
| ; CHECK-LABEL: tcgen05_mma_tf32_shared_disable_output_lane_cg1( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<2>; |
| ; CHECK-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NEXT: .reg .b32 %r<8>; |
| ; CHECK-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b8 %rs1, [tcgen05_mma_tf32_shared_disable_output_lane_cg1_param_5]; |
| ; CHECK-NEXT: and.b16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: setp.ne.b16 %p1, %rs2, 0; |
| ; CHECK-NEXT: ld.param.b32 %r1, [tcgen05_mma_tf32_shared_disable_output_lane_cg1_param_0]; |
| ; CHECK-NEXT: ld.param.b64 %rd1, [tcgen05_mma_tf32_shared_disable_output_lane_cg1_param_2]; |
| ; CHECK-NEXT: ld.param.b64 %rd2, [tcgen05_mma_tf32_shared_disable_output_lane_cg1_param_3]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [tcgen05_mma_tf32_shared_disable_output_lane_cg1_param_4]; |
| ; CHECK-NEXT: ld.param.v4.b32 {%r3, %r4, %r5, %r6}, [tcgen05_mma_tf32_shared_disable_output_lane_cg1_param_6]; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::1.kind::tf32.collector::a::discard [%r1], %rd1, %rd2, %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: ld.param.b32 %r7, [tcgen05_mma_tf32_shared_disable_output_lane_cg1_param_1]; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::1.kind::tf32.collector::a::discard [%r1], [%r7], %rd2, %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::1.kind::tf32.ashift.collector::a::discard [%r1], [%r7], %rd2, %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::1.kind::tf32.collector::a::lastuse [%r1], %rd1, %rd2, %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::1.kind::tf32.collector::a::lastuse [%r1], [%r7], %rd2, %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::1.kind::tf32.ashift.collector::a::lastuse [%r1], [%r7], %rd2, %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::1.kind::tf32.collector::a::fill [%r1], %rd1, %rd2, %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::1.kind::tf32.collector::a::fill [%r1], [%r7], %rd2, %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::1.kind::tf32.collector::a::use [%r1], %rd1, %rd2, %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::1.kind::tf32.collector::a::use [%r1], [%r7], %rd2, %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: ret; |
| call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg1(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, i32 1, i32 0) |
| |
| call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, i32 1, i32 0) |
| |
| call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1.ashift(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, i32 1, i32 0) |
| |
| call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg1(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, i32 1, i32 1) |
| |
| call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, i32 1, i32 1) |
| |
| call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1.ashift(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, i32 1, i32 1) |
| |
| call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg1(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, i32 1, i32 2) |
| |
| call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, i32 1, i32 2) |
| |
| call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg1(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, i32 1, i32 3) |
| |
| call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, i32 1, i32 3) |
| |
| ret void |
| } |
| |
| define void @tcgen05_mma_tf32_shared_disable_output_lane_cg2(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, <8 x i32> %disable_output_lanev8) { |
| ; CHECK-LABEL: tcgen05_mma_tf32_shared_disable_output_lane_cg2( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<2>; |
| ; CHECK-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NEXT: .reg .b32 %r<12>; |
| ; CHECK-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b8 %rs1, [tcgen05_mma_tf32_shared_disable_output_lane_cg2_param_5]; |
| ; CHECK-NEXT: and.b16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: setp.ne.b16 %p1, %rs2, 0; |
| ; CHECK-NEXT: ld.param.b32 %r1, [tcgen05_mma_tf32_shared_disable_output_lane_cg2_param_0]; |
| ; CHECK-NEXT: ld.param.b64 %rd1, [tcgen05_mma_tf32_shared_disable_output_lane_cg2_param_2]; |
| ; CHECK-NEXT: ld.param.b64 %rd2, [tcgen05_mma_tf32_shared_disable_output_lane_cg2_param_3]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [tcgen05_mma_tf32_shared_disable_output_lane_cg2_param_4]; |
| ; CHECK-NEXT: ld.param.v4.b32 {%r3, %r4, %r5, %r6}, [tcgen05_mma_tf32_shared_disable_output_lane_cg2_param_7+16]; |
| ; CHECK-NEXT: ld.param.v4.b32 {%r7, %r8, %r9, %r10}, [tcgen05_mma_tf32_shared_disable_output_lane_cg2_param_7]; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::2.kind::tf32.collector::a::discard [%r1], %rd1, %rd2, %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: ld.param.b32 %r11, [tcgen05_mma_tf32_shared_disable_output_lane_cg2_param_1]; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::2.kind::tf32.collector::a::discard [%r1], [%r11], %rd2, %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::2.kind::tf32.ashift.collector::a::discard [%r1], [%r11], %rd2, %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::2.kind::tf32.collector::a::lastuse [%r1], %rd1, %rd2, %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::2.kind::tf32.collector::a::lastuse [%r1], [%r11], %rd2, %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::2.kind::tf32.ashift.collector::a::lastuse [%r1], [%r11], %rd2, %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::2.kind::tf32.collector::a::fill [%r1], %rd1, %rd2, %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::2.kind::tf32.collector::a::fill [%r1], [%r11], %rd2, %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::2.kind::tf32.collector::a::use [%r1], %rd1, %rd2, %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::2.kind::tf32.collector::a::use [%r1], [%r11], %rd2, %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: ret; |
| call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg2(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, <8 x i32> %disable_output_lanev8, i32 1, i32 0) |
| |
| call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, <8 x i32> %disable_output_lanev8, i32 1, i32 0) |
| |
| call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2.ashift(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, <8 x i32> %disable_output_lanev8, i32 1, i32 0) |
| |
| call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg2(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, <8 x i32> %disable_output_lanev8, i32 1, i32 1) |
| |
| call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, <8 x i32> %disable_output_lanev8, i32 1, i32 1) |
| |
| call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2.ashift(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, <8 x i32> %disable_output_lanev8, i32 1, i32 1) |
| |
| call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg2(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, <8 x i32> %disable_output_lanev8, i32 1, i32 2) |
| |
| call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, <8 x i32> %disable_output_lanev8, i32 1, i32 2) |
| |
| call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg2(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, <8 x i32> %disable_output_lanev8, i32 1, i32 3) |
| |
| call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, <8 x i32> %disable_output_lanev8, i32 1, i32 3) |
| ret void |
| } |
| |
| define void @tcgen05_mma_sp_tf32_shared_disable_output_lane_cg1(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, <8 x i32> %disable_output_lanev8, ptr addrspace(6) %spmetadata) { |
| ; CHECK-LABEL: tcgen05_mma_sp_tf32_shared_disable_output_lane_cg1( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<2>; |
| ; CHECK-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NEXT: .reg .b32 %r<9>; |
| ; CHECK-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b8 %rs1, [tcgen05_mma_sp_tf32_shared_disable_output_lane_cg1_param_5]; |
| ; CHECK-NEXT: and.b16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: setp.ne.b16 %p1, %rs2, 0; |
| ; CHECK-NEXT: ld.param.b32 %r1, [tcgen05_mma_sp_tf32_shared_disable_output_lane_cg1_param_0]; |
| ; CHECK-NEXT: ld.param.b64 %rd1, [tcgen05_mma_sp_tf32_shared_disable_output_lane_cg1_param_2]; |
| ; CHECK-NEXT: ld.param.b64 %rd2, [tcgen05_mma_sp_tf32_shared_disable_output_lane_cg1_param_3]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [tcgen05_mma_sp_tf32_shared_disable_output_lane_cg1_param_4]; |
| ; CHECK-NEXT: ld.param.v4.b32 {%r3, %r4, %r5, %r6}, [tcgen05_mma_sp_tf32_shared_disable_output_lane_cg1_param_6]; |
| ; CHECK-NEXT: ld.param.b32 %r7, [tcgen05_mma_sp_tf32_shared_disable_output_lane_cg1_param_8]; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::1.kind::tf32.collector::a::discard [%r1], %rd1, %rd2, [%r7], %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: ld.param.b32 %r8, [tcgen05_mma_sp_tf32_shared_disable_output_lane_cg1_param_1]; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::1.kind::tf32.collector::a::discard [%r1], [%r8], %rd2, [%r7], %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::1.kind::tf32.ashift.collector::a::discard [%r1], [%r8], %rd2, [%r7], %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::1.kind::tf32.collector::a::lastuse [%r1], %rd1, %rd2, [%r7], %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::1.kind::tf32.collector::a::lastuse [%r1], [%r8], %rd2, [%r7], %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::1.kind::tf32.ashift.collector::a::lastuse [%r1], [%r8], %rd2, [%r7], %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::1.kind::tf32.collector::a::fill [%r1], %rd1, %rd2, [%r7], %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::1.kind::tf32.collector::a::fill [%r1], [%r8], %rd2, [%r7], %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::1.kind::tf32.collector::a::use [%r1], %rd1, %rd2, [%r7], %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::1.kind::tf32.collector::a::use [%r1], [%r8], %rd2, [%r7], %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: ret; |
| call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg1(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <4 x i32> %disable_output_lanev4, i32 1, i32 0) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <4 x i32> %disable_output_lanev4, i32 1, i32 0) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1.ashift(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <4 x i32> %disable_output_lanev4, i32 1, i32 0) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg1(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <4 x i32> %disable_output_lanev4, i32 1, i32 1) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <4 x i32> %disable_output_lanev4, i32 1, i32 1) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1.ashift(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <4 x i32> %disable_output_lanev4, i32 1, i32 1) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg1(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <4 x i32> %disable_output_lanev4, i32 1, i32 2) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <4 x i32> %disable_output_lanev4, i32 1, i32 2) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg1(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <4 x i32> %disable_output_lanev4, i32 1, i32 3) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <4 x i32> %disable_output_lanev4, i32 1, i32 3) |
| |
| ret void |
| } |
| |
| define void @tcgen05_mma_sp_tf32_shared_disable_output_lane_cg2(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, <8 x i32> %disable_output_lanev8, ptr addrspace(6) %spmetadata) { |
| ; CHECK-LABEL: tcgen05_mma_sp_tf32_shared_disable_output_lane_cg2( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<2>; |
| ; CHECK-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NEXT: .reg .b32 %r<13>; |
| ; CHECK-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b8 %rs1, [tcgen05_mma_sp_tf32_shared_disable_output_lane_cg2_param_5]; |
| ; CHECK-NEXT: and.b16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: setp.ne.b16 %p1, %rs2, 0; |
| ; CHECK-NEXT: ld.param.b32 %r1, [tcgen05_mma_sp_tf32_shared_disable_output_lane_cg2_param_0]; |
| ; CHECK-NEXT: ld.param.b64 %rd1, [tcgen05_mma_sp_tf32_shared_disable_output_lane_cg2_param_2]; |
| ; CHECK-NEXT: ld.param.b64 %rd2, [tcgen05_mma_sp_tf32_shared_disable_output_lane_cg2_param_3]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [tcgen05_mma_sp_tf32_shared_disable_output_lane_cg2_param_4]; |
| ; CHECK-NEXT: ld.param.v4.b32 {%r3, %r4, %r5, %r6}, [tcgen05_mma_sp_tf32_shared_disable_output_lane_cg2_param_7+16]; |
| ; CHECK-NEXT: ld.param.v4.b32 {%r7, %r8, %r9, %r10}, [tcgen05_mma_sp_tf32_shared_disable_output_lane_cg2_param_7]; |
| ; CHECK-NEXT: ld.param.b32 %r11, [tcgen05_mma_sp_tf32_shared_disable_output_lane_cg2_param_8]; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::2.kind::tf32.collector::a::discard [%r1], %rd1, %rd2, [%r11], %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: ld.param.b32 %r12, [tcgen05_mma_sp_tf32_shared_disable_output_lane_cg2_param_1]; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::2.kind::tf32.collector::a::discard [%r1], [%r12], %rd2, [%r11], %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::2.kind::tf32.ashift.collector::a::discard [%r1], [%r12], %rd2, [%r11], %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::2.kind::tf32.collector::a::lastuse [%r1], %rd1, %rd2, [%r11], %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::2.kind::tf32.collector::a::lastuse [%r1], [%r12], %rd2, [%r11], %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::2.kind::tf32.ashift.collector::a::lastuse [%r1], [%r12], %rd2, [%r11], %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::2.kind::tf32.collector::a::fill [%r1], %rd1, %rd2, [%r11], %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::2.kind::tf32.collector::a::fill [%r1], [%r12], %rd2, [%r11], %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::2.kind::tf32.collector::a::use [%r1], %rd1, %rd2, [%r11], %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::2.kind::tf32.collector::a::use [%r1], [%r12], %rd2, [%r11], %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: ret; |
| call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg2(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <8 x i32> %disable_output_lanev8, i32 1, i32 0) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <8 x i32> %disable_output_lanev8, i32 1, i32 0) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2.ashift(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <8 x i32> %disable_output_lanev8, i32 1, i32 0) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg2(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <8 x i32> %disable_output_lanev8, i32 1, i32 1) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <8 x i32> %disable_output_lanev8, i32 1, i32 1) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2.ashift(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <8 x i32> %disable_output_lanev8, i32 1, i32 1) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg2(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <8 x i32> %disable_output_lanev8, i32 1, i32 2) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <8 x i32> %disable_output_lanev8, i32 1, i32 2) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg2(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <8 x i32> %disable_output_lanev8, i32 1, i32 3) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <8 x i32> %disable_output_lanev8, i32 1, i32 3) |
| |
| ret void |
| } |
| |
| define void @tcgen05_mma_f8f6f4_shared_disable_output_lane_cg1(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, <8 x i32> %disable_output_lanev8) { |
| ; CHECK-LABEL: tcgen05_mma_f8f6f4_shared_disable_output_lane_cg1( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<2>; |
| ; CHECK-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NEXT: .reg .b32 %r<8>; |
| ; CHECK-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b8 %rs1, [tcgen05_mma_f8f6f4_shared_disable_output_lane_cg1_param_5]; |
| ; CHECK-NEXT: and.b16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: setp.ne.b16 %p1, %rs2, 0; |
| ; CHECK-NEXT: ld.param.b32 %r1, [tcgen05_mma_f8f6f4_shared_disable_output_lane_cg1_param_0]; |
| ; CHECK-NEXT: ld.param.b64 %rd1, [tcgen05_mma_f8f6f4_shared_disable_output_lane_cg1_param_2]; |
| ; CHECK-NEXT: ld.param.b64 %rd2, [tcgen05_mma_f8f6f4_shared_disable_output_lane_cg1_param_3]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [tcgen05_mma_f8f6f4_shared_disable_output_lane_cg1_param_4]; |
| ; CHECK-NEXT: ld.param.v4.b32 {%r3, %r4, %r5, %r6}, [tcgen05_mma_f8f6f4_shared_disable_output_lane_cg1_param_6]; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::1.kind::f8f6f4.collector::a::discard [%r1], %rd1, %rd2, %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: ld.param.b32 %r7, [tcgen05_mma_f8f6f4_shared_disable_output_lane_cg1_param_1]; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::1.kind::f8f6f4.collector::a::discard [%r1], [%r7], %rd2, %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::1.kind::f8f6f4.ashift.collector::a::discard [%r1], [%r7], %rd2, %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::1.kind::f8f6f4.collector::a::lastuse [%r1], %rd1, %rd2, %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::1.kind::f8f6f4.collector::a::lastuse [%r1], [%r7], %rd2, %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::1.kind::f8f6f4.ashift.collector::a::lastuse [%r1], [%r7], %rd2, %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::1.kind::f8f6f4.collector::a::fill [%r1], %rd1, %rd2, %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::1.kind::f8f6f4.collector::a::fill [%r1], [%r7], %rd2, %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::1.kind::f8f6f4.collector::a::use [%r1], %rd1, %rd2, %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::1.kind::f8f6f4.collector::a::use [%r1], [%r7], %rd2, %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: ret; |
| call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg1(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, i32 2, i32 0) |
| |
| call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, i32 2, i32 0) |
| |
| call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1.ashift(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, i32 2, i32 0) |
| |
| call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg1(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, i32 2, i32 1) |
| |
| call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, i32 2, i32 1) |
| |
| call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1.ashift(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, i32 2, i32 1) |
| |
| call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg1(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, i32 2, i32 2) |
| |
| call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, i32 2, i32 2) |
| |
| call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg1(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, i32 2, i32 3) |
| |
| call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, i32 2, i32 3) |
| |
| ret void |
| } |
| |
| define void @tcgen05_mma_f8f6f4_shared_disable_output_lane_cg2(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, <8 x i32> %disable_output_lanev8) { |
| ; CHECK-LABEL: tcgen05_mma_f8f6f4_shared_disable_output_lane_cg2( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<2>; |
| ; CHECK-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NEXT: .reg .b32 %r<12>; |
| ; CHECK-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b8 %rs1, [tcgen05_mma_f8f6f4_shared_disable_output_lane_cg2_param_5]; |
| ; CHECK-NEXT: and.b16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: setp.ne.b16 %p1, %rs2, 0; |
| ; CHECK-NEXT: ld.param.b32 %r1, [tcgen05_mma_f8f6f4_shared_disable_output_lane_cg2_param_0]; |
| ; CHECK-NEXT: ld.param.b64 %rd1, [tcgen05_mma_f8f6f4_shared_disable_output_lane_cg2_param_2]; |
| ; CHECK-NEXT: ld.param.b64 %rd2, [tcgen05_mma_f8f6f4_shared_disable_output_lane_cg2_param_3]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [tcgen05_mma_f8f6f4_shared_disable_output_lane_cg2_param_4]; |
| ; CHECK-NEXT: ld.param.v4.b32 {%r3, %r4, %r5, %r6}, [tcgen05_mma_f8f6f4_shared_disable_output_lane_cg2_param_7+16]; |
| ; CHECK-NEXT: ld.param.v4.b32 {%r7, %r8, %r9, %r10}, [tcgen05_mma_f8f6f4_shared_disable_output_lane_cg2_param_7]; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::2.kind::f8f6f4.collector::a::discard [%r1], %rd1, %rd2, %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: ld.param.b32 %r11, [tcgen05_mma_f8f6f4_shared_disable_output_lane_cg2_param_1]; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::2.kind::f8f6f4.collector::a::discard [%r1], [%r11], %rd2, %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::2.kind::f8f6f4.ashift.collector::a::discard [%r1], [%r11], %rd2, %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::2.kind::f8f6f4.collector::a::lastuse [%r1], %rd1, %rd2, %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::2.kind::f8f6f4.collector::a::lastuse [%r1], [%r11], %rd2, %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::2.kind::f8f6f4.ashift.collector::a::lastuse [%r1], [%r11], %rd2, %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::2.kind::f8f6f4.collector::a::fill [%r1], %rd1, %rd2, %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::2.kind::f8f6f4.collector::a::fill [%r1], [%r11], %rd2, %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::2.kind::f8f6f4.collector::a::use [%r1], %rd1, %rd2, %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::2.kind::f8f6f4.collector::a::use [%r1], [%r11], %rd2, %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: ret; |
| call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg2(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, <8 x i32> %disable_output_lanev8, i32 2, i32 0) |
| |
| call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, <8 x i32> %disable_output_lanev8, i32 2, i32 0) |
| |
| call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2.ashift(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, <8 x i32> %disable_output_lanev8, i32 2, i32 0) |
| |
| call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg2(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, <8 x i32> %disable_output_lanev8, i32 2, i32 1) |
| |
| call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, <8 x i32> %disable_output_lanev8, i32 2, i32 1) |
| |
| call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2.ashift(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, <8 x i32> %disable_output_lanev8, i32 2, i32 1) |
| |
| call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg2(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, <8 x i32> %disable_output_lanev8, i32 2, i32 2) |
| |
| call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, <8 x i32> %disable_output_lanev8, i32 2, i32 2) |
| |
| call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg2(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, <8 x i32> %disable_output_lanev8, i32 2, i32 3) |
| |
| call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, <8 x i32> %disable_output_lanev8, i32 2, i32 3) |
| |
| ret void |
| } |
| |
| define void @tcgen05_mma_sp_f8f6f4_shared_disable_output_lane_cg1(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, <8 x i32> %disable_output_lanev8, ptr addrspace(6) %spmetadata) { |
| ; CHECK-LABEL: tcgen05_mma_sp_f8f6f4_shared_disable_output_lane_cg1( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<2>; |
| ; CHECK-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NEXT: .reg .b32 %r<9>; |
| ; CHECK-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b8 %rs1, [tcgen05_mma_sp_f8f6f4_shared_disable_output_lane_cg1_param_5]; |
| ; CHECK-NEXT: and.b16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: setp.ne.b16 %p1, %rs2, 0; |
| ; CHECK-NEXT: ld.param.b32 %r1, [tcgen05_mma_sp_f8f6f4_shared_disable_output_lane_cg1_param_0]; |
| ; CHECK-NEXT: ld.param.b64 %rd1, [tcgen05_mma_sp_f8f6f4_shared_disable_output_lane_cg1_param_2]; |
| ; CHECK-NEXT: ld.param.b64 %rd2, [tcgen05_mma_sp_f8f6f4_shared_disable_output_lane_cg1_param_3]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [tcgen05_mma_sp_f8f6f4_shared_disable_output_lane_cg1_param_4]; |
| ; CHECK-NEXT: ld.param.v4.b32 {%r3, %r4, %r5, %r6}, [tcgen05_mma_sp_f8f6f4_shared_disable_output_lane_cg1_param_6]; |
| ; CHECK-NEXT: ld.param.b32 %r7, [tcgen05_mma_sp_f8f6f4_shared_disable_output_lane_cg1_param_8]; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::1.kind::f8f6f4.collector::a::discard [%r1], %rd1, %rd2, [%r7], %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: ld.param.b32 %r8, [tcgen05_mma_sp_f8f6f4_shared_disable_output_lane_cg1_param_1]; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::1.kind::f8f6f4.collector::a::discard [%r1], [%r8], %rd2, [%r7], %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::1.kind::f8f6f4.ashift.collector::a::discard [%r1], [%r8], %rd2, [%r7], %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::1.kind::f8f6f4.collector::a::lastuse [%r1], %rd1, %rd2, [%r7], %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::1.kind::f8f6f4.collector::a::lastuse [%r1], [%r8], %rd2, [%r7], %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::1.kind::f8f6f4.ashift.collector::a::lastuse [%r1], [%r8], %rd2, [%r7], %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::1.kind::f8f6f4.collector::a::fill [%r1], %rd1, %rd2, [%r7], %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::1.kind::f8f6f4.collector::a::fill [%r1], [%r8], %rd2, [%r7], %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::1.kind::f8f6f4.collector::a::use [%r1], %rd1, %rd2, [%r7], %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::1.kind::f8f6f4.collector::a::use [%r1], [%r8], %rd2, [%r7], %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: ret; |
| call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg1(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <4 x i32> %disable_output_lanev4, i32 2, i32 0) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <4 x i32> %disable_output_lanev4, i32 2, i32 0) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1.ashift(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <4 x i32> %disable_output_lanev4, i32 2, i32 0) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg1(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <4 x i32> %disable_output_lanev4, i32 2, i32 1) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <4 x i32> %disable_output_lanev4, i32 2, i32 1) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1.ashift(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <4 x i32> %disable_output_lanev4, i32 2, i32 1) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg1(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <4 x i32> %disable_output_lanev4, i32 2, i32 2) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <4 x i32> %disable_output_lanev4, i32 2, i32 2) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg1(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <4 x i32> %disable_output_lanev4, i32 2, i32 3) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <4 x i32> %disable_output_lanev4, i32 2, i32 3) |
| |
| ret void |
| } |
| |
| define void @tcgen05_mma_sp_f8f6f4_shared_disable_output_lane_cg2(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, <8 x i32> %disable_output_lanev8, ptr addrspace(6) %spmetadata) { |
| ; CHECK-LABEL: tcgen05_mma_sp_f8f6f4_shared_disable_output_lane_cg2( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<2>; |
| ; CHECK-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NEXT: .reg .b32 %r<13>; |
| ; CHECK-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b8 %rs1, [tcgen05_mma_sp_f8f6f4_shared_disable_output_lane_cg2_param_5]; |
| ; CHECK-NEXT: and.b16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: setp.ne.b16 %p1, %rs2, 0; |
| ; CHECK-NEXT: ld.param.b32 %r1, [tcgen05_mma_sp_f8f6f4_shared_disable_output_lane_cg2_param_0]; |
| ; CHECK-NEXT: ld.param.b64 %rd1, [tcgen05_mma_sp_f8f6f4_shared_disable_output_lane_cg2_param_2]; |
| ; CHECK-NEXT: ld.param.b64 %rd2, [tcgen05_mma_sp_f8f6f4_shared_disable_output_lane_cg2_param_3]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [tcgen05_mma_sp_f8f6f4_shared_disable_output_lane_cg2_param_4]; |
| ; CHECK-NEXT: ld.param.v4.b32 {%r3, %r4, %r5, %r6}, [tcgen05_mma_sp_f8f6f4_shared_disable_output_lane_cg2_param_7+16]; |
| ; CHECK-NEXT: ld.param.v4.b32 {%r7, %r8, %r9, %r10}, [tcgen05_mma_sp_f8f6f4_shared_disable_output_lane_cg2_param_7]; |
| ; CHECK-NEXT: ld.param.b32 %r11, [tcgen05_mma_sp_f8f6f4_shared_disable_output_lane_cg2_param_8]; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::2.kind::f8f6f4.collector::a::discard [%r1], %rd1, %rd2, [%r11], %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: ld.param.b32 %r12, [tcgen05_mma_sp_f8f6f4_shared_disable_output_lane_cg2_param_1]; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::2.kind::f8f6f4.collector::a::discard [%r1], [%r12], %rd2, [%r11], %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::2.kind::f8f6f4.ashift.collector::a::discard [%r1], [%r12], %rd2, [%r11], %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::2.kind::f8f6f4.collector::a::lastuse [%r1], %rd1, %rd2, [%r11], %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::2.kind::f8f6f4.collector::a::lastuse [%r1], [%r12], %rd2, [%r11], %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::2.kind::f8f6f4.ashift.collector::a::lastuse [%r1], [%r12], %rd2, [%r11], %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::2.kind::f8f6f4.collector::a::fill [%r1], %rd1, %rd2, [%r11], %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::2.kind::f8f6f4.collector::a::fill [%r1], [%r12], %rd2, [%r11], %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::2.kind::f8f6f4.collector::a::use [%r1], %rd1, %rd2, [%r11], %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::2.kind::f8f6f4.collector::a::use [%r1], [%r12], %rd2, [%r11], %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: ret; |
| call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg2(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <8 x i32> %disable_output_lanev8, i32 2, i32 0) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <8 x i32> %disable_output_lanev8, i32 2, i32 0) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2.ashift(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <8 x i32> %disable_output_lanev8, i32 2, i32 0) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg2(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <8 x i32> %disable_output_lanev8, i32 2, i32 1) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <8 x i32> %disable_output_lanev8, i32 2, i32 1) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2.ashift(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <8 x i32> %disable_output_lanev8, i32 2, i32 1) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg2(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <8 x i32> %disable_output_lanev8, i32 2, i32 2) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <8 x i32> %disable_output_lanev8, i32 2, i32 2) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg2(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <8 x i32> %disable_output_lanev8, i32 2, i32 3) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <8 x i32> %disable_output_lanev8, i32 2, i32 3) |
| |
| ret void |
| |
| } |
| |
| define void @tcgen05_mma_i8_shared_disable_output_lane_cg1(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, <8 x i32> %disable_output_lanev8) { |
| ; CHECK-LABEL: tcgen05_mma_i8_shared_disable_output_lane_cg1( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<2>; |
| ; CHECK-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NEXT: .reg .b32 %r<8>; |
| ; CHECK-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b8 %rs1, [tcgen05_mma_i8_shared_disable_output_lane_cg1_param_5]; |
| ; CHECK-NEXT: and.b16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: setp.ne.b16 %p1, %rs2, 0; |
| ; CHECK-NEXT: ld.param.b32 %r1, [tcgen05_mma_i8_shared_disable_output_lane_cg1_param_0]; |
| ; CHECK-NEXT: ld.param.b64 %rd1, [tcgen05_mma_i8_shared_disable_output_lane_cg1_param_2]; |
| ; CHECK-NEXT: ld.param.b64 %rd2, [tcgen05_mma_i8_shared_disable_output_lane_cg1_param_3]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [tcgen05_mma_i8_shared_disable_output_lane_cg1_param_4]; |
| ; CHECK-NEXT: ld.param.v4.b32 {%r3, %r4, %r5, %r6}, [tcgen05_mma_i8_shared_disable_output_lane_cg1_param_6]; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::1.kind::i8.collector::a::discard [%r1], %rd1, %rd2, %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: ld.param.b32 %r7, [tcgen05_mma_i8_shared_disable_output_lane_cg1_param_1]; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::1.kind::i8.collector::a::discard [%r1], [%r7], %rd2, %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::1.kind::i8.ashift.collector::a::discard [%r1], [%r7], %rd2, %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::1.kind::i8.collector::a::lastuse [%r1], %rd1, %rd2, %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::1.kind::i8.collector::a::lastuse [%r1], [%r7], %rd2, %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::1.kind::i8.ashift.collector::a::lastuse [%r1], [%r7], %rd2, %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::1.kind::i8.collector::a::fill [%r1], %rd1, %rd2, %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::1.kind::i8.collector::a::fill [%r1], [%r7], %rd2, %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::1.kind::i8.collector::a::use [%r1], %rd1, %rd2, %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::1.kind::i8.collector::a::use [%r1], [%r7], %rd2, %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: ret; |
| call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg1(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, i32 3, i32 0) |
| |
| call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, i32 3, i32 0) |
| |
| call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1.ashift(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, i32 3, i32 0) |
| |
| call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg1(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, i32 3, i32 1) |
| |
| call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, i32 3, i32 1) |
| |
| call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1.ashift(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, i32 3, i32 1) |
| |
| call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg1(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, i32 3, i32 2) |
| |
| call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, i32 3, i32 2) |
| |
| call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg1(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, i32 3, i32 3) |
| |
| call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg1(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, i32 3, i32 3) |
| |
| ret void |
| } |
| |
| define void @tcgen05_mma_i8_shared_disable_output_lane_cg2(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, <8 x i32> %disable_output_lanev8) { |
| ; CHECK-LABEL: tcgen05_mma_i8_shared_disable_output_lane_cg2( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<2>; |
| ; CHECK-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NEXT: .reg .b32 %r<12>; |
| ; CHECK-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b8 %rs1, [tcgen05_mma_i8_shared_disable_output_lane_cg2_param_5]; |
| ; CHECK-NEXT: and.b16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: setp.ne.b16 %p1, %rs2, 0; |
| ; CHECK-NEXT: ld.param.b32 %r1, [tcgen05_mma_i8_shared_disable_output_lane_cg2_param_0]; |
| ; CHECK-NEXT: ld.param.b64 %rd1, [tcgen05_mma_i8_shared_disable_output_lane_cg2_param_2]; |
| ; CHECK-NEXT: ld.param.b64 %rd2, [tcgen05_mma_i8_shared_disable_output_lane_cg2_param_3]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [tcgen05_mma_i8_shared_disable_output_lane_cg2_param_4]; |
| ; CHECK-NEXT: ld.param.v4.b32 {%r3, %r4, %r5, %r6}, [tcgen05_mma_i8_shared_disable_output_lane_cg2_param_7+16]; |
| ; CHECK-NEXT: ld.param.v4.b32 {%r7, %r8, %r9, %r10}, [tcgen05_mma_i8_shared_disable_output_lane_cg2_param_7]; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::2.kind::i8.collector::a::discard [%r1], %rd1, %rd2, %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: ld.param.b32 %r11, [tcgen05_mma_i8_shared_disable_output_lane_cg2_param_1]; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::2.kind::i8.collector::a::discard [%r1], [%r11], %rd2, %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::2.kind::i8.ashift.collector::a::discard [%r1], [%r11], %rd2, %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::2.kind::i8.collector::a::lastuse [%r1], %rd1, %rd2, %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::2.kind::i8.collector::a::lastuse [%r1], [%r11], %rd2, %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::2.kind::i8.ashift.collector::a::lastuse [%r1], [%r11], %rd2, %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::2.kind::i8.collector::a::fill [%r1], %rd1, %rd2, %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::2.kind::i8.collector::a::fill [%r1], [%r11], %rd2, %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::2.kind::i8.collector::a::use [%r1], %rd1, %rd2, %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.cta_group::2.kind::i8.collector::a::use [%r1], [%r11], %rd2, %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: ret; |
| call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg2(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, <8 x i32> %disable_output_lanev8, i32 3, i32 0) |
| |
| call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, <8 x i32> %disable_output_lanev8, i32 3, i32 0) |
| |
| call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2.ashift(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, <8 x i32> %disable_output_lanev8, i32 3, i32 0) |
| |
| call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg2(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, <8 x i32> %disable_output_lanev8, i32 3, i32 1) |
| |
| call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, <8 x i32> %disable_output_lanev8, i32 3, i32 1) |
| |
| call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2.ashift(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, <8 x i32> %disable_output_lanev8, i32 3, i32 1) |
| |
| call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg2(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, <8 x i32> %disable_output_lanev8, i32 3, i32 2) |
| |
| call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, <8 x i32> %disable_output_lanev8, i32 3, i32 2) |
| |
| call void @llvm.nvvm.tcgen05.mma.shared.disable_output_lane.cg2(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, <8 x i32> %disable_output_lanev8, i32 3, i32 3) |
| |
| call void @llvm.nvvm.tcgen05.mma.tensor.disable_output_lane.cg2(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, <8 x i32> %disable_output_lanev8, i32 3, i32 3) |
| |
| ret void |
| } |
| |
| define void @tcgen05_mma_sp_i8_shared_disable_output_lane_cg1(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, <8 x i32> %disable_output_lanev8, ptr addrspace(6) %spmetadata) { |
| ; CHECK-LABEL: tcgen05_mma_sp_i8_shared_disable_output_lane_cg1( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<2>; |
| ; CHECK-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NEXT: .reg .b32 %r<9>; |
| ; CHECK-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b8 %rs1, [tcgen05_mma_sp_i8_shared_disable_output_lane_cg1_param_5]; |
| ; CHECK-NEXT: and.b16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: setp.ne.b16 %p1, %rs2, 0; |
| ; CHECK-NEXT: ld.param.b32 %r1, [tcgen05_mma_sp_i8_shared_disable_output_lane_cg1_param_0]; |
| ; CHECK-NEXT: ld.param.b64 %rd1, [tcgen05_mma_sp_i8_shared_disable_output_lane_cg1_param_2]; |
| ; CHECK-NEXT: ld.param.b64 %rd2, [tcgen05_mma_sp_i8_shared_disable_output_lane_cg1_param_3]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [tcgen05_mma_sp_i8_shared_disable_output_lane_cg1_param_4]; |
| ; CHECK-NEXT: ld.param.v4.b32 {%r3, %r4, %r5, %r6}, [tcgen05_mma_sp_i8_shared_disable_output_lane_cg1_param_6]; |
| ; CHECK-NEXT: ld.param.b32 %r7, [tcgen05_mma_sp_i8_shared_disable_output_lane_cg1_param_8]; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::1.kind::i8.collector::a::discard [%r1], %rd1, %rd2, [%r7], %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: ld.param.b32 %r8, [tcgen05_mma_sp_i8_shared_disable_output_lane_cg1_param_1]; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::1.kind::i8.collector::a::discard [%r1], [%r8], %rd2, [%r7], %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::1.kind::i8.ashift.collector::a::discard [%r1], [%r8], %rd2, [%r7], %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::1.kind::i8.collector::a::lastuse [%r1], %rd1, %rd2, [%r7], %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::1.kind::i8.collector::a::lastuse [%r1], [%r8], %rd2, [%r7], %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::1.kind::i8.ashift.collector::a::lastuse [%r1], [%r8], %rd2, [%r7], %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::1.kind::i8.collector::a::fill [%r1], %rd1, %rd2, [%r7], %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::1.kind::i8.collector::a::fill [%r1], [%r8], %rd2, [%r7], %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::1.kind::i8.collector::a::use [%r1], %rd1, %rd2, [%r7], %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::1.kind::i8.collector::a::use [%r1], [%r8], %rd2, [%r7], %r2, {%r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: ret; |
| call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg1(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <4 x i32> %disable_output_lanev4, i32 3, i32 0) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <4 x i32> %disable_output_lanev4, i32 3, i32 0) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1.ashift(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <4 x i32> %disable_output_lanev4, i32 3, i32 0) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg1(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <4 x i32> %disable_output_lanev4, i32 3, i32 1) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <4 x i32> %disable_output_lanev4, i32 3, i32 1) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1.ashift(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <4 x i32> %disable_output_lanev4, i32 3, i32 1) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg1(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <4 x i32> %disable_output_lanev4, i32 3, i32 2) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <4 x i32> %disable_output_lanev4, i32 3, i32 2) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg1(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <4 x i32> %disable_output_lanev4, i32 3, i32 3) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg1(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <4 x i32> %disable_output_lanev4, i32 3, i32 3) |
| |
| ret void |
| } |
| |
| define void @tcgen05_mma_sp_i8_shared_disable_output_lane_cg2(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, <4 x i32> %disable_output_lanev4, <8 x i32> %disable_output_lanev8, ptr addrspace(6) %spmetadata) { |
| ; CHECK-LABEL: tcgen05_mma_sp_i8_shared_disable_output_lane_cg2( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<2>; |
| ; CHECK-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NEXT: .reg .b32 %r<13>; |
| ; CHECK-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b8 %rs1, [tcgen05_mma_sp_i8_shared_disable_output_lane_cg2_param_5]; |
| ; CHECK-NEXT: and.b16 %rs2, %rs1, 1; |
| ; CHECK-NEXT: setp.ne.b16 %p1, %rs2, 0; |
| ; CHECK-NEXT: ld.param.b32 %r1, [tcgen05_mma_sp_i8_shared_disable_output_lane_cg2_param_0]; |
| ; CHECK-NEXT: ld.param.b64 %rd1, [tcgen05_mma_sp_i8_shared_disable_output_lane_cg2_param_2]; |
| ; CHECK-NEXT: ld.param.b64 %rd2, [tcgen05_mma_sp_i8_shared_disable_output_lane_cg2_param_3]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [tcgen05_mma_sp_i8_shared_disable_output_lane_cg2_param_4]; |
| ; CHECK-NEXT: ld.param.v4.b32 {%r3, %r4, %r5, %r6}, [tcgen05_mma_sp_i8_shared_disable_output_lane_cg2_param_7+16]; |
| ; CHECK-NEXT: ld.param.v4.b32 {%r7, %r8, %r9, %r10}, [tcgen05_mma_sp_i8_shared_disable_output_lane_cg2_param_7]; |
| ; CHECK-NEXT: ld.param.b32 %r11, [tcgen05_mma_sp_i8_shared_disable_output_lane_cg2_param_8]; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::2.kind::i8.collector::a::discard [%r1], %rd1, %rd2, [%r11], %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: ld.param.b32 %r12, [tcgen05_mma_sp_i8_shared_disable_output_lane_cg2_param_1]; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::2.kind::i8.collector::a::discard [%r1], [%r12], %rd2, [%r11], %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::2.kind::i8.ashift.collector::a::discard [%r1], [%r12], %rd2, [%r11], %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::2.kind::i8.collector::a::lastuse [%r1], %rd1, %rd2, [%r11], %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::2.kind::i8.collector::a::lastuse [%r1], [%r12], %rd2, [%r11], %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::2.kind::i8.ashift.collector::a::lastuse [%r1], [%r12], %rd2, [%r11], %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::2.kind::i8.collector::a::fill [%r1], %rd1, %rd2, [%r11], %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::2.kind::i8.collector::a::fill [%r1], [%r12], %rd2, [%r11], %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::2.kind::i8.collector::a::use [%r1], %rd1, %rd2, [%r11], %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: tcgen05.mma.sp.cta_group::2.kind::i8.collector::a::use [%r1], [%r12], %rd2, [%r11], %r2, {%r7, %r8, %r9, %r10, %r3, %r4, %r5, %r6}, %p1; |
| ; CHECK-NEXT: ret; |
| call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg2(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <8 x i32> %disable_output_lanev8, i32 3, i32 0) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <8 x i32> %disable_output_lanev8, i32 3, i32 0) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2.ashift(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <8 x i32> %disable_output_lanev8, i32 3, i32 0) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg2(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <8 x i32> %disable_output_lanev8, i32 3, i32 1) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <8 x i32> %disable_output_lanev8, i32 3, i32 1) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2.ashift(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <8 x i32> %disable_output_lanev8, i32 3, i32 1) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg2(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <8 x i32> %disable_output_lanev8, i32 3, i32 2) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <8 x i32> %disable_output_lanev8, i32 3, i32 2) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.shared.disable_output_lane.cg2(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <8 x i32> %disable_output_lanev8, i32 3, i32 3) |
| |
| call void @llvm.nvvm.tcgen05.mma.sp.tensor.disable_output_lane.cg2(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, ptr addrspace(6) %spmetadata, <8 x i32> %disable_output_lanev8, i32 3, i32 3) |
| |
| ret void |
| } |