| # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 |
| # RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -amdgpu-expert-scheduling-mode -run-pass=si-insert-waitcnts %s -o - | FileCheck %s |
| |
| --- |
| name: wmma_scale |
| body: | |
| bb.0: |
| ; CHECK-LABEL: name: wmma_scale |
| ; CHECK: S_SETREG_IMM32_B32 2, 2074, implicit-def $mode, implicit $mode |
| ; CHECK-NEXT: S_WAIT_LOADCNT_DSCNT .Loadcnt_0_Dscnt_0 |
| ; CHECK-NEXT: S_WAIT_KMCNT 0 |
| ; CHECK-NEXT: early-clobber $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_WMMA_F32_16X16X64_FP8_FP8_w32_twoaddr $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, 0, implicit $exec |
| ; CHECK-NEXT: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_threeaddr $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr16, $vgpr16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec |
| ; CHECK-NEXT: early-clobber $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 = V_WMMA_F32_16X16X64_FP8_FP8_w32_twoaddr $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, 8, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, 0, 0, 0, 0, implicit $exec |
| ; CHECK-NEXT: S_WAITCNT_DEPCTR .VaVdst_3 |
| ; CHECK-NEXT: $vgpr64 = GLOBAL_LOAD_DWORD $vgpr8_vgpr9, 0, 0, implicit $exec |
| ; CHECK-NEXT: S_WAIT_LOADCNT 0 |
| ; CHECK-NEXT: S_WAITCNT_DEPCTR .VaVdst_1 |
| ; CHECK-NEXT: $vgpr64 = GLOBAL_LOAD_DWORD $vgpr16_vgpr17, 0, 0, implicit $exec |
| ; CHECK-NEXT: S_WAIT_LOADCNT 0 |
| ; CHECK-NEXT: S_WAITCNT_DEPCTR .VaVdst_0 |
| ; CHECK-NEXT: $vgpr64 = GLOBAL_LOAD_DWORD $vgpr40_vgpr41, 0, 0, implicit $exec |
| $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_WMMA_F32_16X16X64_FP8_FP8_w32_twoaddr $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, 0, implicit $exec |
| $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_threeaddr $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr16, $vgpr16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec |
| $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 = V_WMMA_F32_16X16X64_FP8_FP8_w32_twoaddr $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, 8, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, 0, 0, 0, 0, implicit $exec |
| $vgpr64 = GLOBAL_LOAD_DWORD $vgpr8_vgpr9, 0, 0, implicit $exec |
| $vgpr64 = GLOBAL_LOAD_DWORD $vgpr16_vgpr17, 0, 0, implicit $exec |
| $vgpr64 = GLOBAL_LOAD_DWORD $vgpr40_vgpr41, 0, 0, implicit $exec |
| ... |
| |
| --- |
| name: async_global_to_lds_vaddr_raw |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr1, $vgpr2 |
| ; CHECK-LABEL: name: async_global_to_lds_vaddr_raw |
| ; CHECK: liveins: $vgpr1, $vgpr2 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: S_SETREG_IMM32_B32 2, 2074, implicit-def $mode, implicit $mode |
| ; CHECK-NEXT: S_WAIT_LOADCNT_DSCNT .Loadcnt_0_Dscnt_0 |
| ; CHECK-NEXT: S_WAIT_KMCNT 0 |
| ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 1, implicit $exec |
| ; CHECK-NEXT: S_WAITCNT_DEPCTR .VaVdst_0 |
| ; CHECK-NEXT: GLOBAL_LOAD_ASYNC_TO_LDS_B32 $vgpr2, $vgpr0_vgpr1, 0, 0, implicit-def $asynccnt, implicit $exec, implicit $asynccnt :: (load (s32), addrspace 1), (store (s32), addrspace 3) |
| $vgpr0 = V_MOV_B32_e32 1, implicit $exec |
| GLOBAL_LOAD_ASYNC_TO_LDS_B32 $vgpr2, $vgpr0_vgpr1, 0, 0, implicit-def $asynccnt, implicit $exec, implicit $asynccnt :: (load (s32), addrspace 1), (store (s32), addrspace 3) |
| ... |
| |
| --- |
| name: async_global_to_lds_vaddr_vsrc_war |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr4, $vgpr5 |
| ; CHECK-LABEL: name: async_global_to_lds_vaddr_vsrc_war |
| ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr4, $vgpr5 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: S_SETREG_IMM32_B32 2, 2074, implicit-def $mode, implicit $mode |
| ; CHECK-NEXT: S_WAIT_LOADCNT_DSCNT .Loadcnt_0_Dscnt_0 |
| ; CHECK-NEXT: S_WAIT_KMCNT 0 |
| ; CHECK-NEXT: GLOBAL_LOAD_ASYNC_TO_LDS_B32 $vgpr2, $vgpr0_vgpr1, 0, 0, implicit-def $asynccnt, implicit $exec, implicit $asynccnt :: (load (s32), addrspace 1), (store (s32), addrspace 3) |
| ; CHECK-NEXT: S_WAITCNT_DEPCTR .VmVsrc_0 |
| ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 1, implicit $exec |
| ; CHECK-NEXT: S_WAITCNT_DEPCTR .VaVdst_0 |
| ; CHECK-NEXT: GLOBAL_STORE_DWORD $vgpr4_vgpr5, $vgpr0, 0, 0, implicit $exec :: (store (s32), addrspace 1) |
| GLOBAL_LOAD_ASYNC_TO_LDS_B32 $vgpr2, $vgpr0_vgpr1, 0, 0, implicit-def $asynccnt, implicit $exec, implicit $asynccnt :: (load (s32), addrspace 1), (store (s32), addrspace 3) |
| $vgpr0 = V_MOV_B32_e32 1, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr4_vgpr5, $vgpr0, 0, 0, implicit $exec :: (store (s32), addrspace 1) |
| ... |
| |
| --- |
| name: async_global_to_lds_vdst_vsrc_war |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr4, $vgpr5 |
| ; CHECK-LABEL: name: async_global_to_lds_vdst_vsrc_war |
| ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr4, $vgpr5 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: S_SETREG_IMM32_B32 2, 2074, implicit-def $mode, implicit $mode |
| ; CHECK-NEXT: S_WAIT_LOADCNT_DSCNT .Loadcnt_0_Dscnt_0 |
| ; CHECK-NEXT: S_WAIT_KMCNT 0 |
| ; CHECK-NEXT: GLOBAL_LOAD_ASYNC_TO_LDS_B32 $vgpr2, $vgpr0_vgpr1, 0, 0, implicit-def $asynccnt, implicit $exec, implicit $asynccnt :: (load (s32), addrspace 1), (store (s32), addrspace 3) |
| ; CHECK-NEXT: S_WAITCNT_DEPCTR .VmVsrc_0 |
| ; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 1, implicit $exec |
| ; CHECK-NEXT: S_WAITCNT_DEPCTR .VaVdst_0 |
| ; CHECK-NEXT: GLOBAL_STORE_DWORD $vgpr4_vgpr5, $vgpr2, 0, 0, implicit $exec :: (store (s32), addrspace 1) |
| GLOBAL_LOAD_ASYNC_TO_LDS_B32 $vgpr2, $vgpr0_vgpr1, 0, 0, implicit-def $asynccnt, implicit $exec, implicit $asynccnt :: (load (s32), addrspace 1), (store (s32), addrspace 3) |
| $vgpr2 = V_MOV_B32_e32 1, implicit $exec |
| GLOBAL_STORE_DWORD $vgpr4_vgpr5, $vgpr2, 0, 0, implicit $exec :: (store (s32), addrspace 1) |
| ... |
| |
| --- |
| name: global_load_vsrc_war |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1 |
| ; CHECK-LABEL: name: global_load_vsrc_war |
| ; CHECK: liveins: $vgpr0, $vgpr1 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: S_SETREG_IMM32_B32 2, 2074, implicit-def $mode, implicit $mode |
| ; CHECK-NEXT: S_WAIT_LOADCNT_DSCNT .Loadcnt_0_Dscnt_0 |
| ; CHECK-NEXT: S_WAIT_KMCNT 0 |
| ; CHECK-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s32), addrspace 1) |
| ; CHECK-NEXT: S_WAIT_XCNT 0 |
| ; CHECK-NEXT: S_WAITCNT_DEPCTR .VmVsrc_0 |
| ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 1, implicit $exec |
| $vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s32), addrspace 1) |
| $vgpr0 = V_MOV_B32_e32 1, implicit $exec |
| ... |
| |
| --- |
| name: buffer_load_vsrc_war |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; CHECK-LABEL: name: buffer_load_vsrc_war |
| ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: S_SETREG_IMM32_B32 2, 2074, implicit-def $mode, implicit $mode |
| ; CHECK-NEXT: S_WAIT_LOADCNT_DSCNT .Loadcnt_0_Dscnt_0 |
| ; CHECK-NEXT: S_WAIT_KMCNT 0 |
| ; CHECK-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_ADDR64 $vgpr0_vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec |
| ; CHECK-NEXT: S_WAIT_XCNT 0 |
| ; CHECK-NEXT: S_WAITCNT_DEPCTR .VmVsrc_0 |
| ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 1, implicit $exec |
| $vgpr2 = BUFFER_LOAD_DWORD_ADDR64 $vgpr0_vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec |
| $vgpr0 = V_MOV_B32_e32 1, implicit $exec |
| ... |