blob: 56075905231bd712dafee45aff1e0892c16536f8 [file] [edit]
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -amdgpu-expert-scheduling-mode -run-pass=si-insert-waitcnts %s -o - | FileCheck %s
---
name: wmma_scale
body: |
bb.0:
; CHECK-LABEL: name: wmma_scale
; CHECK: S_SETREG_IMM32_B32 2, 2074, implicit-def $mode, implicit $mode
; CHECK-NEXT: S_WAIT_LOADCNT_DSCNT .Loadcnt_0_Dscnt_0
; CHECK-NEXT: S_WAIT_KMCNT 0
; CHECK-NEXT: early-clobber $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_WMMA_F32_16X16X64_FP8_FP8_w32_twoaddr $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, 0, implicit $exec
; CHECK-NEXT: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_threeaddr $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr16, $vgpr16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
; CHECK-NEXT: early-clobber $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 = V_WMMA_F32_16X16X64_FP8_FP8_w32_twoaddr $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, 8, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, 0, 0, 0, 0, implicit $exec
; CHECK-NEXT: S_WAITCNT_DEPCTR .VaVdst_3
; CHECK-NEXT: $vgpr64 = GLOBAL_LOAD_DWORD $vgpr8_vgpr9, 0, 0, implicit $exec
; CHECK-NEXT: S_WAIT_LOADCNT 0
; CHECK-NEXT: S_WAITCNT_DEPCTR .VaVdst_1
; CHECK-NEXT: $vgpr64 = GLOBAL_LOAD_DWORD $vgpr16_vgpr17, 0, 0, implicit $exec
; CHECK-NEXT: S_WAIT_LOADCNT 0
; CHECK-NEXT: S_WAITCNT_DEPCTR .VaVdst_0
; CHECK-NEXT: $vgpr64 = GLOBAL_LOAD_DWORD $vgpr40_vgpr41, 0, 0, implicit $exec
$vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_WMMA_F32_16X16X64_FP8_FP8_w32_twoaddr $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, 0, implicit $exec
$vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_threeaddr $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr16, $vgpr16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
$vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 = V_WMMA_F32_16X16X64_FP8_FP8_w32_twoaddr $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, 8, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, 0, 0, 0, 0, implicit $exec
$vgpr64 = GLOBAL_LOAD_DWORD $vgpr8_vgpr9, 0, 0, implicit $exec
$vgpr64 = GLOBAL_LOAD_DWORD $vgpr16_vgpr17, 0, 0, implicit $exec
$vgpr64 = GLOBAL_LOAD_DWORD $vgpr40_vgpr41, 0, 0, implicit $exec
...
---
name: async_global_to_lds_vaddr_raw
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr1, $vgpr2
; CHECK-LABEL: name: async_global_to_lds_vaddr_raw
; CHECK: liveins: $vgpr1, $vgpr2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_SETREG_IMM32_B32 2, 2074, implicit-def $mode, implicit $mode
; CHECK-NEXT: S_WAIT_LOADCNT_DSCNT .Loadcnt_0_Dscnt_0
; CHECK-NEXT: S_WAIT_KMCNT 0
; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 1, implicit $exec
; CHECK-NEXT: S_WAITCNT_DEPCTR .VaVdst_0
; CHECK-NEXT: GLOBAL_LOAD_ASYNC_TO_LDS_B32 $vgpr2, $vgpr0_vgpr1, 0, 0, implicit-def $asynccnt, implicit $exec, implicit $asynccnt :: (load (s32), addrspace 1), (store (s32), addrspace 3)
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
GLOBAL_LOAD_ASYNC_TO_LDS_B32 $vgpr2, $vgpr0_vgpr1, 0, 0, implicit-def $asynccnt, implicit $exec, implicit $asynccnt :: (load (s32), addrspace 1), (store (s32), addrspace 3)
...
---
name: async_global_to_lds_vaddr_vsrc_war
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr4, $vgpr5
; CHECK-LABEL: name: async_global_to_lds_vaddr_vsrc_war
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr4, $vgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_SETREG_IMM32_B32 2, 2074, implicit-def $mode, implicit $mode
; CHECK-NEXT: S_WAIT_LOADCNT_DSCNT .Loadcnt_0_Dscnt_0
; CHECK-NEXT: S_WAIT_KMCNT 0
; CHECK-NEXT: GLOBAL_LOAD_ASYNC_TO_LDS_B32 $vgpr2, $vgpr0_vgpr1, 0, 0, implicit-def $asynccnt, implicit $exec, implicit $asynccnt :: (load (s32), addrspace 1), (store (s32), addrspace 3)
; CHECK-NEXT: S_WAITCNT_DEPCTR .VmVsrc_0
; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 1, implicit $exec
; CHECK-NEXT: S_WAITCNT_DEPCTR .VaVdst_0
; CHECK-NEXT: GLOBAL_STORE_DWORD $vgpr4_vgpr5, $vgpr0, 0, 0, implicit $exec :: (store (s32), addrspace 1)
GLOBAL_LOAD_ASYNC_TO_LDS_B32 $vgpr2, $vgpr0_vgpr1, 0, 0, implicit-def $asynccnt, implicit $exec, implicit $asynccnt :: (load (s32), addrspace 1), (store (s32), addrspace 3)
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
GLOBAL_STORE_DWORD $vgpr4_vgpr5, $vgpr0, 0, 0, implicit $exec :: (store (s32), addrspace 1)
...
---
name: async_global_to_lds_vdst_vsrc_war
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr4, $vgpr5
; CHECK-LABEL: name: async_global_to_lds_vdst_vsrc_war
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr4, $vgpr5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_SETREG_IMM32_B32 2, 2074, implicit-def $mode, implicit $mode
; CHECK-NEXT: S_WAIT_LOADCNT_DSCNT .Loadcnt_0_Dscnt_0
; CHECK-NEXT: S_WAIT_KMCNT 0
; CHECK-NEXT: GLOBAL_LOAD_ASYNC_TO_LDS_B32 $vgpr2, $vgpr0_vgpr1, 0, 0, implicit-def $asynccnt, implicit $exec, implicit $asynccnt :: (load (s32), addrspace 1), (store (s32), addrspace 3)
; CHECK-NEXT: S_WAITCNT_DEPCTR .VmVsrc_0
; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 1, implicit $exec
; CHECK-NEXT: S_WAITCNT_DEPCTR .VaVdst_0
; CHECK-NEXT: GLOBAL_STORE_DWORD $vgpr4_vgpr5, $vgpr2, 0, 0, implicit $exec :: (store (s32), addrspace 1)
GLOBAL_LOAD_ASYNC_TO_LDS_B32 $vgpr2, $vgpr0_vgpr1, 0, 0, implicit-def $asynccnt, implicit $exec, implicit $asynccnt :: (load (s32), addrspace 1), (store (s32), addrspace 3)
$vgpr2 = V_MOV_B32_e32 1, implicit $exec
GLOBAL_STORE_DWORD $vgpr4_vgpr5, $vgpr2, 0, 0, implicit $exec :: (store (s32), addrspace 1)
...
---
name: global_load_vsrc_war
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1
; CHECK-LABEL: name: global_load_vsrc_war
; CHECK: liveins: $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_SETREG_IMM32_B32 2, 2074, implicit-def $mode, implicit $mode
; CHECK-NEXT: S_WAIT_LOADCNT_DSCNT .Loadcnt_0_Dscnt_0
; CHECK-NEXT: S_WAIT_KMCNT 0
; CHECK-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s32), addrspace 1)
; CHECK-NEXT: S_WAIT_XCNT 0
; CHECK-NEXT: S_WAITCNT_DEPCTR .VmVsrc_0
; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 1, implicit $exec
$vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s32), addrspace 1)
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
...
---
name: buffer_load_vsrc_war
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-LABEL: name: buffer_load_vsrc_war
; CHECK: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_SETREG_IMM32_B32 2, 2074, implicit-def $mode, implicit $mode
; CHECK-NEXT: S_WAIT_LOADCNT_DSCNT .Loadcnt_0_Dscnt_0
; CHECK-NEXT: S_WAIT_KMCNT 0
; CHECK-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_ADDR64 $vgpr0_vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
; CHECK-NEXT: S_WAIT_XCNT 0
; CHECK-NEXT: S_WAITCNT_DEPCTR .VmVsrc_0
; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 1, implicit $exec
$vgpr2 = BUFFER_LOAD_DWORD_ADDR64 $vgpr0_vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
...