| # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 | 
 | # RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass si-insert-waitcnts %s -o - | FileCheck -check-prefix=GFX9 %s | 
 | # RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass si-insert-waitcnts %s -o - | FileCheck -check-prefix=GFX12 %s | 
 |  | 
 | # There shall be no S_WAITCNT between two stores. | 
 |  | 
 | --- | 
 | name: spill_vgpr_tuple | 
 |  | 
 | body: | | 
 |   bb.0: | 
 |     liveins: $vgpr0_vgpr1, $sgpr76_sgpr77_sgpr78_sgpr79 | 
 |  | 
 |     ; GFX9-LABEL: name: spill_vgpr_tuple | 
 |     ; GFX9: liveins: $vgpr0_vgpr1, $sgpr76_sgpr77_sgpr78_sgpr79 | 
 |     ; GFX9-NEXT: {{  $}} | 
 |     ; GFX9-NEXT: S_WAITCNT 0 | 
 |     ; GFX9-NEXT: $vgpr64_vgpr65 = V_MOV_B64_e32 $vgpr0_vgpr1, implicit $exec | 
 |     ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr64, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 672, 0, 0, implicit $exec, implicit-def $vgpr64_vgpr65, implicit $vgpr64_vgpr65 | 
 |     ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr65, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 676, 0, 0, implicit $exec, implicit $vgpr64_vgpr65 | 
 |     ; GFX9-NEXT: S_ENDPGM 0 | 
 |     ; | 
 |     ; GFX12-LABEL: name: spill_vgpr_tuple | 
 |     ; GFX12: liveins: $vgpr0_vgpr1, $sgpr76_sgpr77_sgpr78_sgpr79 | 
 |     ; GFX12-NEXT: {{  $}} | 
 |     ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0 | 
 |     ; GFX12-NEXT: S_WAIT_EXPCNT 0 | 
 |     ; GFX12-NEXT: S_WAIT_SAMPLECNT 0 | 
 |     ; GFX12-NEXT: S_WAIT_BVHCNT 0 | 
 |     ; GFX12-NEXT: S_WAIT_KMCNT 0 | 
 |     ; GFX12-NEXT: $vgpr64_vgpr65 = V_MOV_B64_e32 $vgpr0_vgpr1, implicit $exec | 
 |     ; GFX12-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr64, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 672, 0, 0, implicit $exec, implicit-def $vgpr64_vgpr65, implicit $vgpr64_vgpr65 | 
 |     ; GFX12-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr65, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 676, 0, 0, implicit $exec, implicit $vgpr64_vgpr65 | 
 |     ; GFX12-NEXT: S_ENDPGM 0 | 
 |     $vgpr64_vgpr65 = V_MOV_B64_e32 $vgpr0_vgpr1, implicit $exec | 
 |     BUFFER_STORE_DWORD_OFFSET killed $vgpr64, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 672, 0, 0, implicit $exec, implicit-def $vgpr64_vgpr65, implicit $vgpr64_vgpr65 | 
 |     BUFFER_STORE_DWORD_OFFSET $vgpr65, $sgpr76_sgpr77_sgpr78_sgpr79, 0, 676, 0, 0, implicit $exec, implicit $vgpr64_vgpr65 | 
 |     S_ENDPGM 0 | 
 | ... | 
 |  | 
 | # Make sure that while ignoring implicit operands we will not ignore implicit $vcc on VALU | 
 |  | 
 | --- | 
 | name: load_vcc_wait | 
 |  | 
 | body: | | 
 |   bb.0: | 
 |     liveins: $vgpr0, $sgpr10_sgpr11 | 
 |  | 
 |     ; GFX9-LABEL: name: load_vcc_wait | 
 |     ; GFX9: liveins: $vgpr0, $sgpr10_sgpr11 | 
 |     ; GFX9-NEXT: {{  $}} | 
 |     ; GFX9-NEXT: S_WAITCNT 0 | 
 |     ; GFX9-NEXT: $vcc_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 | 
 |     ; GFX9-NEXT: S_WAITCNT 49279 | 
 |     ; GFX9-NEXT: $vgpr1 = V_ADDC_U32_e32 0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec | 
 |     ; GFX9-NEXT: S_ENDPGM 0 | 
 |     ; | 
 |     ; GFX12-LABEL: name: load_vcc_wait | 
 |     ; GFX12: liveins: $vgpr0, $sgpr10_sgpr11 | 
 |     ; GFX12-NEXT: {{  $}} | 
 |     ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0 | 
 |     ; GFX12-NEXT: S_WAIT_EXPCNT 0 | 
 |     ; GFX12-NEXT: S_WAIT_SAMPLECNT 0 | 
 |     ; GFX12-NEXT: S_WAIT_BVHCNT 0 | 
 |     ; GFX12-NEXT: S_WAIT_KMCNT 0 | 
 |     ; GFX12-NEXT: $vcc_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 | 
 |     ; GFX12-NEXT: S_WAIT_KMCNT 0 | 
 |     ; GFX12-NEXT: $vgpr1 = V_ADDC_U32_e32 0, $vgpr0, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec | 
 |     ; GFX12-NEXT: S_ENDPGM 0 | 
 |     $vcc_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 | 
 |     $vgpr1 = V_ADDC_U32_e32 0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec | 
 |     S_ENDPGM 0 | 
 | ... | 
 |  | 
 | # Make sure that while ignoring implicit operands we will not ignore implicit $flat_src on FLAT | 
 |  | 
 | --- | 
 | name: load_flat_scr_lo_flat_load_wait | 
 |  | 
 | body: | | 
 |   bb.0: | 
 |     liveins: $sgpr10_sgpr11, $vgpr0_vgpr1 | 
 |  | 
 |     ; GFX9-LABEL: name: load_flat_scr_lo_flat_load_wait | 
 |     ; GFX9: liveins: $sgpr10_sgpr11, $vgpr0_vgpr1 | 
 |     ; GFX9-NEXT: {{  $}} | 
 |     ; GFX9-NEXT: S_WAITCNT 0 | 
 |     ; GFX9-NEXT: $flat_scr_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 | 
 |     ; GFX9-NEXT: S_WAITCNT 49279 | 
 |     ; GFX9-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr | 
 |     ; GFX9-NEXT: S_ENDPGM 0 | 
 |     ; | 
 |     ; GFX12-LABEL: name: load_flat_scr_lo_flat_load_wait | 
 |     ; GFX12: liveins: $sgpr10_sgpr11, $vgpr0_vgpr1 | 
 |     ; GFX12-NEXT: {{  $}} | 
 |     ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0 | 
 |     ; GFX12-NEXT: S_WAIT_EXPCNT 0 | 
 |     ; GFX12-NEXT: S_WAIT_SAMPLECNT 0 | 
 |     ; GFX12-NEXT: S_WAIT_BVHCNT 0 | 
 |     ; GFX12-NEXT: S_WAIT_KMCNT 0 | 
 |     ; GFX12-NEXT: $flat_scr_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 | 
 |     ; GFX12-NEXT: S_WAIT_KMCNT 0 | 
 |     ; GFX12-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr | 
 |     ; GFX12-NEXT: S_ENDPGM 0 | 
 |     $flat_scr_lo = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 | 
 |     $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr | 
 |     S_ENDPGM 0 | 
 | ... | 
 |  | 
 | --- | 
 | name: load_flat_scr_lo_scratch_store_wait | 
 |  | 
 | body: | | 
 |   bb.0: | 
 |     liveins: $sgpr10_sgpr11, $vgpr0, $sgpr32 | 
 |  | 
 |     ; GFX9-LABEL: name: load_flat_scr_lo_scratch_store_wait | 
 |     ; GFX9: liveins: $sgpr10_sgpr11, $vgpr0, $sgpr32 | 
 |     ; GFX9-NEXT: {{  $}} | 
 |     ; GFX9-NEXT: S_WAITCNT 0 | 
 |     ; GFX9-NEXT: $flat_scr_hi = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 | 
 |     ; GFX9-NEXT: S_WAITCNT 49279 | 
 |     ; GFX9-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr | 
 |     ; GFX9-NEXT: S_ENDPGM 0 | 
 |     ; | 
 |     ; GFX12-LABEL: name: load_flat_scr_lo_scratch_store_wait | 
 |     ; GFX12: liveins: $sgpr10_sgpr11, $vgpr0, $sgpr32 | 
 |     ; GFX12-NEXT: {{  $}} | 
 |     ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0 | 
 |     ; GFX12-NEXT: S_WAIT_EXPCNT 0 | 
 |     ; GFX12-NEXT: S_WAIT_SAMPLECNT 0 | 
 |     ; GFX12-NEXT: S_WAIT_BVHCNT 0 | 
 |     ; GFX12-NEXT: S_WAIT_KMCNT 0 | 
 |     ; GFX12-NEXT: $flat_scr_hi = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 | 
 |     ; GFX12-NEXT: S_WAIT_KMCNT 0 | 
 |     ; GFX12-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr | 
 |     ; GFX12-NEXT: S_ENDPGM 0 | 
 |     $flat_scr_hi = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0 | 
 |     SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr | 
 |     S_ENDPGM 0 | 
 | ... | 
 |  | 
 | # Check that implicit spill defs do not force wait to zero on the first store | 
 |  | 
 | --- | 
 | name: spill_load_store | 
 |  | 
 | body: | | 
 |   bb.0: | 
 |     liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32 | 
 |  | 
 |     ; GFX9-LABEL: name: spill_load_store | 
 |     ; GFX9: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32 | 
 |     ; GFX9-NEXT: {{  $}} | 
 |     ; GFX9-NEXT: S_WAITCNT 0 | 
 |     ; GFX9-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 | 
 |     ; GFX9-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec | 
 |     ; GFX9-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec | 
 |     ; GFX9-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 | 
 |     ; GFX9-NEXT: S_WAITCNT 3955 | 
 |     ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 | 
 |     ; GFX9-NEXT: S_WAITCNT 3955 | 
 |     ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec | 
 |     ; GFX9-NEXT: S_WAITCNT 3955 | 
 |     ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec | 
 |     ; GFX9-NEXT: S_WAITCNT 3955 | 
 |     ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 | 
 |     ; GFX9-NEXT: S_ENDPGM 0 | 
 |     ; | 
 |     ; GFX12-LABEL: name: spill_load_store | 
 |     ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32 | 
 |     ; GFX12-NEXT: {{  $}} | 
 |     ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0 | 
 |     ; GFX12-NEXT: S_WAIT_EXPCNT 0 | 
 |     ; GFX12-NEXT: S_WAIT_SAMPLECNT 0 | 
 |     ; GFX12-NEXT: S_WAIT_BVHCNT 0 | 
 |     ; GFX12-NEXT: S_WAIT_KMCNT 0 | 
 |     ; GFX12-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 | 
 |     ; GFX12-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec | 
 |     ; GFX12-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec | 
 |     ; GFX12-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 | 
 |     ; GFX12-NEXT: S_WAIT_LOADCNT 3 | 
 |     ; GFX12-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 | 
 |     ; GFX12-NEXT: S_WAIT_LOADCNT 2 | 
 |     ; GFX12-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec | 
 |     ; GFX12-NEXT: S_WAIT_LOADCNT 1 | 
 |     ; GFX12-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec | 
 |     ; GFX12-NEXT: S_WAIT_LOADCNT 0 | 
 |     ; GFX12-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 | 
 |     ; GFX12-NEXT: S_ENDPGM 0 | 
 |     $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 | 
 |     $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec | 
 |     $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec | 
 |     $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 | 
 |     BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 | 
 |     BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec | 
 |     BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec | 
 |     BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 | 
 |     S_ENDPGM 0 | 
 | ... | 
 |  | 
 | # Make sure we have wait to mitigate WAW on gfx12 | 
 |  | 
 | --- | 
 | name: scratch_load_waw | 
 | body:             | | 
 |   bb.0.entry: | 
 |     liveins: $vgpr0, $sgpr0 | 
 |  | 
 |     ; GFX9-LABEL: name: scratch_load_waw | 
 |     ; GFX9: liveins: $vgpr0, $sgpr0 | 
 |     ; GFX9-NEXT: {{  $}} | 
 |     ; GFX9-NEXT: S_WAITCNT 0 | 
 |     ; GFX9-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD $vgpr0, 0, 0, implicit $exec, implicit $flat_scr | 
 |     ; GFX9-NEXT: $vgpr2 = SCRATCH_LOAD_SHORT_D16_HI_SADDR $sgpr0, 0, 0, $vgpr2, implicit $exec, implicit $flat_scr | 
 |     ; GFX9-NEXT: S_ENDPGM 0 | 
 |     ; | 
 |     ; GFX12-LABEL: name: scratch_load_waw | 
 |     ; GFX12: liveins: $vgpr0, $sgpr0 | 
 |     ; GFX12-NEXT: {{  $}} | 
 |     ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0 | 
 |     ; GFX12-NEXT: S_WAIT_EXPCNT 0 | 
 |     ; GFX12-NEXT: S_WAIT_SAMPLECNT 0 | 
 |     ; GFX12-NEXT: S_WAIT_BVHCNT 0 | 
 |     ; GFX12-NEXT: S_WAIT_KMCNT 0 | 
 |     ; GFX12-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD $vgpr0, 0, 0, implicit $exec, implicit $flat_scr | 
 |     ; GFX12-NEXT: S_WAIT_LOADCNT 0 | 
 |     ; GFX12-NEXT: $vgpr2 = SCRATCH_LOAD_SHORT_D16_HI_SADDR $sgpr0, 0, 0, $vgpr2, implicit $exec, implicit $flat_scr | 
 |     ; GFX12-NEXT: S_ENDPGM 0 | 
 |     $vgpr2 = SCRATCH_LOAD_DWORD $vgpr0, 0, 0, implicit $exec, implicit $flat_scr | 
 |     $vgpr2 = SCRATCH_LOAD_SHORT_D16_HI_SADDR $sgpr0, 0, 0, $vgpr2, implicit $exec, implicit $flat_scr | 
 |     S_ENDPGM 0 | 
 | ... |