blob: dea8bbed4869dac6b75b829227f0b86f6b059513 [file] [edit]
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass=machine-scheduler -amdgpu-sched-strategy=coexec -verify-misched %s -o - | FileCheck -check-prefix=DEFAULT %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass=machine-scheduler -verify-misched %s -o - | FileCheck -check-prefix=MAXOCC %s
--- |
define void @tensor_load_tensor_load() #0 { ret void }
define void @tensor_wait_tensor_load() #0 { ret void }
define void @tensor_wait_tensor_wait() #0 { ret void }
define void @tensor_load_tensor_wait() #0 { ret void }
define void @tensor_load_tensor_wait1() #0 { ret void }
define void @tensor_load_tensor_wait2() #0 { ret void }
define void @async_load_async_load() #0 { ret void }
define void @async_wait_async_load() #0 { ret void }
define void @async_wait_async_wait() #0 { ret void }
define void @async_load_async_wait() #0 { ret void }
define void @async_load_async_wait1() #0 { ret void }
define void @async_load_async_wait2() #0 { ret void }
attributes #0 = { "amdgpu-waves-per-eu"="1,1" }
...
---
name: tensor_load_tensor_load
tracksRegLiveness: true
body: |
bb.0:
; DEFAULT-LABEL: name: tensor_load_tensor_load
; DEFAULT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
; DEFAULT-NEXT: [[DEF1:%[0-9]+]]:sgpr_256 = IMPLICIT_DEF
; DEFAULT-NEXT: TENSOR_LOAD_TO_LDS_d2 [[DEF]], [[DEF1]], 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
; DEFAULT-NEXT: TENSOR_LOAD_TO_LDS_d2 [[DEF]], [[DEF1]], 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
; DEFAULT-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; DEFAULT-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: S_ENDPGM 0
;
; MAXOCC-LABEL: name: tensor_load_tensor_load
; MAXOCC: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
; MAXOCC-NEXT: [[DEF1:%[0-9]+]]:sgpr_256 = IMPLICIT_DEF
; MAXOCC-NEXT: TENSOR_LOAD_TO_LDS_d2 [[DEF]], [[DEF1]], 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
; MAXOCC-NEXT: TENSOR_LOAD_TO_LDS_d2 [[DEF]], [[DEF1]], 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
; MAXOCC-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; MAXOCC-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; MAXOCC-NEXT: S_ENDPGM 0
%0:sgpr_128 = IMPLICIT_DEF
%1:sgpr_256 = IMPLICIT_DEF
%2:vgpr_32 = IMPLICIT_DEF
%3:vgpr_32 = IMPLICIT_DEF
TENSOR_LOAD_TO_LDS_d2 %0:sgpr_128, %1:sgpr_256, 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
TENSOR_LOAD_TO_LDS_d2 %0:sgpr_128, %1:sgpr_256, 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
%4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%5:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%6:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%7:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%8:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
S_ENDPGM 0
...
---
name: tensor_wait_tensor_load
tracksRegLiveness: true
body: |
bb.0:
; DEFAULT-LABEL: name: tensor_wait_tensor_load
; DEFAULT: S_WAIT_TENSORCNT 3, implicit-def dead $tensorcnt, implicit $tensorcnt
; DEFAULT-NEXT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
; DEFAULT-NEXT: [[DEF1:%[0-9]+]]:sgpr_256 = IMPLICIT_DEF
; DEFAULT-NEXT: TENSOR_LOAD_TO_LDS_d2 [[DEF]], [[DEF1]], 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
; DEFAULT-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; DEFAULT-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: S_ENDPGM 0
;
; MAXOCC-LABEL: name: tensor_wait_tensor_load
; MAXOCC: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
; MAXOCC-NEXT: [[DEF1:%[0-9]+]]:sgpr_256 = IMPLICIT_DEF
; MAXOCC-NEXT: S_WAIT_TENSORCNT 3, implicit-def dead $tensorcnt, implicit $tensorcnt
; MAXOCC-NEXT: TENSOR_LOAD_TO_LDS_d2 [[DEF]], [[DEF1]], 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
; MAXOCC-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; MAXOCC-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; MAXOCC-NEXT: S_ENDPGM 0
%0:sgpr_128 = IMPLICIT_DEF
%1:sgpr_256 = IMPLICIT_DEF
%2:vgpr_32 = IMPLICIT_DEF
%3:vgpr_32 = IMPLICIT_DEF
S_WAIT_TENSORCNT 3, implicit-def dead $tensorcnt, implicit $tensorcnt
TENSOR_LOAD_TO_LDS_d2 %0:sgpr_128, %1:sgpr_256, 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
%4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%5:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%6:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%7:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%8:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
S_ENDPGM 0
...
---
name: tensor_wait_tensor_wait
tracksRegLiveness: true
body: |
bb.0:
; DEFAULT-LABEL: name: tensor_wait_tensor_wait
; DEFAULT: S_WAIT_TENSORCNT 3, implicit-def dead $tensorcnt, implicit $tensorcnt
; DEFAULT-NEXT: S_WAIT_TENSORCNT 3, implicit-def dead $tensorcnt, implicit $tensorcnt
; DEFAULT-NEXT: dead [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
; DEFAULT-NEXT: dead [[DEF1:%[0-9]+]]:sgpr_256 = IMPLICIT_DEF
; DEFAULT-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; DEFAULT-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: S_ENDPGM 0
;
; MAXOCC-LABEL: name: tensor_wait_tensor_wait
; MAXOCC: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; MAXOCC-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF]], [[DEF1]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF]], [[DEF1]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF]], [[DEF1]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF]], [[DEF1]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF]], [[DEF1]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF]], [[DEF1]], implicit $exec
; MAXOCC-NEXT: S_WAIT_TENSORCNT 3, implicit-def dead $tensorcnt, implicit $tensorcnt
; MAXOCC-NEXT: S_WAIT_TENSORCNT 3, implicit-def dead $tensorcnt, implicit $tensorcnt
; MAXOCC-NEXT: dead [[DEF2:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
; MAXOCC-NEXT: dead [[DEF3:%[0-9]+]]:sgpr_256 = IMPLICIT_DEF
; MAXOCC-NEXT: S_ENDPGM 0
%0:sgpr_128 = IMPLICIT_DEF
%1:sgpr_256 = IMPLICIT_DEF
%2:vgpr_32 = IMPLICIT_DEF
%3:vgpr_32 = IMPLICIT_DEF
S_WAIT_TENSORCNT 3, implicit-def dead $tensorcnt, implicit $tensorcnt
S_WAIT_TENSORCNT 3, implicit-def dead $tensorcnt, implicit $tensorcnt
%4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%5:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%6:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%7:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%8:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
S_ENDPGM 0
...
---
name: tensor_load_tensor_wait
tracksRegLiveness: true
body: |
bb.0:
; DEFAULT-LABEL: name: tensor_load_tensor_wait
; DEFAULT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
; DEFAULT-NEXT: [[DEF1:%[0-9]+]]:sgpr_256 = IMPLICIT_DEF
; DEFAULT-NEXT: TENSOR_LOAD_TO_LDS_d2 [[DEF]], [[DEF1]], 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
; DEFAULT-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; DEFAULT-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: S_WAIT_TENSORCNT 3, implicit-def dead $tensorcnt, implicit $tensorcnt
; DEFAULT-NEXT: S_ENDPGM 0
;
; MAXOCC-LABEL: name: tensor_load_tensor_wait
; MAXOCC: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
; MAXOCC-NEXT: [[DEF1:%[0-9]+]]:sgpr_256 = IMPLICIT_DEF
; MAXOCC-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; MAXOCC-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; MAXOCC-NEXT: TENSOR_LOAD_TO_LDS_d2 [[DEF]], [[DEF1]], 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
; MAXOCC-NEXT: S_WAIT_TENSORCNT 3, implicit-def dead $tensorcnt, implicit $tensorcnt
; MAXOCC-NEXT: S_ENDPGM 0
%0:sgpr_128 = IMPLICIT_DEF
%1:sgpr_256 = IMPLICIT_DEF
%2:vgpr_32 = IMPLICIT_DEF
%3:vgpr_32 = IMPLICIT_DEF
TENSOR_LOAD_TO_LDS_d2 %0:sgpr_128, %1:sgpr_256, 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
S_WAIT_TENSORCNT 3, implicit-def dead $tensorcnt, implicit $tensorcnt
%4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%5:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%6:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%7:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%8:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
S_ENDPGM 0
...
---
name: tensor_load_tensor_wait1
tracksRegLiveness: true
body: |
bb.0:
; DEFAULT-LABEL: name: tensor_load_tensor_wait1
; DEFAULT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
; DEFAULT-NEXT: [[DEF1:%[0-9]+]]:sgpr_256 = IMPLICIT_DEF
; DEFAULT-NEXT: TENSOR_LOAD_TO_LDS_d2 [[DEF]], [[DEF1]], 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
; DEFAULT-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; DEFAULT-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: S_WAIT_TENSORCNT 0, implicit-def dead $tensorcnt, implicit $tensorcnt
; DEFAULT-NEXT: S_ENDPGM 0
;
; MAXOCC-LABEL: name: tensor_load_tensor_wait1
; MAXOCC: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
; MAXOCC-NEXT: [[DEF1:%[0-9]+]]:sgpr_256 = IMPLICIT_DEF
; MAXOCC-NEXT: TENSOR_LOAD_TO_LDS_d2 [[DEF]], [[DEF1]], 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
; MAXOCC-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; MAXOCC-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; MAXOCC-NEXT: S_WAIT_TENSORCNT 0, implicit-def dead $tensorcnt, implicit $tensorcnt
; MAXOCC-NEXT: S_ENDPGM 0
%0:sgpr_128 = IMPLICIT_DEF
%1:sgpr_256 = IMPLICIT_DEF
%2:vgpr_32 = IMPLICIT_DEF
%3:vgpr_32 = IMPLICIT_DEF
TENSOR_LOAD_TO_LDS_d2 %0:sgpr_128, %1:sgpr_256, 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
S_WAIT_TENSORCNT 0, implicit-def dead $tensorcnt, implicit $tensorcnt
%4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%5:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%6:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%7:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%8:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
S_ENDPGM 0
...
---
name: tensor_load_tensor_wait2
tracksRegLiveness: true
body: |
bb.0:
; DEFAULT-LABEL: name: tensor_load_tensor_wait2
; DEFAULT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
; DEFAULT-NEXT: [[DEF1:%[0-9]+]]:sgpr_256 = IMPLICIT_DEF
; DEFAULT-NEXT: TENSOR_LOAD_TO_LDS_d2 [[DEF]], [[DEF1]], 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
; DEFAULT-NEXT: TENSOR_LOAD_TO_LDS_d2 [[DEF]], [[DEF1]], 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
; DEFAULT-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; DEFAULT-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: S_WAIT_TENSORCNT 1, implicit-def dead $tensorcnt, implicit $tensorcnt
; DEFAULT-NEXT: S_ENDPGM 0
;
; MAXOCC-LABEL: name: tensor_load_tensor_wait2
; MAXOCC: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
; MAXOCC-NEXT: [[DEF1:%[0-9]+]]:sgpr_256 = IMPLICIT_DEF
; MAXOCC-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; MAXOCC-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; MAXOCC-NEXT: TENSOR_LOAD_TO_LDS_d2 [[DEF]], [[DEF1]], 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; MAXOCC-NEXT: TENSOR_LOAD_TO_LDS_d2 [[DEF]], [[DEF1]], 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
; MAXOCC-NEXT: S_WAIT_TENSORCNT 1, implicit-def dead $tensorcnt, implicit $tensorcnt
; MAXOCC-NEXT: S_ENDPGM 0
%0:sgpr_128 = IMPLICIT_DEF
%1:sgpr_256 = IMPLICIT_DEF
%2:vgpr_32 = IMPLICIT_DEF
%3:vgpr_32 = IMPLICIT_DEF
TENSOR_LOAD_TO_LDS_d2 %0:sgpr_128, %1:sgpr_256, 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
TENSOR_LOAD_TO_LDS_d2 %0:sgpr_128, %1:sgpr_256, 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt
S_WAIT_TENSORCNT 1, implicit-def dead $tensorcnt, implicit $tensorcnt
%4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%5:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%6:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%7:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%8:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
S_ENDPGM 0
...
---
name: async_load_async_load
tracksRegLiveness: true
body: |
bb.0:
; DEFAULT-LABEL: name: async_load_async_load
; DEFAULT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; DEFAULT-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; DEFAULT-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; DEFAULT-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; DEFAULT-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; DEFAULT-NEXT: GLOBAL_LOAD_ASYNC_TO_LDS_B64_SADDR [[DEF]], [[DEF1]], [[DEF4]], 0, 0, implicit-def dead $asynccnt, implicit $exec, implicit $asynccnt
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: GLOBAL_LOAD_ASYNC_TO_LDS_B64_SADDR [[DEF]], [[DEF1]], [[DEF4]], 0, 0, implicit-def dead $asynccnt, implicit $exec, implicit $asynccnt
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: SCHED_GROUP_BARRIER 32, 1, 0
; DEFAULT-NEXT: SCHED_GROUP_BARRIER 2, 6, 0
; DEFAULT-NEXT: S_ENDPGM 0
;
; MAXOCC-LABEL: name: async_load_async_load
; MAXOCC: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; MAXOCC-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; MAXOCC-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; MAXOCC-NEXT: GLOBAL_LOAD_ASYNC_TO_LDS_B64_SADDR [[DEF]], [[DEF1]], [[DEF2]], 0, 0, implicit-def dead $asynccnt, implicit $exec, implicit $asynccnt
; MAXOCC-NEXT: GLOBAL_LOAD_ASYNC_TO_LDS_B64_SADDR [[DEF]], [[DEF1]], [[DEF2]], 0, 0, implicit-def dead $asynccnt, implicit $exec, implicit $asynccnt
; MAXOCC-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; MAXOCC-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec
; MAXOCC-NEXT: SCHED_GROUP_BARRIER 32, 1, 0
; MAXOCC-NEXT: SCHED_GROUP_BARRIER 2, 6, 0
; MAXOCC-NEXT: S_ENDPGM 0
%0:vgpr_32 = IMPLICIT_DEF
%1:sreg_64 = IMPLICIT_DEF
%2:vgpr_32 = IMPLICIT_DEF
%3:vgpr_32 = IMPLICIT_DEF
%4:vgpr_32 = IMPLICIT_DEF
GLOBAL_LOAD_ASYNC_TO_LDS_B64_SADDR %0:vgpr_32, %1:sreg_64, %4:vgpr_32, 0, 0, implicit-def dead $asynccnt, implicit $exec, implicit $asynccnt
GLOBAL_LOAD_ASYNC_TO_LDS_B64_SADDR %0:vgpr_32, %1:sreg_64, %4:vgpr_32, 0, 0, implicit-def dead $asynccnt, implicit $exec, implicit $asynccnt
%4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%5:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%6:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%7:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%8:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
SCHED_GROUP_BARRIER 32, 1, 0
SCHED_GROUP_BARRIER 2, 6, 0
S_ENDPGM 0
...
---
name: async_wait_async_load
tracksRegLiveness: true
body: |
bb.0:
; DEFAULT-LABEL: name: async_wait_async_load
; DEFAULT: S_WAIT_ASYNCCNT 2, implicit-def dead $asynccnt, implicit $asynccnt
; DEFAULT-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; DEFAULT-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; DEFAULT-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; DEFAULT-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; DEFAULT-NEXT: GLOBAL_LOAD_ASYNC_TO_LDS_B64_SADDR [[DEF]], [[DEF1]], [[DEF4]], 0, 0, implicit-def dead $asynccnt, implicit $exec, implicit $asynccnt
; DEFAULT-NEXT: S_ENDPGM 0
;
; MAXOCC-LABEL: name: async_wait_async_load
; MAXOCC: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; MAXOCC-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; MAXOCC-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; MAXOCC-NEXT: S_WAIT_ASYNCCNT 2, implicit-def dead $asynccnt, implicit $asynccnt
; MAXOCC-NEXT: GLOBAL_LOAD_ASYNC_TO_LDS_B64_SADDR [[DEF]], [[DEF1]], [[DEF2]], 0, 0, implicit-def dead $asynccnt, implicit $exec, implicit $asynccnt
; MAXOCC-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; MAXOCC-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec
; MAXOCC-NEXT: S_ENDPGM 0
%0:vgpr_32 = IMPLICIT_DEF
%1:sreg_64 = IMPLICIT_DEF
%2:vgpr_32 = IMPLICIT_DEF
%3:vgpr_32 = IMPLICIT_DEF
%4:vgpr_32 = IMPLICIT_DEF
S_WAIT_ASYNCCNT 2, implicit-def dead $asynccnt, implicit $asynccnt
GLOBAL_LOAD_ASYNC_TO_LDS_B64_SADDR %0:vgpr_32, %1:sreg_64, %4:vgpr_32, 0, 0, implicit-def dead $asynccnt, implicit $exec, implicit $asynccnt
%4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%5:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%6:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%7:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%8:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
S_ENDPGM 0
...
---
name: async_wait_async_wait
tracksRegLiveness: true
body: |
bb.0:
; DEFAULT-LABEL: name: async_wait_async_wait
; DEFAULT: S_WAIT_ASYNCCNT 2, implicit-def dead $asynccnt, implicit $asynccnt
; DEFAULT-NEXT: S_WAIT_ASYNCCNT 1, implicit-def dead $asynccnt, implicit $asynccnt
; DEFAULT-NEXT: dead [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; DEFAULT-NEXT: dead [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; DEFAULT-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; DEFAULT-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; DEFAULT-NEXT: S_ENDPGM 0
;
; MAXOCC-LABEL: name: async_wait_async_wait
; MAXOCC: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; MAXOCC-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF]], [[DEF1]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF]], [[DEF1]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF]], [[DEF1]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF]], [[DEF1]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF]], [[DEF1]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF]], [[DEF1]], implicit $exec
; MAXOCC-NEXT: S_WAIT_ASYNCCNT 2, implicit-def dead $asynccnt, implicit $asynccnt
; MAXOCC-NEXT: S_WAIT_ASYNCCNT 1, implicit-def dead $asynccnt, implicit $asynccnt
; MAXOCC-NEXT: dead [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; MAXOCC-NEXT: dead [[DEF3:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; MAXOCC-NEXT: dead [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; MAXOCC-NEXT: S_ENDPGM 0
%0:vgpr_32 = IMPLICIT_DEF
%1:sreg_64 = IMPLICIT_DEF
%2:vgpr_32 = IMPLICIT_DEF
%3:vgpr_32 = IMPLICIT_DEF
%4:vgpr_32 = IMPLICIT_DEF
S_WAIT_ASYNCCNT 2, implicit-def dead $asynccnt, implicit $asynccnt
S_WAIT_ASYNCCNT 1, implicit-def dead $asynccnt, implicit $asynccnt
%4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%5:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%6:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%7:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%8:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
S_ENDPGM 0
...
---
name: async_load_async_wait
tracksRegLiveness: true
body: |
bb.0:
; DEFAULT-LABEL: name: async_load_async_wait
; DEFAULT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; DEFAULT-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; DEFAULT-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; DEFAULT-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; DEFAULT-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; DEFAULT-NEXT: GLOBAL_LOAD_ASYNC_TO_LDS_B64_SADDR [[DEF]], [[DEF1]], [[DEF4]], 0, 0, implicit-def dead $asynccnt, implicit $exec, implicit $asynccnt
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: SCHED_GROUP_BARRIER 32, 1, 0
; DEFAULT-NEXT: SCHED_GROUP_BARRIER 2, 6, 0
; DEFAULT-NEXT: S_WAIT_ASYNCCNT 2, implicit-def dead $asynccnt, implicit $asynccnt
; DEFAULT-NEXT: S_ENDPGM 0
;
; MAXOCC-LABEL: name: async_load_async_wait
; MAXOCC: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; MAXOCC-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; MAXOCC-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; MAXOCC-NEXT: GLOBAL_LOAD_ASYNC_TO_LDS_B64_SADDR [[DEF]], [[DEF1]], [[DEF2]], 0, 0, implicit-def dead $asynccnt, implicit $exec, implicit $asynccnt
; MAXOCC-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; MAXOCC-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec
; MAXOCC-NEXT: S_WAIT_ASYNCCNT 2, implicit-def dead $asynccnt, implicit $asynccnt
; MAXOCC-NEXT: SCHED_GROUP_BARRIER 32, 1, 0
; MAXOCC-NEXT: SCHED_GROUP_BARRIER 2, 6, 0
; MAXOCC-NEXT: S_ENDPGM 0
%0:vgpr_32 = IMPLICIT_DEF
%1:sreg_64 = IMPLICIT_DEF
%2:vgpr_32 = IMPLICIT_DEF
%3:vgpr_32 = IMPLICIT_DEF
%4:vgpr_32 = IMPLICIT_DEF
GLOBAL_LOAD_ASYNC_TO_LDS_B64_SADDR %0:vgpr_32, %1:sreg_64, %4:vgpr_32, 0, 0, implicit-def dead $asynccnt, implicit $exec, implicit $asynccnt
S_WAIT_ASYNCCNT 2, implicit-def dead $asynccnt, implicit $asynccnt
%4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%5:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%6:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%7:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%8:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
SCHED_GROUP_BARRIER 32, 1, 0
SCHED_GROUP_BARRIER 2, 6, 0
S_ENDPGM 0
...
---
name: async_load_async_wait1
tracksRegLiveness: true
body: |
bb.0:
; DEFAULT-LABEL: name: async_load_async_wait1
; DEFAULT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; DEFAULT-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; DEFAULT-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; DEFAULT-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; DEFAULT-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; DEFAULT-NEXT: GLOBAL_LOAD_ASYNC_TO_LDS_B64_SADDR [[DEF]], [[DEF1]], [[DEF4]], 0, 0, implicit-def dead $asynccnt, implicit $exec, implicit $asynccnt
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: SCHED_GROUP_BARRIER 32, 1, 0
; DEFAULT-NEXT: SCHED_GROUP_BARRIER 2, 6, 0
; DEFAULT-NEXT: S_WAIT_ASYNCCNT 0, implicit-def dead $asynccnt, implicit $asynccnt
; DEFAULT-NEXT: S_ENDPGM 0
;
; MAXOCC-LABEL: name: async_load_async_wait1
; MAXOCC: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; MAXOCC-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; MAXOCC-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; MAXOCC-NEXT: GLOBAL_LOAD_ASYNC_TO_LDS_B64_SADDR [[DEF]], [[DEF1]], [[DEF2]], 0, 0, implicit-def dead $asynccnt, implicit $exec, implicit $asynccnt
; MAXOCC-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; MAXOCC-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec
; MAXOCC-NEXT: S_WAIT_ASYNCCNT 0, implicit-def dead $asynccnt, implicit $asynccnt
; MAXOCC-NEXT: SCHED_GROUP_BARRIER 32, 1, 0
; MAXOCC-NEXT: SCHED_GROUP_BARRIER 2, 6, 0
; MAXOCC-NEXT: S_ENDPGM 0
%0:vgpr_32 = IMPLICIT_DEF
%1:sreg_64 = IMPLICIT_DEF
%2:vgpr_32 = IMPLICIT_DEF
%3:vgpr_32 = IMPLICIT_DEF
%4:vgpr_32 = IMPLICIT_DEF
GLOBAL_LOAD_ASYNC_TO_LDS_B64_SADDR %0:vgpr_32, %1:sreg_64, %4:vgpr_32, 0, 0, implicit-def dead $asynccnt, implicit $exec, implicit $asynccnt
S_WAIT_ASYNCCNT 0, implicit-def dead $asynccnt, implicit $asynccnt
%4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%5:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%6:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%7:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%8:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
SCHED_GROUP_BARRIER 32, 1, 0
SCHED_GROUP_BARRIER 2, 6, 0
S_ENDPGM 0
...
---
name: async_load_async_wait2
tracksRegLiveness: true
body: |
bb.0:
; DEFAULT-LABEL: name: async_load_async_wait2
; DEFAULT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; DEFAULT-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; DEFAULT-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; DEFAULT-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; DEFAULT-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; DEFAULT-NEXT: GLOBAL_LOAD_ASYNC_TO_LDS_B64_SADDR [[DEF]], [[DEF1]], [[DEF4]], 0, 0, implicit-def dead $asynccnt, implicit $exec, implicit $asynccnt
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: GLOBAL_LOAD_ASYNC_TO_LDS_B64_SADDR [[DEF]], [[DEF1]], [[DEF4]], 0, 0, implicit-def dead $asynccnt, implicit $exec, implicit $asynccnt
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: dead [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
; DEFAULT-NEXT: SCHED_GROUP_BARRIER 32, 1, 0
; DEFAULT-NEXT: SCHED_GROUP_BARRIER 2, 6, 0
; DEFAULT-NEXT: S_WAIT_ASYNCCNT 2, implicit-def dead $asynccnt, implicit $asynccnt
; DEFAULT-NEXT: S_ENDPGM 0
;
; MAXOCC-LABEL: name: async_load_async_wait2
; MAXOCC: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; MAXOCC-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
; MAXOCC-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; MAXOCC-NEXT: GLOBAL_LOAD_ASYNC_TO_LDS_B64_SADDR [[DEF]], [[DEF1]], [[DEF2]], 0, 0, implicit-def dead $asynccnt, implicit $exec, implicit $asynccnt
; MAXOCC-NEXT: GLOBAL_LOAD_ASYNC_TO_LDS_B64_SADDR [[DEF]], [[DEF1]], [[DEF2]], 0, 0, implicit-def dead $asynccnt, implicit $exec, implicit $asynccnt
; MAXOCC-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; MAXOCC-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec
; MAXOCC-NEXT: dead [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec
; MAXOCC-NEXT: S_WAIT_ASYNCCNT 2, implicit-def dead $asynccnt, implicit $asynccnt
; MAXOCC-NEXT: SCHED_GROUP_BARRIER 32, 1, 0
; MAXOCC-NEXT: SCHED_GROUP_BARRIER 2, 6, 0
; MAXOCC-NEXT: S_ENDPGM 0
%0:vgpr_32 = IMPLICIT_DEF
%1:sreg_64 = IMPLICIT_DEF
%2:vgpr_32 = IMPLICIT_DEF
%3:vgpr_32 = IMPLICIT_DEF
%4:vgpr_32 = IMPLICIT_DEF
GLOBAL_LOAD_ASYNC_TO_LDS_B64_SADDR %0:vgpr_32, %1:sreg_64, %4:vgpr_32, 0, 0, implicit-def dead $asynccnt, implicit $exec, implicit $asynccnt
GLOBAL_LOAD_ASYNC_TO_LDS_B64_SADDR %0:vgpr_32, %1:sreg_64, %4:vgpr_32, 0, 0, implicit-def dead $asynccnt, implicit $exec, implicit $asynccnt
S_WAIT_ASYNCCNT 2, implicit-def dead $asynccnt, implicit $asynccnt
%4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%5:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%6:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%7:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
%8:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
SCHED_GROUP_BARRIER 32, 1, 0
SCHED_GROUP_BARRIER 2, 6, 0
S_ENDPGM 0
...