| # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 |
| # RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass=machine-scheduler -amdgpu-sched-strategy=coexec -verify-misched %s -o - | FileCheck -check-prefix=DEFAULT %s |
| # RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass=machine-scheduler -verify-misched %s -o - | FileCheck -check-prefix=MAXOCC %s |
| |
| --- | |
| define void @tensor_load_tensor_load() #0 { ret void } |
| define void @tensor_wait_tensor_load() #0 { ret void } |
| define void @tensor_wait_tensor_wait() #0 { ret void } |
| define void @tensor_load_tensor_wait() #0 { ret void } |
| define void @tensor_load_tensor_wait1() #0 { ret void } |
| define void @tensor_load_tensor_wait2() #0 { ret void } |
| define void @async_load_async_load() #0 { ret void } |
| define void @async_wait_async_load() #0 { ret void } |
| define void @async_wait_async_wait() #0 { ret void } |
| define void @async_load_async_wait() #0 { ret void } |
| define void @async_load_async_wait1() #0 { ret void } |
| define void @async_load_async_wait2() #0 { ret void } |
| |
| attributes #0 = { "amdgpu-waves-per-eu"="1,1" } |
| ... |
| |
| --- |
| name: tensor_load_tensor_load |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| ; DEFAULT-LABEL: name: tensor_load_tensor_load |
| ; DEFAULT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: [[DEF1:%[0-9]+]]:sgpr_256 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: TENSOR_LOAD_TO_LDS_d2 [[DEF]], [[DEF1]], 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt |
| ; DEFAULT-NEXT: TENSOR_LOAD_TO_LDS_d2 [[DEF]], [[DEF1]], 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt |
| ; DEFAULT-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: S_ENDPGM 0 |
| ; |
| ; MAXOCC-LABEL: name: tensor_load_tensor_load |
| ; MAXOCC: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: [[DEF1:%[0-9]+]]:sgpr_256 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: TENSOR_LOAD_TO_LDS_d2 [[DEF]], [[DEF1]], 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt |
| ; MAXOCC-NEXT: TENSOR_LOAD_TO_LDS_d2 [[DEF]], [[DEF1]], 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt |
| ; MAXOCC-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; MAXOCC-NEXT: S_ENDPGM 0 |
| %0:sgpr_128 = IMPLICIT_DEF |
| %1:sgpr_256 = IMPLICIT_DEF |
| %2:vgpr_32 = IMPLICIT_DEF |
| %3:vgpr_32 = IMPLICIT_DEF |
| TENSOR_LOAD_TO_LDS_d2 %0:sgpr_128, %1:sgpr_256, 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt |
| TENSOR_LOAD_TO_LDS_d2 %0:sgpr_128, %1:sgpr_256, 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt |
| %4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %5:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %6:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %7:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %8:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| S_ENDPGM 0 |
| ... |
| |
| --- |
| name: tensor_wait_tensor_load |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| ; DEFAULT-LABEL: name: tensor_wait_tensor_load |
| ; DEFAULT: S_WAIT_TENSORCNT 3, implicit-def dead $tensorcnt, implicit $tensorcnt |
| ; DEFAULT-NEXT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: [[DEF1:%[0-9]+]]:sgpr_256 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: TENSOR_LOAD_TO_LDS_d2 [[DEF]], [[DEF1]], 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt |
| ; DEFAULT-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: S_ENDPGM 0 |
| ; |
| ; MAXOCC-LABEL: name: tensor_wait_tensor_load |
| ; MAXOCC: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: [[DEF1:%[0-9]+]]:sgpr_256 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: S_WAIT_TENSORCNT 3, implicit-def dead $tensorcnt, implicit $tensorcnt |
| ; MAXOCC-NEXT: TENSOR_LOAD_TO_LDS_d2 [[DEF]], [[DEF1]], 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt |
| ; MAXOCC-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; MAXOCC-NEXT: S_ENDPGM 0 |
| %0:sgpr_128 = IMPLICIT_DEF |
| %1:sgpr_256 = IMPLICIT_DEF |
| %2:vgpr_32 = IMPLICIT_DEF |
| %3:vgpr_32 = IMPLICIT_DEF |
| S_WAIT_TENSORCNT 3, implicit-def dead $tensorcnt, implicit $tensorcnt |
| TENSOR_LOAD_TO_LDS_d2 %0:sgpr_128, %1:sgpr_256, 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt |
| %4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %5:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %6:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %7:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %8:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| S_ENDPGM 0 |
| ... |
| |
| --- |
| name: tensor_wait_tensor_wait |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| ; DEFAULT-LABEL: name: tensor_wait_tensor_wait |
| ; DEFAULT: S_WAIT_TENSORCNT 3, implicit-def dead $tensorcnt, implicit $tensorcnt |
| ; DEFAULT-NEXT: S_WAIT_TENSORCNT 3, implicit-def dead $tensorcnt, implicit $tensorcnt |
| ; DEFAULT-NEXT: dead [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: dead [[DEF1:%[0-9]+]]:sgpr_256 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: S_ENDPGM 0 |
| ; |
| ; MAXOCC-LABEL: name: tensor_wait_tensor_wait |
| ; MAXOCC: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF]], [[DEF1]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF]], [[DEF1]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF]], [[DEF1]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF]], [[DEF1]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF]], [[DEF1]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF]], [[DEF1]], implicit $exec |
| ; MAXOCC-NEXT: S_WAIT_TENSORCNT 3, implicit-def dead $tensorcnt, implicit $tensorcnt |
| ; MAXOCC-NEXT: S_WAIT_TENSORCNT 3, implicit-def dead $tensorcnt, implicit $tensorcnt |
| ; MAXOCC-NEXT: dead [[DEF2:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: dead [[DEF3:%[0-9]+]]:sgpr_256 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: S_ENDPGM 0 |
| %0:sgpr_128 = IMPLICIT_DEF |
| %1:sgpr_256 = IMPLICIT_DEF |
| %2:vgpr_32 = IMPLICIT_DEF |
| %3:vgpr_32 = IMPLICIT_DEF |
| S_WAIT_TENSORCNT 3, implicit-def dead $tensorcnt, implicit $tensorcnt |
| S_WAIT_TENSORCNT 3, implicit-def dead $tensorcnt, implicit $tensorcnt |
| %4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %5:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %6:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %7:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %8:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| S_ENDPGM 0 |
| ... |
| |
| |
| --- |
| name: tensor_load_tensor_wait |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| ; DEFAULT-LABEL: name: tensor_load_tensor_wait |
| ; DEFAULT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: [[DEF1:%[0-9]+]]:sgpr_256 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: TENSOR_LOAD_TO_LDS_d2 [[DEF]], [[DEF1]], 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt |
| ; DEFAULT-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: S_WAIT_TENSORCNT 3, implicit-def dead $tensorcnt, implicit $tensorcnt |
| ; DEFAULT-NEXT: S_ENDPGM 0 |
| ; |
| ; MAXOCC-LABEL: name: tensor_load_tensor_wait |
| ; MAXOCC: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: [[DEF1:%[0-9]+]]:sgpr_256 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; MAXOCC-NEXT: TENSOR_LOAD_TO_LDS_d2 [[DEF]], [[DEF1]], 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt |
| ; MAXOCC-NEXT: S_WAIT_TENSORCNT 3, implicit-def dead $tensorcnt, implicit $tensorcnt |
| ; MAXOCC-NEXT: S_ENDPGM 0 |
| %0:sgpr_128 = IMPLICIT_DEF |
| %1:sgpr_256 = IMPLICIT_DEF |
| %2:vgpr_32 = IMPLICIT_DEF |
| %3:vgpr_32 = IMPLICIT_DEF |
| TENSOR_LOAD_TO_LDS_d2 %0:sgpr_128, %1:sgpr_256, 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt |
| S_WAIT_TENSORCNT 3, implicit-def dead $tensorcnt, implicit $tensorcnt |
| %4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %5:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %6:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %7:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %8:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| S_ENDPGM 0 |
| ... |
| |
| --- |
| name: tensor_load_tensor_wait1 |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| ; DEFAULT-LABEL: name: tensor_load_tensor_wait1 |
| ; DEFAULT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: [[DEF1:%[0-9]+]]:sgpr_256 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: TENSOR_LOAD_TO_LDS_d2 [[DEF]], [[DEF1]], 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt |
| ; DEFAULT-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: S_WAIT_TENSORCNT 0, implicit-def dead $tensorcnt, implicit $tensorcnt |
| ; DEFAULT-NEXT: S_ENDPGM 0 |
| ; |
| ; MAXOCC-LABEL: name: tensor_load_tensor_wait1 |
| ; MAXOCC: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: [[DEF1:%[0-9]+]]:sgpr_256 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: TENSOR_LOAD_TO_LDS_d2 [[DEF]], [[DEF1]], 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt |
| ; MAXOCC-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; MAXOCC-NEXT: S_WAIT_TENSORCNT 0, implicit-def dead $tensorcnt, implicit $tensorcnt |
| ; MAXOCC-NEXT: S_ENDPGM 0 |
| %0:sgpr_128 = IMPLICIT_DEF |
| %1:sgpr_256 = IMPLICIT_DEF |
| %2:vgpr_32 = IMPLICIT_DEF |
| %3:vgpr_32 = IMPLICIT_DEF |
| TENSOR_LOAD_TO_LDS_d2 %0:sgpr_128, %1:sgpr_256, 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt |
| S_WAIT_TENSORCNT 0, implicit-def dead $tensorcnt, implicit $tensorcnt |
| %4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %5:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %6:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %7:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %8:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| S_ENDPGM 0 |
| ... |
| |
| --- |
| name: tensor_load_tensor_wait2 |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| ; DEFAULT-LABEL: name: tensor_load_tensor_wait2 |
| ; DEFAULT: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: [[DEF1:%[0-9]+]]:sgpr_256 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: TENSOR_LOAD_TO_LDS_d2 [[DEF]], [[DEF1]], 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt |
| ; DEFAULT-NEXT: TENSOR_LOAD_TO_LDS_d2 [[DEF]], [[DEF1]], 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt |
| ; DEFAULT-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: S_WAIT_TENSORCNT 1, implicit-def dead $tensorcnt, implicit $tensorcnt |
| ; DEFAULT-NEXT: S_ENDPGM 0 |
| ; |
| ; MAXOCC-LABEL: name: tensor_load_tensor_wait2 |
| ; MAXOCC: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: [[DEF1:%[0-9]+]]:sgpr_256 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: TENSOR_LOAD_TO_LDS_d2 [[DEF]], [[DEF1]], 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; MAXOCC-NEXT: TENSOR_LOAD_TO_LDS_d2 [[DEF]], [[DEF1]], 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt |
| ; MAXOCC-NEXT: S_WAIT_TENSORCNT 1, implicit-def dead $tensorcnt, implicit $tensorcnt |
| ; MAXOCC-NEXT: S_ENDPGM 0 |
| %0:sgpr_128 = IMPLICIT_DEF |
| %1:sgpr_256 = IMPLICIT_DEF |
| %2:vgpr_32 = IMPLICIT_DEF |
| %3:vgpr_32 = IMPLICIT_DEF |
| TENSOR_LOAD_TO_LDS_d2 %0:sgpr_128, %1:sgpr_256, 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt |
| TENSOR_LOAD_TO_LDS_d2 %0:sgpr_128, %1:sgpr_256, 0, 0, implicit-def dead $tensorcnt, implicit $exec, implicit $tensorcnt |
| S_WAIT_TENSORCNT 1, implicit-def dead $tensorcnt, implicit $tensorcnt |
| %4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %5:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %6:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %7:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %8:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| S_ENDPGM 0 |
| ... |
| |
| --- |
| name: async_load_async_load |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| ; DEFAULT-LABEL: name: async_load_async_load |
| ; DEFAULT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: GLOBAL_LOAD_ASYNC_TO_LDS_B64_SADDR [[DEF]], [[DEF1]], [[DEF4]], 0, 0, implicit-def dead $asynccnt, implicit $exec, implicit $asynccnt |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: GLOBAL_LOAD_ASYNC_TO_LDS_B64_SADDR [[DEF]], [[DEF1]], [[DEF4]], 0, 0, implicit-def dead $asynccnt, implicit $exec, implicit $asynccnt |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: SCHED_GROUP_BARRIER 32, 1, 0 |
| ; DEFAULT-NEXT: SCHED_GROUP_BARRIER 2, 6, 0 |
| ; DEFAULT-NEXT: S_ENDPGM 0 |
| ; |
| ; MAXOCC-LABEL: name: async_load_async_load |
| ; MAXOCC: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: GLOBAL_LOAD_ASYNC_TO_LDS_B64_SADDR [[DEF]], [[DEF1]], [[DEF2]], 0, 0, implicit-def dead $asynccnt, implicit $exec, implicit $asynccnt |
| ; MAXOCC-NEXT: GLOBAL_LOAD_ASYNC_TO_LDS_B64_SADDR [[DEF]], [[DEF1]], [[DEF2]], 0, 0, implicit-def dead $asynccnt, implicit $exec, implicit $asynccnt |
| ; MAXOCC-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec |
| ; MAXOCC-NEXT: SCHED_GROUP_BARRIER 32, 1, 0 |
| ; MAXOCC-NEXT: SCHED_GROUP_BARRIER 2, 6, 0 |
| ; MAXOCC-NEXT: S_ENDPGM 0 |
| %0:vgpr_32 = IMPLICIT_DEF |
| %1:sreg_64 = IMPLICIT_DEF |
| %2:vgpr_32 = IMPLICIT_DEF |
| %3:vgpr_32 = IMPLICIT_DEF |
| %4:vgpr_32 = IMPLICIT_DEF |
| GLOBAL_LOAD_ASYNC_TO_LDS_B64_SADDR %0:vgpr_32, %1:sreg_64, %4:vgpr_32, 0, 0, implicit-def dead $asynccnt, implicit $exec, implicit $asynccnt |
| GLOBAL_LOAD_ASYNC_TO_LDS_B64_SADDR %0:vgpr_32, %1:sreg_64, %4:vgpr_32, 0, 0, implicit-def dead $asynccnt, implicit $exec, implicit $asynccnt |
| %4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %5:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %6:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %7:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %8:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| SCHED_GROUP_BARRIER 32, 1, 0 |
| SCHED_GROUP_BARRIER 2, 6, 0 |
| S_ENDPGM 0 |
| ... |
| |
| --- |
| name: async_wait_async_load |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| ; DEFAULT-LABEL: name: async_wait_async_load |
| ; DEFAULT: S_WAIT_ASYNCCNT 2, implicit-def dead $asynccnt, implicit $asynccnt |
| ; DEFAULT-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: GLOBAL_LOAD_ASYNC_TO_LDS_B64_SADDR [[DEF]], [[DEF1]], [[DEF4]], 0, 0, implicit-def dead $asynccnt, implicit $exec, implicit $asynccnt |
| ; DEFAULT-NEXT: S_ENDPGM 0 |
| ; |
| ; MAXOCC-LABEL: name: async_wait_async_load |
| ; MAXOCC: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: S_WAIT_ASYNCCNT 2, implicit-def dead $asynccnt, implicit $asynccnt |
| ; MAXOCC-NEXT: GLOBAL_LOAD_ASYNC_TO_LDS_B64_SADDR [[DEF]], [[DEF1]], [[DEF2]], 0, 0, implicit-def dead $asynccnt, implicit $exec, implicit $asynccnt |
| ; MAXOCC-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec |
| ; MAXOCC-NEXT: S_ENDPGM 0 |
| %0:vgpr_32 = IMPLICIT_DEF |
| %1:sreg_64 = IMPLICIT_DEF |
| %2:vgpr_32 = IMPLICIT_DEF |
| %3:vgpr_32 = IMPLICIT_DEF |
| %4:vgpr_32 = IMPLICIT_DEF |
| S_WAIT_ASYNCCNT 2, implicit-def dead $asynccnt, implicit $asynccnt |
| GLOBAL_LOAD_ASYNC_TO_LDS_B64_SADDR %0:vgpr_32, %1:sreg_64, %4:vgpr_32, 0, 0, implicit-def dead $asynccnt, implicit $exec, implicit $asynccnt |
| %4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %5:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %6:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %7:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %8:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| S_ENDPGM 0 |
| ... |
| |
| --- |
| name: async_wait_async_wait |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| ; DEFAULT-LABEL: name: async_wait_async_wait |
| ; DEFAULT: S_WAIT_ASYNCCNT 2, implicit-def dead $asynccnt, implicit $asynccnt |
| ; DEFAULT-NEXT: S_WAIT_ASYNCCNT 1, implicit-def dead $asynccnt, implicit $asynccnt |
| ; DEFAULT-NEXT: dead [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: dead [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: S_ENDPGM 0 |
| ; |
| ; MAXOCC-LABEL: name: async_wait_async_wait |
| ; MAXOCC: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF]], [[DEF1]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF]], [[DEF1]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF]], [[DEF1]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF]], [[DEF1]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF]], [[DEF1]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF]], [[DEF1]], implicit $exec |
| ; MAXOCC-NEXT: S_WAIT_ASYNCCNT 2, implicit-def dead $asynccnt, implicit $asynccnt |
| ; MAXOCC-NEXT: S_WAIT_ASYNCCNT 1, implicit-def dead $asynccnt, implicit $asynccnt |
| ; MAXOCC-NEXT: dead [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: dead [[DEF3:%[0-9]+]]:sreg_64 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: dead [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: S_ENDPGM 0 |
| %0:vgpr_32 = IMPLICIT_DEF |
| %1:sreg_64 = IMPLICIT_DEF |
| %2:vgpr_32 = IMPLICIT_DEF |
| %3:vgpr_32 = IMPLICIT_DEF |
| %4:vgpr_32 = IMPLICIT_DEF |
| S_WAIT_ASYNCCNT 2, implicit-def dead $asynccnt, implicit $asynccnt |
| S_WAIT_ASYNCCNT 1, implicit-def dead $asynccnt, implicit $asynccnt |
| %4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %5:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %6:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %7:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %8:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| S_ENDPGM 0 |
| ... |
| |
| --- |
| name: async_load_async_wait |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| ; DEFAULT-LABEL: name: async_load_async_wait |
| ; DEFAULT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: GLOBAL_LOAD_ASYNC_TO_LDS_B64_SADDR [[DEF]], [[DEF1]], [[DEF4]], 0, 0, implicit-def dead $asynccnt, implicit $exec, implicit $asynccnt |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: SCHED_GROUP_BARRIER 32, 1, 0 |
| ; DEFAULT-NEXT: SCHED_GROUP_BARRIER 2, 6, 0 |
| ; DEFAULT-NEXT: S_WAIT_ASYNCCNT 2, implicit-def dead $asynccnt, implicit $asynccnt |
| ; DEFAULT-NEXT: S_ENDPGM 0 |
| ; |
| ; MAXOCC-LABEL: name: async_load_async_wait |
| ; MAXOCC: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: GLOBAL_LOAD_ASYNC_TO_LDS_B64_SADDR [[DEF]], [[DEF1]], [[DEF2]], 0, 0, implicit-def dead $asynccnt, implicit $exec, implicit $asynccnt |
| ; MAXOCC-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec |
| ; MAXOCC-NEXT: S_WAIT_ASYNCCNT 2, implicit-def dead $asynccnt, implicit $asynccnt |
| ; MAXOCC-NEXT: SCHED_GROUP_BARRIER 32, 1, 0 |
| ; MAXOCC-NEXT: SCHED_GROUP_BARRIER 2, 6, 0 |
| ; MAXOCC-NEXT: S_ENDPGM 0 |
| %0:vgpr_32 = IMPLICIT_DEF |
| %1:sreg_64 = IMPLICIT_DEF |
| %2:vgpr_32 = IMPLICIT_DEF |
| %3:vgpr_32 = IMPLICIT_DEF |
| %4:vgpr_32 = IMPLICIT_DEF |
| GLOBAL_LOAD_ASYNC_TO_LDS_B64_SADDR %0:vgpr_32, %1:sreg_64, %4:vgpr_32, 0, 0, implicit-def dead $asynccnt, implicit $exec, implicit $asynccnt |
| S_WAIT_ASYNCCNT 2, implicit-def dead $asynccnt, implicit $asynccnt |
| %4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %5:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %6:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %7:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %8:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| SCHED_GROUP_BARRIER 32, 1, 0 |
| SCHED_GROUP_BARRIER 2, 6, 0 |
| S_ENDPGM 0 |
| ... |
| |
| --- |
| name: async_load_async_wait1 |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| ; DEFAULT-LABEL: name: async_load_async_wait1 |
| ; DEFAULT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: GLOBAL_LOAD_ASYNC_TO_LDS_B64_SADDR [[DEF]], [[DEF1]], [[DEF4]], 0, 0, implicit-def dead $asynccnt, implicit $exec, implicit $asynccnt |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: SCHED_GROUP_BARRIER 32, 1, 0 |
| ; DEFAULT-NEXT: SCHED_GROUP_BARRIER 2, 6, 0 |
| ; DEFAULT-NEXT: S_WAIT_ASYNCCNT 0, implicit-def dead $asynccnt, implicit $asynccnt |
| ; DEFAULT-NEXT: S_ENDPGM 0 |
| ; |
| ; MAXOCC-LABEL: name: async_load_async_wait1 |
| ; MAXOCC: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: GLOBAL_LOAD_ASYNC_TO_LDS_B64_SADDR [[DEF]], [[DEF1]], [[DEF2]], 0, 0, implicit-def dead $asynccnt, implicit $exec, implicit $asynccnt |
| ; MAXOCC-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec |
| ; MAXOCC-NEXT: S_WAIT_ASYNCCNT 0, implicit-def dead $asynccnt, implicit $asynccnt |
| ; MAXOCC-NEXT: SCHED_GROUP_BARRIER 32, 1, 0 |
| ; MAXOCC-NEXT: SCHED_GROUP_BARRIER 2, 6, 0 |
| ; MAXOCC-NEXT: S_ENDPGM 0 |
| %0:vgpr_32 = IMPLICIT_DEF |
| %1:sreg_64 = IMPLICIT_DEF |
| %2:vgpr_32 = IMPLICIT_DEF |
| %3:vgpr_32 = IMPLICIT_DEF |
| %4:vgpr_32 = IMPLICIT_DEF |
| GLOBAL_LOAD_ASYNC_TO_LDS_B64_SADDR %0:vgpr_32, %1:sreg_64, %4:vgpr_32, 0, 0, implicit-def dead $asynccnt, implicit $exec, implicit $asynccnt |
| S_WAIT_ASYNCCNT 0, implicit-def dead $asynccnt, implicit $asynccnt |
| %4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %5:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %6:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %7:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %8:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| SCHED_GROUP_BARRIER 32, 1, 0 |
| SCHED_GROUP_BARRIER 2, 6, 0 |
| S_ENDPGM 0 |
| ... |
| --- |
| name: async_load_async_wait2 |
| tracksRegLiveness: true |
| body: | |
| bb.0: |
| ; DEFAULT-LABEL: name: async_load_async_wait2 |
| ; DEFAULT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; DEFAULT-NEXT: GLOBAL_LOAD_ASYNC_TO_LDS_B64_SADDR [[DEF]], [[DEF1]], [[DEF4]], 0, 0, implicit-def dead $asynccnt, implicit $exec, implicit $asynccnt |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: GLOBAL_LOAD_ASYNC_TO_LDS_B64_SADDR [[DEF]], [[DEF1]], [[DEF4]], 0, 0, implicit-def dead $asynccnt, implicit $exec, implicit $asynccnt |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: dead [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec |
| ; DEFAULT-NEXT: SCHED_GROUP_BARRIER 32, 1, 0 |
| ; DEFAULT-NEXT: SCHED_GROUP_BARRIER 2, 6, 0 |
| ; DEFAULT-NEXT: S_WAIT_ASYNCCNT 2, implicit-def dead $asynccnt, implicit $asynccnt |
| ; DEFAULT-NEXT: S_ENDPGM 0 |
| ; |
| ; MAXOCC-LABEL: name: async_load_async_wait2 |
| ; MAXOCC: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: GLOBAL_LOAD_ASYNC_TO_LDS_B64_SADDR [[DEF]], [[DEF1]], [[DEF2]], 0, 0, implicit-def dead $asynccnt, implicit $exec, implicit $asynccnt |
| ; MAXOCC-NEXT: GLOBAL_LOAD_ASYNC_TO_LDS_B64_SADDR [[DEF]], [[DEF1]], [[DEF2]], 0, 0, implicit-def dead $asynccnt, implicit $exec, implicit $asynccnt |
| ; MAXOCC-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec |
| ; MAXOCC-NEXT: dead [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[DEF4]], implicit $exec |
| ; MAXOCC-NEXT: S_WAIT_ASYNCCNT 2, implicit-def dead $asynccnt, implicit $asynccnt |
| ; MAXOCC-NEXT: SCHED_GROUP_BARRIER 32, 1, 0 |
| ; MAXOCC-NEXT: SCHED_GROUP_BARRIER 2, 6, 0 |
| ; MAXOCC-NEXT: S_ENDPGM 0 |
| %0:vgpr_32 = IMPLICIT_DEF |
| %1:sreg_64 = IMPLICIT_DEF |
| %2:vgpr_32 = IMPLICIT_DEF |
| %3:vgpr_32 = IMPLICIT_DEF |
| %4:vgpr_32 = IMPLICIT_DEF |
| GLOBAL_LOAD_ASYNC_TO_LDS_B64_SADDR %0:vgpr_32, %1:sreg_64, %4:vgpr_32, 0, 0, implicit-def dead $asynccnt, implicit $exec, implicit $asynccnt |
| GLOBAL_LOAD_ASYNC_TO_LDS_B64_SADDR %0:vgpr_32, %1:sreg_64, %4:vgpr_32, 0, 0, implicit-def dead $asynccnt, implicit $exec, implicit $asynccnt |
| S_WAIT_ASYNCCNT 2, implicit-def dead $asynccnt, implicit $asynccnt |
| %4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %5:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %6:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %7:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| %8:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec |
| SCHED_GROUP_BARRIER 32, 1, 0 |
| SCHED_GROUP_BARRIER 2, 6, 0 |
| S_ENDPGM 0 |
| ... |