| # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py |
| # RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefixes=GCN,GFX11 %s |
| # RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -verify-machineinstrs -run-pass post-RA-hazard-rec,amdgpu-wait-sgpr-hazards -o - %s | FileCheck -check-prefixes=GCN,GFX12 %s |
| |
| --- | |
| @mem = internal unnamed_addr addrspace(4) constant [4 x <4 x i32>] [<4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> <i32 0, i32 0, i32 0, i32 0>] |
| |
| define amdgpu_gs void @mask_hazard_getpc1() { ret void } |
| define amdgpu_gs void @mask_hazard_getpc2() { ret void } |
| define amdgpu_gs void @mask_hazard_vcc1() { ret void } |
| define amdgpu_gs void @mask_hazard_vcc2() { ret void } |
| define amdgpu_gs void @mask_hazard_cndmask_dpp1() { ret void } |
| define amdgpu_gs void @mask_hazard_cndmask_dpp2() { ret void } |
| define amdgpu_gs void @mask_hazard_cndmask_dpp3() { ret void } |
| define amdgpu_gs void @mask_hazard_addc1() { ret void } |
| define amdgpu_gs void @mask_hazard_addc2() { ret void } |
| define amdgpu_gs void @mask_hazard_addc3() { ret void } |
| define amdgpu_gs void @mask_hazard_addc4() { ret void } |
| define amdgpu_gs void @mask_hazard_subb1() { ret void } |
| define amdgpu_gs void @mask_hazard_subb2() { ret void } |
| define amdgpu_gs void @mask_hazard_subb3() { ret void } |
| define amdgpu_gs void @mask_hazard_subb4() { ret void } |
| define amdgpu_gs void @mask_hazard_subbrev1() { ret void } |
| define amdgpu_gs void @mask_hazard_subbrev2() { ret void } |
| define amdgpu_gs void @mask_hazard_subbrev3() { ret void } |
| define amdgpu_gs void @mask_hazard_subbrev4() { ret void } |
| define amdgpu_gs void @mask_hazard_div_fmas_f32() { ret void } |
| define amdgpu_gs void @mask_hazard_div_fmas_f64() { ret void } |
| define amdgpu_gs void @mask_hazard_subreg1() { ret void } |
| define amdgpu_gs void @mask_hazard_subreg2() { ret void } |
| define amdgpu_gs void @mask_hazard_subreg3() { ret void } |
| define amdgpu_gs void @mask_hazard_subreg4() { ret void } |
| define amdgpu_gs void @mask_hazard_subreg5() { ret void } |
| define amdgpu_gs void @mask_hazard_waitcnt() { ret void } |
| define amdgpu_gs void @mask_hazard_gap1() { ret void } |
| define amdgpu_gs void @mask_hazard_gap2() { ret void } |
| define amdgpu_gs void @mask_hazard_gap3() { ret void } |
| define amdgpu_gs void @mask_hazard_no_hazard1() { ret void } |
| define amdgpu_gs void @mask_hazard_no_hazard2() { ret void } |
| define amdgpu_gs void @mask_hazard_no_hazard3() { ret void } |
| ... |
| |
| --- |
| name: mask_hazard_getpc1 |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: mask_hazard_getpc1 |
| ; GCN: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec |
| ; GCN-NEXT: $sgpr0_sgpr1 = S_GETPC_B64 |
| ; GCN-NEXT: S_WAITCNT_DEPCTR 65534 |
| ; GCN-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, 0, implicit-def $scc |
| ; GCN-NEXT: S_ENDPGM 0 |
| $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec |
| $sgpr0_sgpr1 = S_GETPC_B64 |
| $sgpr0 = S_ADD_U32 $sgpr0, 0, implicit-def $scc |
| S_ENDPGM 0 |
| ... |
| |
| --- |
| name: mask_hazard_getpc2 |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: mask_hazard_getpc2 |
| ; GCN: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec |
| ; GCN-NEXT: BUNDLE implicit-def $sgpr0_sgpr1 { |
| ; GCN-NEXT: $sgpr0_sgpr1 = S_GETPC_B64 |
| ; GCN-NEXT: S_WAITCNT_DEPCTR 65534 |
| ; GCN-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, target-flags(amdgpu-rel32-lo) @mem + 8, implicit-def $scc |
| ; GCN-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, target-flags(amdgpu-rel32-lo) @mem + 16, implicit-def $scc, implicit $scc |
| ; GCN-NEXT: } |
| ; GCN-NEXT: S_ENDPGM 0 |
| $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec |
| BUNDLE implicit-def $sgpr0_sgpr1 { |
| $sgpr0_sgpr1 = S_GETPC_B64 |
| $sgpr0 = S_ADD_U32 $sgpr0, target-flags(amdgpu-rel32-lo) @mem + 4, implicit-def $scc |
| $sgpr1 = S_ADDC_U32 $sgpr1, target-flags(amdgpu-rel32-lo) @mem + 12, implicit-def $scc, implicit $scc |
| } |
| S_ENDPGM 0 |
| ... |
| |
| --- |
| name: mask_hazard_vcc1 |
| body: | |
| bb.0: |
| ; GFX11-LABEL: name: mask_hazard_vcc1 |
| ; GFX11: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec |
| ; GFX11-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc |
| ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 |
| ; GFX11-NEXT: S_ENDPGM 0 |
| ; |
| ; GFX12-LABEL: name: mask_hazard_vcc1 |
| ; GFX12: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec |
| ; GFX12-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc |
| ; GFX12-NEXT: S_ENDPGM 0 |
| $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec |
| $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc |
| S_ENDPGM 0 |
| ... |
| |
| --- |
| name: mask_hazard_vcc2 |
| body: | |
| bb.0: |
| ; GFX11-LABEL: name: mask_hazard_vcc2 |
| ; GFX11: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec |
| ; GFX11-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc |
| ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 |
| ; GFX11-NEXT: S_ENDPGM 0 |
| ; |
| ; GFX12-LABEL: name: mask_hazard_vcc2 |
| ; GFX12: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec |
| ; GFX12-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc |
| ; GFX12-NEXT: S_ENDPGM 0 |
| $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec |
| $vcc = S_CSELECT_B64 -1, 0, implicit $scc |
| S_ENDPGM 0 |
| ... |
| |
| --- |
| name: mask_hazard_cndmask_dpp1 |
| body: | |
| bb.0: |
| ; GFX11-LABEL: name: mask_hazard_cndmask_dpp1 |
| ; GFX11: $vgpr0 = V_CNDMASK_B32_dpp $vgpr0, 0, $vgpr1, 0, $vgpr2, 1, 15, 15, 1, implicit $vcc, implicit $exec |
| ; GFX11-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc |
| ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 |
| ; GFX11-NEXT: S_ENDPGM 0 |
| ; |
| ; GFX12-LABEL: name: mask_hazard_cndmask_dpp1 |
| ; GFX12: $vgpr0 = V_CNDMASK_B32_dpp $vgpr0, 0, $vgpr1, 0, $vgpr2, 1, 15, 15, 1, implicit $vcc, implicit $exec |
| ; GFX12-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc |
| ; GFX12-NEXT: S_ENDPGM 0 |
| $vgpr0 = V_CNDMASK_B32_dpp $vgpr0, 0, $vgpr1, 0, $vgpr2, 1, 15, 15, 1, implicit $vcc, implicit $exec |
| $vcc = S_CSELECT_B64 -1, 0, implicit $scc |
| S_ENDPGM 0 |
| ... |
| |
| --- |
| name: mask_hazard_cndmask_dpp2 |
| body: | |
| bb.0: |
| ; GFX11-LABEL: name: mask_hazard_cndmask_dpp2 |
| ; GFX11: $vgpr0 = V_CNDMASK_B32_e64_dpp $vgpr0, 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, 1, 15, 15, 1, implicit $exec |
| ; GFX11-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc |
| ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 |
| ; GFX11-NEXT: S_ENDPGM 0 |
| ; |
| ; GFX12-LABEL: name: mask_hazard_cndmask_dpp2 |
| ; GFX12: $vgpr0 = V_CNDMASK_B32_e64_dpp $vgpr0, 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, 1, 15, 15, 1, implicit $exec |
| ; GFX12-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc |
| ; GFX12-NEXT: S_ENDPGM 0 |
| $vgpr0 = V_CNDMASK_B32_e64_dpp $vgpr0, 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, 1, 15, 15, 1, implicit $exec |
| $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc |
| S_ENDPGM 0 |
| ... |
| |
| --- |
| name: mask_hazard_cndmask_dpp3 |
| body: | |
| bb.0: |
| ; GFX11-LABEL: name: mask_hazard_cndmask_dpp3 |
| ; GFX11: $vgpr0 = V_CNDMASK_B16_fake16_e64_dpp $vgpr0, 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, 1, 15, 15, 1, implicit $exec |
| ; GFX11-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc |
| ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 |
| ; GFX11-NEXT: S_ENDPGM 0 |
| ; |
| ; GFX12-LABEL: name: mask_hazard_cndmask_dpp3 |
| ; GFX12: $vgpr0 = V_CNDMASK_B16_fake16_e64_dpp $vgpr0, 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, 1, 15, 15, 1, implicit $exec |
| ; GFX12-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc |
| ; GFX12-NEXT: S_ENDPGM 0 |
| $vgpr0 = V_CNDMASK_B16_fake16_e64_dpp $vgpr0, 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, 1, 15, 15, 1, implicit $exec |
| $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc |
| S_ENDPGM 0 |
| ... |
| |
| --- |
| name: mask_hazard_addc1 |
| body: | |
| bb.0: |
| ; GFX11-LABEL: name: mask_hazard_addc1 |
| ; GFX11: $vgpr1, $vcc = V_ADDC_U32_e64 0, $vgpr1, $sgpr2_sgpr3, 0, implicit $exec |
| ; GFX11-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc |
| ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 |
| ; GFX11-NEXT: S_ENDPGM 0 |
| ; |
| ; GFX12-LABEL: name: mask_hazard_addc1 |
| ; GFX12: $vgpr1, $vcc = V_ADDC_U32_e64 0, $vgpr1, $sgpr2_sgpr3, 0, implicit $exec |
| ; GFX12-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc |
| ; GFX12-NEXT: S_ENDPGM 0 |
| $vgpr1, $vcc = V_ADDC_U32_e64 0, $vgpr1, $sgpr2_sgpr3, 0, implicit $exec |
| $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc |
| S_ENDPGM 0 |
| ... |
| |
| --- |
| name: mask_hazard_addc2 |
| body: | |
| bb.0: |
| ; GFX11-LABEL: name: mask_hazard_addc2 |
| ; GFX11: $vgpr1 = V_ADDC_U32_e32 0, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec |
| ; GFX11-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc |
| ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 |
| ; GFX11-NEXT: S_ENDPGM 0 |
| ; |
| ; GFX12-LABEL: name: mask_hazard_addc2 |
| ; GFX12: $vgpr1 = V_ADDC_U32_e32 0, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec |
| ; GFX12-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc |
| ; GFX12-NEXT: S_ENDPGM 0 |
| $vgpr1 = V_ADDC_U32_e32 0, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec |
| $vcc = S_CSELECT_B64 -1, 0, implicit $scc |
| S_ENDPGM 0 |
| ... |
| |
| --- |
| name: mask_hazard_addc3 |
| body: | |
| bb.0: |
| ; GFX11-LABEL: name: mask_hazard_addc3 |
| ; GFX11: $vgpr0 = V_ADDC_U32_dpp $vgpr0, $vgpr1, $vgpr2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec |
| ; GFX11-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc |
| ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 |
| ; GFX11-NEXT: S_ENDPGM 0 |
| ; |
| ; GFX12-LABEL: name: mask_hazard_addc3 |
| ; GFX12: $vgpr0 = V_ADDC_U32_dpp $vgpr0, $vgpr1, $vgpr2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec |
| ; GFX12-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc |
| ; GFX12-NEXT: S_ENDPGM 0 |
| $vgpr0 = V_ADDC_U32_dpp $vgpr0, $vgpr1, $vgpr2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec |
| $vcc = S_CSELECT_B64 -1, 0, implicit $scc |
| S_ENDPGM 0 |
| ... |
| |
| --- |
| name: mask_hazard_addc4 |
| body: | |
| bb.0: |
| ; GFX11-LABEL: name: mask_hazard_addc4 |
| ; GFX11: $vgpr0, $sgpr2_sgpr3 = V_ADDC_U32_e64_dpp $vgpr0, $vgpr1, $vgpr2, $sgpr2_sgpr3, 0, 1, 15, 15, 1, implicit $exec |
| ; GFX11-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc |
| ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 |
| ; GFX11-NEXT: S_ENDPGM 0 |
| ; |
| ; GFX12-LABEL: name: mask_hazard_addc4 |
| ; GFX12: $vgpr0, $sgpr2_sgpr3 = V_ADDC_U32_e64_dpp $vgpr0, $vgpr1, $vgpr2, $sgpr2_sgpr3, 0, 1, 15, 15, 1, implicit $exec |
| ; GFX12-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc |
| ; GFX12-NEXT: S_ENDPGM 0 |
| $vgpr0, $sgpr2_sgpr3 = V_ADDC_U32_e64_dpp $vgpr0, $vgpr1, $vgpr2, $sgpr2_sgpr3, 0, 1, 15, 15, 1, implicit $exec |
| $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc |
| S_ENDPGM 0 |
| ... |
| |
| --- |
| name: mask_hazard_subb1 |
| body: | |
| bb.0: |
| ; GFX11-LABEL: name: mask_hazard_subb1 |
| ; GFX11: $vgpr1, $vcc = V_SUBB_U32_e64 0, $vgpr1, $sgpr2_sgpr3, 0, implicit $exec |
| ; GFX11-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc |
| ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 |
| ; GFX11-NEXT: S_ENDPGM 0 |
| ; |
| ; GFX12-LABEL: name: mask_hazard_subb1 |
| ; GFX12: $vgpr1, $vcc = V_SUBB_U32_e64 0, $vgpr1, $sgpr2_sgpr3, 0, implicit $exec |
| ; GFX12-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc |
| ; GFX12-NEXT: S_ENDPGM 0 |
| $vgpr1, $vcc = V_SUBB_U32_e64 0, $vgpr1, $sgpr2_sgpr3, 0, implicit $exec |
| $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc |
| S_ENDPGM 0 |
| ... |
| |
| --- |
| name: mask_hazard_subb2 |
| body: | |
| bb.0: |
| ; GFX11-LABEL: name: mask_hazard_subb2 |
| ; GFX11: $vgpr1 = V_SUBB_U32_e32 0, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec |
| ; GFX11-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc |
| ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 |
| ; GFX11-NEXT: S_ENDPGM 0 |
| ; |
| ; GFX12-LABEL: name: mask_hazard_subb2 |
| ; GFX12: $vgpr1 = V_SUBB_U32_e32 0, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec |
| ; GFX12-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc |
| ; GFX12-NEXT: S_ENDPGM 0 |
| $vgpr1 = V_SUBB_U32_e32 0, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec |
| $vcc = S_CSELECT_B64 -1, 0, implicit $scc |
| S_ENDPGM 0 |
| ... |
| |
| --- |
| name: mask_hazard_subb3 |
| body: | |
| bb.0: |
| ; GFX11-LABEL: name: mask_hazard_subb3 |
| ; GFX11: $vgpr0 = V_SUBB_U32_dpp $vgpr0, $vgpr1, $vgpr2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec |
| ; GFX11-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc |
| ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 |
| ; GFX11-NEXT: S_ENDPGM 0 |
| ; |
| ; GFX12-LABEL: name: mask_hazard_subb3 |
| ; GFX12: $vgpr0 = V_SUBB_U32_dpp $vgpr0, $vgpr1, $vgpr2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec |
| ; GFX12-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc |
| ; GFX12-NEXT: S_ENDPGM 0 |
| $vgpr0 = V_SUBB_U32_dpp $vgpr0, $vgpr1, $vgpr2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec |
| $vcc = S_CSELECT_B64 -1, 0, implicit $scc |
| S_ENDPGM 0 |
| ... |
| |
| --- |
| name: mask_hazard_subb4 |
| body: | |
| bb.0: |
| ; GFX11-LABEL: name: mask_hazard_subb4 |
| ; GFX11: $vgpr0, $sgpr2_sgpr3 = V_SUBB_U32_e64_dpp $vgpr0, $vgpr1, $vgpr2, $sgpr2_sgpr3, 0, 1, 15, 15, 1, implicit $exec |
| ; GFX11-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc |
| ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 |
| ; GFX11-NEXT: S_ENDPGM 0 |
| ; |
| ; GFX12-LABEL: name: mask_hazard_subb4 |
| ; GFX12: $vgpr0, $sgpr2_sgpr3 = V_SUBB_U32_e64_dpp $vgpr0, $vgpr1, $vgpr2, $sgpr2_sgpr3, 0, 1, 15, 15, 1, implicit $exec |
| ; GFX12-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc |
| ; GFX12-NEXT: S_ENDPGM 0 |
| $vgpr0, $sgpr2_sgpr3 = V_SUBB_U32_e64_dpp $vgpr0, $vgpr1, $vgpr2, $sgpr2_sgpr3, 0, 1, 15, 15, 1, implicit $exec |
| $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc |
| S_ENDPGM 0 |
| ... |
| |
| --- |
| name: mask_hazard_subbrev1 |
| body: | |
| bb.0: |
| ; GFX11-LABEL: name: mask_hazard_subbrev1 |
| ; GFX11: $vgpr1, $vcc = V_SUBBREV_U32_e64 0, $vgpr1, $sgpr2_sgpr3, 0, implicit $exec |
| ; GFX11-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc |
| ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 |
| ; GFX11-NEXT: S_ENDPGM 0 |
| ; |
| ; GFX12-LABEL: name: mask_hazard_subbrev1 |
| ; GFX12: $vgpr1, $vcc = V_SUBBREV_U32_e64 0, $vgpr1, $sgpr2_sgpr3, 0, implicit $exec |
| ; GFX12-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc |
| ; GFX12-NEXT: S_ENDPGM 0 |
| $vgpr1, $vcc = V_SUBBREV_U32_e64 0, $vgpr1, $sgpr2_sgpr3, 0, implicit $exec |
| $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc |
| S_ENDPGM 0 |
| ... |
| |
| --- |
| name: mask_hazard_subbrev2 |
| body: | |
| bb.0: |
| ; GFX11-LABEL: name: mask_hazard_subbrev2 |
| ; GFX11: $vgpr1 = V_SUBBREV_U32_e32 0, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec |
| ; GFX11-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc |
| ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 |
| ; GFX11-NEXT: S_ENDPGM 0 |
| ; |
| ; GFX12-LABEL: name: mask_hazard_subbrev2 |
| ; GFX12: $vgpr1 = V_SUBBREV_U32_e32 0, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec |
| ; GFX12-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc |
| ; GFX12-NEXT: S_ENDPGM 0 |
| $vgpr1 = V_SUBBREV_U32_e32 0, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec |
| $vcc = S_CSELECT_B64 -1, 0, implicit $scc |
| S_ENDPGM 0 |
| ... |
| |
| --- |
| name: mask_hazard_subbrev3 |
| body: | |
| bb.0: |
| ; GFX11-LABEL: name: mask_hazard_subbrev3 |
| ; GFX11: $vgpr0 = V_SUBBREV_U32_dpp $vgpr0, $vgpr1, $vgpr2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec |
| ; GFX11-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc |
| ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 |
| ; GFX11-NEXT: S_ENDPGM 0 |
| ; |
| ; GFX12-LABEL: name: mask_hazard_subbrev3 |
| ; GFX12: $vgpr0 = V_SUBBREV_U32_dpp $vgpr0, $vgpr1, $vgpr2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec |
| ; GFX12-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc |
| ; GFX12-NEXT: S_ENDPGM 0 |
| $vgpr0 = V_SUBBREV_U32_dpp $vgpr0, $vgpr1, $vgpr2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec |
| $vcc = S_CSELECT_B64 -1, 0, implicit $scc |
| S_ENDPGM 0 |
| ... |
| |
| --- |
| name: mask_hazard_subbrev4 |
| body: | |
| bb.0: |
| ; GFX11-LABEL: name: mask_hazard_subbrev4 |
| ; GFX11: $vgpr0, $sgpr2_sgpr3 = V_SUBBREV_U32_e64_dpp $vgpr0, $vgpr1, $vgpr2, $sgpr2_sgpr3, 0, 1, 15, 15, 1, implicit $exec |
| ; GFX11-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc |
| ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 |
| ; GFX11-NEXT: S_ENDPGM 0 |
| ; |
| ; GFX12-LABEL: name: mask_hazard_subbrev4 |
| ; GFX12: $vgpr0, $sgpr2_sgpr3 = V_SUBBREV_U32_e64_dpp $vgpr0, $vgpr1, $vgpr2, $sgpr2_sgpr3, 0, 1, 15, 15, 1, implicit $exec |
| ; GFX12-NEXT: $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc |
| ; GFX12-NEXT: S_ENDPGM 0 |
| $vgpr0, $sgpr2_sgpr3 = V_SUBBREV_U32_e64_dpp $vgpr0, $vgpr1, $vgpr2, $sgpr2_sgpr3, 0, 1, 15, 15, 1, implicit $exec |
| $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit $scc |
| S_ENDPGM 0 |
| ... |
| |
| --- |
| name: mask_hazard_div_fmas_f32 |
| body: | |
| bb.0: |
| ; GFX11-LABEL: name: mask_hazard_div_fmas_f32 |
| ; GFX11: $vgpr0 = V_DIV_FMAS_F32_e64 0, $vgpr1, 0, $vgpr2, 0, $vgpr3, 0, 0, implicit $mode, implicit $vcc, implicit $exec |
| ; GFX11-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc |
| ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 |
| ; GFX11-NEXT: S_ENDPGM 0 |
| ; |
| ; GFX12-LABEL: name: mask_hazard_div_fmas_f32 |
| ; GFX12: $vgpr0 = V_DIV_FMAS_F32_e64 0, $vgpr1, 0, $vgpr2, 0, $vgpr3, 0, 0, implicit $mode, implicit $vcc, implicit $exec |
| ; GFX12-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc |
| ; GFX12-NEXT: S_ENDPGM 0 |
| $vgpr0 = V_DIV_FMAS_F32_e64 0, $vgpr1, 0, $vgpr2, 0, $vgpr3, 0, 0, implicit $mode, implicit $vcc, implicit $exec |
| $vcc = S_CSELECT_B64 -1, 0, implicit $scc |
| S_ENDPGM 0 |
| ... |
| |
| --- |
| name: mask_hazard_div_fmas_f64 |
| body: | |
| bb.0: |
| ; GFX11-LABEL: name: mask_hazard_div_fmas_f64 |
| ; GFX11: $vgpr0_vgpr1 = V_DIV_FMAS_F64_e64 0, $vgpr0_vgpr1, 0, $vgpr2_vgpr3, 0, $vgpr4_vgpr5, 0, 0, implicit $mode, implicit $vcc, implicit $exec |
| ; GFX11-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc |
| ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 |
| ; GFX11-NEXT: S_ENDPGM 0 |
| ; |
| ; GFX12-LABEL: name: mask_hazard_div_fmas_f64 |
| ; GFX12: $vgpr0_vgpr1 = V_DIV_FMAS_F64_e64 0, $vgpr0_vgpr1, 0, $vgpr2_vgpr3, 0, $vgpr4_vgpr5, 0, 0, implicit $mode, implicit $vcc, implicit $exec |
| ; GFX12-NEXT: $vcc = S_CSELECT_B64 -1, 0, implicit $scc |
| ; GFX12-NEXT: S_ENDPGM 0 |
| $vgpr0_vgpr1 = V_DIV_FMAS_F64_e64 0, $vgpr0_vgpr1, 0, $vgpr2_vgpr3, 0, $vgpr4_vgpr5, 0, 0, implicit $mode, implicit $vcc, implicit $exec |
| $vcc = S_CSELECT_B64 -1, 0, implicit $scc |
| S_ENDPGM 0 |
| ... |
| |
| # Check low word overlap |
| --- |
| name: mask_hazard_subreg1 |
| body: | |
| bb.0: |
| ; GFX11-LABEL: name: mask_hazard_subreg1 |
| ; GFX11: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec |
| ; GFX11-NEXT: $sgpr2 = S_MOV_B32 0 |
| ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 |
| ; GFX11-NEXT: S_ENDPGM 0 |
| ; |
| ; GFX12-LABEL: name: mask_hazard_subreg1 |
| ; GFX12: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec |
| ; GFX12-NEXT: $sgpr2 = S_MOV_B32 0 |
| ; GFX12-NEXT: S_ENDPGM 0 |
| $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec |
| $sgpr2 = S_MOV_B32 0 |
| S_ENDPGM 0 |
| ... |
| |
| # Check high word overlap |
| --- |
| name: mask_hazard_subreg2 |
| body: | |
| bb.0: |
| ; GFX11-LABEL: name: mask_hazard_subreg2 |
| ; GFX11: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec |
| ; GFX11-NEXT: $sgpr3 = S_MOV_B32 0 |
| ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 |
| ; GFX11-NEXT: S_ENDPGM 0 |
| ; |
| ; GFX12-LABEL: name: mask_hazard_subreg2 |
| ; GFX12: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec |
| ; GFX12-NEXT: $sgpr3 = S_MOV_B32 0 |
| ; GFX12-NEXT: S_ENDPGM 0 |
| $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec |
| $sgpr3 = S_MOV_B32 0 |
| S_ENDPGM 0 |
| ... |
| |
| # Check multiple subreg overlap |
| --- |
| name: mask_hazard_subreg3 |
| body: | |
| bb.0: |
| ; GFX11-LABEL: name: mask_hazard_subreg3 |
| ; GFX11: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec |
| ; GFX11-NEXT: $sgpr2 = S_MOV_B32 0 |
| ; GFX11-NEXT: S_WAITCNT_DEPCTR 65534 |
| ; GFX11-NEXT: $sgpr3 = S_MOV_B32 0 |
| ; GFX11-NEXT: S_ENDPGM 0 |
| ; |
| ; GFX12-LABEL: name: mask_hazard_subreg3 |
| ; GFX12: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec |
| ; GFX12-NEXT: $sgpr2 = S_MOV_B32 0 |
| ; GFX12-NEXT: $sgpr3 = S_MOV_B32 0 |
| ; GFX12-NEXT: S_ENDPGM 0 |
| $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec |
| $sgpr2 = S_MOV_B32 0 |
| $sgpr3 = S_MOV_B32 0 |
| S_ENDPGM 0 |
| ... |
| |
| # Check vcc_lo overlap |
| --- |
| name: mask_hazard_subreg4 |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: mask_hazard_subreg4 |
| ; GCN: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec |
| ; GCN-NEXT: $vcc_lo = S_MOV_B32 0 |
| ; GCN-NEXT: S_WAITCNT_DEPCTR 65534 |
| ; GCN-NEXT: $sgpr2 = S_MOV_B32 $vcc_lo |
| ; GCN-NEXT: S_ENDPGM 0 |
| $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec |
| $vcc_lo = S_MOV_B32 0 |
| $sgpr2 = S_MOV_B32 $vcc_lo |
| S_ENDPGM 0 |
| ... |
| |
| # Check vcc_hi overlap |
| --- |
| name: mask_hazard_subreg5 |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: mask_hazard_subreg5 |
| ; GCN: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec |
| ; GCN-NEXT: $vcc_hi = S_MOV_B32 0 |
| ; GCN-NEXT: S_WAITCNT_DEPCTR 65534 |
| ; GCN-NEXT: $sgpr2 = S_MOV_B32 $vcc_hi |
| ; GCN-NEXT: S_ENDPGM 0 |
| $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec |
| $vcc_hi = S_MOV_B32 0 |
| $sgpr2 = S_MOV_B32 $vcc_hi |
| S_ENDPGM 0 |
| ... |
| |
| # S_WAITCNT does not mitigate hazard |
| --- |
| name: mask_hazard_waitcnt |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: mask_hazard_waitcnt |
| ; GCN: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec |
| ; GCN-NEXT: S_WAITCNT 0 |
| ; GCN-NEXT: $sgpr0_sgpr1 = S_GETPC_B64 |
| ; GCN-NEXT: S_WAITCNT_DEPCTR 65534 |
| ; GCN-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, 0, implicit-def $scc |
| ; GCN-NEXT: S_ENDPGM 0 |
| $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec |
| S_WAITCNT 0 |
| $sgpr0_sgpr1 = S_GETPC_B64 |
| $sgpr0 = S_ADD_U32 $sgpr0, 0, implicit-def $scc |
| S_ENDPGM 0 |
| ... |
| |
| # Check implicit $exec |
| --- |
| name: mask_hazard_gap1 |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: mask_hazard_gap1 |
| ; GCN: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec |
| ; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec |
| ; GCN-NEXT: $vgpr3 = V_MOV_B32_e32 0, implicit $exec |
| ; GCN-NEXT: $sgpr0_sgpr1 = S_GETPC_B64 |
| ; GCN-NEXT: S_WAITCNT_DEPCTR 65534 |
| ; GCN-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, 0, implicit-def $scc |
| ; GCN-NEXT: S_ENDPGM 0 |
| $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec |
| $vgpr2 = V_MOV_B32_e32 0, implicit $exec |
| $vgpr3 = V_MOV_B32_e32 0, implicit $exec |
| $sgpr0_sgpr1 = S_GETPC_B64 |
| $sgpr0 = S_ADD_U32 $sgpr0, 0, implicit-def $scc |
| S_ENDPGM 0 |
| ... |
| |
| # Check implicit $mode |
| --- |
| name: mask_hazard_gap2 |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: mask_hazard_gap2 |
| ; GCN: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec |
| ; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec, implicit $mode |
| ; GCN-NEXT: $sgpr0_sgpr1 = S_GETPC_B64 |
| ; GCN-NEXT: S_WAITCNT_DEPCTR 65534 |
| ; GCN-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, 0, implicit-def $scc |
| ; GCN-NEXT: S_ENDPGM 0 |
| $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec |
| $vgpr2 = V_MOV_B32_e32 0, implicit $exec, implicit $mode |
| $sgpr0_sgpr1 = S_GETPC_B64 |
| $sgpr0 = S_ADD_U32 $sgpr0, 0, implicit-def $scc |
| S_ENDPGM 0 |
| ... |
| |
| # Check explicit $exec |
| --- |
| name: mask_hazard_gap3 |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: mask_hazard_gap3 |
| ; GCN: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec |
| ; GCN-NEXT: $vgpr2 = V_WRITELANE_B32 $exec_lo, 0, $vgpr2 |
| ; GCN-NEXT: $sgpr0_sgpr1 = S_GETPC_B64 |
| ; GCN-NEXT: S_WAITCNT_DEPCTR 65534 |
| ; GCN-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, 0, implicit-def $scc |
| ; GCN-NEXT: S_ENDPGM 0 |
| $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr0_sgpr1, implicit $exec |
| $vgpr2 = V_WRITELANE_B32 $exec_lo, 0, $vgpr2 |
| $sgpr0_sgpr1 = S_GETPC_B64 |
| $sgpr0 = S_ADD_U32 $sgpr0, 0, implicit-def $scc |
| S_ENDPGM 0 |
| ... |
| |
| # Different SGPR write |
| --- |
| name: mask_hazard_no_hazard1 |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: mask_hazard_no_hazard1 |
| ; GCN: $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec |
| ; GCN-NEXT: $sgpr0 = S_MOV_B32 0 |
| ; GCN-NEXT: S_ENDPGM 0 |
| $vgpr1 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr2, $sgpr2_sgpr3, implicit $exec |
| $sgpr0 = S_MOV_B32 0 |
| S_ENDPGM 0 |
| ... |
| |
| # Different SGPR write with mask read overlap |
| --- |
| name: mask_hazard_no_hazard2 |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: mask_hazard_no_hazard2 |
| ; GCN: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec |
| ; GCN-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $vcc |
| ; GCN-NEXT: S_ENDPGM 0 |
| $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec |
| $sgpr0_sgpr1 = S_MOV_B64 $vcc |
| S_ENDPGM 0 |
| ... |
| |
| # Overlapping VGPR write |
| --- |
| name: mask_hazard_no_hazard3 |
| body: | |
| bb.0: |
| ; GCN-LABEL: name: mask_hazard_no_hazard3 |
| ; GCN: $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec |
| ; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec |
| ; GCN-NEXT: S_ENDPGM 0 |
| $vgpr1 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec |
| $vgpr2 = V_MOV_B32_e32 0, implicit $exec |
| S_ENDPGM 0 |
| ... |