| # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 |
| # RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass si-wqm -o - %s | FileCheck %s |
| # RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -passes=si-wqm -o - %s | FileCheck %s |
| |
| --- |
| # Test that we don't do silly things when there is no whole wave mode in the |
| # shader (aka bb.1). |
| # |
| name: test_no_wwm |
| alignment: 1 |
| exposesReturnsTwice: false |
| tracksRegLiveness: true |
| body: | |
| ; CHECK-LABEL: name: test_no_wwm |
| ; CHECK: bb.0: |
| ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) |
| ; CHECK-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr8 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[S_OR_SAVEEXEC_B32_:%[0-9]+]]:sreg_32 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def $scc, implicit $exec |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 |
| ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:ccr_sgpr_64 = COPY $sgpr1 |
| ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:ccr_sgpr_64 = COPY $sgpr2 |
| ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr8 |
| ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32_xm0_xexec = COPY $exec_lo, implicit-def $exec_lo |
| ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY3]], [[S_OR_SAVEEXEC_B32_]], implicit-def dead $scc |
| ; CHECK-NEXT: $exec_lo = S_MOV_B32_term [[S_AND_B32_]] |
| ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec |
| ; CHECK-NEXT: S_BRANCH %bb.1 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.1: |
| ; CHECK-NEXT: successors: %bb.2(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 5, [[COPY2]], 0, implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.2: |
| ; CHECK-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[COPY3]], implicit-def $scc |
| ; CHECK-NEXT: $vgpr8 = COPY [[COPY2]] |
| ; CHECK-NEXT: $sgpr0 = COPY [[COPY]] |
| ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr0 |
| ; CHECK-NEXT: SI_CS_CHAIN_TC_W32 [[COPY1]], 0, 0, [[COPY4]], amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8 |
| bb.0: |
| successors: %bb.1, %bb.2 |
| liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr8 |
| %9:sreg_32 = COPY $sgpr0 |
| undef %1.sub0:ccr_sgpr_64 = COPY $sgpr1 |
| %1.sub1:ccr_sgpr_64 = COPY $sgpr2 |
| %37:vgpr_32 = COPY $vgpr8 |
| %14:sreg_32_xm0_xexec = SI_INIT_WHOLE_WAVE implicit-def $exec, implicit $exec |
| %16:sreg_32_xm0_xexec = COPY $exec_lo, implicit-def $exec_lo |
| %38:sreg_32 = S_AND_B32 %16:sreg_32_xm0_xexec, %14:sreg_32_xm0_xexec, implicit-def dead $scc |
| $exec_lo = S_MOV_B32_term %38:sreg_32 |
| S_CBRANCH_EXECZ %bb.2, implicit $exec |
| S_BRANCH %bb.1 |
| |
| bb.1: |
| %37:vgpr_32 = V_ADD_U32_e64 5, %37:vgpr_32, 0, implicit $exec |
| |
| bb.2: |
| $exec_lo = S_OR_B32 $exec_lo, %16:sreg_32_xm0_xexec, implicit-def $scc |
| $vgpr8 = COPY %37:vgpr_32 |
| $sgpr0 = COPY %9:sreg_32 |
| %2:sreg_32 = COPY $sgpr0 |
| SI_CS_CHAIN_TC_W32 %1:ccr_sgpr_64, 0, 0, %2:sreg_32, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8 |
| ... |
| |
| --- |
| # Test that we handle WWM in the shader correctly. |
| # |
| name: test_wwm_bb1 |
| alignment: 1 |
| exposesReturnsTwice: false |
| tracksRegLiveness: true |
| body: | |
| ; CHECK-LABEL: name: test_wwm_bb1 |
| ; CHECK: bb.0: |
| ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) |
| ; CHECK-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr8, $vgpr9 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[S_OR_SAVEEXEC_B32_:%[0-9]+]]:sreg_32 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def $scc, implicit $exec |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 |
| ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:ccr_sgpr_64 = COPY $sgpr1 |
| ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:ccr_sgpr_64 = COPY $sgpr2 |
| ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr9 |
| ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr8 |
| ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec = COPY $exec_lo, implicit-def $exec_lo |
| ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY4]], [[S_OR_SAVEEXEC_B32_]], implicit-def dead $scc |
| ; CHECK-NEXT: $exec_lo = S_MOV_B32_term [[S_AND_B32_]] |
| ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec |
| ; CHECK-NEXT: S_BRANCH %bb.1 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.1: |
| ; CHECK-NEXT: successors: %bb.2(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 24, [[COPY3]], 0, implicit $exec |
| ; CHECK-NEXT: [[ENTER_STRICT_WWM:%[0-9]+]]:sreg_32_xm0_xexec = ENTER_STRICT_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec |
| ; CHECK-NEXT: dead [[DEF:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF |
| ; CHECK-NEXT: [[V_SET_INACTIVE_B32_:%[0-9]+]]:vgpr_32 = V_SET_INACTIVE_B32 0, [[COPY3]], 0, 71, undef [[ENTER_STRICT_WWM]], implicit $exec, implicit-def $scc |
| ; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 42, [[V_SET_INACTIVE_B32_]], 0, implicit $exec |
| ; CHECK-NEXT: $exec_lo = EXIT_STRICT_WWM [[ENTER_STRICT_WWM]] |
| ; CHECK-NEXT: early-clobber [[COPY2]]:vgpr_32 = V_MOV_B32_e32 [[V_ADD_U32_e64_]], implicit $exec |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.2: |
| ; CHECK-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[COPY4]], implicit-def $scc |
| ; CHECK-NEXT: $vgpr8 = COPY [[COPY2]] |
| ; CHECK-NEXT: $vgpr9 = COPY [[COPY3]] |
| ; CHECK-NEXT: $sgpr0 = COPY [[COPY]] |
| ; CHECK-NEXT: SI_CS_CHAIN_TC_W32 [[COPY1]], 0, 0, [[COPY]], amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9 |
| bb.0: |
| successors: %bb.1, %bb.2 |
| liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr8, $vgpr9 |
| %9:sreg_32 = COPY $sgpr0 |
| undef %1.sub0:ccr_sgpr_64 = COPY $sgpr1 |
| %1.sub1:ccr_sgpr_64 = COPY $sgpr2 |
| %40:vgpr_32 = COPY $vgpr9 |
| %36:vgpr_32 = COPY $vgpr8 |
| %14:sreg_32_xm0_xexec = SI_INIT_WHOLE_WAVE implicit-def $exec, implicit $exec |
| %16:sreg_32_xm0_xexec = COPY $exec_lo, implicit-def $exec_lo |
| %38:sreg_32 = S_AND_B32 %16:sreg_32_xm0_xexec, %14:sreg_32_xm0_xexec, implicit-def dead $scc |
| $exec_lo = S_MOV_B32_term %38:sreg_32 |
| S_CBRANCH_EXECZ %bb.2, implicit $exec |
| S_BRANCH %bb.1 |
| |
| bb.1: |
| %36:vgpr_32 = V_ADD_U32_e64 24, %36:vgpr_32, 0, implicit $exec |
| %20:sreg_32_xm0_xexec = IMPLICIT_DEF |
| %19:vgpr_32 = V_SET_INACTIVE_B32 0, %36:vgpr_32, 0, 71, undef %20, implicit $exec, implicit-def $scc |
| %18:vgpr_32 = V_ADD_U32_e64 42, %19:vgpr_32, 0, implicit $exec |
| %40:vgpr_32 = STRICT_WWM %18:vgpr_32, implicit $exec |
| |
| bb.2: |
| $exec_lo = S_OR_B32 $exec_lo, %16:sreg_32_xm0_xexec, implicit-def $scc |
| $vgpr8 = COPY %40:vgpr_32 |
| $vgpr9 = COPY %36:vgpr_32 |
| $sgpr0 = COPY %9:sreg_32 |
| SI_CS_CHAIN_TC_W32 %1:ccr_sgpr_64, 0, 0, %9:sreg_32, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9 |
| ... |