| # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py |
| # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -run-pass=prologepilog,machine-cp -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s |
| |
| # The COPY that moves the return value to VGPR0 should not be removed during machine-cp. The spill restore of the same register that follows, |
| # meant to only reload its inactive lanes. By marking the reg itself as the tied-op in the spill reload prevents the undesired optimization. |
| |
| --- |
| name: wwm_scratch_reg_spill_reload_of_outgoing_reg |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| wwmReservedRegs: ['$vgpr0'] |
| isEntryFunction: false |
| scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' |
| stackPtrOffsetReg: '$sgpr32' |
| frameOffsetReg: '$sgpr33' |
| body: | |
| bb.0: |
| liveins: $sgpr20, $vgpr1 |
| ; GCN-LABEL: name: wwm_scratch_reg_spill_reload_of_outgoing_reg |
| ; GCN: liveins: $sgpr20, $vgpr0, $vgpr1 |
| ; GCN-NEXT: {{ $}} |
| ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec |
| ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) |
| ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 |
| ; GCN-NEXT: $vgpr0 = IMPLICIT_DEF |
| ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr20, 0, $vgpr0 |
| ; GCN-NEXT: $vgpr0 = COPY killed renamable $vgpr1, implicit $exec |
| ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec |
| ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $vgpr0(tied-def 0) :: (load (s32) from %stack.0, addrspace 5) |
| ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 |
| ; GCN-NEXT: SI_RETURN implicit $vgpr0 |
| $vgpr0 = IMPLICIT_DEF |
| $vgpr0 = V_WRITELANE_B32 killed $sgpr20, 0, $vgpr0 |
| $vgpr0 = COPY killed renamable $vgpr1, implicit $exec |
| SI_RETURN implicit $vgpr0 |
| ... |
| |
| # The reload of vgpr0 require the tied-op as it is a subreg in the outgoing tuple register vgpr0_vgpr1. |
| # The vgpr2 doesn't need the tied-op in the reload as it isn't holding any return value. |
| --- |
| name: wwm_scratch_reg_spill_reload_of_outgoing_tuple_subreg |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| wwmReservedRegs: ['$vgpr0', '$vgpr2'] |
| isEntryFunction: false |
| scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' |
| stackPtrOffsetReg: '$sgpr32' |
| frameOffsetReg: '$sgpr33' |
| body: | |
| bb.0: |
| liveins: $sgpr20, $sgpr21, $vgpr1 |
| ; GCN-LABEL: name: wwm_scratch_reg_spill_reload_of_outgoing_tuple_subreg |
| ; GCN: liveins: $sgpr20, $sgpr21, $vgpr0, $vgpr1, $vgpr2 |
| ; GCN-NEXT: {{ $}} |
| ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec |
| ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) |
| ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) |
| ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 |
| ; GCN-NEXT: $vgpr0 = IMPLICIT_DEF |
| ; GCN-NEXT: $vgpr2 = IMPLICIT_DEF |
| ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr20, 0, $vgpr0 |
| ; GCN-NEXT: $vgpr2 = V_WRITELANE_B32 killed $sgpr21, 0, $vgpr2 |
| ; GCN-NEXT: $vgpr0 = COPY $vgpr1, implicit $exec |
| ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec |
| ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $vgpr0(tied-def 0) :: (load (s32) from %stack.0, addrspace 5) |
| ; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) |
| ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 |
| ; GCN-NEXT: SI_RETURN implicit $vgpr0_vgpr1 |
| $vgpr0 = IMPLICIT_DEF |
| $vgpr2 = IMPLICIT_DEF |
| $vgpr0 = V_WRITELANE_B32 killed $sgpr20, 0, $vgpr0 |
| $vgpr2 = V_WRITELANE_B32 killed $sgpr21, 0, $vgpr2 |
| $vgpr0 = COPY $vgpr1, implicit $exec |
| SI_RETURN implicit $vgpr0_vgpr1 |
| ... |
| |
| # Tied op not required in the spill reload of vgpr2. |
| |
| --- |
| name: wwm_scratch_reg_spill_reload_different_outgoing_reg |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| wwmReservedRegs: ['$vgpr2'] |
| isEntryFunction: false |
| scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' |
| stackPtrOffsetReg: '$sgpr32' |
| frameOffsetReg: '$sgpr33' |
| body: | |
| bb.0: |
| liveins: $sgpr20, $vgpr1 |
| ; GCN-LABEL: name: wwm_scratch_reg_spill_reload_different_outgoing_reg |
| ; GCN: liveins: $sgpr20, $vgpr1, $vgpr2 |
| ; GCN-NEXT: {{ $}} |
| ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec |
| ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) |
| ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 |
| ; GCN-NEXT: $vgpr2 = IMPLICIT_DEF |
| ; GCN-NEXT: $vgpr2 = V_WRITELANE_B32 killed $sgpr20, 0, $vgpr2 |
| ; GCN-NEXT: $vgpr0 = COPY $vgpr1, implicit $exec |
| ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec |
| ; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) |
| ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 |
| ; GCN-NEXT: SI_RETURN implicit $vgpr0_vgpr1 |
| $vgpr2 = IMPLICIT_DEF |
| $vgpr2 = V_WRITELANE_B32 killed $sgpr20, 0, $vgpr2 |
| $vgpr0 = COPY $vgpr1, implicit $exec |
| SI_RETURN implicit $vgpr0_vgpr1 |
| ... |
| |
| # Tied op not required in the spill reload of vgpr40 which is in the CSR range. |
| --- |
| name: wwm_csr_spill_reload |
| tracksRegLiveness: true |
| machineFunctionInfo: |
| wwmReservedRegs: ['$vgpr40'] |
| isEntryFunction: false |
| scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' |
| stackPtrOffsetReg: '$sgpr32' |
| frameOffsetReg: '$sgpr33' |
| body: | |
| bb.0: |
| liveins: $sgpr20, $vgpr1 |
| ; GCN-LABEL: name: wwm_csr_spill_reload |
| ; GCN: liveins: $sgpr20, $vgpr1, $vgpr40 |
| ; GCN-NEXT: {{ $}} |
| ; GCN-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec |
| ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) |
| ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 |
| ; GCN-NEXT: $vgpr40 = IMPLICIT_DEF |
| ; GCN-NEXT: $vgpr40 = V_WRITELANE_B32 killed $sgpr20, 0, $vgpr40 |
| ; GCN-NEXT: $sgpr20 = V_READLANE_B32 $vgpr40, 0, implicit $exec |
| ; GCN-NEXT: $vgpr0 = COPY killed $vgpr1, implicit $exec |
| ; GCN-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec |
| ; GCN-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) |
| ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 |
| ; GCN-NEXT: SI_RETURN implicit $vgpr0 |
| $vgpr40 = IMPLICIT_DEF |
| $vgpr40 = V_WRITELANE_B32 killed $sgpr20, 0, $vgpr40 |
| $sgpr20 = V_READLANE_B32 $vgpr40, 0, implicit $exec |
| $vgpr0 = COPY killed $vgpr1, implicit $exec |
| SI_RETURN implicit $vgpr0 |
| ... |