| # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py |
| # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=prologepilog %s -o - | FileCheck -check-prefixes=CHECK,GFX900 %s |
| # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -run-pass=prologepilog %s -o - | FileCheck -check-prefixes=CHECK,GFX942 %s |
| # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -run-pass=prologepilog %s -o - | FileCheck -check-prefixes=CHECK,GFX12,GFX1200 %s |
| # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -run-pass=prologepilog %s -o - | FileCheck -check-prefixes=CHECK,GFX12,GFX1250 %s |
| |
| --- |
| name: spill_v32 |
| tracksRegLiveness: true |
| stack: |
| - { id: 0, type: spill-slot, size: 4, alignment: 4 } |
| machineFunctionInfo: |
| scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' |
| stackPtrOffsetReg: '$sgpr32' |
| frameOffsetReg: '$sgpr33' |
| body: | |
| bb.0: |
| liveins: $vgpr0 |
| |
| ; GFX900-LABEL: name: spill_v32 |
| ; GFX900: liveins: $vgpr0 |
| ; GFX900-NEXT: {{ $}} |
| ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5) |
| ; GFX900-NEXT: S_NOP 0, implicit $vgpr0 |
| ; |
| ; GFX942-LABEL: name: spill_v32 |
| ; GFX942: liveins: $vgpr0 |
| ; GFX942-NEXT: {{ $}} |
| ; GFX942-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5) |
| ; GFX942-NEXT: S_NOP 0, implicit $vgpr0 |
| ; |
| ; GFX12-LABEL: name: spill_v32 |
| ; GFX12: liveins: $vgpr0 |
| ; GFX12-NEXT: {{ $}} |
| ; GFX12-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5) |
| ; GFX12-NEXT: S_NOP 0, implicit $vgpr0 |
| SI_SPILL_V32_SAVE $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) |
| S_NOP 0, implicit $vgpr0 |
| ... |
| |
| --- |
| name: spill_v32_kill |
| tracksRegLiveness: true |
| stack: |
| - { id: 0, type: spill-slot, size: 4, alignment: 4 } |
| machineFunctionInfo: |
| scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' |
| stackPtrOffsetReg: '$sgpr32' |
| frameOffsetReg: '$sgpr33' |
| body: | |
| bb.0: |
| liveins: $vgpr0 |
| |
| ; GFX900-LABEL: name: spill_v32_kill |
| ; GFX900: liveins: $vgpr0 |
| ; GFX900-NEXT: {{ $}} |
| ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5) |
| ; |
| ; GFX942-LABEL: name: spill_v32_kill |
| ; GFX942: liveins: $vgpr0 |
| ; GFX942-NEXT: {{ $}} |
| ; GFX942-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5) |
| ; |
| ; GFX12-LABEL: name: spill_v32_kill |
| ; GFX12: liveins: $vgpr0 |
| ; GFX12-NEXT: {{ $}} |
| ; GFX12-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5) |
| SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) |
| ... |
| |
| --- |
| name: spill_v64 |
| tracksRegLiveness: true |
| stack: |
| - { id: 0, type: spill-slot, size: 8, alignment: 4 } |
| machineFunctionInfo: |
| scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' |
| stackPtrOffsetReg: '$sgpr32' |
| frameOffsetReg: '$sgpr33' |
| body: | |
| bb.0: |
| liveins: $vgpr0_vgpr1 |
| |
| ; GFX900-LABEL: name: spill_v64 |
| ; GFX900: liveins: $vgpr0_vgpr1 |
| ; GFX900-NEXT: {{ $}} |
| ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5) |
| ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit $vgpr0_vgpr1 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5) |
| ; GFX900-NEXT: S_NOP 0, implicit $vgpr0_vgpr1 |
| ; |
| ; GFX942-LABEL: name: spill_v64 |
| ; GFX942: liveins: $vgpr0_vgpr1 |
| ; GFX942-NEXT: {{ $}} |
| ; GFX942-NEXT: SCRATCH_STORE_DWORDX2_SADDR $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s64) into %stack.0, align 4, addrspace 5) |
| ; GFX942-NEXT: S_NOP 0, implicit $vgpr0_vgpr1 |
| ; |
| ; GFX12-LABEL: name: spill_v64 |
| ; GFX12: liveins: $vgpr0_vgpr1 |
| ; GFX12-NEXT: {{ $}} |
| ; GFX12-NEXT: SCRATCH_STORE_DWORDX2_SADDR $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s64) into %stack.0, align 4, addrspace 5) |
| ; GFX12-NEXT: S_NOP 0, implicit $vgpr0_vgpr1 |
| SI_SPILL_V64_SAVE $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) |
| S_NOP 0, implicit $vgpr0_vgpr1 |
| ... |
| |
| --- |
| name: spill_v64_kill |
| tracksRegLiveness: true |
| stack: |
| - { id: 0, type: spill-slot, size: 8, alignment: 4 } |
| machineFunctionInfo: |
| scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' |
| stackPtrOffsetReg: '$sgpr32' |
| frameOffsetReg: '$sgpr33' |
| body: | |
| bb.0: |
| liveins: $vgpr0_vgpr1 |
| |
| ; GFX900-LABEL: name: spill_v64_kill |
| ; GFX900: liveins: $vgpr0_vgpr1 |
| ; GFX900-NEXT: {{ $}} |
| ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5) |
| ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5) |
| ; |
| ; GFX942-LABEL: name: spill_v64_kill |
| ; GFX942: liveins: $vgpr0_vgpr1 |
| ; GFX942-NEXT: {{ $}} |
| ; GFX942-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s64) into %stack.0, align 4, addrspace 5) |
| ; |
| ; GFX12-LABEL: name: spill_v64_kill |
| ; GFX12: liveins: $vgpr0_vgpr1 |
| ; GFX12-NEXT: {{ $}} |
| ; GFX12-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s64) into %stack.0, align 4, addrspace 5) |
| SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) |
| ... |
| |
| # Make sure there's no verifier error on the undef spill component when the value is killed. |
| |
| --- |
| name: spill_v64_undef_sub1_killed |
| tracksRegLiveness: true |
| stack: |
| - { id: 0, type: spill-slot, size: 8, alignment: 4 } |
| machineFunctionInfo: |
| scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' |
| stackPtrOffsetReg: '$sgpr32' |
| frameOffsetReg: '$sgpr33' |
| body: | |
| bb.0: |
| liveins: $vgpr0 |
| |
| ; GFX900-LABEL: name: spill_v64_undef_sub1_killed |
| ; GFX900: liveins: $vgpr0 |
| ; GFX900-NEXT: {{ $}} |
| ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5) |
| ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5) |
| ; |
| ; GFX942-LABEL: name: spill_v64_undef_sub1_killed |
| ; GFX942: liveins: $vgpr0 |
| ; GFX942-NEXT: {{ $}} |
| ; GFX942-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s64) into %stack.0, align 4, addrspace 5) |
| ; |
| ; GFX12-LABEL: name: spill_v64_undef_sub1_killed |
| ; GFX12: liveins: $vgpr0 |
| ; GFX12-NEXT: {{ $}} |
| ; GFX12-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s64) into %stack.0, align 4, addrspace 5) |
| SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) |
| ... |
| |
| --- |
| name: spill_v64_undef_sub0_killed |
| tracksRegLiveness: true |
| stack: |
| - { id: 0, type: spill-slot, size: 8, alignment: 4 } |
| machineFunctionInfo: |
| scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' |
| stackPtrOffsetReg: '$sgpr32' |
| frameOffsetReg: '$sgpr33' |
| body: | |
| bb.0: |
| liveins: $vgpr1 |
| |
| ; GFX900-LABEL: name: spill_v64_undef_sub0_killed |
| ; GFX900: liveins: $vgpr1 |
| ; GFX900-NEXT: {{ $}} |
| ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5) |
| ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5) |
| ; |
| ; GFX942-LABEL: name: spill_v64_undef_sub0_killed |
| ; GFX942: liveins: $vgpr1 |
| ; GFX942-NEXT: {{ $}} |
| ; GFX942-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s64) into %stack.0, align 4, addrspace 5) |
| ; |
| ; GFX12-LABEL: name: spill_v64_undef_sub0_killed |
| ; GFX12: liveins: $vgpr1 |
| ; GFX12-NEXT: {{ $}} |
| ; GFX12-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s64) into %stack.0, align 4, addrspace 5) |
| SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) |
| ... |
| |
| --- |
| name: spill_v128_kill |
| tracksRegLiveness: true |
| stack: |
| - { id: 0, type: spill-slot, size: 16, alignment: 4 } |
| machineFunctionInfo: |
| scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' |
| stackPtrOffsetReg: '$sgpr32' |
| frameOffsetReg: '$sgpr33' |
| body: | |
| bb.0: |
| liveins: $vgpr0_vgpr1_vgpr2_vgpr3 |
| |
| ; GFX900-LABEL: name: spill_v128_kill |
| ; GFX900: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 |
| ; GFX900-NEXT: {{ $}} |
| ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5) |
| ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5) |
| ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5) |
| ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5) |
| ; |
| ; GFX942-LABEL: name: spill_v128_kill |
| ; GFX942: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 |
| ; GFX942-NEXT: {{ $}} |
| ; GFX942-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5) |
| ; |
| ; GFX12-LABEL: name: spill_v128_kill |
| ; GFX12: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 |
| ; GFX12-NEXT: {{ $}} |
| ; GFX12-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5) |
| SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, addrspace 5) |
| ... |
| |
| --- |
| name: spill_v32_undef |
| tracksRegLiveness: true |
| stack: |
| - { id: 0, type: spill-slot, size: 4, alignment: 4 } |
| machineFunctionInfo: |
| scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' |
| stackPtrOffsetReg: '$sgpr32' |
| frameOffsetReg: '$sgpr33' |
| body: | |
| bb.0: |
| ; CHECK-LABEL: name: spill_v32_undef |
| ; CHECK: S_NOP 0, implicit undef $vgpr0 |
| SI_SPILL_V32_SAVE undef $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) |
| S_NOP 0, implicit undef $vgpr0 |
| ... |
| |
| --- |
| name: spill_v64_undef |
| tracksRegLiveness: true |
| stack: |
| - { id: 0, type: spill-slot, size: 8, alignment: 4 } |
| machineFunctionInfo: |
| scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' |
| stackPtrOffsetReg: '$sgpr32' |
| frameOffsetReg: '$sgpr33' |
| body: | |
| bb.0: |
| ; CHECK-LABEL: name: spill_v64_undef |
| ; CHECK: S_NOP 0, implicit undef $vgpr0_vgpr1 |
| SI_SPILL_V64_SAVE undef $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5) |
| S_NOP 0, implicit undef $vgpr0_vgpr1 |
| ... |
| |
| --- |
| name: spill_v128_kill_unaligned |
| tracksRegLiveness: true |
| stack: |
| - { id: 0, type: spill-slot, size: 16, alignment: 4 } |
| machineFunctionInfo: |
| scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' |
| stackPtrOffsetReg: '$sgpr32' |
| frameOffsetReg: '$sgpr33' |
| body: | |
| bb.0: |
| liveins: $vgpr1_vgpr2_vgpr3_vgpr4 |
| |
| ; GFX900-LABEL: name: spill_v128_kill_unaligned |
| ; GFX900: liveins: $vgpr1_vgpr2_vgpr3_vgpr4 |
| ; GFX900-NEXT: {{ $}} |
| ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5) |
| ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5) |
| ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5) |
| ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec, implicit killed $vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5) |
| ; |
| ; GFX942-LABEL: name: spill_v128_kill_unaligned |
| ; GFX942: liveins: $vgpr1_vgpr2_vgpr3_vgpr4 |
| ; GFX942-NEXT: {{ $}} |
| ; GFX942-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5) |
| ; GFX942-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr2_vgpr3_vgpr4, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-thread-private" store (s96) into %stack.0 + 4, align 4, addrspace 5) |
| ; |
| ; GFX1200-LABEL: name: spill_v128_kill_unaligned |
| ; GFX1200: liveins: $vgpr1_vgpr2_vgpr3_vgpr4 |
| ; GFX1200-NEXT: {{ $}} |
| ; GFX1200-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr1_vgpr2_vgpr3_vgpr4, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5) |
| ; |
| ; GFX1250-LABEL: name: spill_v128_kill_unaligned |
| ; GFX1250: liveins: $vgpr1_vgpr2_vgpr3_vgpr4 |
| ; GFX1250-NEXT: {{ $}} |
| ; GFX1250-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5) |
| ; GFX1250-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr2_vgpr3_vgpr4, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-thread-private" store (s96) into %stack.0 + 4, align 4, addrspace 5) |
| SI_SPILL_V128_SAVE killed $vgpr1_vgpr2_vgpr3_vgpr4, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, addrspace 5) |
| ... |
| |
| --- |
| name: spill_v128_unaligned |
| tracksRegLiveness: true |
| stack: |
| - { id: 0, type: spill-slot, size: 16, alignment: 4 } |
| machineFunctionInfo: |
| scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' |
| stackPtrOffsetReg: '$sgpr32' |
| frameOffsetReg: '$sgpr33' |
| body: | |
| bb.0: |
| liveins: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 |
| |
| ; GFX900-LABEL: name: spill_v128_unaligned |
| ; GFX900: liveins: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 |
| ; GFX900-NEXT: {{ $}} |
| ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5) |
| ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5) |
| ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5) |
| ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5) |
| ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec, implicit $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5) |
| ; |
| ; GFX942-LABEL: name: spill_v128_unaligned |
| ; GFX942: liveins: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 |
| ; GFX942-NEXT: {{ $}} |
| ; GFX942-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5) |
| ; GFX942-NEXT: SCRATCH_STORE_DWORDX4_SADDR $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-thread-private" store (s128) into %stack.0 + 4, align 4, addrspace 5) |
| ; |
| ; GFX1200-LABEL: name: spill_v128_unaligned |
| ; GFX1200: liveins: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 |
| ; GFX1200-NEXT: {{ $}} |
| ; GFX1200-NEXT: SCRATCH_STORE_DWORDX4_SADDR $vgpr1_vgpr2_vgpr3_vgpr4, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5) |
| ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5) |
| ; |
| ; GFX1250-LABEL: name: spill_v128_unaligned |
| ; GFX1250: liveins: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 |
| ; GFX1250-NEXT: {{ $}} |
| ; GFX1250-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5) |
| ; GFX1250-NEXT: SCRATCH_STORE_DWORDX4_SADDR $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-thread-private" store (s128) into %stack.0 + 4, align 4, addrspace 5) |
| SI_SPILL_V160_SAVE $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, addrspace 5) |
| ... |
| |
| --- |
| name: spill_v256_aligned |
| tracksRegLiveness: true |
| stack: |
| - { id: 0, type: spill-slot, size: 16, alignment: 4 } |
| machineFunctionInfo: |
| scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' |
| stackPtrOffsetReg: '$sgpr32' |
| frameOffsetReg: '$sgpr33' |
| body: | |
| bb.0: |
| liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 |
| |
| ; GFX900-LABEL: name: spill_v256_aligned |
| ; GFX900: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 |
| ; GFX900-NEXT: {{ $}} |
| ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5) |
| ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5) |
| ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5) |
| ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5) |
| ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5) |
| ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5) |
| ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5) |
| ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 28, addrspace 5) |
| ; |
| ; GFX942-LABEL: name: spill_v256_aligned |
| ; GFX942: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 |
| ; GFX942-NEXT: {{ $}} |
| ; GFX942-NEXT: SCRATCH_STORE_DWORDX4_SADDR $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5) |
| ; GFX942-NEXT: SCRATCH_STORE_DWORDX4_SADDR $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-thread-private" store (s128) into %stack.0 + 16, align 4, addrspace 5) |
| ; |
| ; GFX12-LABEL: name: spill_v256_aligned |
| ; GFX12: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 |
| ; GFX12-NEXT: {{ $}} |
| ; GFX12-NEXT: SCRATCH_STORE_DWORDX4_SADDR $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5) |
| ; GFX12-NEXT: SCRATCH_STORE_DWORDX4_SADDR $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-thread-private" store (s128) into %stack.0 + 16, align 4, addrspace 5) |
| SI_SPILL_V256_SAVE $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, addrspace 5) |
| ... |
| |
| --- |
| name: spill_v256_unaligned |
| tracksRegLiveness: true |
| stack: |
| - { id: 0, type: spill-slot, size: 16, alignment: 4 } |
| machineFunctionInfo: |
| scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' |
| stackPtrOffsetReg: '$sgpr32' |
| frameOffsetReg: '$sgpr33' |
| body: | |
| bb.0: |
| liveins: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 |
| |
| ; GFX900-LABEL: name: spill_v256_unaligned |
| ; GFX900: liveins: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 |
| ; GFX900-NEXT: {{ $}} |
| ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8, implicit $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5) |
| ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5) |
| ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5) |
| ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5) |
| ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5) |
| ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5) |
| ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5) |
| ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec, implicit $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 28, addrspace 5) |
| ; |
| ; GFX942-LABEL: name: spill_v256_unaligned |
| ; GFX942: liveins: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 |
| ; GFX942-NEXT: {{ $}} |
| ; GFX942-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8, implicit $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5) |
| ; GFX942-NEXT: SCRATCH_STORE_DWORDX4_SADDR $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 4, align 4, addrspace 5) |
| ; GFX942-NEXT: SCRATCH_STORE_DWORDX3_SADDR $vgpr6_vgpr7_vgpr8, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr, implicit $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 :: ("amdgpu-thread-private" store (s96) into %stack.0 + 20, align 4, addrspace 5) |
| ; |
| ; GFX1200-LABEL: name: spill_v256_unaligned |
| ; GFX1200: liveins: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 |
| ; GFX1200-NEXT: {{ $}} |
| ; GFX1200-NEXT: SCRATCH_STORE_DWORDX4_SADDR $vgpr1_vgpr2_vgpr3_vgpr4, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8, implicit $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5) |
| ; GFX1200-NEXT: SCRATCH_STORE_DWORDX4_SADDR $vgpr5_vgpr6_vgpr7_vgpr8, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 :: ("amdgpu-thread-private" store (s128) into %stack.0 + 16, align 4, addrspace 5) |
| ; |
| ; GFX1250-LABEL: name: spill_v256_unaligned |
| ; GFX1250: liveins: $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 |
| ; GFX1250-NEXT: {{ $}} |
| ; GFX1250-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8, implicit $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5) |
| ; GFX1250-NEXT: SCRATCH_STORE_DWORDX4_SADDR $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 4, align 4, addrspace 5) |
| ; GFX1250-NEXT: SCRATCH_STORE_DWORDX3_SADDR $vgpr6_vgpr7_vgpr8, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr, implicit $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 :: ("amdgpu-thread-private" store (s96) into %stack.0 + 20, align 4, addrspace 5) |
| SI_SPILL_V256_SAVE $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, addrspace 5) |
| ... |