| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode < %s | FileCheck -check-prefix=GFX10_1 %s |
| ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -mattr=+cumode < %s | FileCheck -check-prefix=GFX10_3 %s |
| ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck -check-prefix=GFX11 %s |
| ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck -check-prefix=GFX12 %s |
| ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefix=GFX8 %s |
| ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s |
| ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefixes=GFX9,GFX942 %s |
| |
| ; We aren't pressuring the SGPRs, so this can use the add with carry out pre-gfx9. |
| define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 { |
| ; GFX10_1-LABEL: scalar_mov_materializes_frame_index_unavailable_scc: |
| ; GFX10_1: ; %bb.0: |
| ; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 |
| ; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80880 |
| ; GFX10_1-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill |
| ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 |
| ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 |
| ; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32 |
| ; GFX10_1-NEXT: v_writelane_b32 v1, s55, 0 |
| ; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo |
| ; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0 |
| ; GFX10_1-NEXT: ;;#ASMSTART |
| ; GFX10_1-NEXT: ; use alloca0 v0 |
| ; GFX10_1-NEXT: ;;#ASMEND |
| ; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32 |
| ; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 0x4040, v0 |
| ; GFX10_1-NEXT: v_readfirstlane_b32 s55, v0 |
| ; GFX10_1-NEXT: ;;#ASMSTART |
| ; GFX10_1-NEXT: ; use s55, scc |
| ; GFX10_1-NEXT: ;;#ASMEND |
| ; GFX10_1-NEXT: v_readlane_b32 s55, v1, 0 |
| ; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 |
| ; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80880 |
| ; GFX10_1-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload |
| ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 |
| ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 |
| ; GFX10_1-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10_1-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10_3-LABEL: scalar_mov_materializes_frame_index_unavailable_scc: |
| ; GFX10_3: ; %bb.0: |
| ; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 |
| ; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80880 |
| ; GFX10_3-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill |
| ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 |
| ; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32 |
| ; GFX10_3-NEXT: v_writelane_b32 v1, s55, 0 |
| ; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo |
| ; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0 |
| ; GFX10_3-NEXT: ;;#ASMSTART |
| ; GFX10_3-NEXT: ; use alloca0 v0 |
| ; GFX10_3-NEXT: ;;#ASMEND |
| ; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32 |
| ; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 0x4040, v0 |
| ; GFX10_3-NEXT: v_readfirstlane_b32 s55, v0 |
| ; GFX10_3-NEXT: ;;#ASMSTART |
| ; GFX10_3-NEXT: ; use s55, scc |
| ; GFX10_3-NEXT: ;;#ASMEND |
| ; GFX10_3-NEXT: v_readlane_b32 s55, v1, 0 |
| ; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 |
| ; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80880 |
| ; GFX10_3-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload |
| ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 |
| ; GFX10_3-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10_3-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: scalar_mov_materializes_frame_index_unavailable_scc: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 |
| ; GFX11-NEXT: s_add_i32 s1, s32, 0x4044 |
| ; GFX11-NEXT: scratch_store_b32 off, v1, s1 ; 4-byte Folded Spill |
| ; GFX11-NEXT: s_mov_b32 exec_lo, s0 |
| ; GFX11-NEXT: s_add_i32 s0, s32, 64 |
| ; GFX11-NEXT: v_writelane_b32 v1, s55, 0 |
| ; GFX11-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX11-NEXT: s_and_b32 s0, 0, exec_lo |
| ; GFX11-NEXT: s_addc_u32 s0, s32, 0x4040 |
| ; GFX11-NEXT: ;;#ASMSTART |
| ; GFX11-NEXT: ; use alloca0 v0 |
| ; GFX11-NEXT: ;;#ASMEND |
| ; GFX11-NEXT: s_bitcmp1_b32 s0, 0 |
| ; GFX11-NEXT: s_bitset0_b32 s0, 0 |
| ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; GFX11-NEXT: s_mov_b32 s55, s0 |
| ; GFX11-NEXT: ;;#ASMSTART |
| ; GFX11-NEXT: ; use s55, scc |
| ; GFX11-NEXT: ;;#ASMEND |
| ; GFX11-NEXT: v_readlane_b32 s55, v1, 0 |
| ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 |
| ; GFX11-NEXT: s_add_i32 s1, s32, 0x4044 |
| ; GFX11-NEXT: scratch_load_b32 v1, off, s1 ; 4-byte Folded Reload |
| ; GFX11-NEXT: s_mov_b32 exec_lo, s0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: scalar_mov_materializes_frame_index_unavailable_scc: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 |
| ; GFX12-NEXT: scratch_store_b32 off, v1, s32 offset:16388 ; 4-byte Folded Spill |
| ; GFX12-NEXT: s_wait_alu 0xfffe |
| ; GFX12-NEXT: s_mov_b32 exec_lo, s0 |
| ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; GFX12-NEXT: s_and_b32 s0, 0, exec_lo |
| ; GFX12-NEXT: v_writelane_b32 v1, s55, 0 |
| ; GFX12-NEXT: s_add_co_ci_u32 s0, s32, 0x4000 |
| ; GFX12-NEXT: v_mov_b32_e32 v0, s32 |
| ; GFX12-NEXT: s_wait_alu 0xfffe |
| ; GFX12-NEXT: s_bitcmp1_b32 s0, 0 |
| ; GFX12-NEXT: s_bitset0_b32 s0, 0 |
| ; GFX12-NEXT: ;;#ASMSTART |
| ; GFX12-NEXT: ; use alloca0 v0 |
| ; GFX12-NEXT: ;;#ASMEND |
| ; GFX12-NEXT: s_wait_alu 0xfffe |
| ; GFX12-NEXT: s_mov_b32 s55, s0 |
| ; GFX12-NEXT: ;;#ASMSTART |
| ; GFX12-NEXT: ; use s55, scc |
| ; GFX12-NEXT: ;;#ASMEND |
| ; GFX12-NEXT: v_readlane_b32 s55, v1, 0 |
| ; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 |
| ; GFX12-NEXT: scratch_load_b32 v1, off, s32 offset:16388 ; 4-byte Folded Reload |
| ; GFX12-NEXT: s_wait_alu 0xfffe |
| ; GFX12-NEXT: s_mov_b32 exec_lo, s0 |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: scalar_mov_materializes_frame_index_unavailable_scc: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; GFX8-NEXT: s_add_i32 s6, s32, 0x101100 |
| ; GFX8-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill |
| ; GFX8-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32 |
| ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0 |
| ; GFX8-NEXT: v_writelane_b32 v1, s55, 0 |
| ; GFX8-NEXT: ;;#ASMSTART |
| ; GFX8-NEXT: ; use alloca0 v0 |
| ; GFX8-NEXT: ;;#ASMEND |
| ; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32 |
| ; GFX8-NEXT: s_movk_i32 s55, 0x4040 |
| ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s55, v0 |
| ; GFX8-NEXT: v_readfirstlane_b32 s55, v0 |
| ; GFX8-NEXT: s_and_b64 s[4:5], 0, exec |
| ; GFX8-NEXT: ;;#ASMSTART |
| ; GFX8-NEXT: ; use s55, scc |
| ; GFX8-NEXT: ;;#ASMEND |
| ; GFX8-NEXT: v_readlane_b32 s55, v1, 0 |
| ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; GFX8-NEXT: s_add_i32 s6, s32, 0x101100 |
| ; GFX8-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload |
| ; GFX8-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-LABEL: scalar_mov_materializes_frame_index_unavailable_scc: |
| ; GFX900: ; %bb.0: |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; GFX900-NEXT: s_add_i32 s6, s32, 0x101100 |
| ; GFX900-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill |
| ; GFX900-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 |
| ; GFX900-NEXT: v_add_u32_e32 v0, 64, v0 |
| ; GFX900-NEXT: ;;#ASMSTART |
| ; GFX900-NEXT: ; use alloca0 v0 |
| ; GFX900-NEXT: ;;#ASMEND |
| ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 |
| ; GFX900-NEXT: v_add_u32_e32 v0, 0x4040, v0 |
| ; GFX900-NEXT: v_writelane_b32 v1, s55, 0 |
| ; GFX900-NEXT: v_readfirstlane_b32 s55, v0 |
| ; GFX900-NEXT: s_and_b64 s[4:5], 0, exec |
| ; GFX900-NEXT: ;;#ASMSTART |
| ; GFX900-NEXT: ; use s55, scc |
| ; GFX900-NEXT: ;;#ASMEND |
| ; GFX900-NEXT: v_readlane_b32 s55, v1, 0 |
| ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; GFX900-NEXT: s_add_i32 s6, s32, 0x101100 |
| ; GFX900-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload |
| ; GFX900-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-LABEL: scalar_mov_materializes_frame_index_unavailable_scc: |
| ; GFX942: ; %bb.0: |
| ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 |
| ; GFX942-NEXT: s_add_i32 s2, s32, 0x4044 |
| ; GFX942-NEXT: scratch_store_dword off, v1, s2 ; 4-byte Folded Spill |
| ; GFX942-NEXT: s_mov_b64 exec, s[0:1] |
| ; GFX942-NEXT: s_add_i32 s0, s32, 64 |
| ; GFX942-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX942-NEXT: s_and_b64 s[0:1], 0, exec |
| ; GFX942-NEXT: s_addc_u32 s0, s32, 0x4040 |
| ; GFX942-NEXT: s_bitcmp1_b32 s0, 0 |
| ; GFX942-NEXT: s_bitset0_b32 s0, 0 |
| ; GFX942-NEXT: v_writelane_b32 v1, s55, 0 |
| ; GFX942-NEXT: s_mov_b32 s55, s0 |
| ; GFX942-NEXT: ;;#ASMSTART |
| ; GFX942-NEXT: ; use alloca0 v0 |
| ; GFX942-NEXT: ;;#ASMEND |
| ; GFX942-NEXT: ;;#ASMSTART |
| ; GFX942-NEXT: ; use s55, scc |
| ; GFX942-NEXT: ;;#ASMEND |
| ; GFX942-NEXT: v_readlane_b32 s55, v1, 0 |
| ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 |
| ; GFX942-NEXT: s_add_i32 s2, s32, 0x4044 |
| ; GFX942-NEXT: scratch_load_dword v1, off, s2 ; 4-byte Folded Reload |
| ; GFX942-NEXT: s_mov_b64 exec, s[0:1] |
| ; GFX942-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-NEXT: s_setpc_b64 s[30:31] |
| %alloca0 = alloca [4096 x i32], align 64, addrspace(5) |
| %alloca1 = alloca i32, align 4, addrspace(5) |
| call void asm sideeffect "; use alloca0 $0", "v"(ptr addrspace(5) %alloca0) |
| call void asm sideeffect "; use $0, $1", "{s55},{scc}"(ptr addrspace(5) %alloca1, i32 0) |
| ret void |
| } |
| |
| ; %alloca1 should end up materializing with s_mov_b32, and scc is |
| ; available. |
| define void @scalar_mov_materializes_frame_index_dead_scc() #0 { |
| ; GFX10_1-LABEL: scalar_mov_materializes_frame_index_dead_scc: |
| ; GFX10_1: ; %bb.0: |
| ; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 |
| ; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80880 |
| ; GFX10_1-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill |
| ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 |
| ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 |
| ; GFX10_1-NEXT: v_writelane_b32 v1, s55, 0 |
| ; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32 |
| ; GFX10_1-NEXT: s_lshr_b32 s55, s32, 5 |
| ; GFX10_1-NEXT: s_addk_i32 s55, 0x4040 |
| ; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0 |
| ; GFX10_1-NEXT: ;;#ASMSTART |
| ; GFX10_1-NEXT: ; use alloca0 v0 |
| ; GFX10_1-NEXT: ;;#ASMEND |
| ; GFX10_1-NEXT: ;;#ASMSTART |
| ; GFX10_1-NEXT: ; use s55 |
| ; GFX10_1-NEXT: ;;#ASMEND |
| ; GFX10_1-NEXT: v_readlane_b32 s55, v1, 0 |
| ; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 |
| ; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80880 |
| ; GFX10_1-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload |
| ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 |
| ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 |
| ; GFX10_1-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10_1-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10_3-LABEL: scalar_mov_materializes_frame_index_dead_scc: |
| ; GFX10_3: ; %bb.0: |
| ; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 |
| ; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80880 |
| ; GFX10_3-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill |
| ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 |
| ; GFX10_3-NEXT: v_writelane_b32 v1, s55, 0 |
| ; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32 |
| ; GFX10_3-NEXT: s_lshr_b32 s55, s32, 5 |
| ; GFX10_3-NEXT: s_addk_i32 s55, 0x4040 |
| ; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0 |
| ; GFX10_3-NEXT: ;;#ASMSTART |
| ; GFX10_3-NEXT: ; use alloca0 v0 |
| ; GFX10_3-NEXT: ;;#ASMEND |
| ; GFX10_3-NEXT: ;;#ASMSTART |
| ; GFX10_3-NEXT: ; use s55 |
| ; GFX10_3-NEXT: ;;#ASMEND |
| ; GFX10_3-NEXT: v_readlane_b32 s55, v1, 0 |
| ; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 |
| ; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80880 |
| ; GFX10_3-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload |
| ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 |
| ; GFX10_3-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10_3-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: scalar_mov_materializes_frame_index_dead_scc: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 |
| ; GFX11-NEXT: s_add_i32 s1, s32, 0x4044 |
| ; GFX11-NEXT: scratch_store_b32 off, v1, s1 ; 4-byte Folded Spill |
| ; GFX11-NEXT: s_mov_b32 exec_lo, s0 |
| ; GFX11-NEXT: v_writelane_b32 v1, s55, 0 |
| ; GFX11-NEXT: s_add_i32 s0, s32, 64 |
| ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; GFX11-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX11-NEXT: s_add_i32 s0, s32, 0x4040 |
| ; GFX11-NEXT: ;;#ASMSTART |
| ; GFX11-NEXT: ; use alloca0 v0 |
| ; GFX11-NEXT: ;;#ASMEND |
| ; GFX11-NEXT: s_mov_b32 s55, s0 |
| ; GFX11-NEXT: ;;#ASMSTART |
| ; GFX11-NEXT: ; use s55 |
| ; GFX11-NEXT: ;;#ASMEND |
| ; GFX11-NEXT: v_readlane_b32 s55, v1, 0 |
| ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 |
| ; GFX11-NEXT: s_add_i32 s1, s32, 0x4044 |
| ; GFX11-NEXT: scratch_load_b32 v1, off, s1 ; 4-byte Folded Reload |
| ; GFX11-NEXT: s_mov_b32 exec_lo, s0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: scalar_mov_materializes_frame_index_dead_scc: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 |
| ; GFX12-NEXT: scratch_store_b32 off, v1, s32 offset:16388 ; 4-byte Folded Spill |
| ; GFX12-NEXT: s_wait_alu 0xfffe |
| ; GFX12-NEXT: s_mov_b32 exec_lo, s0 |
| ; GFX12-NEXT: v_writelane_b32 v1, s55, 0 |
| ; GFX12-NEXT: s_add_co_i32 s0, s32, 0x4000 |
| ; GFX12-NEXT: v_mov_b32_e32 v0, s32 |
| ; GFX12-NEXT: s_wait_alu 0xfffe |
| ; GFX12-NEXT: s_mov_b32 s55, s0 |
| ; GFX12-NEXT: ;;#ASMSTART |
| ; GFX12-NEXT: ; use alloca0 v0 |
| ; GFX12-NEXT: ;;#ASMEND |
| ; GFX12-NEXT: ;;#ASMSTART |
| ; GFX12-NEXT: ; use s55 |
| ; GFX12-NEXT: ;;#ASMEND |
| ; GFX12-NEXT: v_readlane_b32 s55, v1, 0 |
| ; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 |
| ; GFX12-NEXT: scratch_load_b32 v1, off, s32 offset:16388 ; 4-byte Folded Reload |
| ; GFX12-NEXT: s_wait_alu 0xfffe |
| ; GFX12-NEXT: s_mov_b32 exec_lo, s0 |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: scalar_mov_materializes_frame_index_dead_scc: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; GFX8-NEXT: s_add_i32 s6, s32, 0x101100 |
| ; GFX8-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill |
| ; GFX8-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX8-NEXT: v_writelane_b32 v1, s55, 0 |
| ; GFX8-NEXT: s_lshr_b32 s55, s32, 6 |
| ; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32 |
| ; GFX8-NEXT: s_addk_i32 s55, 0x4040 |
| ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0 |
| ; GFX8-NEXT: ;;#ASMSTART |
| ; GFX8-NEXT: ; use alloca0 v0 |
| ; GFX8-NEXT: ;;#ASMEND |
| ; GFX8-NEXT: ;;#ASMSTART |
| ; GFX8-NEXT: ; use s55 |
| ; GFX8-NEXT: ;;#ASMEND |
| ; GFX8-NEXT: v_readlane_b32 s55, v1, 0 |
| ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; GFX8-NEXT: s_add_i32 s6, s32, 0x101100 |
| ; GFX8-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload |
| ; GFX8-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-LABEL: scalar_mov_materializes_frame_index_dead_scc: |
| ; GFX900: ; %bb.0: |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; GFX900-NEXT: s_add_i32 s6, s32, 0x101100 |
| ; GFX900-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill |
| ; GFX900-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX900-NEXT: v_writelane_b32 v1, s55, 0 |
| ; GFX900-NEXT: s_lshr_b32 s55, s32, 6 |
| ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 |
| ; GFX900-NEXT: s_addk_i32 s55, 0x4040 |
| ; GFX900-NEXT: v_add_u32_e32 v0, 64, v0 |
| ; GFX900-NEXT: ;;#ASMSTART |
| ; GFX900-NEXT: ; use alloca0 v0 |
| ; GFX900-NEXT: ;;#ASMEND |
| ; GFX900-NEXT: ;;#ASMSTART |
| ; GFX900-NEXT: ; use s55 |
| ; GFX900-NEXT: ;;#ASMEND |
| ; GFX900-NEXT: v_readlane_b32 s55, v1, 0 |
| ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; GFX900-NEXT: s_add_i32 s6, s32, 0x101100 |
| ; GFX900-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload |
| ; GFX900-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-LABEL: scalar_mov_materializes_frame_index_dead_scc: |
| ; GFX942: ; %bb.0: |
| ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 |
| ; GFX942-NEXT: s_add_i32 s2, s32, 0x4044 |
| ; GFX942-NEXT: scratch_store_dword off, v1, s2 ; 4-byte Folded Spill |
| ; GFX942-NEXT: s_mov_b64 exec, s[0:1] |
| ; GFX942-NEXT: s_add_i32 s0, s32, 64 |
| ; GFX942-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX942-NEXT: s_add_i32 s0, s32, 0x4040 |
| ; GFX942-NEXT: v_writelane_b32 v1, s55, 0 |
| ; GFX942-NEXT: s_mov_b32 s55, s0 |
| ; GFX942-NEXT: ;;#ASMSTART |
| ; GFX942-NEXT: ; use alloca0 v0 |
| ; GFX942-NEXT: ;;#ASMEND |
| ; GFX942-NEXT: ;;#ASMSTART |
| ; GFX942-NEXT: ; use s55 |
| ; GFX942-NEXT: ;;#ASMEND |
| ; GFX942-NEXT: v_readlane_b32 s55, v1, 0 |
| ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 |
| ; GFX942-NEXT: s_add_i32 s2, s32, 0x4044 |
| ; GFX942-NEXT: scratch_load_dword v1, off, s2 ; 4-byte Folded Reload |
| ; GFX942-NEXT: s_mov_b64 exec, s[0:1] |
| ; GFX942-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-NEXT: s_setpc_b64 s[30:31] |
| %alloca0 = alloca [4096 x i32], align 64, addrspace(5) |
| %alloca1 = alloca i32, align 4, addrspace(5) |
| call void asm sideeffect "; use alloca0 $0", "v"(ptr addrspace(5) %alloca0) |
| call void asm sideeffect "; use $0", "{s55}"(ptr addrspace(5) %alloca1) |
| ret void |
| } |
| |
| define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 { |
| ; GFX10_1-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp: |
| ; GFX10_1: ; %bb.0: |
| ; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10_1-NEXT: s_mov_b32 s5, s33 |
| ; GFX10_1-NEXT: s_mov_b32 s33, s32 |
| ; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 |
| ; GFX10_1-NEXT: s_add_i32 s6, s33, 0x80880 |
| ; GFX10_1-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill |
| ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 |
| ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 |
| ; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s33 |
| ; GFX10_1-NEXT: v_writelane_b32 v1, s55, 0 |
| ; GFX10_1-NEXT: s_add_i32 s32, s32, 0x81000 |
| ; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo |
| ; GFX10_1-NEXT: s_mov_b32 s32, s33 |
| ; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0 |
| ; GFX10_1-NEXT: ;;#ASMSTART |
| ; GFX10_1-NEXT: ; use alloca0 v0 |
| ; GFX10_1-NEXT: ;;#ASMEND |
| ; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s33 |
| ; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 0x4040, v0 |
| ; GFX10_1-NEXT: v_readfirstlane_b32 s55, v0 |
| ; GFX10_1-NEXT: ;;#ASMSTART |
| ; GFX10_1-NEXT: ; use s55, scc |
| ; GFX10_1-NEXT: ;;#ASMEND |
| ; GFX10_1-NEXT: v_readlane_b32 s55, v1, 0 |
| ; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 |
| ; GFX10_1-NEXT: s_add_i32 s6, s33, 0x80880 |
| ; GFX10_1-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload |
| ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 |
| ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 |
| ; GFX10_1-NEXT: s_mov_b32 s33, s5 |
| ; GFX10_1-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10_1-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10_3-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp: |
| ; GFX10_3: ; %bb.0: |
| ; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10_3-NEXT: s_mov_b32 s5, s33 |
| ; GFX10_3-NEXT: s_mov_b32 s33, s32 |
| ; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 |
| ; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80880 |
| ; GFX10_3-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill |
| ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 |
| ; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s33 |
| ; GFX10_3-NEXT: v_writelane_b32 v1, s55, 0 |
| ; GFX10_3-NEXT: s_add_i32 s32, s32, 0x81000 |
| ; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo |
| ; GFX10_3-NEXT: s_mov_b32 s32, s33 |
| ; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0 |
| ; GFX10_3-NEXT: ;;#ASMSTART |
| ; GFX10_3-NEXT: ; use alloca0 v0 |
| ; GFX10_3-NEXT: ;;#ASMEND |
| ; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s33 |
| ; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 0x4040, v0 |
| ; GFX10_3-NEXT: v_readfirstlane_b32 s55, v0 |
| ; GFX10_3-NEXT: ;;#ASMSTART |
| ; GFX10_3-NEXT: ; use s55, scc |
| ; GFX10_3-NEXT: ;;#ASMEND |
| ; GFX10_3-NEXT: v_readlane_b32 s55, v1, 0 |
| ; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 |
| ; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80880 |
| ; GFX10_3-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload |
| ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 |
| ; GFX10_3-NEXT: s_mov_b32 s33, s5 |
| ; GFX10_3-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10_3-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s1, s33 |
| ; GFX11-NEXT: s_mov_b32 s33, s32 |
| ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 |
| ; GFX11-NEXT: s_add_i32 s2, s33, 0x4044 |
| ; GFX11-NEXT: scratch_store_b32 off, v1, s2 ; 4-byte Folded Spill |
| ; GFX11-NEXT: s_mov_b32 exec_lo, s0 |
| ; GFX11-NEXT: s_addk_i32 s32, 0x4080 |
| ; GFX11-NEXT: s_add_i32 s0, s33, 64 |
| ; GFX11-NEXT: v_writelane_b32 v1, s55, 0 |
| ; GFX11-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX11-NEXT: s_and_b32 s0, 0, exec_lo |
| ; GFX11-NEXT: s_addc_u32 s0, s33, 0x4040 |
| ; GFX11-NEXT: ;;#ASMSTART |
| ; GFX11-NEXT: ; use alloca0 v0 |
| ; GFX11-NEXT: ;;#ASMEND |
| ; GFX11-NEXT: s_bitcmp1_b32 s0, 0 |
| ; GFX11-NEXT: s_bitset0_b32 s0, 0 |
| ; GFX11-NEXT: s_mov_b32 s32, s33 |
| ; GFX11-NEXT: s_mov_b32 s55, s0 |
| ; GFX11-NEXT: ;;#ASMSTART |
| ; GFX11-NEXT: ; use s55, scc |
| ; GFX11-NEXT: ;;#ASMEND |
| ; GFX11-NEXT: v_readlane_b32 s55, v1, 0 |
| ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 |
| ; GFX11-NEXT: s_add_i32 s2, s33, 0x4044 |
| ; GFX11-NEXT: scratch_load_b32 v1, off, s2 ; 4-byte Folded Reload |
| ; GFX11-NEXT: s_mov_b32 exec_lo, s0 |
| ; GFX11-NEXT: s_mov_b32 s33, s1 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: s_mov_b32 s1, s33 |
| ; GFX12-NEXT: s_mov_b32 s33, s32 |
| ; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 |
| ; GFX12-NEXT: scratch_store_b32 off, v1, s33 offset:16388 ; 4-byte Folded Spill |
| ; GFX12-NEXT: s_wait_alu 0xfffe |
| ; GFX12-NEXT: s_mov_b32 exec_lo, s0 |
| ; GFX12-NEXT: s_addk_co_i32 s32, 0x4040 |
| ; GFX12-NEXT: s_and_b32 s0, 0, exec_lo |
| ; GFX12-NEXT: v_writelane_b32 v1, s55, 0 |
| ; GFX12-NEXT: s_add_co_ci_u32 s0, s33, 0x4000 |
| ; GFX12-NEXT: v_mov_b32_e32 v0, s33 |
| ; GFX12-NEXT: s_wait_alu 0xfffe |
| ; GFX12-NEXT: s_bitcmp1_b32 s0, 0 |
| ; GFX12-NEXT: s_bitset0_b32 s0, 0 |
| ; GFX12-NEXT: ;;#ASMSTART |
| ; GFX12-NEXT: ; use alloca0 v0 |
| ; GFX12-NEXT: ;;#ASMEND |
| ; GFX12-NEXT: s_wait_alu 0xfffe |
| ; GFX12-NEXT: s_mov_b32 s55, s0 |
| ; GFX12-NEXT: ;;#ASMSTART |
| ; GFX12-NEXT: ; use s55, scc |
| ; GFX12-NEXT: ;;#ASMEND |
| ; GFX12-NEXT: v_readlane_b32 s55, v1, 0 |
| ; GFX12-NEXT: s_mov_b32 s32, s33 |
| ; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 |
| ; GFX12-NEXT: scratch_load_b32 v1, off, s33 offset:16388 ; 4-byte Folded Reload |
| ; GFX12-NEXT: s_wait_alu 0xfffe |
| ; GFX12-NEXT: s_mov_b32 exec_lo, s0 |
| ; GFX12-NEXT: s_mov_b32 s33, s1 |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: s_wait_alu 0xfffe |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: s_mov_b32 s6, s33 |
| ; GFX8-NEXT: s_mov_b32 s33, s32 |
| ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; GFX8-NEXT: s_add_i32 s7, s33, 0x101100 |
| ; GFX8-NEXT: buffer_store_dword v1, off, s[0:3], s7 ; 4-byte Folded Spill |
| ; GFX8-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s33 |
| ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0 |
| ; GFX8-NEXT: v_writelane_b32 v1, s55, 0 |
| ; GFX8-NEXT: ;;#ASMSTART |
| ; GFX8-NEXT: ; use alloca0 v0 |
| ; GFX8-NEXT: ;;#ASMEND |
| ; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s33 |
| ; GFX8-NEXT: s_movk_i32 s55, 0x4040 |
| ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s55, v0 |
| ; GFX8-NEXT: s_add_i32 s32, s32, 0x102000 |
| ; GFX8-NEXT: v_readfirstlane_b32 s55, v0 |
| ; GFX8-NEXT: s_and_b64 s[4:5], 0, exec |
| ; GFX8-NEXT: ;;#ASMSTART |
| ; GFX8-NEXT: ; use s55, scc |
| ; GFX8-NEXT: ;;#ASMEND |
| ; GFX8-NEXT: v_readlane_b32 s55, v1, 0 |
| ; GFX8-NEXT: s_mov_b32 s32, s33 |
| ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; GFX8-NEXT: s_add_i32 s7, s33, 0x101100 |
| ; GFX8-NEXT: buffer_load_dword v1, off, s[0:3], s7 ; 4-byte Folded Reload |
| ; GFX8-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX8-NEXT: s_mov_b32 s33, s6 |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp: |
| ; GFX900: ; %bb.0: |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: s_mov_b32 s6, s33 |
| ; GFX900-NEXT: s_mov_b32 s33, s32 |
| ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; GFX900-NEXT: s_add_i32 s7, s33, 0x101100 |
| ; GFX900-NEXT: buffer_store_dword v1, off, s[0:3], s7 ; 4-byte Folded Spill |
| ; GFX900-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s33 |
| ; GFX900-NEXT: v_add_u32_e32 v0, 64, v0 |
| ; GFX900-NEXT: ;;#ASMSTART |
| ; GFX900-NEXT: ; use alloca0 v0 |
| ; GFX900-NEXT: ;;#ASMEND |
| ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s33 |
| ; GFX900-NEXT: v_add_u32_e32 v0, 0x4040, v0 |
| ; GFX900-NEXT: s_add_i32 s32, s32, 0x102000 |
| ; GFX900-NEXT: v_writelane_b32 v1, s55, 0 |
| ; GFX900-NEXT: v_readfirstlane_b32 s55, v0 |
| ; GFX900-NEXT: s_and_b64 s[4:5], 0, exec |
| ; GFX900-NEXT: ;;#ASMSTART |
| ; GFX900-NEXT: ; use s55, scc |
| ; GFX900-NEXT: ;;#ASMEND |
| ; GFX900-NEXT: v_readlane_b32 s55, v1, 0 |
| ; GFX900-NEXT: s_mov_b32 s32, s33 |
| ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; GFX900-NEXT: s_add_i32 s7, s33, 0x101100 |
| ; GFX900-NEXT: buffer_load_dword v1, off, s[0:3], s7 ; 4-byte Folded Reload |
| ; GFX900-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX900-NEXT: s_mov_b32 s33, s6 |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp: |
| ; GFX942: ; %bb.0: |
| ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-NEXT: s_mov_b32 s2, s33 |
| ; GFX942-NEXT: s_mov_b32 s33, s32 |
| ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 |
| ; GFX942-NEXT: s_add_i32 s3, s33, 0x4044 |
| ; GFX942-NEXT: scratch_store_dword off, v1, s3 ; 4-byte Folded Spill |
| ; GFX942-NEXT: s_mov_b64 exec, s[0:1] |
| ; GFX942-NEXT: s_addk_i32 s32, 0x4080 |
| ; GFX942-NEXT: s_add_i32 s0, s33, 64 |
| ; GFX942-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX942-NEXT: s_and_b64 s[0:1], 0, exec |
| ; GFX942-NEXT: s_addc_u32 s0, s33, 0x4040 |
| ; GFX942-NEXT: s_bitcmp1_b32 s0, 0 |
| ; GFX942-NEXT: s_bitset0_b32 s0, 0 |
| ; GFX942-NEXT: v_writelane_b32 v1, s55, 0 |
| ; GFX942-NEXT: s_mov_b32 s55, s0 |
| ; GFX942-NEXT: ;;#ASMSTART |
| ; GFX942-NEXT: ; use alloca0 v0 |
| ; GFX942-NEXT: ;;#ASMEND |
| ; GFX942-NEXT: ;;#ASMSTART |
| ; GFX942-NEXT: ; use s55, scc |
| ; GFX942-NEXT: ;;#ASMEND |
| ; GFX942-NEXT: v_readlane_b32 s55, v1, 0 |
| ; GFX942-NEXT: s_mov_b32 s32, s33 |
| ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 |
| ; GFX942-NEXT: s_add_i32 s3, s33, 0x4044 |
| ; GFX942-NEXT: scratch_load_dword v1, off, s3 ; 4-byte Folded Reload |
| ; GFX942-NEXT: s_mov_b64 exec, s[0:1] |
| ; GFX942-NEXT: s_mov_b32 s33, s2 |
| ; GFX942-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-NEXT: s_setpc_b64 s[30:31] |
| %alloca0 = alloca [4096 x i32], align 64, addrspace(5) |
| %alloca1 = alloca i32, align 4, addrspace(5) |
| call void asm sideeffect "; use alloca0 $0", "v"(ptr addrspace(5) %alloca0) |
| call void asm sideeffect "; use $0, $1", "{s55},{scc}"(ptr addrspace(5) %alloca1, i32 0) |
| ret void |
| } |
| |
| define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset() #0 { |
| ; GFX10_1-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset: |
| ; GFX10_1: ; %bb.0: |
| ; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 |
| ; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80800 |
| ; GFX10_1-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill |
| ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 |
| ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 |
| ; GFX10_1-NEXT: v_lshrrev_b32_e64 v1, 5, s32 |
| ; GFX10_1-NEXT: v_writelane_b32 v0, s55, 0 |
| ; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo |
| ; GFX10_1-NEXT: v_add_nc_u32_e32 v1, 64, v1 |
| ; GFX10_1-NEXT: v_readfirstlane_b32 s55, v1 |
| ; GFX10_1-NEXT: ;;#ASMSTART |
| ; GFX10_1-NEXT: ; use s55, scc |
| ; GFX10_1-NEXT: ;;#ASMEND |
| ; GFX10_1-NEXT: v_readlane_b32 s55, v0, 0 |
| ; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 |
| ; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80800 |
| ; GFX10_1-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload |
| ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 |
| ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 |
| ; GFX10_1-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10_1-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10_3-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset: |
| ; GFX10_3: ; %bb.0: |
| ; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 |
| ; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80800 |
| ; GFX10_3-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill |
| ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 |
| ; GFX10_3-NEXT: v_lshrrev_b32_e64 v1, 5, s32 |
| ; GFX10_3-NEXT: v_writelane_b32 v0, s55, 0 |
| ; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo |
| ; GFX10_3-NEXT: v_add_nc_u32_e32 v1, 64, v1 |
| ; GFX10_3-NEXT: v_readfirstlane_b32 s55, v1 |
| ; GFX10_3-NEXT: ;;#ASMSTART |
| ; GFX10_3-NEXT: ; use s55, scc |
| ; GFX10_3-NEXT: ;;#ASMEND |
| ; GFX10_3-NEXT: v_readlane_b32 s55, v0, 0 |
| ; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 |
| ; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80800 |
| ; GFX10_3-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload |
| ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 |
| ; GFX10_3-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10_3-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 |
| ; GFX11-NEXT: s_add_i32 s1, s32, 0x4040 |
| ; GFX11-NEXT: scratch_store_b32 off, v0, s1 ; 4-byte Folded Spill |
| ; GFX11-NEXT: s_mov_b32 exec_lo, s0 |
| ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) |
| ; GFX11-NEXT: s_and_b32 s0, 0, exec_lo |
| ; GFX11-NEXT: v_writelane_b32 v0, s55, 0 |
| ; GFX11-NEXT: s_addc_u32 s0, s32, 64 |
| ; GFX11-NEXT: s_bitcmp1_b32 s0, 0 |
| ; GFX11-NEXT: s_bitset0_b32 s0, 0 |
| ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; GFX11-NEXT: s_mov_b32 s55, s0 |
| ; GFX11-NEXT: ;;#ASMSTART |
| ; GFX11-NEXT: ; use s55, scc |
| ; GFX11-NEXT: ;;#ASMEND |
| ; GFX11-NEXT: v_readlane_b32 s55, v0, 0 |
| ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 |
| ; GFX11-NEXT: s_add_i32 s1, s32, 0x4040 |
| ; GFX11-NEXT: scratch_load_b32 v0, off, s1 ; 4-byte Folded Reload |
| ; GFX11-NEXT: s_mov_b32 exec_lo, s0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 |
| ; GFX12-NEXT: scratch_store_b32 off, v0, s32 offset:16384 ; 4-byte Folded Spill |
| ; GFX12-NEXT: s_wait_alu 0xfffe |
| ; GFX12-NEXT: s_mov_b32 exec_lo, s0 |
| ; GFX12-NEXT: v_writelane_b32 v0, s55, 0 |
| ; GFX12-NEXT: s_mov_b32 s55, s32 |
| ; GFX12-NEXT: s_and_b32 s0, 0, exec_lo |
| ; GFX12-NEXT: ;;#ASMSTART |
| ; GFX12-NEXT: ; use s55, scc |
| ; GFX12-NEXT: ;;#ASMEND |
| ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-NEXT: v_readlane_b32 s55, v0, 0 |
| ; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 |
| ; GFX12-NEXT: scratch_load_b32 v0, off, s32 offset:16384 ; 4-byte Folded Reload |
| ; GFX12-NEXT: s_wait_alu 0xfffe |
| ; GFX12-NEXT: s_mov_b32 exec_lo, s0 |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; GFX8-NEXT: s_add_i32 s6, s32, 0x101000 |
| ; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill |
| ; GFX8-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX8-NEXT: v_writelane_b32 v0, s55, 0 |
| ; GFX8-NEXT: v_lshrrev_b32_e64 v1, 6, s32 |
| ; GFX8-NEXT: s_mov_b32 s55, 64 |
| ; GFX8-NEXT: v_add_u32_e32 v1, vcc, s55, v1 |
| ; GFX8-NEXT: v_readfirstlane_b32 s55, v1 |
| ; GFX8-NEXT: s_and_b64 s[4:5], 0, exec |
| ; GFX8-NEXT: ;;#ASMSTART |
| ; GFX8-NEXT: ; use s55, scc |
| ; GFX8-NEXT: ;;#ASMEND |
| ; GFX8-NEXT: v_readlane_b32 s55, v0, 0 |
| ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; GFX8-NEXT: s_add_i32 s6, s32, 0x101000 |
| ; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload |
| ; GFX8-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset: |
| ; GFX900: ; %bb.0: |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; GFX900-NEXT: s_add_i32 s6, s32, 0x101000 |
| ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill |
| ; GFX900-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX900-NEXT: v_lshrrev_b32_e64 v1, 6, s32 |
| ; GFX900-NEXT: v_add_u32_e32 v1, 64, v1 |
| ; GFX900-NEXT: v_writelane_b32 v0, s55, 0 |
| ; GFX900-NEXT: v_readfirstlane_b32 s55, v1 |
| ; GFX900-NEXT: s_and_b64 s[4:5], 0, exec |
| ; GFX900-NEXT: ;;#ASMSTART |
| ; GFX900-NEXT: ; use s55, scc |
| ; GFX900-NEXT: ;;#ASMEND |
| ; GFX900-NEXT: v_readlane_b32 s55, v0, 0 |
| ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; GFX900-NEXT: s_add_i32 s6, s32, 0x101000 |
| ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload |
| ; GFX900-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset: |
| ; GFX942: ; %bb.0: |
| ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 |
| ; GFX942-NEXT: s_add_i32 s2, s32, 0x4040 |
| ; GFX942-NEXT: scratch_store_dword off, v0, s2 ; 4-byte Folded Spill |
| ; GFX942-NEXT: s_mov_b64 exec, s[0:1] |
| ; GFX942-NEXT: s_and_b64 s[0:1], 0, exec |
| ; GFX942-NEXT: s_addc_u32 s0, s32, 64 |
| ; GFX942-NEXT: s_bitcmp1_b32 s0, 0 |
| ; GFX942-NEXT: s_bitset0_b32 s0, 0 |
| ; GFX942-NEXT: v_writelane_b32 v0, s55, 0 |
| ; GFX942-NEXT: s_mov_b32 s55, s0 |
| ; GFX942-NEXT: ;;#ASMSTART |
| ; GFX942-NEXT: ; use s55, scc |
| ; GFX942-NEXT: ;;#ASMEND |
| ; GFX942-NEXT: v_readlane_b32 s55, v0, 0 |
| ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 |
| ; GFX942-NEXT: s_add_i32 s2, s32, 0x4040 |
| ; GFX942-NEXT: scratch_load_dword v0, off, s2 ; 4-byte Folded Reload |
| ; GFX942-NEXT: s_mov_b64 exec, s[0:1] |
| ; GFX942-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-NEXT: s_setpc_b64 s[30:31] |
| %alloca0 = alloca [4096 x i32], align 64, addrspace(5) |
| call void asm sideeffect "; use $0, $1", "{s55},{scc}"(ptr addrspace(5) %alloca0, i32 0) |
| ret void |
| } |
| |
| define void @scalar_mov_materializes_frame_index_available_scc_small_offset() #0 { |
| ; GFX10_1-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset: |
| ; GFX10_1: ; %bb.0: |
| ; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 |
| ; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80800 |
| ; GFX10_1-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill |
| ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 |
| ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 |
| ; GFX10_1-NEXT: v_writelane_b32 v0, s55, 0 |
| ; GFX10_1-NEXT: s_lshr_b32 s55, s32, 5 |
| ; GFX10_1-NEXT: s_add_i32 s55, s55, 64 |
| ; GFX10_1-NEXT: ;;#ASMSTART |
| ; GFX10_1-NEXT: ; use s55 |
| ; GFX10_1-NEXT: ;;#ASMEND |
| ; GFX10_1-NEXT: v_readlane_b32 s55, v0, 0 |
| ; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 |
| ; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80800 |
| ; GFX10_1-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload |
| ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 |
| ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 |
| ; GFX10_1-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10_1-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10_3-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset: |
| ; GFX10_3: ; %bb.0: |
| ; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 |
| ; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80800 |
| ; GFX10_3-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill |
| ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 |
| ; GFX10_3-NEXT: v_writelane_b32 v0, s55, 0 |
| ; GFX10_3-NEXT: s_lshr_b32 s55, s32, 5 |
| ; GFX10_3-NEXT: s_add_i32 s55, s55, 64 |
| ; GFX10_3-NEXT: ;;#ASMSTART |
| ; GFX10_3-NEXT: ; use s55 |
| ; GFX10_3-NEXT: ;;#ASMEND |
| ; GFX10_3-NEXT: v_readlane_b32 s55, v0, 0 |
| ; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 |
| ; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80800 |
| ; GFX10_3-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload |
| ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 |
| ; GFX10_3-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10_3-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 |
| ; GFX11-NEXT: s_add_i32 s1, s32, 0x4040 |
| ; GFX11-NEXT: scratch_store_b32 off, v0, s1 ; 4-byte Folded Spill |
| ; GFX11-NEXT: s_mov_b32 exec_lo, s0 |
| ; GFX11-NEXT: v_writelane_b32 v0, s55, 0 |
| ; GFX11-NEXT: s_add_i32 s0, s32, 64 |
| ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) |
| ; GFX11-NEXT: s_mov_b32 s55, s0 |
| ; GFX11-NEXT: ;;#ASMSTART |
| ; GFX11-NEXT: ; use s55 |
| ; GFX11-NEXT: ;;#ASMEND |
| ; GFX11-NEXT: v_readlane_b32 s55, v0, 0 |
| ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 |
| ; GFX11-NEXT: s_add_i32 s1, s32, 0x4040 |
| ; GFX11-NEXT: scratch_load_b32 v0, off, s1 ; 4-byte Folded Reload |
| ; GFX11-NEXT: s_mov_b32 exec_lo, s0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 |
| ; GFX12-NEXT: scratch_store_b32 off, v0, s32 offset:16384 ; 4-byte Folded Spill |
| ; GFX12-NEXT: s_wait_alu 0xfffe |
| ; GFX12-NEXT: s_mov_b32 exec_lo, s0 |
| ; GFX12-NEXT: v_writelane_b32 v0, s55, 0 |
| ; GFX12-NEXT: s_mov_b32 s55, s32 |
| ; GFX12-NEXT: ;;#ASMSTART |
| ; GFX12-NEXT: ; use s55 |
| ; GFX12-NEXT: ;;#ASMEND |
| ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-NEXT: v_readlane_b32 s55, v0, 0 |
| ; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 |
| ; GFX12-NEXT: scratch_load_b32 v0, off, s32 offset:16384 ; 4-byte Folded Reload |
| ; GFX12-NEXT: s_wait_alu 0xfffe |
| ; GFX12-NEXT: s_mov_b32 exec_lo, s0 |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; GFX8-NEXT: s_add_i32 s6, s32, 0x101000 |
| ; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill |
| ; GFX8-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX8-NEXT: v_writelane_b32 v0, s55, 0 |
| ; GFX8-NEXT: s_lshr_b32 s55, s32, 6 |
| ; GFX8-NEXT: s_add_i32 s55, s55, 64 |
| ; GFX8-NEXT: ;;#ASMSTART |
| ; GFX8-NEXT: ; use s55 |
| ; GFX8-NEXT: ;;#ASMEND |
| ; GFX8-NEXT: v_readlane_b32 s55, v0, 0 |
| ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; GFX8-NEXT: s_add_i32 s6, s32, 0x101000 |
| ; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload |
| ; GFX8-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset: |
| ; GFX900: ; %bb.0: |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; GFX900-NEXT: s_add_i32 s6, s32, 0x101000 |
| ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill |
| ; GFX900-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX900-NEXT: v_writelane_b32 v0, s55, 0 |
| ; GFX900-NEXT: s_lshr_b32 s55, s32, 6 |
| ; GFX900-NEXT: s_add_i32 s55, s55, 64 |
| ; GFX900-NEXT: ;;#ASMSTART |
| ; GFX900-NEXT: ; use s55 |
| ; GFX900-NEXT: ;;#ASMEND |
| ; GFX900-NEXT: v_readlane_b32 s55, v0, 0 |
| ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; GFX900-NEXT: s_add_i32 s6, s32, 0x101000 |
| ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload |
| ; GFX900-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset: |
| ; GFX942: ; %bb.0: |
| ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 |
| ; GFX942-NEXT: s_add_i32 s2, s32, 0x4040 |
| ; GFX942-NEXT: scratch_store_dword off, v0, s2 ; 4-byte Folded Spill |
| ; GFX942-NEXT: s_mov_b64 exec, s[0:1] |
| ; GFX942-NEXT: s_add_i32 s0, s32, 64 |
| ; GFX942-NEXT: v_writelane_b32 v0, s55, 0 |
| ; GFX942-NEXT: s_mov_b32 s55, s0 |
| ; GFX942-NEXT: ;;#ASMSTART |
| ; GFX942-NEXT: ; use s55 |
| ; GFX942-NEXT: ;;#ASMEND |
| ; GFX942-NEXT: v_readlane_b32 s55, v0, 0 |
| ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 |
| ; GFX942-NEXT: s_add_i32 s2, s32, 0x4040 |
| ; GFX942-NEXT: scratch_load_dword v0, off, s2 ; 4-byte Folded Reload |
| ; GFX942-NEXT: s_mov_b64 exec, s[0:1] |
| ; GFX942-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-NEXT: s_setpc_b64 s[30:31] |
| %alloca0 = alloca [4096 x i32], align 64, addrspace(5) |
| call void asm sideeffect "; use $0", "{s55}"(ptr addrspace(5) %alloca0) |
| ret void |
| } |
| |
| define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp() #1 { |
| ; GFX10_1-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp: |
| ; GFX10_1: ; %bb.0: |
| ; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10_1-NEXT: s_mov_b32 s5, s33 |
| ; GFX10_1-NEXT: s_mov_b32 s33, s32 |
| ; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 |
| ; GFX10_1-NEXT: s_add_i32 s6, s33, 0x80800 |
| ; GFX10_1-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill |
| ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 |
| ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 |
| ; GFX10_1-NEXT: v_lshrrev_b32_e64 v1, 5, s33 |
| ; GFX10_1-NEXT: v_writelane_b32 v0, s55, 0 |
| ; GFX10_1-NEXT: s_add_i32 s32, s32, 0x81000 |
| ; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo |
| ; GFX10_1-NEXT: s_mov_b32 s32, s33 |
| ; GFX10_1-NEXT: v_add_nc_u32_e32 v1, 64, v1 |
| ; GFX10_1-NEXT: v_readfirstlane_b32 s55, v1 |
| ; GFX10_1-NEXT: ;;#ASMSTART |
| ; GFX10_1-NEXT: ; use s55, scc |
| ; GFX10_1-NEXT: ;;#ASMEND |
| ; GFX10_1-NEXT: v_readlane_b32 s55, v0, 0 |
| ; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 |
| ; GFX10_1-NEXT: s_add_i32 s6, s33, 0x80800 |
| ; GFX10_1-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload |
| ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 |
| ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 |
| ; GFX10_1-NEXT: s_mov_b32 s33, s5 |
| ; GFX10_1-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10_1-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10_3-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp: |
| ; GFX10_3: ; %bb.0: |
| ; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10_3-NEXT: s_mov_b32 s5, s33 |
| ; GFX10_3-NEXT: s_mov_b32 s33, s32 |
| ; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 |
| ; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80800 |
| ; GFX10_3-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill |
| ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 |
| ; GFX10_3-NEXT: v_lshrrev_b32_e64 v1, 5, s33 |
| ; GFX10_3-NEXT: v_writelane_b32 v0, s55, 0 |
| ; GFX10_3-NEXT: s_add_i32 s32, s32, 0x81000 |
| ; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo |
| ; GFX10_3-NEXT: s_mov_b32 s32, s33 |
| ; GFX10_3-NEXT: v_add_nc_u32_e32 v1, 64, v1 |
| ; GFX10_3-NEXT: v_readfirstlane_b32 s55, v1 |
| ; GFX10_3-NEXT: ;;#ASMSTART |
| ; GFX10_3-NEXT: ; use s55, scc |
| ; GFX10_3-NEXT: ;;#ASMEND |
| ; GFX10_3-NEXT: v_readlane_b32 s55, v0, 0 |
| ; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 |
| ; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80800 |
| ; GFX10_3-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload |
| ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 |
| ; GFX10_3-NEXT: s_mov_b32 s33, s5 |
| ; GFX10_3-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10_3-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s1, s33 |
| ; GFX11-NEXT: s_mov_b32 s33, s32 |
| ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 |
| ; GFX11-NEXT: s_add_i32 s2, s33, 0x4040 |
| ; GFX11-NEXT: scratch_store_b32 off, v0, s2 ; 4-byte Folded Spill |
| ; GFX11-NEXT: s_mov_b32 exec_lo, s0 |
| ; GFX11-NEXT: s_addk_i32 s32, 0x4080 |
| ; GFX11-NEXT: s_and_b32 s0, 0, exec_lo |
| ; GFX11-NEXT: v_writelane_b32 v0, s55, 0 |
| ; GFX11-NEXT: s_addc_u32 s0, s33, 64 |
| ; GFX11-NEXT: s_mov_b32 s32, s33 |
| ; GFX11-NEXT: s_bitcmp1_b32 s0, 0 |
| ; GFX11-NEXT: s_bitset0_b32 s0, 0 |
| ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; GFX11-NEXT: s_mov_b32 s55, s0 |
| ; GFX11-NEXT: ;;#ASMSTART |
| ; GFX11-NEXT: ; use s55, scc |
| ; GFX11-NEXT: ;;#ASMEND |
| ; GFX11-NEXT: v_readlane_b32 s55, v0, 0 |
| ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 |
| ; GFX11-NEXT: s_add_i32 s2, s33, 0x4040 |
| ; GFX11-NEXT: scratch_load_b32 v0, off, s2 ; 4-byte Folded Reload |
| ; GFX11-NEXT: s_mov_b32 exec_lo, s0 |
| ; GFX11-NEXT: s_mov_b32 s33, s1 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: s_mov_b32 s1, s33 |
| ; GFX12-NEXT: s_mov_b32 s33, s32 |
| ; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 |
| ; GFX12-NEXT: scratch_store_b32 off, v0, s33 offset:16384 ; 4-byte Folded Spill |
| ; GFX12-NEXT: s_wait_alu 0xfffe |
| ; GFX12-NEXT: s_mov_b32 exec_lo, s0 |
| ; GFX12-NEXT: v_writelane_b32 v0, s55, 0 |
| ; GFX12-NEXT: s_addk_co_i32 s32, 0x4040 |
| ; GFX12-NEXT: s_mov_b32 s55, s33 |
| ; GFX12-NEXT: s_and_b32 s0, 0, exec_lo |
| ; GFX12-NEXT: ;;#ASMSTART |
| ; GFX12-NEXT: ; use s55, scc |
| ; GFX12-NEXT: ;;#ASMEND |
| ; GFX12-NEXT: v_readlane_b32 s55, v0, 0 |
| ; GFX12-NEXT: s_mov_b32 s32, s33 |
| ; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 |
| ; GFX12-NEXT: scratch_load_b32 v0, off, s33 offset:16384 ; 4-byte Folded Reload |
| ; GFX12-NEXT: s_wait_alu 0xfffe |
| ; GFX12-NEXT: s_mov_b32 exec_lo, s0 |
| ; GFX12-NEXT: s_mov_b32 s33, s1 |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: s_wait_alu 0xfffe |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: s_mov_b32 s6, s33 |
| ; GFX8-NEXT: s_mov_b32 s33, s32 |
| ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; GFX8-NEXT: s_add_i32 s7, s33, 0x101000 |
| ; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], s7 ; 4-byte Folded Spill |
| ; GFX8-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX8-NEXT: v_writelane_b32 v0, s55, 0 |
| ; GFX8-NEXT: v_lshrrev_b32_e64 v1, 6, s33 |
| ; GFX8-NEXT: s_mov_b32 s55, 64 |
| ; GFX8-NEXT: v_add_u32_e32 v1, vcc, s55, v1 |
| ; GFX8-NEXT: s_add_i32 s32, s32, 0x102000 |
| ; GFX8-NEXT: v_readfirstlane_b32 s55, v1 |
| ; GFX8-NEXT: s_and_b64 s[4:5], 0, exec |
| ; GFX8-NEXT: ;;#ASMSTART |
| ; GFX8-NEXT: ; use s55, scc |
| ; GFX8-NEXT: ;;#ASMEND |
| ; GFX8-NEXT: v_readlane_b32 s55, v0, 0 |
| ; GFX8-NEXT: s_mov_b32 s32, s33 |
| ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; GFX8-NEXT: s_add_i32 s7, s33, 0x101000 |
| ; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s7 ; 4-byte Folded Reload |
| ; GFX8-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX8-NEXT: s_mov_b32 s33, s6 |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp: |
| ; GFX900: ; %bb.0: |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: s_mov_b32 s6, s33 |
| ; GFX900-NEXT: s_mov_b32 s33, s32 |
| ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; GFX900-NEXT: s_add_i32 s7, s33, 0x101000 |
| ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s7 ; 4-byte Folded Spill |
| ; GFX900-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX900-NEXT: v_lshrrev_b32_e64 v1, 6, s33 |
| ; GFX900-NEXT: v_add_u32_e32 v1, 64, v1 |
| ; GFX900-NEXT: s_add_i32 s32, s32, 0x102000 |
| ; GFX900-NEXT: v_writelane_b32 v0, s55, 0 |
| ; GFX900-NEXT: v_readfirstlane_b32 s55, v1 |
| ; GFX900-NEXT: s_and_b64 s[4:5], 0, exec |
| ; GFX900-NEXT: ;;#ASMSTART |
| ; GFX900-NEXT: ; use s55, scc |
| ; GFX900-NEXT: ;;#ASMEND |
| ; GFX900-NEXT: v_readlane_b32 s55, v0, 0 |
| ; GFX900-NEXT: s_mov_b32 s32, s33 |
| ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; GFX900-NEXT: s_add_i32 s7, s33, 0x101000 |
| ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s7 ; 4-byte Folded Reload |
| ; GFX900-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX900-NEXT: s_mov_b32 s33, s6 |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp: |
| ; GFX942: ; %bb.0: |
| ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-NEXT: s_mov_b32 s2, s33 |
| ; GFX942-NEXT: s_mov_b32 s33, s32 |
| ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 |
| ; GFX942-NEXT: s_add_i32 s3, s33, 0x4040 |
| ; GFX942-NEXT: scratch_store_dword off, v0, s3 ; 4-byte Folded Spill |
| ; GFX942-NEXT: s_mov_b64 exec, s[0:1] |
| ; GFX942-NEXT: s_addk_i32 s32, 0x4080 |
| ; GFX942-NEXT: s_and_b64 s[0:1], 0, exec |
| ; GFX942-NEXT: s_addc_u32 s0, s33, 64 |
| ; GFX942-NEXT: s_bitcmp1_b32 s0, 0 |
| ; GFX942-NEXT: s_bitset0_b32 s0, 0 |
| ; GFX942-NEXT: v_writelane_b32 v0, s55, 0 |
| ; GFX942-NEXT: s_mov_b32 s55, s0 |
| ; GFX942-NEXT: ;;#ASMSTART |
| ; GFX942-NEXT: ; use s55, scc |
| ; GFX942-NEXT: ;;#ASMEND |
| ; GFX942-NEXT: v_readlane_b32 s55, v0, 0 |
| ; GFX942-NEXT: s_mov_b32 s32, s33 |
| ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 |
| ; GFX942-NEXT: s_add_i32 s3, s33, 0x4040 |
| ; GFX942-NEXT: scratch_load_dword v0, off, s3 ; 4-byte Folded Reload |
| ; GFX942-NEXT: s_mov_b64 exec, s[0:1] |
| ; GFX942-NEXT: s_mov_b32 s33, s2 |
| ; GFX942-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-NEXT: s_setpc_b64 s[30:31] |
| %alloca0 = alloca [4096 x i32], align 64, addrspace(5) |
| call void asm sideeffect "; use $0, $1", "{s55},{scc}"(ptr addrspace(5) %alloca0, i32 0) |
| ret void |
| } |
| |
| define void @scalar_mov_materializes_frame_index_available_scc_small_offset_fp() #1 { |
| ; GFX10_1-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset_fp: |
| ; GFX10_1: ; %bb.0: |
| ; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10_1-NEXT: s_mov_b32 s4, s33 |
| ; GFX10_1-NEXT: s_mov_b32 s33, s32 |
| ; GFX10_1-NEXT: s_xor_saveexec_b32 s5, -1 |
| ; GFX10_1-NEXT: s_add_i32 s6, s33, 0x80800 |
| ; GFX10_1-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill |
| ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 |
| ; GFX10_1-NEXT: s_mov_b32 exec_lo, s5 |
| ; GFX10_1-NEXT: v_writelane_b32 v0, s55, 0 |
| ; GFX10_1-NEXT: s_add_i32 s32, s32, 0x81000 |
| ; GFX10_1-NEXT: s_lshr_b32 s55, s33, 5 |
| ; GFX10_1-NEXT: s_mov_b32 s32, s33 |
| ; GFX10_1-NEXT: s_add_i32 s55, s55, 64 |
| ; GFX10_1-NEXT: ;;#ASMSTART |
| ; GFX10_1-NEXT: ; use s55 |
| ; GFX10_1-NEXT: ;;#ASMEND |
| ; GFX10_1-NEXT: v_readlane_b32 s55, v0, 0 |
| ; GFX10_1-NEXT: s_xor_saveexec_b32 s5, -1 |
| ; GFX10_1-NEXT: s_add_i32 s6, s33, 0x80800 |
| ; GFX10_1-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload |
| ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 |
| ; GFX10_1-NEXT: s_mov_b32 exec_lo, s5 |
| ; GFX10_1-NEXT: s_mov_b32 s33, s4 |
| ; GFX10_1-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10_1-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10_3-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset_fp: |
| ; GFX10_3: ; %bb.0: |
| ; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10_3-NEXT: s_mov_b32 s4, s33 |
| ; GFX10_3-NEXT: s_mov_b32 s33, s32 |
| ; GFX10_3-NEXT: s_xor_saveexec_b32 s5, -1 |
| ; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80800 |
| ; GFX10_3-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill |
| ; GFX10_3-NEXT: s_mov_b32 exec_lo, s5 |
| ; GFX10_3-NEXT: v_writelane_b32 v0, s55, 0 |
| ; GFX10_3-NEXT: s_add_i32 s32, s32, 0x81000 |
| ; GFX10_3-NEXT: s_lshr_b32 s55, s33, 5 |
| ; GFX10_3-NEXT: s_mov_b32 s32, s33 |
| ; GFX10_3-NEXT: s_add_i32 s55, s55, 64 |
| ; GFX10_3-NEXT: ;;#ASMSTART |
| ; GFX10_3-NEXT: ; use s55 |
| ; GFX10_3-NEXT: ;;#ASMEND |
| ; GFX10_3-NEXT: v_readlane_b32 s55, v0, 0 |
| ; GFX10_3-NEXT: s_xor_saveexec_b32 s5, -1 |
| ; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80800 |
| ; GFX10_3-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload |
| ; GFX10_3-NEXT: s_mov_b32 exec_lo, s5 |
| ; GFX10_3-NEXT: s_mov_b32 s33, s4 |
| ; GFX10_3-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10_3-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset_fp: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s0, s33 |
| ; GFX11-NEXT: s_mov_b32 s33, s32 |
| ; GFX11-NEXT: s_xor_saveexec_b32 s1, -1 |
| ; GFX11-NEXT: s_add_i32 s2, s33, 0x4040 |
| ; GFX11-NEXT: scratch_store_b32 off, v0, s2 ; 4-byte Folded Spill |
| ; GFX11-NEXT: s_mov_b32 exec_lo, s1 |
| ; GFX11-NEXT: v_writelane_b32 v0, s55, 0 |
| ; GFX11-NEXT: s_addk_i32 s32, 0x4080 |
| ; GFX11-NEXT: s_add_i32 s1, s33, 64 |
| ; GFX11-NEXT: s_mov_b32 s32, s33 |
| ; GFX11-NEXT: s_mov_b32 s55, s1 |
| ; GFX11-NEXT: ;;#ASMSTART |
| ; GFX11-NEXT: ; use s55 |
| ; GFX11-NEXT: ;;#ASMEND |
| ; GFX11-NEXT: v_readlane_b32 s55, v0, 0 |
| ; GFX11-NEXT: s_xor_saveexec_b32 s1, -1 |
| ; GFX11-NEXT: s_add_i32 s2, s33, 0x4040 |
| ; GFX11-NEXT: scratch_load_b32 v0, off, s2 ; 4-byte Folded Reload |
| ; GFX11-NEXT: s_mov_b32 exec_lo, s1 |
| ; GFX11-NEXT: s_mov_b32 s33, s0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset_fp: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: s_mov_b32 s0, s33 |
| ; GFX12-NEXT: s_mov_b32 s33, s32 |
| ; GFX12-NEXT: s_xor_saveexec_b32 s1, -1 |
| ; GFX12-NEXT: scratch_store_b32 off, v0, s33 offset:16384 ; 4-byte Folded Spill |
| ; GFX12-NEXT: s_wait_alu 0xfffe |
| ; GFX12-NEXT: s_mov_b32 exec_lo, s1 |
| ; GFX12-NEXT: v_writelane_b32 v0, s55, 0 |
| ; GFX12-NEXT: s_addk_co_i32 s32, 0x4040 |
| ; GFX12-NEXT: s_mov_b32 s55, s33 |
| ; GFX12-NEXT: ;;#ASMSTART |
| ; GFX12-NEXT: ; use s55 |
| ; GFX12-NEXT: ;;#ASMEND |
| ; GFX12-NEXT: s_mov_b32 s32, s33 |
| ; GFX12-NEXT: v_readlane_b32 s55, v0, 0 |
| ; GFX12-NEXT: s_xor_saveexec_b32 s1, -1 |
| ; GFX12-NEXT: scratch_load_b32 v0, off, s33 offset:16384 ; 4-byte Folded Reload |
| ; GFX12-NEXT: s_wait_alu 0xfffe |
| ; GFX12-NEXT: s_mov_b32 exec_lo, s1 |
| ; GFX12-NEXT: s_mov_b32 s33, s0 |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: s_wait_alu 0xfffe |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset_fp: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: s_mov_b32 s4, s33 |
| ; GFX8-NEXT: s_mov_b32 s33, s32 |
| ; GFX8-NEXT: s_xor_saveexec_b64 s[6:7], -1 |
| ; GFX8-NEXT: s_add_i32 s5, s33, 0x101000 |
| ; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill |
| ; GFX8-NEXT: s_mov_b64 exec, s[6:7] |
| ; GFX8-NEXT: s_add_i32 s32, s32, 0x102000 |
| ; GFX8-NEXT: v_writelane_b32 v0, s55, 0 |
| ; GFX8-NEXT: s_lshr_b32 s55, s33, 6 |
| ; GFX8-NEXT: s_add_i32 s55, s55, 64 |
| ; GFX8-NEXT: ;;#ASMSTART |
| ; GFX8-NEXT: ; use s55 |
| ; GFX8-NEXT: ;;#ASMEND |
| ; GFX8-NEXT: v_readlane_b32 s55, v0, 0 |
| ; GFX8-NEXT: s_mov_b32 s32, s33 |
| ; GFX8-NEXT: s_xor_saveexec_b64 s[6:7], -1 |
| ; GFX8-NEXT: s_add_i32 s5, s33, 0x101000 |
| ; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload |
| ; GFX8-NEXT: s_mov_b64 exec, s[6:7] |
| ; GFX8-NEXT: s_mov_b32 s33, s4 |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset_fp: |
| ; GFX900: ; %bb.0: |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: s_mov_b32 s4, s33 |
| ; GFX900-NEXT: s_mov_b32 s33, s32 |
| ; GFX900-NEXT: s_xor_saveexec_b64 s[6:7], -1 |
| ; GFX900-NEXT: s_add_i32 s5, s33, 0x101000 |
| ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill |
| ; GFX900-NEXT: s_mov_b64 exec, s[6:7] |
| ; GFX900-NEXT: s_add_i32 s32, s32, 0x102000 |
| ; GFX900-NEXT: v_writelane_b32 v0, s55, 0 |
| ; GFX900-NEXT: s_lshr_b32 s55, s33, 6 |
| ; GFX900-NEXT: s_add_i32 s55, s55, 64 |
| ; GFX900-NEXT: ;;#ASMSTART |
| ; GFX900-NEXT: ; use s55 |
| ; GFX900-NEXT: ;;#ASMEND |
| ; GFX900-NEXT: v_readlane_b32 s55, v0, 0 |
| ; GFX900-NEXT: s_mov_b32 s32, s33 |
| ; GFX900-NEXT: s_xor_saveexec_b64 s[6:7], -1 |
| ; GFX900-NEXT: s_add_i32 s5, s33, 0x101000 |
| ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload |
| ; GFX900-NEXT: s_mov_b64 exec, s[6:7] |
| ; GFX900-NEXT: s_mov_b32 s33, s4 |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset_fp: |
| ; GFX942: ; %bb.0: |
| ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-NEXT: s_mov_b32 s0, s33 |
| ; GFX942-NEXT: s_mov_b32 s33, s32 |
| ; GFX942-NEXT: s_xor_saveexec_b64 s[2:3], -1 |
| ; GFX942-NEXT: s_add_i32 s1, s33, 0x4040 |
| ; GFX942-NEXT: scratch_store_dword off, v0, s1 ; 4-byte Folded Spill |
| ; GFX942-NEXT: s_mov_b64 exec, s[2:3] |
| ; GFX942-NEXT: s_addk_i32 s32, 0x4080 |
| ; GFX942-NEXT: s_add_i32 s1, s33, 64 |
| ; GFX942-NEXT: v_writelane_b32 v0, s55, 0 |
| ; GFX942-NEXT: s_mov_b32 s55, s1 |
| ; GFX942-NEXT: ;;#ASMSTART |
| ; GFX942-NEXT: ; use s55 |
| ; GFX942-NEXT: ;;#ASMEND |
| ; GFX942-NEXT: v_readlane_b32 s55, v0, 0 |
| ; GFX942-NEXT: s_mov_b32 s32, s33 |
| ; GFX942-NEXT: s_xor_saveexec_b64 s[2:3], -1 |
| ; GFX942-NEXT: s_add_i32 s1, s33, 0x4040 |
| ; GFX942-NEXT: scratch_load_dword v0, off, s1 ; 4-byte Folded Reload |
| ; GFX942-NEXT: s_mov_b64 exec, s[2:3] |
| ; GFX942-NEXT: s_mov_b32 s33, s0 |
| ; GFX942-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-NEXT: s_setpc_b64 s[30:31] |
| %alloca0 = alloca [4096 x i32], align 64, addrspace(5) |
| call void asm sideeffect "; use $0", "{s55}"(ptr addrspace(5) %alloca0) |
| ret void |
| } |
| |
| define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset() #0 { |
| ; GFX10_1-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset: |
| ; GFX10_1: ; %bb.0: |
| ; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 |
| ; GFX10_1-NEXT: s_add_i32 s5, s32, 0x100800 |
| ; GFX10_1-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill |
| ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 |
| ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 |
| ; GFX10_1-NEXT: v_writelane_b32 v1, s55, 0 |
| ; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32 |
| ; GFX10_1-NEXT: s_lshr_b32 s4, s32, 5 |
| ; GFX10_1-NEXT: s_add_i32 s55, s4, 0x442c |
| ; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo |
| ; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0 |
| ; GFX10_1-NEXT: ;;#ASMSTART |
| ; GFX10_1-NEXT: ; use alloca0 v0 |
| ; GFX10_1-NEXT: ;;#ASMEND |
| ; GFX10_1-NEXT: ;;#ASMSTART |
| ; GFX10_1-NEXT: ; use s55, scc |
| ; GFX10_1-NEXT: ;;#ASMEND |
| ; GFX10_1-NEXT: v_readlane_b32 s55, v1, 0 |
| ; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 |
| ; GFX10_1-NEXT: s_add_i32 s5, s32, 0x100800 |
| ; GFX10_1-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload |
| ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 |
| ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 |
| ; GFX10_1-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10_1-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10_3-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset: |
| ; GFX10_3: ; %bb.0: |
| ; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 |
| ; GFX10_3-NEXT: s_add_i32 s5, s32, 0x100800 |
| ; GFX10_3-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill |
| ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 |
| ; GFX10_3-NEXT: v_writelane_b32 v1, s55, 0 |
| ; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32 |
| ; GFX10_3-NEXT: s_lshr_b32 s4, s32, 5 |
| ; GFX10_3-NEXT: s_add_i32 s55, s4, 0x442c |
| ; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo |
| ; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0 |
| ; GFX10_3-NEXT: ;;#ASMSTART |
| ; GFX10_3-NEXT: ; use alloca0 v0 |
| ; GFX10_3-NEXT: ;;#ASMEND |
| ; GFX10_3-NEXT: ;;#ASMSTART |
| ; GFX10_3-NEXT: ; use s55, scc |
| ; GFX10_3-NEXT: ;;#ASMEND |
| ; GFX10_3-NEXT: v_readlane_b32 s55, v1, 0 |
| ; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 |
| ; GFX10_3-NEXT: s_add_i32 s5, s32, 0x100800 |
| ; GFX10_3-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload |
| ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 |
| ; GFX10_3-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10_3-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 |
| ; GFX11-NEXT: s_add_i32 s1, s32, 0x8040 |
| ; GFX11-NEXT: scratch_store_b32 off, v1, s1 ; 4-byte Folded Spill |
| ; GFX11-NEXT: s_mov_b32 exec_lo, s0 |
| ; GFX11-NEXT: v_writelane_b32 v1, s55, 0 |
| ; GFX11-NEXT: s_add_i32 s0, s32, 64 |
| ; GFX11-NEXT: s_add_i32 s55, s32, 0x442c |
| ; GFX11-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX11-NEXT: s_and_b32 s0, 0, exec_lo |
| ; GFX11-NEXT: ;;#ASMSTART |
| ; GFX11-NEXT: ; use alloca0 v0 |
| ; GFX11-NEXT: ;;#ASMEND |
| ; GFX11-NEXT: ;;#ASMSTART |
| ; GFX11-NEXT: ; use s55, scc |
| ; GFX11-NEXT: ;;#ASMEND |
| ; GFX11-NEXT: v_readlane_b32 s55, v1, 0 |
| ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 |
| ; GFX11-NEXT: s_add_i32 s1, s32, 0x8040 |
| ; GFX11-NEXT: scratch_load_b32 v1, off, s1 ; 4-byte Folded Reload |
| ; GFX11-NEXT: s_mov_b32 exec_lo, s0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 |
| ; GFX12-NEXT: scratch_store_b32 off, v1, s32 offset:32768 ; 4-byte Folded Spill |
| ; GFX12-NEXT: s_wait_alu 0xfffe |
| ; GFX12-NEXT: s_mov_b32 exec_lo, s0 |
| ; GFX12-NEXT: v_writelane_b32 v1, s55, 0 |
| ; GFX12-NEXT: s_add_co_i32 s55, s32, 0x43ec |
| ; GFX12-NEXT: v_mov_b32_e32 v0, s32 |
| ; GFX12-NEXT: s_and_b32 s0, 0, exec_lo |
| ; GFX12-NEXT: ;;#ASMSTART |
| ; GFX12-NEXT: ; use alloca0 v0 |
| ; GFX12-NEXT: ;;#ASMEND |
| ; GFX12-NEXT: ;;#ASMSTART |
| ; GFX12-NEXT: ; use s55, scc |
| ; GFX12-NEXT: ;;#ASMEND |
| ; GFX12-NEXT: v_readlane_b32 s55, v1, 0 |
| ; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 |
| ; GFX12-NEXT: scratch_load_b32 v1, off, s32 offset:32768 ; 4-byte Folded Reload |
| ; GFX12-NEXT: s_wait_alu 0xfffe |
| ; GFX12-NEXT: s_mov_b32 exec_lo, s0 |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; GFX8-NEXT: s_add_i32 s6, s32, 0x201000 |
| ; GFX8-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill |
| ; GFX8-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX8-NEXT: s_lshr_b32 s4, s32, 6 |
| ; GFX8-NEXT: v_writelane_b32 v1, s55, 0 |
| ; GFX8-NEXT: s_add_i32 s55, s4, 0x442c |
| ; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32 |
| ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0 |
| ; GFX8-NEXT: ;;#ASMSTART |
| ; GFX8-NEXT: ; use alloca0 v0 |
| ; GFX8-NEXT: ;;#ASMEND |
| ; GFX8-NEXT: s_and_b64 s[4:5], 0, exec |
| ; GFX8-NEXT: ;;#ASMSTART |
| ; GFX8-NEXT: ; use s55, scc |
| ; GFX8-NEXT: ;;#ASMEND |
| ; GFX8-NEXT: v_readlane_b32 s55, v1, 0 |
| ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; GFX8-NEXT: s_add_i32 s6, s32, 0x201000 |
| ; GFX8-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload |
| ; GFX8-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset: |
| ; GFX900: ; %bb.0: |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; GFX900-NEXT: s_add_i32 s6, s32, 0x201000 |
| ; GFX900-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill |
| ; GFX900-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX900-NEXT: s_lshr_b32 s4, s32, 6 |
| ; GFX900-NEXT: v_writelane_b32 v1, s55, 0 |
| ; GFX900-NEXT: s_add_i32 s55, s4, 0x442c |
| ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 |
| ; GFX900-NEXT: v_add_u32_e32 v0, 64, v0 |
| ; GFX900-NEXT: ;;#ASMSTART |
| ; GFX900-NEXT: ; use alloca0 v0 |
| ; GFX900-NEXT: ;;#ASMEND |
| ; GFX900-NEXT: s_and_b64 s[4:5], 0, exec |
| ; GFX900-NEXT: ;;#ASMSTART |
| ; GFX900-NEXT: ; use s55, scc |
| ; GFX900-NEXT: ;;#ASMEND |
| ; GFX900-NEXT: v_readlane_b32 s55, v1, 0 |
| ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; GFX900-NEXT: s_add_i32 s6, s32, 0x201000 |
| ; GFX900-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload |
| ; GFX900-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset: |
| ; GFX942: ; %bb.0: |
| ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 |
| ; GFX942-NEXT: s_add_i32 s2, s32, 0x8040 |
| ; GFX942-NEXT: scratch_store_dword off, v1, s2 ; 4-byte Folded Spill |
| ; GFX942-NEXT: s_mov_b64 exec, s[0:1] |
| ; GFX942-NEXT: v_writelane_b32 v1, s55, 0 |
| ; GFX942-NEXT: s_add_i32 s55, s32, 0x442c |
| ; GFX942-NEXT: s_add_i32 s0, s32, 64 |
| ; GFX942-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX942-NEXT: ;;#ASMSTART |
| ; GFX942-NEXT: ; use alloca0 v0 |
| ; GFX942-NEXT: ;;#ASMEND |
| ; GFX942-NEXT: s_and_b64 s[0:1], 0, exec |
| ; GFX942-NEXT: ;;#ASMSTART |
| ; GFX942-NEXT: ; use s55, scc |
| ; GFX942-NEXT: ;;#ASMEND |
| ; GFX942-NEXT: v_readlane_b32 s55, v1, 0 |
| ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 |
| ; GFX942-NEXT: s_add_i32 s2, s32, 0x8040 |
| ; GFX942-NEXT: scratch_load_dword v1, off, s2 ; 4-byte Folded Reload |
| ; GFX942-NEXT: s_mov_b64 exec, s[0:1] |
| ; GFX942-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-NEXT: s_setpc_b64 s[30:31] |
| %alloca0 = alloca [4096 x i32], align 64, addrspace(5) |
| %alloca1 = alloca [4096 x i32], align 4, addrspace(5) |
| %alloca1.offset = getelementptr [4096 x i32], ptr addrspace(5) %alloca1, i32 0, i32 251 |
| call void asm sideeffect "; use alloca0 $0", "v"(ptr addrspace(5) %alloca0) |
| call void asm sideeffect "; use $0, $1", "{s55},{scc}"(ptr addrspace(5) %alloca1.offset, i32 0) |
| ret void |
| } |
| |
| define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset(i32 inreg %soffset) #0 { |
| ; GFX10_1-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset: |
| ; GFX10_1: ; %bb.0: |
| ; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 |
| ; GFX10_1-NEXT: s_add_i32 s5, s32, 0x100800 |
| ; GFX10_1-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill |
| ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 |
| ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 |
| ; GFX10_1-NEXT: v_writelane_b32 v1, s55, 0 |
| ; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32 |
| ; GFX10_1-NEXT: s_lshl_b32 s4, s16, 2 |
| ; GFX10_1-NEXT: s_lshr_b32 s55, s32, 5 |
| ; GFX10_1-NEXT: s_add_i32 s55, s55, s4 |
| ; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0 |
| ; GFX10_1-NEXT: s_addk_i32 s55, 0x4040 |
| ; GFX10_1-NEXT: ;;#ASMSTART |
| ; GFX10_1-NEXT: ; use alloca0 v0 |
| ; GFX10_1-NEXT: ;;#ASMEND |
| ; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo |
| ; GFX10_1-NEXT: ;;#ASMSTART |
| ; GFX10_1-NEXT: ; use s55, scc |
| ; GFX10_1-NEXT: ;;#ASMEND |
| ; GFX10_1-NEXT: v_readlane_b32 s55, v1, 0 |
| ; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 |
| ; GFX10_1-NEXT: s_add_i32 s5, s32, 0x100800 |
| ; GFX10_1-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload |
| ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 |
| ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 |
| ; GFX10_1-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10_1-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10_3-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset: |
| ; GFX10_3: ; %bb.0: |
| ; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 |
| ; GFX10_3-NEXT: s_add_i32 s5, s32, 0x100800 |
| ; GFX10_3-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill |
| ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 |
| ; GFX10_3-NEXT: v_writelane_b32 v1, s55, 0 |
| ; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32 |
| ; GFX10_3-NEXT: s_lshl_b32 s4, s16, 2 |
| ; GFX10_3-NEXT: s_lshr_b32 s55, s32, 5 |
| ; GFX10_3-NEXT: s_add_i32 s55, s55, s4 |
| ; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0 |
| ; GFX10_3-NEXT: s_addk_i32 s55, 0x4040 |
| ; GFX10_3-NEXT: ;;#ASMSTART |
| ; GFX10_3-NEXT: ; use alloca0 v0 |
| ; GFX10_3-NEXT: ;;#ASMEND |
| ; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo |
| ; GFX10_3-NEXT: ;;#ASMSTART |
| ; GFX10_3-NEXT: ; use s55, scc |
| ; GFX10_3-NEXT: ;;#ASMEND |
| ; GFX10_3-NEXT: v_readlane_b32 s55, v1, 0 |
| ; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 |
| ; GFX10_3-NEXT: s_add_i32 s5, s32, 0x100800 |
| ; GFX10_3-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload |
| ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 |
| ; GFX10_3-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10_3-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_xor_saveexec_b32 s1, -1 |
| ; GFX11-NEXT: s_add_i32 s2, s32, 0x8040 |
| ; GFX11-NEXT: scratch_store_b32 off, v1, s2 ; 4-byte Folded Spill |
| ; GFX11-NEXT: s_mov_b32 exec_lo, s1 |
| ; GFX11-NEXT: s_add_i32 s1, s32, 64 |
| ; GFX11-NEXT: v_writelane_b32 v1, s55, 0 |
| ; GFX11-NEXT: s_lshl_b32 s0, s0, 2 |
| ; GFX11-NEXT: v_mov_b32_e32 v0, s1 |
| ; GFX11-NEXT: s_add_i32 s55, s32, s0 |
| ; GFX11-NEXT: ;;#ASMSTART |
| ; GFX11-NEXT: ; use alloca0 v0 |
| ; GFX11-NEXT: ;;#ASMEND |
| ; GFX11-NEXT: s_addk_i32 s55, 0x4040 |
| ; GFX11-NEXT: s_and_b32 s0, 0, exec_lo |
| ; GFX11-NEXT: ;;#ASMSTART |
| ; GFX11-NEXT: ; use s55, scc |
| ; GFX11-NEXT: ;;#ASMEND |
| ; GFX11-NEXT: v_readlane_b32 s55, v1, 0 |
| ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 |
| ; GFX11-NEXT: s_add_i32 s1, s32, 0x8040 |
| ; GFX11-NEXT: scratch_load_b32 v1, off, s1 ; 4-byte Folded Reload |
| ; GFX11-NEXT: s_mov_b32 exec_lo, s0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: s_xor_saveexec_b32 s1, -1 |
| ; GFX12-NEXT: scratch_store_b32 off, v1, s32 offset:32768 ; 4-byte Folded Spill |
| ; GFX12-NEXT: s_wait_alu 0xfffe |
| ; GFX12-NEXT: s_mov_b32 exec_lo, s1 |
| ; GFX12-NEXT: v_writelane_b32 v1, s55, 0 |
| ; GFX12-NEXT: s_lshl_b32 s0, s0, 2 |
| ; GFX12-NEXT: v_mov_b32_e32 v0, s32 |
| ; GFX12-NEXT: s_wait_alu 0xfffe |
| ; GFX12-NEXT: s_add_co_i32 s55, s32, s0 |
| ; GFX12-NEXT: ;;#ASMSTART |
| ; GFX12-NEXT: ; use alloca0 v0 |
| ; GFX12-NEXT: ;;#ASMEND |
| ; GFX12-NEXT: s_wait_alu 0xfffe |
| ; GFX12-NEXT: s_addk_co_i32 s55, 0x4000 |
| ; GFX12-NEXT: s_and_b32 s0, 0, exec_lo |
| ; GFX12-NEXT: ;;#ASMSTART |
| ; GFX12-NEXT: ; use s55, scc |
| ; GFX12-NEXT: ;;#ASMEND |
| ; GFX12-NEXT: v_readlane_b32 s55, v1, 0 |
| ; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 |
| ; GFX12-NEXT: scratch_load_b32 v1, off, s32 offset:32768 ; 4-byte Folded Reload |
| ; GFX12-NEXT: s_wait_alu 0xfffe |
| ; GFX12-NEXT: s_mov_b32 exec_lo, s0 |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; GFX8-NEXT: s_add_i32 s6, s32, 0x201000 |
| ; GFX8-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill |
| ; GFX8-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX8-NEXT: v_writelane_b32 v1, s55, 0 |
| ; GFX8-NEXT: s_lshl_b32 s4, s16, 2 |
| ; GFX8-NEXT: s_lshr_b32 s55, s32, 6 |
| ; GFX8-NEXT: s_add_i32 s55, s55, s4 |
| ; GFX8-NEXT: s_addk_i32 s55, 0x4040 |
| ; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32 |
| ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0 |
| ; GFX8-NEXT: ;;#ASMSTART |
| ; GFX8-NEXT: ; use alloca0 v0 |
| ; GFX8-NEXT: ;;#ASMEND |
| ; GFX8-NEXT: s_and_b64 s[4:5], 0, exec |
| ; GFX8-NEXT: ;;#ASMSTART |
| ; GFX8-NEXT: ; use s55, scc |
| ; GFX8-NEXT: ;;#ASMEND |
| ; GFX8-NEXT: v_readlane_b32 s55, v1, 0 |
| ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; GFX8-NEXT: s_add_i32 s6, s32, 0x201000 |
| ; GFX8-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload |
| ; GFX8-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset: |
| ; GFX900: ; %bb.0: |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; GFX900-NEXT: s_add_i32 s6, s32, 0x201000 |
| ; GFX900-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill |
| ; GFX900-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX900-NEXT: v_writelane_b32 v1, s55, 0 |
| ; GFX900-NEXT: s_lshl_b32 s4, s16, 2 |
| ; GFX900-NEXT: s_lshr_b32 s55, s32, 6 |
| ; GFX900-NEXT: s_add_i32 s55, s55, s4 |
| ; GFX900-NEXT: s_addk_i32 s55, 0x4040 |
| ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 |
| ; GFX900-NEXT: v_add_u32_e32 v0, 64, v0 |
| ; GFX900-NEXT: ;;#ASMSTART |
| ; GFX900-NEXT: ; use alloca0 v0 |
| ; GFX900-NEXT: ;;#ASMEND |
| ; GFX900-NEXT: s_and_b64 s[4:5], 0, exec |
| ; GFX900-NEXT: ;;#ASMSTART |
| ; GFX900-NEXT: ; use s55, scc |
| ; GFX900-NEXT: ;;#ASMEND |
| ; GFX900-NEXT: v_readlane_b32 s55, v1, 0 |
| ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; GFX900-NEXT: s_add_i32 s6, s32, 0x201000 |
| ; GFX900-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload |
| ; GFX900-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset: |
| ; GFX942: ; %bb.0: |
| ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-NEXT: s_xor_saveexec_b64 s[2:3], -1 |
| ; GFX942-NEXT: s_add_i32 s1, s32, 0x8040 |
| ; GFX942-NEXT: scratch_store_dword off, v1, s1 ; 4-byte Folded Spill |
| ; GFX942-NEXT: s_mov_b64 exec, s[2:3] |
| ; GFX942-NEXT: s_lshl_b32 s0, s0, 2 |
| ; GFX942-NEXT: v_writelane_b32 v1, s55, 0 |
| ; GFX942-NEXT: s_add_i32 s55, s32, s0 |
| ; GFX942-NEXT: s_addk_i32 s55, 0x4040 |
| ; GFX942-NEXT: s_add_i32 s0, s32, 64 |
| ; GFX942-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX942-NEXT: ;;#ASMSTART |
| ; GFX942-NEXT: ; use alloca0 v0 |
| ; GFX942-NEXT: ;;#ASMEND |
| ; GFX942-NEXT: s_and_b64 s[0:1], 0, exec |
| ; GFX942-NEXT: ;;#ASMSTART |
| ; GFX942-NEXT: ; use s55, scc |
| ; GFX942-NEXT: ;;#ASMEND |
| ; GFX942-NEXT: v_readlane_b32 s55, v1, 0 |
| ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 |
| ; GFX942-NEXT: s_add_i32 s2, s32, 0x8040 |
| ; GFX942-NEXT: scratch_load_dword v1, off, s2 ; 4-byte Folded Reload |
| ; GFX942-NEXT: s_mov_b64 exec, s[0:1] |
| ; GFX942-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-NEXT: s_setpc_b64 s[30:31] |
| %alloca0 = alloca [4096 x i32], align 64, addrspace(5) |
| %alloca1 = alloca [4096 x i32], align 4, addrspace(5) |
| %alloca1.offset = getelementptr [4096 x i32], ptr addrspace(5) %alloca1, i32 0, i32 %soffset |
| call void asm sideeffect "; use alloca0 $0", "v"(ptr addrspace(5) %alloca0) |
| call void asm sideeffect "; use $0, $1", "{s55},{scc}"(ptr addrspace(5) %alloca1.offset, i32 0) |
| ret void |
| } |
| |
| attributes #0 = { nounwind alignstack=64 "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="10,10" "no-realign-stack" } |
| attributes #1 = { nounwind alignstack=64 "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="10,10" "no-realign-stack" "frame-pointer"="all" } |
| ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: |
| ; GFX9: {{.*}} |