| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefix=GFX7 %s |
| ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx810 -mattr=+xnack < %s | FileCheck -check-prefix=GFX8 %s |
| ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+xnack < %s | FileCheck -check-prefixes=GFX900 %s |
| ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -mattr=+xnack < %s | FileCheck -check-prefixes=GFX942 %s |
| ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10_1 %s |
| ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefix=GFX10_3 %s |
| ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s |
| ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s |
| |
| %asm.output = type { <16 x i32>, <16 x i32>, <16 x i32>, <8 x i32>, <2 x i32>, i32, ; sgprs |
| <16 x i32>, <7 x i32>, ; vgprs |
| i64 ; vcc |
| } |
| |
| %asm.output2 = type { <16 x i32>, <16 x i32>, <16 x i32>, <8 x i32>, <2 x i32>, i32, ; sgprs |
| <16 x i32>, <5 x i32>, ; vgprs |
| i64 ; vcc |
| } |
| |
| %asm.output3 = type { <16 x i32>, <16 x i32>, <16 x i32>, <8 x i32>, <2 x i32>, ; sgprs |
| <16 x i32>, <6 x i32>, ; vgprs |
| i64 ; vcc |
| } |
| |
| ; %alloca1 should end up materializing with s_mov_b32, but scc is |
| ; unavailable. |
| ; |
| ; This is primarily to test gfx7 and gfx8, which do not have vector |
| ; add with no carry. |
| ; |
| define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 { |
| ; GFX7-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; GFX7-NEXT: s_add_i32 s6, s32, 0x101100 |
| ; GFX7-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill |
| ; GFX7-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX7-NEXT: v_writelane_b32 v23, s30, 0 |
| ; GFX7-NEXT: v_writelane_b32 v23, s31, 1 |
| ; GFX7-NEXT: v_writelane_b32 v23, s33, 2 |
| ; GFX7-NEXT: v_writelane_b32 v23, s34, 3 |
| ; GFX7-NEXT: v_writelane_b32 v23, s35, 4 |
| ; GFX7-NEXT: v_writelane_b32 v23, s36, 5 |
| ; GFX7-NEXT: v_writelane_b32 v23, s37, 6 |
| ; GFX7-NEXT: v_writelane_b32 v23, s38, 7 |
| ; GFX7-NEXT: v_writelane_b32 v23, s39, 8 |
| ; GFX7-NEXT: v_writelane_b32 v23, s48, 9 |
| ; GFX7-NEXT: v_writelane_b32 v23, s49, 10 |
| ; GFX7-NEXT: v_writelane_b32 v23, s50, 11 |
| ; GFX7-NEXT: v_writelane_b32 v23, s51, 12 |
| ; GFX7-NEXT: v_writelane_b32 v23, s52, 13 |
| ; GFX7-NEXT: v_writelane_b32 v23, s53, 14 |
| ; GFX7-NEXT: v_lshr_b32_e64 v0, s32, 6 |
| ; GFX7-NEXT: v_writelane_b32 v23, s54, 15 |
| ; GFX7-NEXT: v_add_i32_e32 v0, vcc, 64, v0 |
| ; GFX7-NEXT: s_and_b64 s[4:5], 0, exec |
| ; GFX7-NEXT: v_writelane_b32 v23, s55, 16 |
| ; GFX7-NEXT: ;;#ASMSTART |
| ; GFX7-NEXT: ; use alloca0 v0 |
| ; GFX7-NEXT: ;;#ASMEND |
| ; GFX7-NEXT: ;;#ASMSTART |
| ; GFX7-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc |
| ; GFX7-NEXT: ;;#ASMEND |
| ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], s32 |
| ; GFX7-NEXT: v_mov_b32_e32 v0, 0x4040 |
| ; GFX7-NEXT: v_mad_u32_u24 v0, v0, 64, s32 |
| ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 6, v0 |
| ; GFX7-NEXT: v_readfirstlane_b32 s54, v0 |
| ; GFX7-NEXT: buffer_load_dword v0, off, s[0:3], s32 |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: ;;#ASMSTART |
| ; GFX7-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc |
| ; GFX7-NEXT: ;;#ASMEND |
| ; GFX7-NEXT: v_readlane_b32 s55, v23, 16 |
| ; GFX7-NEXT: v_readlane_b32 s54, v23, 15 |
| ; GFX7-NEXT: v_readlane_b32 s53, v23, 14 |
| ; GFX7-NEXT: v_readlane_b32 s52, v23, 13 |
| ; GFX7-NEXT: v_readlane_b32 s51, v23, 12 |
| ; GFX7-NEXT: v_readlane_b32 s50, v23, 11 |
| ; GFX7-NEXT: v_readlane_b32 s49, v23, 10 |
| ; GFX7-NEXT: v_readlane_b32 s48, v23, 9 |
| ; GFX7-NEXT: v_readlane_b32 s39, v23, 8 |
| ; GFX7-NEXT: v_readlane_b32 s38, v23, 7 |
| ; GFX7-NEXT: v_readlane_b32 s37, v23, 6 |
| ; GFX7-NEXT: v_readlane_b32 s36, v23, 5 |
| ; GFX7-NEXT: v_readlane_b32 s35, v23, 4 |
| ; GFX7-NEXT: v_readlane_b32 s34, v23, 3 |
| ; GFX7-NEXT: v_readlane_b32 s33, v23, 2 |
| ; GFX7-NEXT: v_readlane_b32 s31, v23, 1 |
| ; GFX7-NEXT: v_readlane_b32 s30, v23, 0 |
| ; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; GFX7-NEXT: s_add_i32 s6, s32, 0x101100 |
| ; GFX7-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload |
| ; GFX7-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; GFX8-NEXT: s_add_i32 s6, s32, 0x101100 |
| ; GFX8-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill |
| ; GFX8-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX8-NEXT: v_writelane_b32 v23, s30, 0 |
| ; GFX8-NEXT: v_writelane_b32 v23, s31, 1 |
| ; GFX8-NEXT: v_writelane_b32 v23, s33, 2 |
| ; GFX8-NEXT: v_writelane_b32 v23, s34, 3 |
| ; GFX8-NEXT: v_writelane_b32 v23, s35, 4 |
| ; GFX8-NEXT: v_writelane_b32 v23, s36, 5 |
| ; GFX8-NEXT: v_writelane_b32 v23, s37, 6 |
| ; GFX8-NEXT: v_writelane_b32 v23, s38, 7 |
| ; GFX8-NEXT: v_writelane_b32 v23, s39, 8 |
| ; GFX8-NEXT: v_writelane_b32 v23, s48, 9 |
| ; GFX8-NEXT: v_writelane_b32 v23, s49, 10 |
| ; GFX8-NEXT: v_writelane_b32 v23, s50, 11 |
| ; GFX8-NEXT: v_writelane_b32 v23, s51, 12 |
| ; GFX8-NEXT: v_writelane_b32 v23, s52, 13 |
| ; GFX8-NEXT: v_writelane_b32 v23, s53, 14 |
| ; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32 |
| ; GFX8-NEXT: v_writelane_b32 v23, s54, 15 |
| ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0 |
| ; GFX8-NEXT: s_and_b64 s[4:5], 0, exec |
| ; GFX8-NEXT: v_writelane_b32 v23, s55, 16 |
| ; GFX8-NEXT: ;;#ASMSTART |
| ; GFX8-NEXT: ; use alloca0 v0 |
| ; GFX8-NEXT: ;;#ASMEND |
| ; GFX8-NEXT: ;;#ASMSTART |
| ; GFX8-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc |
| ; GFX8-NEXT: ;;#ASMEND |
| ; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], s32 |
| ; GFX8-NEXT: v_mov_b32_e32 v0, 0x4040 |
| ; GFX8-NEXT: v_mad_u32_u24 v0, v0, 64, s32 |
| ; GFX8-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 |
| ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 6, v0 |
| ; GFX8-NEXT: v_readfirstlane_b32 s54, v0 |
| ; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s32 |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: ;;#ASMSTART |
| ; GFX8-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc |
| ; GFX8-NEXT: ;;#ASMEND |
| ; GFX8-NEXT: v_readlane_b32 s55, v23, 16 |
| ; GFX8-NEXT: v_readlane_b32 s54, v23, 15 |
| ; GFX8-NEXT: v_readlane_b32 s53, v23, 14 |
| ; GFX8-NEXT: v_readlane_b32 s52, v23, 13 |
| ; GFX8-NEXT: v_readlane_b32 s51, v23, 12 |
| ; GFX8-NEXT: v_readlane_b32 s50, v23, 11 |
| ; GFX8-NEXT: v_readlane_b32 s49, v23, 10 |
| ; GFX8-NEXT: v_readlane_b32 s48, v23, 9 |
| ; GFX8-NEXT: v_readlane_b32 s39, v23, 8 |
| ; GFX8-NEXT: v_readlane_b32 s38, v23, 7 |
| ; GFX8-NEXT: v_readlane_b32 s37, v23, 6 |
| ; GFX8-NEXT: v_readlane_b32 s36, v23, 5 |
| ; GFX8-NEXT: v_readlane_b32 s35, v23, 4 |
| ; GFX8-NEXT: v_readlane_b32 s34, v23, 3 |
| ; GFX8-NEXT: v_readlane_b32 s33, v23, 2 |
| ; GFX8-NEXT: v_readlane_b32 s31, v23, 1 |
| ; GFX8-NEXT: v_readlane_b32 s30, v23, 0 |
| ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; GFX8-NEXT: s_add_i32 s6, s32, 0x101100 |
| ; GFX8-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload |
| ; GFX8-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs: |
| ; GFX900: ; %bb.0: |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; GFX900-NEXT: s_add_i32 s6, s32, 0x101100 |
| ; GFX900-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill |
| ; GFX900-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX900-NEXT: v_writelane_b32 v23, s30, 0 |
| ; GFX900-NEXT: v_writelane_b32 v23, s31, 1 |
| ; GFX900-NEXT: v_writelane_b32 v23, s33, 2 |
| ; GFX900-NEXT: v_writelane_b32 v23, s34, 3 |
| ; GFX900-NEXT: v_writelane_b32 v23, s35, 4 |
| ; GFX900-NEXT: v_writelane_b32 v23, s36, 5 |
| ; GFX900-NEXT: v_writelane_b32 v23, s37, 6 |
| ; GFX900-NEXT: v_writelane_b32 v23, s38, 7 |
| ; GFX900-NEXT: v_writelane_b32 v23, s39, 8 |
| ; GFX900-NEXT: v_writelane_b32 v23, s48, 9 |
| ; GFX900-NEXT: v_writelane_b32 v23, s49, 10 |
| ; GFX900-NEXT: v_writelane_b32 v23, s50, 11 |
| ; GFX900-NEXT: v_writelane_b32 v23, s51, 12 |
| ; GFX900-NEXT: v_writelane_b32 v23, s52, 13 |
| ; GFX900-NEXT: v_writelane_b32 v23, s53, 14 |
| ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 |
| ; GFX900-NEXT: v_writelane_b32 v23, s54, 15 |
| ; GFX900-NEXT: v_add_u32_e32 v0, 64, v0 |
| ; GFX900-NEXT: s_and_b64 s[4:5], 0, exec |
| ; GFX900-NEXT: v_writelane_b32 v23, s55, 16 |
| ; GFX900-NEXT: ;;#ASMSTART |
| ; GFX900-NEXT: ; use alloca0 v0 |
| ; GFX900-NEXT: ;;#ASMEND |
| ; GFX900-NEXT: ;;#ASMSTART |
| ; GFX900-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc |
| ; GFX900-NEXT: ;;#ASMEND |
| ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 |
| ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 |
| ; GFX900-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 |
| ; GFX900-NEXT: v_add_u32_e32 v0, 0x4040, v0 |
| ; GFX900-NEXT: v_readfirstlane_b32 s54, v0 |
| ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) |
| ; GFX900-NEXT: ;;#ASMSTART |
| ; GFX900-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc |
| ; GFX900-NEXT: ;;#ASMEND |
| ; GFX900-NEXT: v_readlane_b32 s55, v23, 16 |
| ; GFX900-NEXT: v_readlane_b32 s54, v23, 15 |
| ; GFX900-NEXT: v_readlane_b32 s53, v23, 14 |
| ; GFX900-NEXT: v_readlane_b32 s52, v23, 13 |
| ; GFX900-NEXT: v_readlane_b32 s51, v23, 12 |
| ; GFX900-NEXT: v_readlane_b32 s50, v23, 11 |
| ; GFX900-NEXT: v_readlane_b32 s49, v23, 10 |
| ; GFX900-NEXT: v_readlane_b32 s48, v23, 9 |
| ; GFX900-NEXT: v_readlane_b32 s39, v23, 8 |
| ; GFX900-NEXT: v_readlane_b32 s38, v23, 7 |
| ; GFX900-NEXT: v_readlane_b32 s37, v23, 6 |
| ; GFX900-NEXT: v_readlane_b32 s36, v23, 5 |
| ; GFX900-NEXT: v_readlane_b32 s35, v23, 4 |
| ; GFX900-NEXT: v_readlane_b32 s34, v23, 3 |
| ; GFX900-NEXT: v_readlane_b32 s33, v23, 2 |
| ; GFX900-NEXT: v_readlane_b32 s31, v23, 1 |
| ; GFX900-NEXT: v_readlane_b32 s30, v23, 0 |
| ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; GFX900-NEXT: s_add_i32 s6, s32, 0x101100 |
| ; GFX900-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload |
| ; GFX900-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs: |
| ; GFX942: ; %bb.0: |
| ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 |
| ; GFX942-NEXT: s_add_i32 s2, s32, 0x4044 |
| ; GFX942-NEXT: scratch_store_dword off, v23, s2 ; 4-byte Folded Spill |
| ; GFX942-NEXT: s_mov_b64 exec, s[0:1] |
| ; GFX942-NEXT: v_writelane_b32 v23, s30, 0 |
| ; GFX942-NEXT: v_writelane_b32 v23, s31, 1 |
| ; GFX942-NEXT: v_writelane_b32 v23, s33, 2 |
| ; GFX942-NEXT: v_writelane_b32 v23, s34, 3 |
| ; GFX942-NEXT: v_writelane_b32 v23, s35, 4 |
| ; GFX942-NEXT: v_writelane_b32 v23, s36, 5 |
| ; GFX942-NEXT: v_writelane_b32 v23, s37, 6 |
| ; GFX942-NEXT: v_writelane_b32 v23, s38, 7 |
| ; GFX942-NEXT: v_writelane_b32 v23, s39, 8 |
| ; GFX942-NEXT: v_writelane_b32 v23, s48, 9 |
| ; GFX942-NEXT: v_writelane_b32 v23, s49, 10 |
| ; GFX942-NEXT: v_writelane_b32 v23, s50, 11 |
| ; GFX942-NEXT: v_writelane_b32 v23, s51, 12 |
| ; GFX942-NEXT: v_writelane_b32 v23, s52, 13 |
| ; GFX942-NEXT: v_writelane_b32 v23, s53, 14 |
| ; GFX942-NEXT: s_add_i32 s0, s32, 64 |
| ; GFX942-NEXT: v_writelane_b32 v23, s54, 15 |
| ; GFX942-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX942-NEXT: s_and_b64 s[60:61], 0, exec |
| ; GFX942-NEXT: v_writelane_b32 v23, s55, 16 |
| ; GFX942-NEXT: ;;#ASMSTART |
| ; GFX942-NEXT: ; use alloca0 v0 |
| ; GFX942-NEXT: ;;#ASMEND |
| ; GFX942-NEXT: ;;#ASMSTART |
| ; GFX942-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc |
| ; GFX942-NEXT: ;;#ASMEND |
| ; GFX942-NEXT: s_addc_u32 s59, s32, 0x4040 |
| ; GFX942-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 |
| ; GFX942-NEXT: s_bitcmp1_b32 s59, 0 |
| ; GFX942-NEXT: s_bitset0_b32 s59, 0 |
| ; GFX942-NEXT: s_mov_b32 s54, s59 |
| ; GFX942-NEXT: ;;#ASMSTART |
| ; GFX942-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc |
| ; GFX942-NEXT: ;;#ASMEND |
| ; GFX942-NEXT: v_readlane_b32 s55, v23, 16 |
| ; GFX942-NEXT: v_readlane_b32 s54, v23, 15 |
| ; GFX942-NEXT: v_readlane_b32 s53, v23, 14 |
| ; GFX942-NEXT: v_readlane_b32 s52, v23, 13 |
| ; GFX942-NEXT: v_readlane_b32 s51, v23, 12 |
| ; GFX942-NEXT: v_readlane_b32 s50, v23, 11 |
| ; GFX942-NEXT: v_readlane_b32 s49, v23, 10 |
| ; GFX942-NEXT: v_readlane_b32 s48, v23, 9 |
| ; GFX942-NEXT: v_readlane_b32 s39, v23, 8 |
| ; GFX942-NEXT: v_readlane_b32 s38, v23, 7 |
| ; GFX942-NEXT: v_readlane_b32 s37, v23, 6 |
| ; GFX942-NEXT: v_readlane_b32 s36, v23, 5 |
| ; GFX942-NEXT: v_readlane_b32 s35, v23, 4 |
| ; GFX942-NEXT: v_readlane_b32 s34, v23, 3 |
| ; GFX942-NEXT: v_readlane_b32 s33, v23, 2 |
| ; GFX942-NEXT: v_readlane_b32 s31, v23, 1 |
| ; GFX942-NEXT: v_readlane_b32 s30, v23, 0 |
| ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 |
| ; GFX942-NEXT: s_add_i32 s2, s32, 0x4044 |
| ; GFX942-NEXT: scratch_load_dword v23, off, s2 ; 4-byte Folded Reload |
| ; GFX942-NEXT: s_mov_b64 exec, s[0:1] |
| ; GFX942-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10_1-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs: |
| ; GFX10_1: ; %bb.0: |
| ; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 |
| ; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80880 |
| ; GFX10_1-NEXT: buffer_store_dword v23, off, s[0:3], s5 ; 4-byte Folded Spill |
| ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 |
| ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 |
| ; GFX10_1-NEXT: v_writelane_b32 v23, s30, 0 |
| ; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32 |
| ; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo |
| ; GFX10_1-NEXT: v_writelane_b32 v23, s31, 1 |
| ; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0 |
| ; GFX10_1-NEXT: ;;#ASMSTART |
| ; GFX10_1-NEXT: ; use alloca0 v0 |
| ; GFX10_1-NEXT: ;;#ASMEND |
| ; GFX10_1-NEXT: v_writelane_b32 v23, s33, 2 |
| ; GFX10_1-NEXT: v_writelane_b32 v23, s34, 3 |
| ; GFX10_1-NEXT: v_writelane_b32 v23, s35, 4 |
| ; GFX10_1-NEXT: v_writelane_b32 v23, s36, 5 |
| ; GFX10_1-NEXT: v_writelane_b32 v23, s37, 6 |
| ; GFX10_1-NEXT: v_writelane_b32 v23, s38, 7 |
| ; GFX10_1-NEXT: v_writelane_b32 v23, s39, 8 |
| ; GFX10_1-NEXT: v_writelane_b32 v23, s48, 9 |
| ; GFX10_1-NEXT: v_writelane_b32 v23, s49, 10 |
| ; GFX10_1-NEXT: v_writelane_b32 v23, s50, 11 |
| ; GFX10_1-NEXT: v_writelane_b32 v23, s51, 12 |
| ; GFX10_1-NEXT: v_writelane_b32 v23, s52, 13 |
| ; GFX10_1-NEXT: v_writelane_b32 v23, s53, 14 |
| ; GFX10_1-NEXT: v_writelane_b32 v23, s54, 15 |
| ; GFX10_1-NEXT: v_writelane_b32 v23, s55, 16 |
| ; GFX10_1-NEXT: ;;#ASMSTART |
| ; GFX10_1-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc |
| ; GFX10_1-NEXT: ;;#ASMEND |
| ; GFX10_1-NEXT: v_lshrrev_b32_e64 v24, 5, s32 |
| ; GFX10_1-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 |
| ; GFX10_1-NEXT: v_add_nc_u32_e32 v24, 0x4040, v24 |
| ; GFX10_1-NEXT: v_readfirstlane_b32 s54, v24 |
| ; GFX10_1-NEXT: ;;#ASMSTART |
| ; GFX10_1-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc |
| ; GFX10_1-NEXT: ;;#ASMEND |
| ; GFX10_1-NEXT: v_readlane_b32 s55, v23, 16 |
| ; GFX10_1-NEXT: v_readlane_b32 s54, v23, 15 |
| ; GFX10_1-NEXT: v_readlane_b32 s53, v23, 14 |
| ; GFX10_1-NEXT: v_readlane_b32 s52, v23, 13 |
| ; GFX10_1-NEXT: v_readlane_b32 s51, v23, 12 |
| ; GFX10_1-NEXT: v_readlane_b32 s50, v23, 11 |
| ; GFX10_1-NEXT: v_readlane_b32 s49, v23, 10 |
| ; GFX10_1-NEXT: v_readlane_b32 s48, v23, 9 |
| ; GFX10_1-NEXT: v_readlane_b32 s39, v23, 8 |
| ; GFX10_1-NEXT: v_readlane_b32 s38, v23, 7 |
| ; GFX10_1-NEXT: v_readlane_b32 s37, v23, 6 |
| ; GFX10_1-NEXT: v_readlane_b32 s36, v23, 5 |
| ; GFX10_1-NEXT: v_readlane_b32 s35, v23, 4 |
| ; GFX10_1-NEXT: v_readlane_b32 s34, v23, 3 |
| ; GFX10_1-NEXT: v_readlane_b32 s33, v23, 2 |
| ; GFX10_1-NEXT: v_readlane_b32 s31, v23, 1 |
| ; GFX10_1-NEXT: v_readlane_b32 s30, v23, 0 |
| ; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 |
| ; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80880 |
| ; GFX10_1-NEXT: buffer_load_dword v23, off, s[0:3], s5 ; 4-byte Folded Reload |
| ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 |
| ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 |
| ; GFX10_1-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10_1-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10_3-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs: |
| ; GFX10_3: ; %bb.0: |
| ; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 |
| ; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80880 |
| ; GFX10_3-NEXT: buffer_store_dword v23, off, s[0:3], s5 ; 4-byte Folded Spill |
| ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 |
| ; GFX10_3-NEXT: v_writelane_b32 v23, s30, 0 |
| ; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32 |
| ; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo |
| ; GFX10_3-NEXT: v_writelane_b32 v23, s31, 1 |
| ; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0 |
| ; GFX10_3-NEXT: ;;#ASMSTART |
| ; GFX10_3-NEXT: ; use alloca0 v0 |
| ; GFX10_3-NEXT: ;;#ASMEND |
| ; GFX10_3-NEXT: v_writelane_b32 v23, s33, 2 |
| ; GFX10_3-NEXT: v_writelane_b32 v23, s34, 3 |
| ; GFX10_3-NEXT: v_writelane_b32 v23, s35, 4 |
| ; GFX10_3-NEXT: v_writelane_b32 v23, s36, 5 |
| ; GFX10_3-NEXT: v_writelane_b32 v23, s37, 6 |
| ; GFX10_3-NEXT: v_writelane_b32 v23, s38, 7 |
| ; GFX10_3-NEXT: v_writelane_b32 v23, s39, 8 |
| ; GFX10_3-NEXT: v_writelane_b32 v23, s48, 9 |
| ; GFX10_3-NEXT: v_writelane_b32 v23, s49, 10 |
| ; GFX10_3-NEXT: v_writelane_b32 v23, s50, 11 |
| ; GFX10_3-NEXT: v_writelane_b32 v23, s51, 12 |
| ; GFX10_3-NEXT: v_writelane_b32 v23, s52, 13 |
| ; GFX10_3-NEXT: v_writelane_b32 v23, s53, 14 |
| ; GFX10_3-NEXT: v_writelane_b32 v23, s54, 15 |
| ; GFX10_3-NEXT: v_writelane_b32 v23, s55, 16 |
| ; GFX10_3-NEXT: ;;#ASMSTART |
| ; GFX10_3-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc |
| ; GFX10_3-NEXT: ;;#ASMEND |
| ; GFX10_3-NEXT: v_lshrrev_b32_e64 v24, 5, s32 |
| ; GFX10_3-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 |
| ; GFX10_3-NEXT: v_add_nc_u32_e32 v24, 0x4040, v24 |
| ; GFX10_3-NEXT: v_readfirstlane_b32 s54, v24 |
| ; GFX10_3-NEXT: ;;#ASMSTART |
| ; GFX10_3-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc |
| ; GFX10_3-NEXT: ;;#ASMEND |
| ; GFX10_3-NEXT: v_readlane_b32 s55, v23, 16 |
| ; GFX10_3-NEXT: v_readlane_b32 s54, v23, 15 |
| ; GFX10_3-NEXT: v_readlane_b32 s53, v23, 14 |
| ; GFX10_3-NEXT: v_readlane_b32 s52, v23, 13 |
| ; GFX10_3-NEXT: v_readlane_b32 s51, v23, 12 |
| ; GFX10_3-NEXT: v_readlane_b32 s50, v23, 11 |
| ; GFX10_3-NEXT: v_readlane_b32 s49, v23, 10 |
| ; GFX10_3-NEXT: v_readlane_b32 s48, v23, 9 |
| ; GFX10_3-NEXT: v_readlane_b32 s39, v23, 8 |
| ; GFX10_3-NEXT: v_readlane_b32 s38, v23, 7 |
| ; GFX10_3-NEXT: v_readlane_b32 s37, v23, 6 |
| ; GFX10_3-NEXT: v_readlane_b32 s36, v23, 5 |
| ; GFX10_3-NEXT: v_readlane_b32 s35, v23, 4 |
| ; GFX10_3-NEXT: v_readlane_b32 s34, v23, 3 |
| ; GFX10_3-NEXT: v_readlane_b32 s33, v23, 2 |
| ; GFX10_3-NEXT: v_readlane_b32 s31, v23, 1 |
| ; GFX10_3-NEXT: v_readlane_b32 s30, v23, 0 |
| ; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 |
| ; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80880 |
| ; GFX10_3-NEXT: buffer_load_dword v23, off, s[0:3], s5 ; 4-byte Folded Reload |
| ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 |
| ; GFX10_3-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10_3-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 |
| ; GFX11-NEXT: s_add_i32 s1, s32, 0x4044 |
| ; GFX11-NEXT: scratch_store_b32 off, v23, s1 ; 4-byte Folded Spill |
| ; GFX11-NEXT: s_mov_b32 exec_lo, s0 |
| ; GFX11-NEXT: v_writelane_b32 v23, s30, 0 |
| ; GFX11-NEXT: s_add_i32 s0, s32, 64 |
| ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; GFX11-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX11-NEXT: s_and_b32 s0, 0, exec_lo |
| ; GFX11-NEXT: v_writelane_b32 v23, s31, 1 |
| ; GFX11-NEXT: ;;#ASMSTART |
| ; GFX11-NEXT: ; use alloca0 v0 |
| ; GFX11-NEXT: ;;#ASMEND |
| ; GFX11-NEXT: v_writelane_b32 v23, s33, 2 |
| ; GFX11-NEXT: v_writelane_b32 v23, s34, 3 |
| ; GFX11-NEXT: v_writelane_b32 v23, s35, 4 |
| ; GFX11-NEXT: v_writelane_b32 v23, s36, 5 |
| ; GFX11-NEXT: v_writelane_b32 v23, s37, 6 |
| ; GFX11-NEXT: v_writelane_b32 v23, s38, 7 |
| ; GFX11-NEXT: v_writelane_b32 v23, s39, 8 |
| ; GFX11-NEXT: v_writelane_b32 v23, s48, 9 |
| ; GFX11-NEXT: v_writelane_b32 v23, s49, 10 |
| ; GFX11-NEXT: v_writelane_b32 v23, s50, 11 |
| ; GFX11-NEXT: v_writelane_b32 v23, s51, 12 |
| ; GFX11-NEXT: v_writelane_b32 v23, s52, 13 |
| ; GFX11-NEXT: v_writelane_b32 v23, s53, 14 |
| ; GFX11-NEXT: v_writelane_b32 v23, s54, 15 |
| ; GFX11-NEXT: v_writelane_b32 v23, s55, 16 |
| ; GFX11-NEXT: ;;#ASMSTART |
| ; GFX11-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc |
| ; GFX11-NEXT: ;;#ASMEND |
| ; GFX11-NEXT: s_addc_u32 s59, s32, 0x4040 |
| ; GFX11-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 |
| ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) |
| ; GFX11-NEXT: s_bitcmp1_b32 s59, 0 |
| ; GFX11-NEXT: s_bitset0_b32 s59, 0 |
| ; GFX11-NEXT: s_mov_b32 s54, s59 |
| ; GFX11-NEXT: ;;#ASMSTART |
| ; GFX11-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc |
| ; GFX11-NEXT: ;;#ASMEND |
| ; GFX11-NEXT: v_readlane_b32 s55, v23, 16 |
| ; GFX11-NEXT: v_readlane_b32 s54, v23, 15 |
| ; GFX11-NEXT: v_readlane_b32 s53, v23, 14 |
| ; GFX11-NEXT: v_readlane_b32 s52, v23, 13 |
| ; GFX11-NEXT: v_readlane_b32 s51, v23, 12 |
| ; GFX11-NEXT: v_readlane_b32 s50, v23, 11 |
| ; GFX11-NEXT: v_readlane_b32 s49, v23, 10 |
| ; GFX11-NEXT: v_readlane_b32 s48, v23, 9 |
| ; GFX11-NEXT: v_readlane_b32 s39, v23, 8 |
| ; GFX11-NEXT: v_readlane_b32 s38, v23, 7 |
| ; GFX11-NEXT: v_readlane_b32 s37, v23, 6 |
| ; GFX11-NEXT: v_readlane_b32 s36, v23, 5 |
| ; GFX11-NEXT: v_readlane_b32 s35, v23, 4 |
| ; GFX11-NEXT: v_readlane_b32 s34, v23, 3 |
| ; GFX11-NEXT: v_readlane_b32 s33, v23, 2 |
| ; GFX11-NEXT: v_readlane_b32 s31, v23, 1 |
| ; GFX11-NEXT: v_readlane_b32 s30, v23, 0 |
| ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 |
| ; GFX11-NEXT: s_add_i32 s1, s32, 0x4044 |
| ; GFX11-NEXT: scratch_load_b32 v23, off, s1 ; 4-byte Folded Reload |
| ; GFX11-NEXT: s_mov_b32 exec_lo, s0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 |
| ; GFX12-NEXT: scratch_store_b32 off, v23, s32 offset:16388 ; 4-byte Folded Spill |
| ; GFX12-NEXT: s_wait_alu 0xfffe |
| ; GFX12-NEXT: s_mov_b32 exec_lo, s0 |
| ; GFX12-NEXT: v_writelane_b32 v23, s30, 0 |
| ; GFX12-NEXT: v_mov_b32_e32 v0, s32 |
| ; GFX12-NEXT: s_and_b32 s0, 0, exec_lo |
| ; GFX12-NEXT: ;;#ASMSTART |
| ; GFX12-NEXT: ; use alloca0 v0 |
| ; GFX12-NEXT: ;;#ASMEND |
| ; GFX12-NEXT: v_writelane_b32 v23, s31, 1 |
| ; GFX12-NEXT: v_writelane_b32 v23, s33, 2 |
| ; GFX12-NEXT: v_writelane_b32 v23, s34, 3 |
| ; GFX12-NEXT: v_writelane_b32 v23, s35, 4 |
| ; GFX12-NEXT: v_writelane_b32 v23, s36, 5 |
| ; GFX12-NEXT: v_writelane_b32 v23, s37, 6 |
| ; GFX12-NEXT: v_writelane_b32 v23, s38, 7 |
| ; GFX12-NEXT: v_writelane_b32 v23, s39, 8 |
| ; GFX12-NEXT: v_writelane_b32 v23, s48, 9 |
| ; GFX12-NEXT: v_writelane_b32 v23, s49, 10 |
| ; GFX12-NEXT: v_writelane_b32 v23, s50, 11 |
| ; GFX12-NEXT: v_writelane_b32 v23, s51, 12 |
| ; GFX12-NEXT: v_writelane_b32 v23, s52, 13 |
| ; GFX12-NEXT: v_writelane_b32 v23, s53, 14 |
| ; GFX12-NEXT: v_writelane_b32 v23, s54, 15 |
| ; GFX12-NEXT: v_writelane_b32 v23, s55, 16 |
| ; GFX12-NEXT: ;;#ASMSTART |
| ; GFX12-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc |
| ; GFX12-NEXT: ;;#ASMEND |
| ; GFX12-NEXT: s_add_co_ci_u32 s59, s32, 0x4000 |
| ; GFX12-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 |
| ; GFX12-NEXT: s_wait_alu 0xfffe |
| ; GFX12-NEXT: s_bitcmp1_b32 s59, 0 |
| ; GFX12-NEXT: s_bitset0_b32 s59, 0 |
| ; GFX12-NEXT: s_wait_alu 0xfffe |
| ; GFX12-NEXT: s_mov_b32 s54, s59 |
| ; GFX12-NEXT: ;;#ASMSTART |
| ; GFX12-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc |
| ; GFX12-NEXT: ;;#ASMEND |
| ; GFX12-NEXT: v_readlane_b32 s55, v23, 16 |
| ; GFX12-NEXT: v_readlane_b32 s54, v23, 15 |
| ; GFX12-NEXT: v_readlane_b32 s53, v23, 14 |
| ; GFX12-NEXT: v_readlane_b32 s52, v23, 13 |
| ; GFX12-NEXT: v_readlane_b32 s51, v23, 12 |
| ; GFX12-NEXT: v_readlane_b32 s50, v23, 11 |
| ; GFX12-NEXT: v_readlane_b32 s49, v23, 10 |
| ; GFX12-NEXT: v_readlane_b32 s48, v23, 9 |
| ; GFX12-NEXT: v_readlane_b32 s39, v23, 8 |
| ; GFX12-NEXT: v_readlane_b32 s38, v23, 7 |
| ; GFX12-NEXT: v_readlane_b32 s37, v23, 6 |
| ; GFX12-NEXT: v_readlane_b32 s36, v23, 5 |
| ; GFX12-NEXT: v_readlane_b32 s35, v23, 4 |
| ; GFX12-NEXT: v_readlane_b32 s34, v23, 3 |
| ; GFX12-NEXT: v_readlane_b32 s33, v23, 2 |
| ; GFX12-NEXT: v_readlane_b32 s31, v23, 1 |
| ; GFX12-NEXT: v_readlane_b32 s30, v23, 0 |
| ; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 |
| ; GFX12-NEXT: scratch_load_b32 v23, off, s32 offset:16388 ; 4-byte Folded Reload |
| ; GFX12-NEXT: s_wait_alu 0xfffe |
| ; GFX12-NEXT: s_mov_b32 exec_lo, s0 |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %alloca0 = alloca [4096 x i32], align 64, addrspace(5) |
| %alloca1 = alloca i32, align 4, addrspace(5) |
| call void asm sideeffect "; use alloca0 $0", "v"(ptr addrspace(5) %alloca0) |
| |
| ; Force no SGPRs to be available for the carry-out of the vector add. |
| %asm = call %asm.output asm sideeffect |
| "; def $0, $1, $2, $3, $4, $5, $6, $7, $8", |
| "={s[0:15]},={s[16:31]},={s[32:47]},={s[48:55]},={s[56:57]},={s58},={v[0:15]},={v[16:22]},={vcc}"() |
| |
| %s0 = extractvalue %asm.output %asm, 0 |
| %s1 = extractvalue %asm.output %asm, 1 |
| %s2 = extractvalue %asm.output %asm, 2 |
| %s3 = extractvalue %asm.output %asm, 3 |
| %s4 = extractvalue %asm.output %asm, 4 |
| %s5 = extractvalue %asm.output %asm, 5 |
| |
| %v0 = extractvalue %asm.output %asm, 6 |
| %v1 = extractvalue %asm.output %asm, 7 |
| |
| %vcc = extractvalue %asm.output %asm, 8 |
| |
| ; scc is unavailable since it is live in |
| call void asm sideeffect "; use $0, $1, $2, $3, $4, $5, $6, $7, $8, $9, $10", |
| "{s[0:15]},{s[16:31]},{s[32:47]},{s[48:55]},{s[56:57]},{s58},{v[0:15]},{v[16:22]},{vcc},{s54},{scc}"( |
| <16 x i32> %s0, |
| <16 x i32> %s1, |
| <16 x i32> %s2, |
| <8 x i32> %s3, |
| <2 x i32> %s4, |
| i32 %s5, |
| <16 x i32> %v0, |
| <7 x i32> %v1, |
| i64 %vcc, |
| ptr addrspace(5) %alloca1, |
| i32 0) ; use of scc |
| |
| ret void |
| } |
| |
| ; FIXME: This would have test FI at offset 0, but other objects get |
| ; assigned there. This shows a non-0, but inline immediate that can |
| ; fold directly into the address computation. |
| define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset() #1 { |
| ; GFX7-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; GFX7-NEXT: s_add_i32 s6, s32, 0x100400 |
| ; GFX7-NEXT: buffer_store_dword v21, off, s[0:3], s6 ; 4-byte Folded Spill |
| ; GFX7-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX7-NEXT: v_writelane_b32 v21, s30, 0 |
| ; GFX7-NEXT: v_writelane_b32 v21, s31, 1 |
| ; GFX7-NEXT: v_writelane_b32 v21, s33, 2 |
| ; GFX7-NEXT: v_writelane_b32 v21, s34, 3 |
| ; GFX7-NEXT: v_writelane_b32 v21, s35, 4 |
| ; GFX7-NEXT: v_writelane_b32 v21, s36, 5 |
| ; GFX7-NEXT: v_writelane_b32 v21, s37, 6 |
| ; GFX7-NEXT: v_writelane_b32 v21, s38, 7 |
| ; GFX7-NEXT: v_writelane_b32 v21, s39, 8 |
| ; GFX7-NEXT: v_writelane_b32 v21, s48, 9 |
| ; GFX7-NEXT: v_writelane_b32 v21, s49, 10 |
| ; GFX7-NEXT: v_writelane_b32 v21, s50, 11 |
| ; GFX7-NEXT: v_writelane_b32 v21, s51, 12 |
| ; GFX7-NEXT: v_writelane_b32 v21, s52, 13 |
| ; GFX7-NEXT: v_writelane_b32 v21, s53, 14 |
| ; GFX7-NEXT: v_writelane_b32 v21, s54, 15 |
| ; GFX7-NEXT: s_and_b64 s[4:5], 0, exec |
| ; GFX7-NEXT: v_writelane_b32 v21, s55, 16 |
| ; GFX7-NEXT: ;;#ASMSTART |
| ; GFX7-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc |
| ; GFX7-NEXT: ;;#ASMEND |
| ; GFX7-NEXT: v_mad_u32_u24 v22, 16, 64, s32 |
| ; GFX7-NEXT: v_lshrrev_b32_e32 v22, 6, v22 |
| ; GFX7-NEXT: v_readfirstlane_b32 s54, v22 |
| ; GFX7-NEXT: ;;#ASMSTART |
| ; GFX7-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc |
| ; GFX7-NEXT: ;;#ASMEND |
| ; GFX7-NEXT: v_readlane_b32 s55, v21, 16 |
| ; GFX7-NEXT: v_readlane_b32 s54, v21, 15 |
| ; GFX7-NEXT: v_readlane_b32 s53, v21, 14 |
| ; GFX7-NEXT: v_readlane_b32 s52, v21, 13 |
| ; GFX7-NEXT: v_readlane_b32 s51, v21, 12 |
| ; GFX7-NEXT: v_readlane_b32 s50, v21, 11 |
| ; GFX7-NEXT: v_readlane_b32 s49, v21, 10 |
| ; GFX7-NEXT: v_readlane_b32 s48, v21, 9 |
| ; GFX7-NEXT: v_readlane_b32 s39, v21, 8 |
| ; GFX7-NEXT: v_readlane_b32 s38, v21, 7 |
| ; GFX7-NEXT: v_readlane_b32 s37, v21, 6 |
| ; GFX7-NEXT: v_readlane_b32 s36, v21, 5 |
| ; GFX7-NEXT: v_readlane_b32 s35, v21, 4 |
| ; GFX7-NEXT: v_readlane_b32 s34, v21, 3 |
| ; GFX7-NEXT: v_readlane_b32 s33, v21, 2 |
| ; GFX7-NEXT: v_readlane_b32 s31, v21, 1 |
| ; GFX7-NEXT: v_readlane_b32 s30, v21, 0 |
| ; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; GFX7-NEXT: s_add_i32 s6, s32, 0x100400 |
| ; GFX7-NEXT: buffer_load_dword v21, off, s[0:3], s6 ; 4-byte Folded Reload |
| ; GFX7-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; GFX8-NEXT: s_add_i32 s6, s32, 0x100400 |
| ; GFX8-NEXT: buffer_store_dword v21, off, s[0:3], s6 ; 4-byte Folded Spill |
| ; GFX8-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX8-NEXT: v_writelane_b32 v21, s30, 0 |
| ; GFX8-NEXT: v_writelane_b32 v21, s31, 1 |
| ; GFX8-NEXT: v_writelane_b32 v21, s33, 2 |
| ; GFX8-NEXT: v_writelane_b32 v21, s34, 3 |
| ; GFX8-NEXT: v_writelane_b32 v21, s35, 4 |
| ; GFX8-NEXT: v_writelane_b32 v21, s36, 5 |
| ; GFX8-NEXT: v_writelane_b32 v21, s37, 6 |
| ; GFX8-NEXT: v_writelane_b32 v21, s38, 7 |
| ; GFX8-NEXT: v_writelane_b32 v21, s39, 8 |
| ; GFX8-NEXT: v_writelane_b32 v21, s48, 9 |
| ; GFX8-NEXT: v_writelane_b32 v21, s49, 10 |
| ; GFX8-NEXT: v_writelane_b32 v21, s50, 11 |
| ; GFX8-NEXT: v_writelane_b32 v21, s51, 12 |
| ; GFX8-NEXT: v_writelane_b32 v21, s52, 13 |
| ; GFX8-NEXT: v_writelane_b32 v21, s53, 14 |
| ; GFX8-NEXT: v_writelane_b32 v21, s54, 15 |
| ; GFX8-NEXT: s_and_b64 s[4:5], 0, exec |
| ; GFX8-NEXT: v_writelane_b32 v21, s55, 16 |
| ; GFX8-NEXT: ;;#ASMSTART |
| ; GFX8-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc |
| ; GFX8-NEXT: ;;#ASMEND |
| ; GFX8-NEXT: v_mad_u32_u24 v22, 16, 64, s32 |
| ; GFX8-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 |
| ; GFX8-NEXT: v_lshrrev_b32_e32 v22, 6, v22 |
| ; GFX8-NEXT: v_readfirstlane_b32 s54, v22 |
| ; GFX8-NEXT: ;;#ASMSTART |
| ; GFX8-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc |
| ; GFX8-NEXT: ;;#ASMEND |
| ; GFX8-NEXT: v_readlane_b32 s55, v21, 16 |
| ; GFX8-NEXT: v_readlane_b32 s54, v21, 15 |
| ; GFX8-NEXT: v_readlane_b32 s53, v21, 14 |
| ; GFX8-NEXT: v_readlane_b32 s52, v21, 13 |
| ; GFX8-NEXT: v_readlane_b32 s51, v21, 12 |
| ; GFX8-NEXT: v_readlane_b32 s50, v21, 11 |
| ; GFX8-NEXT: v_readlane_b32 s49, v21, 10 |
| ; GFX8-NEXT: v_readlane_b32 s48, v21, 9 |
| ; GFX8-NEXT: v_readlane_b32 s39, v21, 8 |
| ; GFX8-NEXT: v_readlane_b32 s38, v21, 7 |
| ; GFX8-NEXT: v_readlane_b32 s37, v21, 6 |
| ; GFX8-NEXT: v_readlane_b32 s36, v21, 5 |
| ; GFX8-NEXT: v_readlane_b32 s35, v21, 4 |
| ; GFX8-NEXT: v_readlane_b32 s34, v21, 3 |
| ; GFX8-NEXT: v_readlane_b32 s33, v21, 2 |
| ; GFX8-NEXT: v_readlane_b32 s31, v21, 1 |
| ; GFX8-NEXT: v_readlane_b32 s30, v21, 0 |
| ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; GFX8-NEXT: s_add_i32 s6, s32, 0x100400 |
| ; GFX8-NEXT: buffer_load_dword v21, off, s[0:3], s6 ; 4-byte Folded Reload |
| ; GFX8-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset: |
| ; GFX900: ; %bb.0: |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; GFX900-NEXT: s_add_i32 s6, s32, 0x100400 |
| ; GFX900-NEXT: buffer_store_dword v21, off, s[0:3], s6 ; 4-byte Folded Spill |
| ; GFX900-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX900-NEXT: v_writelane_b32 v21, s30, 0 |
| ; GFX900-NEXT: v_writelane_b32 v21, s31, 1 |
| ; GFX900-NEXT: v_writelane_b32 v21, s33, 2 |
| ; GFX900-NEXT: v_writelane_b32 v21, s34, 3 |
| ; GFX900-NEXT: v_writelane_b32 v21, s35, 4 |
| ; GFX900-NEXT: v_writelane_b32 v21, s36, 5 |
| ; GFX900-NEXT: v_writelane_b32 v21, s37, 6 |
| ; GFX900-NEXT: v_writelane_b32 v21, s38, 7 |
| ; GFX900-NEXT: v_writelane_b32 v21, s39, 8 |
| ; GFX900-NEXT: v_writelane_b32 v21, s48, 9 |
| ; GFX900-NEXT: v_writelane_b32 v21, s49, 10 |
| ; GFX900-NEXT: v_writelane_b32 v21, s50, 11 |
| ; GFX900-NEXT: v_writelane_b32 v21, s51, 12 |
| ; GFX900-NEXT: v_writelane_b32 v21, s52, 13 |
| ; GFX900-NEXT: v_writelane_b32 v21, s53, 14 |
| ; GFX900-NEXT: v_writelane_b32 v21, s54, 15 |
| ; GFX900-NEXT: s_and_b64 s[4:5], 0, exec |
| ; GFX900-NEXT: v_writelane_b32 v21, s55, 16 |
| ; GFX900-NEXT: ;;#ASMSTART |
| ; GFX900-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc |
| ; GFX900-NEXT: ;;#ASMEND |
| ; GFX900-NEXT: v_lshrrev_b32_e64 v22, 6, s32 |
| ; GFX900-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 |
| ; GFX900-NEXT: v_add_u32_e32 v22, 16, v22 |
| ; GFX900-NEXT: v_readfirstlane_b32 s54, v22 |
| ; GFX900-NEXT: ;;#ASMSTART |
| ; GFX900-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc |
| ; GFX900-NEXT: ;;#ASMEND |
| ; GFX900-NEXT: v_readlane_b32 s55, v21, 16 |
| ; GFX900-NEXT: v_readlane_b32 s54, v21, 15 |
| ; GFX900-NEXT: v_readlane_b32 s53, v21, 14 |
| ; GFX900-NEXT: v_readlane_b32 s52, v21, 13 |
| ; GFX900-NEXT: v_readlane_b32 s51, v21, 12 |
| ; GFX900-NEXT: v_readlane_b32 s50, v21, 11 |
| ; GFX900-NEXT: v_readlane_b32 s49, v21, 10 |
| ; GFX900-NEXT: v_readlane_b32 s48, v21, 9 |
| ; GFX900-NEXT: v_readlane_b32 s39, v21, 8 |
| ; GFX900-NEXT: v_readlane_b32 s38, v21, 7 |
| ; GFX900-NEXT: v_readlane_b32 s37, v21, 6 |
| ; GFX900-NEXT: v_readlane_b32 s36, v21, 5 |
| ; GFX900-NEXT: v_readlane_b32 s35, v21, 4 |
| ; GFX900-NEXT: v_readlane_b32 s34, v21, 3 |
| ; GFX900-NEXT: v_readlane_b32 s33, v21, 2 |
| ; GFX900-NEXT: v_readlane_b32 s31, v21, 1 |
| ; GFX900-NEXT: v_readlane_b32 s30, v21, 0 |
| ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; GFX900-NEXT: s_add_i32 s6, s32, 0x100400 |
| ; GFX900-NEXT: buffer_load_dword v21, off, s[0:3], s6 ; 4-byte Folded Reload |
| ; GFX900-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset: |
| ; GFX942: ; %bb.0: |
| ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 |
| ; GFX942-NEXT: s_add_i32 s2, s32, 0x4010 |
| ; GFX942-NEXT: scratch_store_dword off, v21, s2 ; 4-byte Folded Spill |
| ; GFX942-NEXT: s_mov_b64 exec, s[0:1] |
| ; GFX942-NEXT: v_writelane_b32 v21, s30, 0 |
| ; GFX942-NEXT: v_writelane_b32 v21, s31, 1 |
| ; GFX942-NEXT: v_writelane_b32 v21, s33, 2 |
| ; GFX942-NEXT: v_writelane_b32 v21, s34, 3 |
| ; GFX942-NEXT: v_writelane_b32 v21, s35, 4 |
| ; GFX942-NEXT: v_writelane_b32 v21, s36, 5 |
| ; GFX942-NEXT: v_writelane_b32 v21, s37, 6 |
| ; GFX942-NEXT: v_writelane_b32 v21, s38, 7 |
| ; GFX942-NEXT: v_writelane_b32 v21, s39, 8 |
| ; GFX942-NEXT: v_writelane_b32 v21, s48, 9 |
| ; GFX942-NEXT: v_writelane_b32 v21, s49, 10 |
| ; GFX942-NEXT: v_writelane_b32 v21, s50, 11 |
| ; GFX942-NEXT: v_writelane_b32 v21, s51, 12 |
| ; GFX942-NEXT: v_writelane_b32 v21, s52, 13 |
| ; GFX942-NEXT: v_writelane_b32 v21, s53, 14 |
| ; GFX942-NEXT: v_writelane_b32 v21, s54, 15 |
| ; GFX942-NEXT: s_and_b64 s[60:61], 0, exec |
| ; GFX942-NEXT: v_writelane_b32 v21, s55, 16 |
| ; GFX942-NEXT: ;;#ASMSTART |
| ; GFX942-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc |
| ; GFX942-NEXT: ;;#ASMEND |
| ; GFX942-NEXT: s_addc_u32 s59, s32, 16 |
| ; GFX942-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 |
| ; GFX942-NEXT: s_bitcmp1_b32 s59, 0 |
| ; GFX942-NEXT: s_bitset0_b32 s59, 0 |
| ; GFX942-NEXT: s_mov_b32 s54, s59 |
| ; GFX942-NEXT: ;;#ASMSTART |
| ; GFX942-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc |
| ; GFX942-NEXT: ;;#ASMEND |
| ; GFX942-NEXT: v_readlane_b32 s55, v21, 16 |
| ; GFX942-NEXT: v_readlane_b32 s54, v21, 15 |
| ; GFX942-NEXT: v_readlane_b32 s53, v21, 14 |
| ; GFX942-NEXT: v_readlane_b32 s52, v21, 13 |
| ; GFX942-NEXT: v_readlane_b32 s51, v21, 12 |
| ; GFX942-NEXT: v_readlane_b32 s50, v21, 11 |
| ; GFX942-NEXT: v_readlane_b32 s49, v21, 10 |
| ; GFX942-NEXT: v_readlane_b32 s48, v21, 9 |
| ; GFX942-NEXT: v_readlane_b32 s39, v21, 8 |
| ; GFX942-NEXT: v_readlane_b32 s38, v21, 7 |
| ; GFX942-NEXT: v_readlane_b32 s37, v21, 6 |
| ; GFX942-NEXT: v_readlane_b32 s36, v21, 5 |
| ; GFX942-NEXT: v_readlane_b32 s35, v21, 4 |
| ; GFX942-NEXT: v_readlane_b32 s34, v21, 3 |
| ; GFX942-NEXT: v_readlane_b32 s33, v21, 2 |
| ; GFX942-NEXT: v_readlane_b32 s31, v21, 1 |
| ; GFX942-NEXT: v_readlane_b32 s30, v21, 0 |
| ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 |
| ; GFX942-NEXT: s_add_i32 s2, s32, 0x4010 |
| ; GFX942-NEXT: scratch_load_dword v21, off, s2 ; 4-byte Folded Reload |
| ; GFX942-NEXT: s_mov_b64 exec, s[0:1] |
| ; GFX942-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10_1-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset: |
| ; GFX10_1: ; %bb.0: |
| ; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 |
| ; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80200 |
| ; GFX10_1-NEXT: buffer_store_dword v21, off, s[0:3], s5 ; 4-byte Folded Spill |
| ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 |
| ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 |
| ; GFX10_1-NEXT: v_writelane_b32 v21, s30, 0 |
| ; GFX10_1-NEXT: s_and_b32 s59, 0, exec_lo |
| ; GFX10_1-NEXT: v_writelane_b32 v21, s31, 1 |
| ; GFX10_1-NEXT: v_writelane_b32 v21, s33, 2 |
| ; GFX10_1-NEXT: v_writelane_b32 v21, s34, 3 |
| ; GFX10_1-NEXT: v_writelane_b32 v21, s35, 4 |
| ; GFX10_1-NEXT: v_writelane_b32 v21, s36, 5 |
| ; GFX10_1-NEXT: v_writelane_b32 v21, s37, 6 |
| ; GFX10_1-NEXT: v_writelane_b32 v21, s38, 7 |
| ; GFX10_1-NEXT: v_writelane_b32 v21, s39, 8 |
| ; GFX10_1-NEXT: v_writelane_b32 v21, s48, 9 |
| ; GFX10_1-NEXT: v_writelane_b32 v21, s49, 10 |
| ; GFX10_1-NEXT: v_writelane_b32 v21, s50, 11 |
| ; GFX10_1-NEXT: v_writelane_b32 v21, s51, 12 |
| ; GFX10_1-NEXT: v_writelane_b32 v21, s52, 13 |
| ; GFX10_1-NEXT: v_writelane_b32 v21, s53, 14 |
| ; GFX10_1-NEXT: v_writelane_b32 v21, s54, 15 |
| ; GFX10_1-NEXT: v_writelane_b32 v21, s55, 16 |
| ; GFX10_1-NEXT: ;;#ASMSTART |
| ; GFX10_1-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc |
| ; GFX10_1-NEXT: ;;#ASMEND |
| ; GFX10_1-NEXT: v_lshrrev_b32_e64 v22, 5, s32 |
| ; GFX10_1-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 |
| ; GFX10_1-NEXT: v_add_nc_u32_e32 v22, 16, v22 |
| ; GFX10_1-NEXT: v_readfirstlane_b32 s54, v22 |
| ; GFX10_1-NEXT: ;;#ASMSTART |
| ; GFX10_1-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc |
| ; GFX10_1-NEXT: ;;#ASMEND |
| ; GFX10_1-NEXT: v_readlane_b32 s55, v21, 16 |
| ; GFX10_1-NEXT: v_readlane_b32 s54, v21, 15 |
| ; GFX10_1-NEXT: v_readlane_b32 s53, v21, 14 |
| ; GFX10_1-NEXT: v_readlane_b32 s52, v21, 13 |
| ; GFX10_1-NEXT: v_readlane_b32 s51, v21, 12 |
| ; GFX10_1-NEXT: v_readlane_b32 s50, v21, 11 |
| ; GFX10_1-NEXT: v_readlane_b32 s49, v21, 10 |
| ; GFX10_1-NEXT: v_readlane_b32 s48, v21, 9 |
| ; GFX10_1-NEXT: v_readlane_b32 s39, v21, 8 |
| ; GFX10_1-NEXT: v_readlane_b32 s38, v21, 7 |
| ; GFX10_1-NEXT: v_readlane_b32 s37, v21, 6 |
| ; GFX10_1-NEXT: v_readlane_b32 s36, v21, 5 |
| ; GFX10_1-NEXT: v_readlane_b32 s35, v21, 4 |
| ; GFX10_1-NEXT: v_readlane_b32 s34, v21, 3 |
| ; GFX10_1-NEXT: v_readlane_b32 s33, v21, 2 |
| ; GFX10_1-NEXT: v_readlane_b32 s31, v21, 1 |
| ; GFX10_1-NEXT: v_readlane_b32 s30, v21, 0 |
| ; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 |
| ; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80200 |
| ; GFX10_1-NEXT: buffer_load_dword v21, off, s[0:3], s5 ; 4-byte Folded Reload |
| ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 |
| ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 |
| ; GFX10_1-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10_1-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10_3-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset: |
| ; GFX10_3: ; %bb.0: |
| ; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 |
| ; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80200 |
| ; GFX10_3-NEXT: buffer_store_dword v21, off, s[0:3], s5 ; 4-byte Folded Spill |
| ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 |
| ; GFX10_3-NEXT: v_writelane_b32 v21, s30, 0 |
| ; GFX10_3-NEXT: s_and_b32 s59, 0, exec_lo |
| ; GFX10_3-NEXT: v_writelane_b32 v21, s31, 1 |
| ; GFX10_3-NEXT: v_writelane_b32 v21, s33, 2 |
| ; GFX10_3-NEXT: v_writelane_b32 v21, s34, 3 |
| ; GFX10_3-NEXT: v_writelane_b32 v21, s35, 4 |
| ; GFX10_3-NEXT: v_writelane_b32 v21, s36, 5 |
| ; GFX10_3-NEXT: v_writelane_b32 v21, s37, 6 |
| ; GFX10_3-NEXT: v_writelane_b32 v21, s38, 7 |
| ; GFX10_3-NEXT: v_writelane_b32 v21, s39, 8 |
| ; GFX10_3-NEXT: v_writelane_b32 v21, s48, 9 |
| ; GFX10_3-NEXT: v_writelane_b32 v21, s49, 10 |
| ; GFX10_3-NEXT: v_writelane_b32 v21, s50, 11 |
| ; GFX10_3-NEXT: v_writelane_b32 v21, s51, 12 |
| ; GFX10_3-NEXT: v_writelane_b32 v21, s52, 13 |
| ; GFX10_3-NEXT: v_writelane_b32 v21, s53, 14 |
| ; GFX10_3-NEXT: v_writelane_b32 v21, s54, 15 |
| ; GFX10_3-NEXT: v_writelane_b32 v21, s55, 16 |
| ; GFX10_3-NEXT: ;;#ASMSTART |
| ; GFX10_3-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc |
| ; GFX10_3-NEXT: ;;#ASMEND |
| ; GFX10_3-NEXT: v_lshrrev_b32_e64 v22, 5, s32 |
| ; GFX10_3-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 |
| ; GFX10_3-NEXT: v_add_nc_u32_e32 v22, 16, v22 |
| ; GFX10_3-NEXT: v_readfirstlane_b32 s54, v22 |
| ; GFX10_3-NEXT: ;;#ASMSTART |
| ; GFX10_3-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc |
| ; GFX10_3-NEXT: ;;#ASMEND |
| ; GFX10_3-NEXT: v_readlane_b32 s55, v21, 16 |
| ; GFX10_3-NEXT: v_readlane_b32 s54, v21, 15 |
| ; GFX10_3-NEXT: v_readlane_b32 s53, v21, 14 |
| ; GFX10_3-NEXT: v_readlane_b32 s52, v21, 13 |
| ; GFX10_3-NEXT: v_readlane_b32 s51, v21, 12 |
| ; GFX10_3-NEXT: v_readlane_b32 s50, v21, 11 |
| ; GFX10_3-NEXT: v_readlane_b32 s49, v21, 10 |
| ; GFX10_3-NEXT: v_readlane_b32 s48, v21, 9 |
| ; GFX10_3-NEXT: v_readlane_b32 s39, v21, 8 |
| ; GFX10_3-NEXT: v_readlane_b32 s38, v21, 7 |
| ; GFX10_3-NEXT: v_readlane_b32 s37, v21, 6 |
| ; GFX10_3-NEXT: v_readlane_b32 s36, v21, 5 |
| ; GFX10_3-NEXT: v_readlane_b32 s35, v21, 4 |
| ; GFX10_3-NEXT: v_readlane_b32 s34, v21, 3 |
| ; GFX10_3-NEXT: v_readlane_b32 s33, v21, 2 |
| ; GFX10_3-NEXT: v_readlane_b32 s31, v21, 1 |
| ; GFX10_3-NEXT: v_readlane_b32 s30, v21, 0 |
| ; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 |
| ; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80200 |
| ; GFX10_3-NEXT: buffer_load_dword v21, off, s[0:3], s5 ; 4-byte Folded Reload |
| ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 |
| ; GFX10_3-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10_3-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 |
| ; GFX11-NEXT: s_add_i32 s1, s32, 0x4010 |
| ; GFX11-NEXT: scratch_store_b32 off, v21, s1 ; 4-byte Folded Spill |
| ; GFX11-NEXT: s_mov_b32 exec_lo, s0 |
| ; GFX11-NEXT: v_writelane_b32 v21, s30, 0 |
| ; GFX11-NEXT: s_and_b32 s59, 0, exec_lo |
| ; GFX11-NEXT: v_writelane_b32 v21, s31, 1 |
| ; GFX11-NEXT: v_writelane_b32 v21, s33, 2 |
| ; GFX11-NEXT: v_writelane_b32 v21, s34, 3 |
| ; GFX11-NEXT: v_writelane_b32 v21, s35, 4 |
| ; GFX11-NEXT: v_writelane_b32 v21, s36, 5 |
| ; GFX11-NEXT: v_writelane_b32 v21, s37, 6 |
| ; GFX11-NEXT: v_writelane_b32 v21, s38, 7 |
| ; GFX11-NEXT: v_writelane_b32 v21, s39, 8 |
| ; GFX11-NEXT: v_writelane_b32 v21, s48, 9 |
| ; GFX11-NEXT: v_writelane_b32 v21, s49, 10 |
| ; GFX11-NEXT: v_writelane_b32 v21, s50, 11 |
| ; GFX11-NEXT: v_writelane_b32 v21, s51, 12 |
| ; GFX11-NEXT: v_writelane_b32 v21, s52, 13 |
| ; GFX11-NEXT: v_writelane_b32 v21, s53, 14 |
| ; GFX11-NEXT: v_writelane_b32 v21, s54, 15 |
| ; GFX11-NEXT: v_writelane_b32 v21, s55, 16 |
| ; GFX11-NEXT: ;;#ASMSTART |
| ; GFX11-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc |
| ; GFX11-NEXT: ;;#ASMEND |
| ; GFX11-NEXT: s_addc_u32 s59, s32, 16 |
| ; GFX11-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 |
| ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) |
| ; GFX11-NEXT: s_bitcmp1_b32 s59, 0 |
| ; GFX11-NEXT: s_bitset0_b32 s59, 0 |
| ; GFX11-NEXT: s_mov_b32 s54, s59 |
| ; GFX11-NEXT: ;;#ASMSTART |
| ; GFX11-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc |
| ; GFX11-NEXT: ;;#ASMEND |
| ; GFX11-NEXT: v_readlane_b32 s55, v21, 16 |
| ; GFX11-NEXT: v_readlane_b32 s54, v21, 15 |
| ; GFX11-NEXT: v_readlane_b32 s53, v21, 14 |
| ; GFX11-NEXT: v_readlane_b32 s52, v21, 13 |
| ; GFX11-NEXT: v_readlane_b32 s51, v21, 12 |
| ; GFX11-NEXT: v_readlane_b32 s50, v21, 11 |
| ; GFX11-NEXT: v_readlane_b32 s49, v21, 10 |
| ; GFX11-NEXT: v_readlane_b32 s48, v21, 9 |
| ; GFX11-NEXT: v_readlane_b32 s39, v21, 8 |
| ; GFX11-NEXT: v_readlane_b32 s38, v21, 7 |
| ; GFX11-NEXT: v_readlane_b32 s37, v21, 6 |
| ; GFX11-NEXT: v_readlane_b32 s36, v21, 5 |
| ; GFX11-NEXT: v_readlane_b32 s35, v21, 4 |
| ; GFX11-NEXT: v_readlane_b32 s34, v21, 3 |
| ; GFX11-NEXT: v_readlane_b32 s33, v21, 2 |
| ; GFX11-NEXT: v_readlane_b32 s31, v21, 1 |
| ; GFX11-NEXT: v_readlane_b32 s30, v21, 0 |
| ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 |
| ; GFX11-NEXT: s_add_i32 s1, s32, 0x4010 |
| ; GFX11-NEXT: scratch_load_b32 v21, off, s1 ; 4-byte Folded Reload |
| ; GFX11-NEXT: s_mov_b32 exec_lo, s0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowest_offset: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 |
| ; GFX12-NEXT: scratch_store_b32 off, v21, s32 offset:16384 ; 4-byte Folded Spill |
| ; GFX12-NEXT: s_wait_alu 0xfffe |
| ; GFX12-NEXT: s_mov_b32 exec_lo, s0 |
| ; GFX12-NEXT: v_writelane_b32 v21, s30, 0 |
| ; GFX12-NEXT: s_and_b32 s59, 0, exec_lo |
| ; GFX12-NEXT: v_writelane_b32 v21, s31, 1 |
| ; GFX12-NEXT: v_writelane_b32 v21, s33, 2 |
| ; GFX12-NEXT: v_writelane_b32 v21, s34, 3 |
| ; GFX12-NEXT: v_writelane_b32 v21, s35, 4 |
| ; GFX12-NEXT: v_writelane_b32 v21, s36, 5 |
| ; GFX12-NEXT: v_writelane_b32 v21, s37, 6 |
| ; GFX12-NEXT: v_writelane_b32 v21, s38, 7 |
| ; GFX12-NEXT: v_writelane_b32 v21, s39, 8 |
| ; GFX12-NEXT: v_writelane_b32 v21, s48, 9 |
| ; GFX12-NEXT: v_writelane_b32 v21, s49, 10 |
| ; GFX12-NEXT: v_writelane_b32 v21, s50, 11 |
| ; GFX12-NEXT: v_writelane_b32 v21, s51, 12 |
| ; GFX12-NEXT: v_writelane_b32 v21, s52, 13 |
| ; GFX12-NEXT: v_writelane_b32 v21, s53, 14 |
| ; GFX12-NEXT: v_writelane_b32 v21, s54, 15 |
| ; GFX12-NEXT: v_writelane_b32 v21, s55, 16 |
| ; GFX12-NEXT: ;;#ASMSTART |
| ; GFX12-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc |
| ; GFX12-NEXT: ;;#ASMEND |
| ; GFX12-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 |
| ; GFX12-NEXT: s_mov_b32 s54, s32 |
| ; GFX12-NEXT: ;;#ASMSTART |
| ; GFX12-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc |
| ; GFX12-NEXT: ;;#ASMEND |
| ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX12-NEXT: v_readlane_b32 s55, v21, 16 |
| ; GFX12-NEXT: v_readlane_b32 s54, v21, 15 |
| ; GFX12-NEXT: v_readlane_b32 s53, v21, 14 |
| ; GFX12-NEXT: v_readlane_b32 s52, v21, 13 |
| ; GFX12-NEXT: v_readlane_b32 s51, v21, 12 |
| ; GFX12-NEXT: v_readlane_b32 s50, v21, 11 |
| ; GFX12-NEXT: v_readlane_b32 s49, v21, 10 |
| ; GFX12-NEXT: v_readlane_b32 s48, v21, 9 |
| ; GFX12-NEXT: v_readlane_b32 s39, v21, 8 |
| ; GFX12-NEXT: v_readlane_b32 s38, v21, 7 |
| ; GFX12-NEXT: v_readlane_b32 s37, v21, 6 |
| ; GFX12-NEXT: v_readlane_b32 s36, v21, 5 |
| ; GFX12-NEXT: v_readlane_b32 s35, v21, 4 |
| ; GFX12-NEXT: v_readlane_b32 s34, v21, 3 |
| ; GFX12-NEXT: v_readlane_b32 s33, v21, 2 |
| ; GFX12-NEXT: v_readlane_b32 s31, v21, 1 |
| ; GFX12-NEXT: v_readlane_b32 s30, v21, 0 |
| ; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 |
| ; GFX12-NEXT: scratch_load_b32 v21, off, s32 offset:16384 ; 4-byte Folded Reload |
| ; GFX12-NEXT: s_wait_alu 0xfffe |
| ; GFX12-NEXT: s_mov_b32 exec_lo, s0 |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %alloca0 = alloca [4096 x i32], align 16, addrspace(5) |
| |
| ; Force no SGPRs to be available for the carry-out of the vector add. |
| %asm = call %asm.output2 asm sideeffect |
| "; def $0, $1, $2, $3, $4, $5, $6, $7, $8", |
| "={s[0:15]},={s[16:31]},={s[32:47]},={s[48:55]},={s[56:57]},={s58},={v[0:15]},={v[16:20]},={vcc}"() |
| |
| %s0 = extractvalue %asm.output2 %asm, 0 |
| %s1 = extractvalue %asm.output2 %asm, 1 |
| %s2 = extractvalue %asm.output2 %asm, 2 |
| %s3 = extractvalue %asm.output2 %asm, 3 |
| %s4 = extractvalue %asm.output2 %asm, 4 |
| %s5 = extractvalue %asm.output2 %asm, 5 |
| |
| %v0 = extractvalue %asm.output2 %asm, 6 |
| %v1 = extractvalue %asm.output2 %asm, 7 |
| |
| %vcc = extractvalue %asm.output2 %asm, 8 |
| |
| ; scc is unavailable since it is live in |
| call void asm sideeffect "; use $0, $1, $2, $3, $4, $5, $6, $7, $8, $9, $10", |
| "{s[0:15]},{s[16:31]},{s[32:47]},{s[48:55]},{s[56:57]},{s58},{v[0:15]},{v[16:20]},{vcc},{s54},{scc}"( |
| <16 x i32> %s0, |
| <16 x i32> %s1, |
| <16 x i32> %s2, |
| <8 x i32> %s3, |
| <2 x i32> %s4, |
| i32 %s5, |
| <16 x i32> %v0, |
| <5 x i32> %v1, |
| i64 %vcc, |
| ptr addrspace(5) %alloca0, |
| i32 0) ; use of scc |
| |
| ret void |
| } |
| |
| ; This case isn't using SGPRs yet. |
| ; FIXME: Should also use one more VGPR, but currently fails to allocate on gfx8. |
| define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset() #0 { |
| ; GFX7-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset: |
| ; GFX7: ; %bb.0: |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; GFX7-NEXT: s_add_i32 s6, s32, 0x201000 |
| ; GFX7-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill |
| ; GFX7-NEXT: s_add_i32 s6, s32, 0x201100 |
| ; GFX7-NEXT: buffer_store_dword v22, off, s[0:3], s6 ; 4-byte Folded Spill |
| ; GFX7-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX7-NEXT: v_writelane_b32 v23, s28, 17 |
| ; GFX7-NEXT: v_writelane_b32 v23, s29, 18 |
| ; GFX7-NEXT: v_writelane_b32 v23, s30, 0 |
| ; GFX7-NEXT: v_writelane_b32 v23, s31, 1 |
| ; GFX7-NEXT: v_writelane_b32 v23, s33, 2 |
| ; GFX7-NEXT: v_writelane_b32 v23, s34, 3 |
| ; GFX7-NEXT: v_writelane_b32 v23, s35, 4 |
| ; GFX7-NEXT: v_writelane_b32 v23, s36, 5 |
| ; GFX7-NEXT: v_writelane_b32 v23, s37, 6 |
| ; GFX7-NEXT: v_writelane_b32 v23, s38, 7 |
| ; GFX7-NEXT: v_writelane_b32 v23, s39, 8 |
| ; GFX7-NEXT: v_writelane_b32 v23, s48, 9 |
| ; GFX7-NEXT: v_writelane_b32 v23, s49, 10 |
| ; GFX7-NEXT: v_writelane_b32 v23, s50, 11 |
| ; GFX7-NEXT: v_writelane_b32 v23, s51, 12 |
| ; GFX7-NEXT: v_writelane_b32 v23, s52, 13 |
| ; GFX7-NEXT: s_lshr_b32 s5, s32, 6 |
| ; GFX7-NEXT: v_writelane_b32 v23, s53, 14 |
| ; GFX7-NEXT: v_lshr_b32_e64 v0, s32, 6 |
| ; GFX7-NEXT: s_add_i32 s4, s5, 0x4240 |
| ; GFX7-NEXT: ; implicit-def: $vgpr22 : SGPR spill to VGPR lane |
| ; GFX7-NEXT: v_writelane_b32 v23, s54, 15 |
| ; GFX7-NEXT: v_add_i32_e32 v0, vcc, 64, v0 |
| ; GFX7-NEXT: v_writelane_b32 v22, s4, 0 |
| ; GFX7-NEXT: s_and_b64 s[4:5], 0, exec |
| ; GFX7-NEXT: v_writelane_b32 v23, s55, 16 |
| ; GFX7-NEXT: ;;#ASMSTART |
| ; GFX7-NEXT: ; use alloca0 v0 |
| ; GFX7-NEXT: ;;#ASMEND |
| ; GFX7-NEXT: ;;#ASMSTART |
| ; GFX7-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc |
| ; GFX7-NEXT: ;;#ASMEND |
| ; GFX7-NEXT: v_readlane_b32 s54, v22, 0 |
| ; GFX7-NEXT: ;;#ASMSTART |
| ; GFX7-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc |
| ; GFX7-NEXT: ;;#ASMEND |
| ; GFX7-NEXT: v_readlane_b32 s55, v23, 16 |
| ; GFX7-NEXT: v_readlane_b32 s54, v23, 15 |
| ; GFX7-NEXT: v_readlane_b32 s53, v23, 14 |
| ; GFX7-NEXT: v_readlane_b32 s52, v23, 13 |
| ; GFX7-NEXT: v_readlane_b32 s51, v23, 12 |
| ; GFX7-NEXT: v_readlane_b32 s50, v23, 11 |
| ; GFX7-NEXT: v_readlane_b32 s49, v23, 10 |
| ; GFX7-NEXT: v_readlane_b32 s48, v23, 9 |
| ; GFX7-NEXT: v_readlane_b32 s39, v23, 8 |
| ; GFX7-NEXT: v_readlane_b32 s38, v23, 7 |
| ; GFX7-NEXT: v_readlane_b32 s37, v23, 6 |
| ; GFX7-NEXT: v_readlane_b32 s36, v23, 5 |
| ; GFX7-NEXT: v_readlane_b32 s35, v23, 4 |
| ; GFX7-NEXT: v_readlane_b32 s34, v23, 3 |
| ; GFX7-NEXT: v_readlane_b32 s33, v23, 2 |
| ; GFX7-NEXT: v_readlane_b32 s31, v23, 1 |
| ; GFX7-NEXT: v_readlane_b32 s30, v23, 0 |
| ; GFX7-NEXT: v_readlane_b32 s28, v23, 17 |
| ; GFX7-NEXT: v_readlane_b32 s29, v23, 18 |
| ; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; GFX7-NEXT: s_add_i32 s6, s32, 0x201000 |
| ; GFX7-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload |
| ; GFX7-NEXT: s_add_i32 s6, s32, 0x201100 |
| ; GFX7-NEXT: buffer_load_dword v22, off, s[0:3], s6 ; 4-byte Folded Reload |
| ; GFX7-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX7-NEXT: s_waitcnt vmcnt(0) |
| ; GFX7-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; GFX8-NEXT: s_add_i32 s6, s32, 0x201000 |
| ; GFX8-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill |
| ; GFX8-NEXT: s_add_i32 s6, s32, 0x201100 |
| ; GFX8-NEXT: buffer_store_dword v22, off, s[0:3], s6 ; 4-byte Folded Spill |
| ; GFX8-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX8-NEXT: v_writelane_b32 v23, s30, 0 |
| ; GFX8-NEXT: v_writelane_b32 v23, s31, 1 |
| ; GFX8-NEXT: v_writelane_b32 v23, s33, 2 |
| ; GFX8-NEXT: v_writelane_b32 v23, s34, 3 |
| ; GFX8-NEXT: v_writelane_b32 v23, s35, 4 |
| ; GFX8-NEXT: v_writelane_b32 v23, s36, 5 |
| ; GFX8-NEXT: v_writelane_b32 v23, s37, 6 |
| ; GFX8-NEXT: v_writelane_b32 v23, s38, 7 |
| ; GFX8-NEXT: v_writelane_b32 v23, s39, 8 |
| ; GFX8-NEXT: v_writelane_b32 v23, s48, 9 |
| ; GFX8-NEXT: v_writelane_b32 v23, s49, 10 |
| ; GFX8-NEXT: v_writelane_b32 v23, s50, 11 |
| ; GFX8-NEXT: v_writelane_b32 v23, s51, 12 |
| ; GFX8-NEXT: v_writelane_b32 v23, s52, 13 |
| ; GFX8-NEXT: s_lshr_b32 s5, s32, 6 |
| ; GFX8-NEXT: v_writelane_b32 v23, s53, 14 |
| ; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32 |
| ; GFX8-NEXT: s_add_i32 s4, s5, 0x4240 |
| ; GFX8-NEXT: ; implicit-def: $vgpr22 : SGPR spill to VGPR lane |
| ; GFX8-NEXT: v_writelane_b32 v23, s54, 15 |
| ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0 |
| ; GFX8-NEXT: v_writelane_b32 v22, s4, 0 |
| ; GFX8-NEXT: s_and_b64 s[4:5], 0, exec |
| ; GFX8-NEXT: v_writelane_b32 v23, s55, 16 |
| ; GFX8-NEXT: ;;#ASMSTART |
| ; GFX8-NEXT: ; use alloca0 v0 |
| ; GFX8-NEXT: ;;#ASMEND |
| ; GFX8-NEXT: ;;#ASMSTART |
| ; GFX8-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc |
| ; GFX8-NEXT: ;;#ASMEND |
| ; GFX8-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 |
| ; GFX8-NEXT: v_readlane_b32 s54, v22, 0 |
| ; GFX8-NEXT: ;;#ASMSTART |
| ; GFX8-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc |
| ; GFX8-NEXT: ;;#ASMEND |
| ; GFX8-NEXT: v_readlane_b32 s55, v23, 16 |
| ; GFX8-NEXT: v_readlane_b32 s54, v23, 15 |
| ; GFX8-NEXT: v_readlane_b32 s53, v23, 14 |
| ; GFX8-NEXT: v_readlane_b32 s52, v23, 13 |
| ; GFX8-NEXT: v_readlane_b32 s51, v23, 12 |
| ; GFX8-NEXT: v_readlane_b32 s50, v23, 11 |
| ; GFX8-NEXT: v_readlane_b32 s49, v23, 10 |
| ; GFX8-NEXT: v_readlane_b32 s48, v23, 9 |
| ; GFX8-NEXT: v_readlane_b32 s39, v23, 8 |
| ; GFX8-NEXT: v_readlane_b32 s38, v23, 7 |
| ; GFX8-NEXT: v_readlane_b32 s37, v23, 6 |
| ; GFX8-NEXT: v_readlane_b32 s36, v23, 5 |
| ; GFX8-NEXT: v_readlane_b32 s35, v23, 4 |
| ; GFX8-NEXT: v_readlane_b32 s34, v23, 3 |
| ; GFX8-NEXT: v_readlane_b32 s33, v23, 2 |
| ; GFX8-NEXT: v_readlane_b32 s31, v23, 1 |
| ; GFX8-NEXT: v_readlane_b32 s30, v23, 0 |
| ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; GFX8-NEXT: s_add_i32 s6, s32, 0x201000 |
| ; GFX8-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload |
| ; GFX8-NEXT: s_add_i32 s6, s32, 0x201100 |
| ; GFX8-NEXT: buffer_load_dword v22, off, s[0:3], s6 ; 4-byte Folded Reload |
| ; GFX8-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX900-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset: |
| ; GFX900: ; %bb.0: |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; GFX900-NEXT: s_add_i32 s6, s32, 0x201000 |
| ; GFX900-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill |
| ; GFX900-NEXT: s_add_i32 s6, s32, 0x201100 |
| ; GFX900-NEXT: buffer_store_dword v22, off, s[0:3], s6 ; 4-byte Folded Spill |
| ; GFX900-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX900-NEXT: v_writelane_b32 v23, s30, 0 |
| ; GFX900-NEXT: v_writelane_b32 v23, s31, 1 |
| ; GFX900-NEXT: v_writelane_b32 v23, s33, 2 |
| ; GFX900-NEXT: v_writelane_b32 v23, s34, 3 |
| ; GFX900-NEXT: v_writelane_b32 v23, s35, 4 |
| ; GFX900-NEXT: v_writelane_b32 v23, s36, 5 |
| ; GFX900-NEXT: v_writelane_b32 v23, s37, 6 |
| ; GFX900-NEXT: v_writelane_b32 v23, s38, 7 |
| ; GFX900-NEXT: v_writelane_b32 v23, s39, 8 |
| ; GFX900-NEXT: v_writelane_b32 v23, s48, 9 |
| ; GFX900-NEXT: v_writelane_b32 v23, s49, 10 |
| ; GFX900-NEXT: v_writelane_b32 v23, s50, 11 |
| ; GFX900-NEXT: v_writelane_b32 v23, s51, 12 |
| ; GFX900-NEXT: v_writelane_b32 v23, s52, 13 |
| ; GFX900-NEXT: s_lshr_b32 s5, s32, 6 |
| ; GFX900-NEXT: v_writelane_b32 v23, s53, 14 |
| ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 |
| ; GFX900-NEXT: s_add_i32 s4, s5, 0x4240 |
| ; GFX900-NEXT: ; implicit-def: $vgpr22 : SGPR spill to VGPR lane |
| ; GFX900-NEXT: v_writelane_b32 v23, s54, 15 |
| ; GFX900-NEXT: v_add_u32_e32 v0, 64, v0 |
| ; GFX900-NEXT: v_writelane_b32 v22, s4, 0 |
| ; GFX900-NEXT: s_and_b64 s[4:5], 0, exec |
| ; GFX900-NEXT: v_writelane_b32 v23, s55, 16 |
| ; GFX900-NEXT: ;;#ASMSTART |
| ; GFX900-NEXT: ; use alloca0 v0 |
| ; GFX900-NEXT: ;;#ASMEND |
| ; GFX900-NEXT: ;;#ASMSTART |
| ; GFX900-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc |
| ; GFX900-NEXT: ;;#ASMEND |
| ; GFX900-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 |
| ; GFX900-NEXT: v_readlane_b32 s54, v22, 0 |
| ; GFX900-NEXT: ;;#ASMSTART |
| ; GFX900-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc |
| ; GFX900-NEXT: ;;#ASMEND |
| ; GFX900-NEXT: v_readlane_b32 s55, v23, 16 |
| ; GFX900-NEXT: v_readlane_b32 s54, v23, 15 |
| ; GFX900-NEXT: v_readlane_b32 s53, v23, 14 |
| ; GFX900-NEXT: v_readlane_b32 s52, v23, 13 |
| ; GFX900-NEXT: v_readlane_b32 s51, v23, 12 |
| ; GFX900-NEXT: v_readlane_b32 s50, v23, 11 |
| ; GFX900-NEXT: v_readlane_b32 s49, v23, 10 |
| ; GFX900-NEXT: v_readlane_b32 s48, v23, 9 |
| ; GFX900-NEXT: v_readlane_b32 s39, v23, 8 |
| ; GFX900-NEXT: v_readlane_b32 s38, v23, 7 |
| ; GFX900-NEXT: v_readlane_b32 s37, v23, 6 |
| ; GFX900-NEXT: v_readlane_b32 s36, v23, 5 |
| ; GFX900-NEXT: v_readlane_b32 s35, v23, 4 |
| ; GFX900-NEXT: v_readlane_b32 s34, v23, 3 |
| ; GFX900-NEXT: v_readlane_b32 s33, v23, 2 |
| ; GFX900-NEXT: v_readlane_b32 s31, v23, 1 |
| ; GFX900-NEXT: v_readlane_b32 s30, v23, 0 |
| ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 |
| ; GFX900-NEXT: s_add_i32 s6, s32, 0x201000 |
| ; GFX900-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload |
| ; GFX900-NEXT: s_add_i32 s6, s32, 0x201100 |
| ; GFX900-NEXT: buffer_load_dword v22, off, s[0:3], s6 ; 4-byte Folded Reload |
| ; GFX900-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX900-NEXT: s_waitcnt vmcnt(0) |
| ; GFX900-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX942-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset: |
| ; GFX942: ; %bb.0: |
| ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 |
| ; GFX942-NEXT: s_add_i32 s2, s32, 0x8040 |
| ; GFX942-NEXT: scratch_store_dword off, v22, s2 ; 4-byte Folded Spill |
| ; GFX942-NEXT: s_mov_b64 exec, s[0:1] |
| ; GFX942-NEXT: v_writelane_b32 v22, s30, 0 |
| ; GFX942-NEXT: v_writelane_b32 v22, s31, 1 |
| ; GFX942-NEXT: v_writelane_b32 v22, s33, 2 |
| ; GFX942-NEXT: v_writelane_b32 v22, s34, 3 |
| ; GFX942-NEXT: v_writelane_b32 v22, s35, 4 |
| ; GFX942-NEXT: v_writelane_b32 v22, s36, 5 |
| ; GFX942-NEXT: v_writelane_b32 v22, s37, 6 |
| ; GFX942-NEXT: v_writelane_b32 v22, s38, 7 |
| ; GFX942-NEXT: v_writelane_b32 v22, s39, 8 |
| ; GFX942-NEXT: v_writelane_b32 v22, s48, 9 |
| ; GFX942-NEXT: v_writelane_b32 v22, s49, 10 |
| ; GFX942-NEXT: v_writelane_b32 v22, s50, 11 |
| ; GFX942-NEXT: v_writelane_b32 v22, s51, 12 |
| ; GFX942-NEXT: v_writelane_b32 v22, s52, 13 |
| ; GFX942-NEXT: v_writelane_b32 v22, s53, 14 |
| ; GFX942-NEXT: s_add_i32 s0, s32, 64 |
| ; GFX942-NEXT: v_writelane_b32 v22, s54, 15 |
| ; GFX942-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX942-NEXT: v_writelane_b32 v22, s55, 16 |
| ; GFX942-NEXT: ;;#ASMSTART |
| ; GFX942-NEXT: ; use alloca0 v0 |
| ; GFX942-NEXT: ;;#ASMEND |
| ; GFX942-NEXT: ;;#ASMSTART |
| ; GFX942-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc |
| ; GFX942-NEXT: ;;#ASMEND |
| ; GFX942-NEXT: s_add_i32 s58, s32, 0x4240 |
| ; GFX942-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 |
| ; GFX942-NEXT: s_and_b64 s[60:61], 0, exec |
| ; GFX942-NEXT: s_mov_b32 s54, s58 |
| ; GFX942-NEXT: ;;#ASMSTART |
| ; GFX942-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc |
| ; GFX942-NEXT: ;;#ASMEND |
| ; GFX942-NEXT: v_readlane_b32 s55, v22, 16 |
| ; GFX942-NEXT: v_readlane_b32 s54, v22, 15 |
| ; GFX942-NEXT: v_readlane_b32 s53, v22, 14 |
| ; GFX942-NEXT: v_readlane_b32 s52, v22, 13 |
| ; GFX942-NEXT: v_readlane_b32 s51, v22, 12 |
| ; GFX942-NEXT: v_readlane_b32 s50, v22, 11 |
| ; GFX942-NEXT: v_readlane_b32 s49, v22, 10 |
| ; GFX942-NEXT: v_readlane_b32 s48, v22, 9 |
| ; GFX942-NEXT: v_readlane_b32 s39, v22, 8 |
| ; GFX942-NEXT: v_readlane_b32 s38, v22, 7 |
| ; GFX942-NEXT: v_readlane_b32 s37, v22, 6 |
| ; GFX942-NEXT: v_readlane_b32 s36, v22, 5 |
| ; GFX942-NEXT: v_readlane_b32 s35, v22, 4 |
| ; GFX942-NEXT: v_readlane_b32 s34, v22, 3 |
| ; GFX942-NEXT: v_readlane_b32 s33, v22, 2 |
| ; GFX942-NEXT: v_readlane_b32 s31, v22, 1 |
| ; GFX942-NEXT: v_readlane_b32 s30, v22, 0 |
| ; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 |
| ; GFX942-NEXT: s_add_i32 s2, s32, 0x8040 |
| ; GFX942-NEXT: scratch_load_dword v22, off, s2 ; 4-byte Folded Reload |
| ; GFX942-NEXT: s_mov_b64 exec, s[0:1] |
| ; GFX942-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10_1-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset: |
| ; GFX10_1: ; %bb.0: |
| ; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 |
| ; GFX10_1-NEXT: s_add_i32 s5, s32, 0x100800 |
| ; GFX10_1-NEXT: buffer_store_dword v22, off, s[0:3], s5 ; 4-byte Folded Spill |
| ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 |
| ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 |
| ; GFX10_1-NEXT: v_writelane_b32 v22, s30, 0 |
| ; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32 |
| ; GFX10_1-NEXT: s_lshr_b32 s4, s32, 5 |
| ; GFX10_1-NEXT: s_add_i32 s58, s4, 0x4240 |
| ; GFX10_1-NEXT: v_writelane_b32 v22, s31, 1 |
| ; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0 |
| ; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo |
| ; GFX10_1-NEXT: ;;#ASMSTART |
| ; GFX10_1-NEXT: ; use alloca0 v0 |
| ; GFX10_1-NEXT: ;;#ASMEND |
| ; GFX10_1-NEXT: v_writelane_b32 v22, s33, 2 |
| ; GFX10_1-NEXT: v_writelane_b32 v22, s34, 3 |
| ; GFX10_1-NEXT: v_writelane_b32 v22, s35, 4 |
| ; GFX10_1-NEXT: v_writelane_b32 v22, s36, 5 |
| ; GFX10_1-NEXT: v_writelane_b32 v22, s37, 6 |
| ; GFX10_1-NEXT: v_writelane_b32 v22, s38, 7 |
| ; GFX10_1-NEXT: v_writelane_b32 v22, s39, 8 |
| ; GFX10_1-NEXT: v_writelane_b32 v22, s48, 9 |
| ; GFX10_1-NEXT: v_writelane_b32 v22, s49, 10 |
| ; GFX10_1-NEXT: v_writelane_b32 v22, s50, 11 |
| ; GFX10_1-NEXT: v_writelane_b32 v22, s51, 12 |
| ; GFX10_1-NEXT: v_writelane_b32 v22, s52, 13 |
| ; GFX10_1-NEXT: v_writelane_b32 v22, s53, 14 |
| ; GFX10_1-NEXT: v_writelane_b32 v22, s54, 15 |
| ; GFX10_1-NEXT: v_writelane_b32 v22, s55, 16 |
| ; GFX10_1-NEXT: ;;#ASMSTART |
| ; GFX10_1-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc |
| ; GFX10_1-NEXT: ;;#ASMEND |
| ; GFX10_1-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 |
| ; GFX10_1-NEXT: s_mov_b32 s54, s58 |
| ; GFX10_1-NEXT: ;;#ASMSTART |
| ; GFX10_1-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc |
| ; GFX10_1-NEXT: ;;#ASMEND |
| ; GFX10_1-NEXT: v_readlane_b32 s55, v22, 16 |
| ; GFX10_1-NEXT: v_readlane_b32 s54, v22, 15 |
| ; GFX10_1-NEXT: v_readlane_b32 s53, v22, 14 |
| ; GFX10_1-NEXT: v_readlane_b32 s52, v22, 13 |
| ; GFX10_1-NEXT: v_readlane_b32 s51, v22, 12 |
| ; GFX10_1-NEXT: v_readlane_b32 s50, v22, 11 |
| ; GFX10_1-NEXT: v_readlane_b32 s49, v22, 10 |
| ; GFX10_1-NEXT: v_readlane_b32 s48, v22, 9 |
| ; GFX10_1-NEXT: v_readlane_b32 s39, v22, 8 |
| ; GFX10_1-NEXT: v_readlane_b32 s38, v22, 7 |
| ; GFX10_1-NEXT: v_readlane_b32 s37, v22, 6 |
| ; GFX10_1-NEXT: v_readlane_b32 s36, v22, 5 |
| ; GFX10_1-NEXT: v_readlane_b32 s35, v22, 4 |
| ; GFX10_1-NEXT: v_readlane_b32 s34, v22, 3 |
| ; GFX10_1-NEXT: v_readlane_b32 s33, v22, 2 |
| ; GFX10_1-NEXT: v_readlane_b32 s31, v22, 1 |
| ; GFX10_1-NEXT: v_readlane_b32 s30, v22, 0 |
| ; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 |
| ; GFX10_1-NEXT: s_add_i32 s5, s32, 0x100800 |
| ; GFX10_1-NEXT: buffer_load_dword v22, off, s[0:3], s5 ; 4-byte Folded Reload |
| ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 |
| ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 |
| ; GFX10_1-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10_1-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX10_3-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset: |
| ; GFX10_3: ; %bb.0: |
| ; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 |
| ; GFX10_3-NEXT: s_add_i32 s5, s32, 0x100800 |
| ; GFX10_3-NEXT: buffer_store_dword v22, off, s[0:3], s5 ; 4-byte Folded Spill |
| ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 |
| ; GFX10_3-NEXT: v_writelane_b32 v22, s30, 0 |
| ; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32 |
| ; GFX10_3-NEXT: s_lshr_b32 s4, s32, 5 |
| ; GFX10_3-NEXT: s_add_i32 s58, s4, 0x4240 |
| ; GFX10_3-NEXT: v_writelane_b32 v22, s31, 1 |
| ; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0 |
| ; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo |
| ; GFX10_3-NEXT: ;;#ASMSTART |
| ; GFX10_3-NEXT: ; use alloca0 v0 |
| ; GFX10_3-NEXT: ;;#ASMEND |
| ; GFX10_3-NEXT: v_writelane_b32 v22, s33, 2 |
| ; GFX10_3-NEXT: v_writelane_b32 v22, s34, 3 |
| ; GFX10_3-NEXT: v_writelane_b32 v22, s35, 4 |
| ; GFX10_3-NEXT: v_writelane_b32 v22, s36, 5 |
| ; GFX10_3-NEXT: v_writelane_b32 v22, s37, 6 |
| ; GFX10_3-NEXT: v_writelane_b32 v22, s38, 7 |
| ; GFX10_3-NEXT: v_writelane_b32 v22, s39, 8 |
| ; GFX10_3-NEXT: v_writelane_b32 v22, s48, 9 |
| ; GFX10_3-NEXT: v_writelane_b32 v22, s49, 10 |
| ; GFX10_3-NEXT: v_writelane_b32 v22, s50, 11 |
| ; GFX10_3-NEXT: v_writelane_b32 v22, s51, 12 |
| ; GFX10_3-NEXT: v_writelane_b32 v22, s52, 13 |
| ; GFX10_3-NEXT: v_writelane_b32 v22, s53, 14 |
| ; GFX10_3-NEXT: v_writelane_b32 v22, s54, 15 |
| ; GFX10_3-NEXT: v_writelane_b32 v22, s55, 16 |
| ; GFX10_3-NEXT: ;;#ASMSTART |
| ; GFX10_3-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc |
| ; GFX10_3-NEXT: ;;#ASMEND |
| ; GFX10_3-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 |
| ; GFX10_3-NEXT: s_mov_b32 s54, s58 |
| ; GFX10_3-NEXT: ;;#ASMSTART |
| ; GFX10_3-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc |
| ; GFX10_3-NEXT: ;;#ASMEND |
| ; GFX10_3-NEXT: v_readlane_b32 s55, v22, 16 |
| ; GFX10_3-NEXT: v_readlane_b32 s54, v22, 15 |
| ; GFX10_3-NEXT: v_readlane_b32 s53, v22, 14 |
| ; GFX10_3-NEXT: v_readlane_b32 s52, v22, 13 |
| ; GFX10_3-NEXT: v_readlane_b32 s51, v22, 12 |
| ; GFX10_3-NEXT: v_readlane_b32 s50, v22, 11 |
| ; GFX10_3-NEXT: v_readlane_b32 s49, v22, 10 |
| ; GFX10_3-NEXT: v_readlane_b32 s48, v22, 9 |
| ; GFX10_3-NEXT: v_readlane_b32 s39, v22, 8 |
| ; GFX10_3-NEXT: v_readlane_b32 s38, v22, 7 |
| ; GFX10_3-NEXT: v_readlane_b32 s37, v22, 6 |
| ; GFX10_3-NEXT: v_readlane_b32 s36, v22, 5 |
| ; GFX10_3-NEXT: v_readlane_b32 s35, v22, 4 |
| ; GFX10_3-NEXT: v_readlane_b32 s34, v22, 3 |
| ; GFX10_3-NEXT: v_readlane_b32 s33, v22, 2 |
| ; GFX10_3-NEXT: v_readlane_b32 s31, v22, 1 |
| ; GFX10_3-NEXT: v_readlane_b32 s30, v22, 0 |
| ; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 |
| ; GFX10_3-NEXT: s_add_i32 s5, s32, 0x100800 |
| ; GFX10_3-NEXT: buffer_load_dword v22, off, s[0:3], s5 ; 4-byte Folded Reload |
| ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 |
| ; GFX10_3-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10_3-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 |
| ; GFX11-NEXT: s_add_i32 s1, s32, 0x8040 |
| ; GFX11-NEXT: scratch_store_b32 off, v22, s1 ; 4-byte Folded Spill |
| ; GFX11-NEXT: s_mov_b32 exec_lo, s0 |
| ; GFX11-NEXT: v_writelane_b32 v22, s30, 0 |
| ; GFX11-NEXT: s_add_i32 s0, s32, 64 |
| ; GFX11-NEXT: s_add_i32 s58, s32, 0x4240 |
| ; GFX11-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX11-NEXT: s_and_b32 s0, 0, exec_lo |
| ; GFX11-NEXT: v_writelane_b32 v22, s31, 1 |
| ; GFX11-NEXT: ;;#ASMSTART |
| ; GFX11-NEXT: ; use alloca0 v0 |
| ; GFX11-NEXT: ;;#ASMEND |
| ; GFX11-NEXT: v_writelane_b32 v22, s33, 2 |
| ; GFX11-NEXT: v_writelane_b32 v22, s34, 3 |
| ; GFX11-NEXT: v_writelane_b32 v22, s35, 4 |
| ; GFX11-NEXT: v_writelane_b32 v22, s36, 5 |
| ; GFX11-NEXT: v_writelane_b32 v22, s37, 6 |
| ; GFX11-NEXT: v_writelane_b32 v22, s38, 7 |
| ; GFX11-NEXT: v_writelane_b32 v22, s39, 8 |
| ; GFX11-NEXT: v_writelane_b32 v22, s48, 9 |
| ; GFX11-NEXT: v_writelane_b32 v22, s49, 10 |
| ; GFX11-NEXT: v_writelane_b32 v22, s50, 11 |
| ; GFX11-NEXT: v_writelane_b32 v22, s51, 12 |
| ; GFX11-NEXT: v_writelane_b32 v22, s52, 13 |
| ; GFX11-NEXT: v_writelane_b32 v22, s53, 14 |
| ; GFX11-NEXT: v_writelane_b32 v22, s54, 15 |
| ; GFX11-NEXT: v_writelane_b32 v22, s55, 16 |
| ; GFX11-NEXT: ;;#ASMSTART |
| ; GFX11-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc |
| ; GFX11-NEXT: ;;#ASMEND |
| ; GFX11-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 |
| ; GFX11-NEXT: s_mov_b32 s54, s58 |
| ; GFX11-NEXT: ;;#ASMSTART |
| ; GFX11-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc |
| ; GFX11-NEXT: ;;#ASMEND |
| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-NEXT: v_readlane_b32 s55, v22, 16 |
| ; GFX11-NEXT: v_readlane_b32 s54, v22, 15 |
| ; GFX11-NEXT: v_readlane_b32 s53, v22, 14 |
| ; GFX11-NEXT: v_readlane_b32 s52, v22, 13 |
| ; GFX11-NEXT: v_readlane_b32 s51, v22, 12 |
| ; GFX11-NEXT: v_readlane_b32 s50, v22, 11 |
| ; GFX11-NEXT: v_readlane_b32 s49, v22, 10 |
| ; GFX11-NEXT: v_readlane_b32 s48, v22, 9 |
| ; GFX11-NEXT: v_readlane_b32 s39, v22, 8 |
| ; GFX11-NEXT: v_readlane_b32 s38, v22, 7 |
| ; GFX11-NEXT: v_readlane_b32 s37, v22, 6 |
| ; GFX11-NEXT: v_readlane_b32 s36, v22, 5 |
| ; GFX11-NEXT: v_readlane_b32 s35, v22, 4 |
| ; GFX11-NEXT: v_readlane_b32 s34, v22, 3 |
| ; GFX11-NEXT: v_readlane_b32 s33, v22, 2 |
| ; GFX11-NEXT: v_readlane_b32 s31, v22, 1 |
| ; GFX11-NEXT: v_readlane_b32 s30, v22, 0 |
| ; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 |
| ; GFX11-NEXT: s_add_i32 s1, s32, 0x8040 |
| ; GFX11-NEXT: scratch_load_b32 v22, off, s1 ; 4-byte Folded Reload |
| ; GFX11-NEXT: s_mov_b32 exec_lo, s0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX12-LABEL: scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_immoffset: |
| ; GFX12: ; %bb.0: |
| ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX12-NEXT: s_wait_expcnt 0x0 |
| ; GFX12-NEXT: s_wait_samplecnt 0x0 |
| ; GFX12-NEXT: s_wait_bvhcnt 0x0 |
| ; GFX12-NEXT: s_wait_kmcnt 0x0 |
| ; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 |
| ; GFX12-NEXT: scratch_store_b32 off, v22, s32 offset:32768 ; 4-byte Folded Spill |
| ; GFX12-NEXT: s_wait_alu 0xfffe |
| ; GFX12-NEXT: s_mov_b32 exec_lo, s0 |
| ; GFX12-NEXT: v_writelane_b32 v22, s30, 0 |
| ; GFX12-NEXT: s_add_co_i32 s58, s32, 0x4200 |
| ; GFX12-NEXT: v_mov_b32_e32 v0, s32 |
| ; GFX12-NEXT: s_and_b32 s0, 0, exec_lo |
| ; GFX12-NEXT: ;;#ASMSTART |
| ; GFX12-NEXT: ; use alloca0 v0 |
| ; GFX12-NEXT: ;;#ASMEND |
| ; GFX12-NEXT: v_writelane_b32 v22, s31, 1 |
| ; GFX12-NEXT: v_writelane_b32 v22, s33, 2 |
| ; GFX12-NEXT: v_writelane_b32 v22, s34, 3 |
| ; GFX12-NEXT: v_writelane_b32 v22, s35, 4 |
| ; GFX12-NEXT: v_writelane_b32 v22, s36, 5 |
| ; GFX12-NEXT: v_writelane_b32 v22, s37, 6 |
| ; GFX12-NEXT: v_writelane_b32 v22, s38, 7 |
| ; GFX12-NEXT: v_writelane_b32 v22, s39, 8 |
| ; GFX12-NEXT: v_writelane_b32 v22, s48, 9 |
| ; GFX12-NEXT: v_writelane_b32 v22, s49, 10 |
| ; GFX12-NEXT: v_writelane_b32 v22, s50, 11 |
| ; GFX12-NEXT: v_writelane_b32 v22, s51, 12 |
| ; GFX12-NEXT: v_writelane_b32 v22, s52, 13 |
| ; GFX12-NEXT: v_writelane_b32 v22, s53, 14 |
| ; GFX12-NEXT: v_writelane_b32 v22, s54, 15 |
| ; GFX12-NEXT: v_writelane_b32 v22, s55, 16 |
| ; GFX12-NEXT: ;;#ASMSTART |
| ; GFX12-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc |
| ; GFX12-NEXT: ;;#ASMEND |
| ; GFX12-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 |
| ; GFX12-NEXT: s_wait_alu 0xfffe |
| ; GFX12-NEXT: s_mov_b32 s54, s58 |
| ; GFX12-NEXT: ;;#ASMSTART |
| ; GFX12-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc |
| ; GFX12-NEXT: ;;#ASMEND |
| ; GFX12-NEXT: v_readlane_b32 s55, v22, 16 |
| ; GFX12-NEXT: v_readlane_b32 s54, v22, 15 |
| ; GFX12-NEXT: v_readlane_b32 s53, v22, 14 |
| ; GFX12-NEXT: v_readlane_b32 s52, v22, 13 |
| ; GFX12-NEXT: v_readlane_b32 s51, v22, 12 |
| ; GFX12-NEXT: v_readlane_b32 s50, v22, 11 |
| ; GFX12-NEXT: v_readlane_b32 s49, v22, 10 |
| ; GFX12-NEXT: v_readlane_b32 s48, v22, 9 |
| ; GFX12-NEXT: v_readlane_b32 s39, v22, 8 |
| ; GFX12-NEXT: v_readlane_b32 s38, v22, 7 |
| ; GFX12-NEXT: v_readlane_b32 s37, v22, 6 |
| ; GFX12-NEXT: v_readlane_b32 s36, v22, 5 |
| ; GFX12-NEXT: v_readlane_b32 s35, v22, 4 |
| ; GFX12-NEXT: v_readlane_b32 s34, v22, 3 |
| ; GFX12-NEXT: v_readlane_b32 s33, v22, 2 |
| ; GFX12-NEXT: v_readlane_b32 s31, v22, 1 |
| ; GFX12-NEXT: v_readlane_b32 s30, v22, 0 |
| ; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 |
| ; GFX12-NEXT: scratch_load_b32 v22, off, s32 offset:32768 ; 4-byte Folded Reload |
| ; GFX12-NEXT: s_wait_alu 0xfffe |
| ; GFX12-NEXT: s_mov_b32 exec_lo, s0 |
| ; GFX12-NEXT: s_wait_loadcnt 0x0 |
| ; GFX12-NEXT: s_setpc_b64 s[30:31] |
| %alloca0 = alloca [4096 x i32], align 64, addrspace(5) |
| %alloca1 = alloca [4096 x i32], align 4, addrspace(5) |
| call void asm sideeffect "; use alloca0 $0", "v"(ptr addrspace(5) %alloca0) |
| |
| ; Force no SGPRs to be available for the carry-out of the vector add. |
| %asm = call %asm.output3 asm sideeffect |
| "; def $0, $1, $2, $3, $4, $5, $6, $7", |
| "={s[0:15]},={s[16:31]},={s[32:47]},={s[48:55]},={s[56:57]},={v[0:15]},={v[16:21]},={vcc}"() |
| |
| %s0 = extractvalue %asm.output3 %asm, 0 |
| %s1 = extractvalue %asm.output3 %asm, 1 |
| %s2 = extractvalue %asm.output3 %asm, 2 |
| %s3 = extractvalue %asm.output3 %asm, 3 |
| %s4 = extractvalue %asm.output3 %asm, 4 |
| |
| %v0 = extractvalue %asm.output3 %asm, 5 |
| %v1 = extractvalue %asm.output3 %asm, 6 |
| |
| %vcc = extractvalue %asm.output3 %asm, 7 |
| |
| %alloca1.offset = getelementptr [4096 x i32], ptr addrspace(5) %alloca1, i32 0, i32 128 |
| |
| ; scc is unavailable since it is live in |
| call void asm sideeffect "; use $0, $1, $2, $3, $4, $5, $6, $7, $8, $9", |
| "{s[0:15]},{s[16:31]},{s[32:47]},{s[48:55]},{s[56:57]},{v[0:15]},{v[16:21]},{vcc},{s54},{scc}"( |
| <16 x i32> %s0, |
| <16 x i32> %s1, |
| <16 x i32> %s2, |
| <8 x i32> %s3, |
| <2 x i32> %s4, |
| <16 x i32> %v0, |
| <6 x i32> %v1, |
| i64 %vcc, |
| ptr addrspace(5) %alloca1.offset, |
| i32 0) ; use of scc |
| |
| ret void |
| } |
| |
| ; For gfx8/gfx9, this should enforce a budget of 24 VGPRs, and 60 SGPRs (4 |
| ; are reserved at the end for xnack + vcc). |
| attributes #0 = { nounwind alignstack=64 "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="10,10" "no-realign-stack" } |
| attributes #1 = { nounwind alignstack=16 "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="10,10" "no-realign-stack" } |