| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 < %s | FileCheck -check-prefix=GFX906 %s |
| ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 < %s | FileCheck -check-prefix=GFX908 %s |
| |
| ; Due to high register pressure, regalloc would split the liverange of wwm VGPR register used for SGPR spills |
| ; and introduce a copy. The copy should be of whole-wave with exec mask manipulation around it. |
| ; FIXME: The destination register involved in the whole-wave copy should be considered for preserving all the lanes |
| ; with a spill/restore at function prolog/epilog. The copy might otherwise clobber its inactive lanes unwantedly. |
| define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 { |
| ; GFX906-LABEL: preserve_wwm_copy_dstreg: |
| ; GFX906: ; %bb.0: |
| ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX906-NEXT: s_mov_b32 s16, s33 |
| ; GFX906-NEXT: s_mov_b32 s33, s32 |
| ; GFX906-NEXT: s_xor_saveexec_b64 s[18:19], -1 |
| ; GFX906-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill |
| ; GFX906-NEXT: s_mov_b64 exec, -1 |
| ; GFX906-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:148 ; 4-byte Folded Spill |
| ; GFX906-NEXT: s_mov_b64 exec, s[18:19] |
| ; GFX906-NEXT: s_mov_b32 s21, s15 |
| ; GFX906-NEXT: ; implicit-def: $vgpr39 : SGPR spill to VGPR lane |
| ; GFX906-NEXT: s_mov_b32 s22, s14 |
| ; GFX906-NEXT: v_writelane_b32 v39, s21, 0 |
| ; GFX906-NEXT: v_writelane_b32 v39, s22, 1 |
| ; GFX906-NEXT: s_mov_b32 s23, s13 |
| ; GFX906-NEXT: v_writelane_b32 v39, s23, 2 |
| ; GFX906-NEXT: s_mov_b32 s24, s12 |
| ; GFX906-NEXT: v_writelane_b32 v39, s24, 3 |
| ; GFX906-NEXT: s_mov_b64 s[26:27], s[10:11] |
| ; GFX906-NEXT: v_writelane_b32 v39, s26, 4 |
| ; GFX906-NEXT: v_writelane_b32 v39, s27, 5 |
| ; GFX906-NEXT: v_writelane_b32 v39, s8, 6 |
| ; GFX906-NEXT: v_writelane_b32 v41, s16, 4 |
| ; GFX906-NEXT: v_writelane_b32 v39, s9, 7 |
| ; GFX906-NEXT: v_writelane_b32 v41, s34, 2 |
| ; GFX906-NEXT: v_writelane_b32 v39, s6, 8 |
| ; GFX906-NEXT: v_writelane_b32 v41, s35, 3 |
| ; GFX906-NEXT: v_writelane_b32 v39, s7, 9 |
| ; GFX906-NEXT: v_writelane_b32 v41, s30, 0 |
| ; GFX906-NEXT: v_writelane_b32 v39, s4, 10 |
| ; GFX906-NEXT: s_addk_i32 s32, 0x2800 |
| ; GFX906-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill |
| ; GFX906-NEXT: v_writelane_b32 v41, s31, 1 |
| ; GFX906-NEXT: v_mov_b32_e32 v32, v31 |
| ; GFX906-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill |
| ; GFX906-NEXT: s_nop 0 |
| ; GFX906-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill |
| ; GFX906-NEXT: v_writelane_b32 v39, s5, 11 |
| ; GFX906-NEXT: s_or_saveexec_b64 s[34:35], -1 |
| ; GFX906-NEXT: s_mov_b64 exec, s[34:35] |
| ; GFX906-NEXT: ;;#ASMSTART |
| ; GFX906-NEXT: ; def v[0:31] |
| ; GFX906-NEXT: ;;#ASMEND |
| ; GFX906-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill |
| ; GFX906-NEXT: s_nop 0 |
| ; GFX906-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:68 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:72 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:76 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:80 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:84 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:88 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:92 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v19, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v20, off, s[0:3], s33 offset:100 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v21, off, s[0:3], s33 offset:104 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v22, off, s[0:3], s33 offset:108 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v23, off, s[0:3], s33 offset:112 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v24, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v25, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v26, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v27, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v28, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v29, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v30, off, s[0:3], s33 offset:140 ; 4-byte Folded Spill |
| ; GFX906-NEXT: buffer_store_dword v31, off, s[0:3], s33 offset:144 ; 4-byte Folded Spill |
| ; GFX906-NEXT: ;;#ASMSTART |
| ; GFX906-NEXT: ; def v40 |
| ; GFX906-NEXT: ;;#ASMEND |
| ; GFX906-NEXT: ;;#ASMSTART |
| ; GFX906-NEXT: ; def s11 |
| ; GFX906-NEXT: ;;#ASMEND |
| ; GFX906-NEXT: v_writelane_b32 v39, s11, 12 |
| ; GFX906-NEXT: ;;#ASMSTART |
| ; GFX906-NEXT: ; def s12 |
| ; GFX906-NEXT: ;;#ASMEND |
| ; GFX906-NEXT: v_writelane_b32 v39, s12, 13 |
| ; GFX906-NEXT: ;;#ASMSTART |
| ; GFX906-NEXT: ; def s13 |
| ; GFX906-NEXT: ;;#ASMEND |
| ; GFX906-NEXT: v_writelane_b32 v39, s13, 14 |
| ; GFX906-NEXT: ;;#ASMSTART |
| ; GFX906-NEXT: ; def s14 |
| ; GFX906-NEXT: ;;#ASMEND |
| ; GFX906-NEXT: v_writelane_b32 v39, s14, 15 |
| ; GFX906-NEXT: ;;#ASMSTART |
| ; GFX906-NEXT: ; def s15 |
| ; GFX906-NEXT: ;;#ASMEND |
| ; GFX906-NEXT: v_writelane_b32 v39, s15, 16 |
| ; GFX906-NEXT: s_getpc_b64 s[10:11] |
| ; GFX906-NEXT: s_add_u32 s10, s10, foo@gotpcrel32@lo+4 |
| ; GFX906-NEXT: s_addc_u32 s11, s11, foo@gotpcrel32@hi+12 |
| ; GFX906-NEXT: ;;#ASMSTART |
| ; GFX906-NEXT: ; def s16 |
| ; GFX906-NEXT: ;;#ASMEND |
| ; GFX906-NEXT: v_writelane_b32 v39, s16, 17 |
| ; GFX906-NEXT: s_load_dwordx2 s[10:11], s[10:11], 0x0 |
| ; GFX906-NEXT: ;;#ASMSTART |
| ; GFX906-NEXT: ; def s17 |
| ; GFX906-NEXT: ;;#ASMEND |
| ; GFX906-NEXT: v_writelane_b32 v39, s17, 18 |
| ; GFX906-NEXT: ;;#ASMSTART |
| ; GFX906-NEXT: ; def s18 |
| ; GFX906-NEXT: ;;#ASMEND |
| ; GFX906-NEXT: v_writelane_b32 v39, s18, 19 |
| ; GFX906-NEXT: ;;#ASMSTART |
| ; GFX906-NEXT: ; def s19 |
| ; GFX906-NEXT: ;;#ASMEND |
| ; GFX906-NEXT: v_writelane_b32 v39, s19, 20 |
| ; GFX906-NEXT: ;;#ASMSTART |
| ; GFX906-NEXT: ; def s20 |
| ; GFX906-NEXT: ;;#ASMEND |
| ; GFX906-NEXT: v_writelane_b32 v39, s20, 21 |
| ; GFX906-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX906-NEXT: v_writelane_b32 v39, s10, 22 |
| ; GFX906-NEXT: v_writelane_b32 v39, s11, 23 |
| ; GFX906-NEXT: s_or_saveexec_b64 s[34:35], -1 |
| ; GFX906-NEXT: s_mov_b64 exec, s[34:35] |
| ; GFX906-NEXT: s_or_saveexec_b64 s[34:35], -1 |
| ; GFX906-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill |
| ; GFX906-NEXT: s_mov_b64 exec, s[34:35] |
| ; GFX906-NEXT: v_readlane_b32 s16, v39, 22 |
| ; GFX906-NEXT: s_mov_b32 s12, s24 |
| ; GFX906-NEXT: s_mov_b32 s13, s23 |
| ; GFX906-NEXT: s_mov_b32 s14, s22 |
| ; GFX906-NEXT: v_mov_b32_e32 v31, v32 |
| ; GFX906-NEXT: s_mov_b32 s15, s21 |
| ; GFX906-NEXT: s_mov_b64 s[10:11], s[26:27] |
| ; GFX906-NEXT: v_readlane_b32 s17, v39, 23 |
| ; GFX906-NEXT: v_mov_b32_e32 v40, v32 |
| ; GFX906-NEXT: s_swappc_b64 s[30:31], s[16:17] |
| ; GFX906-NEXT: s_or_saveexec_b64 s[34:35], -1 |
| ; GFX906-NEXT: buffer_load_dword v39, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload |
| ; GFX906-NEXT: s_mov_b64 exec, s[34:35] |
| ; GFX906-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-NEXT: v_readlane_b32 s11, v39, 12 |
| ; GFX906-NEXT: ;;#ASMSTART |
| ; GFX906-NEXT: ; use s11 |
| ; GFX906-NEXT: ;;#ASMEND |
| ; GFX906-NEXT: v_readlane_b32 s12, v39, 13 |
| ; GFX906-NEXT: ;;#ASMSTART |
| ; GFX906-NEXT: ; use s12 |
| ; GFX906-NEXT: ;;#ASMEND |
| ; GFX906-NEXT: v_readlane_b32 s13, v39, 14 |
| ; GFX906-NEXT: ;;#ASMSTART |
| ; GFX906-NEXT: ; use s13 |
| ; GFX906-NEXT: ;;#ASMEND |
| ; GFX906-NEXT: v_readlane_b32 s14, v39, 15 |
| ; GFX906-NEXT: ;;#ASMSTART |
| ; GFX906-NEXT: ; use s14 |
| ; GFX906-NEXT: ;;#ASMEND |
| ; GFX906-NEXT: v_readlane_b32 s15, v39, 16 |
| ; GFX906-NEXT: ;;#ASMSTART |
| ; GFX906-NEXT: ; use s15 |
| ; GFX906-NEXT: ;;#ASMEND |
| ; GFX906-NEXT: v_readlane_b32 s16, v39, 17 |
| ; GFX906-NEXT: ;;#ASMSTART |
| ; GFX906-NEXT: ; use s16 |
| ; GFX906-NEXT: ;;#ASMEND |
| ; GFX906-NEXT: v_readlane_b32 s17, v39, 18 |
| ; GFX906-NEXT: ;;#ASMSTART |
| ; GFX906-NEXT: ; use s17 |
| ; GFX906-NEXT: ;;#ASMEND |
| ; GFX906-NEXT: v_readlane_b32 s18, v39, 19 |
| ; GFX906-NEXT: ;;#ASMSTART |
| ; GFX906-NEXT: ; use s18 |
| ; GFX906-NEXT: ;;#ASMEND |
| ; GFX906-NEXT: v_readlane_b32 s19, v39, 20 |
| ; GFX906-NEXT: ;;#ASMSTART |
| ; GFX906-NEXT: ; use s19 |
| ; GFX906-NEXT: ;;#ASMEND |
| ; GFX906-NEXT: v_readlane_b32 s20, v39, 21 |
| ; GFX906-NEXT: ;;#ASMSTART |
| ; GFX906-NEXT: ; use s20 |
| ; GFX906-NEXT: ;;#ASMEND |
| ; GFX906-NEXT: ;;#ASMSTART |
| ; GFX906-NEXT: ; def s21 |
| ; GFX906-NEXT: ;;#ASMEND |
| ; GFX906-NEXT: v_writelane_b32 v39, s21, 12 |
| ; GFX906-NEXT: ;;#ASMSTART |
| ; GFX906-NEXT: ; def s22 |
| ; GFX906-NEXT: ;;#ASMEND |
| ; GFX906-NEXT: v_writelane_b32 v39, s22, 13 |
| ; GFX906-NEXT: ;;#ASMSTART |
| ; GFX906-NEXT: ; def s23 |
| ; GFX906-NEXT: ;;#ASMEND |
| ; GFX906-NEXT: v_writelane_b32 v39, s23, 14 |
| ; GFX906-NEXT: ;;#ASMSTART |
| ; GFX906-NEXT: ; def s24 |
| ; GFX906-NEXT: ;;#ASMEND |
| ; GFX906-NEXT: v_writelane_b32 v39, s24, 15 |
| ; GFX906-NEXT: ;;#ASMSTART |
| ; GFX906-NEXT: ; def s25 |
| ; GFX906-NEXT: ;;#ASMEND |
| ; GFX906-NEXT: v_writelane_b32 v39, s25, 16 |
| ; GFX906-NEXT: ;;#ASMSTART |
| ; GFX906-NEXT: ; def s26 |
| ; GFX906-NEXT: ;;#ASMEND |
| ; GFX906-NEXT: v_writelane_b32 v39, s26, 17 |
| ; GFX906-NEXT: ;;#ASMSTART |
| ; GFX906-NEXT: ; def s27 |
| ; GFX906-NEXT: ;;#ASMEND |
| ; GFX906-NEXT: v_writelane_b32 v39, s27, 18 |
| ; GFX906-NEXT: ;;#ASMSTART |
| ; GFX906-NEXT: ; def s28 |
| ; GFX906-NEXT: ;;#ASMEND |
| ; GFX906-NEXT: v_writelane_b32 v39, s28, 19 |
| ; GFX906-NEXT: ;;#ASMSTART |
| ; GFX906-NEXT: ; def s29 |
| ; GFX906-NEXT: ;;#ASMEND |
| ; GFX906-NEXT: v_writelane_b32 v39, s29, 20 |
| ; GFX906-NEXT: s_or_saveexec_b64 s[34:35], -1 |
| ; GFX906-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill |
| ; GFX906-NEXT: s_mov_b64 exec, s[34:35] |
| ; GFX906-NEXT: v_readlane_b32 s4, v39, 10 |
| ; GFX906-NEXT: v_readlane_b32 s6, v39, 8 |
| ; GFX906-NEXT: v_readlane_b32 s8, v39, 6 |
| ; GFX906-NEXT: v_readlane_b32 s10, v39, 4 |
| ; GFX906-NEXT: v_readlane_b32 s16, v39, 22 |
| ; GFX906-NEXT: v_readlane_b32 s12, v39, 3 |
| ; GFX906-NEXT: v_mov_b32_e32 v31, v40 |
| ; GFX906-NEXT: v_readlane_b32 s13, v39, 2 |
| ; GFX906-NEXT: v_readlane_b32 s14, v39, 1 |
| ; GFX906-NEXT: v_readlane_b32 s15, v39, 0 |
| ; GFX906-NEXT: v_readlane_b32 s5, v39, 11 |
| ; GFX906-NEXT: v_readlane_b32 s7, v39, 9 |
| ; GFX906-NEXT: v_readlane_b32 s9, v39, 7 |
| ; GFX906-NEXT: v_readlane_b32 s11, v39, 5 |
| ; GFX906-NEXT: v_readlane_b32 s17, v39, 23 |
| ; GFX906-NEXT: s_swappc_b64 s[30:31], s[16:17] |
| ; GFX906-NEXT: s_or_saveexec_b64 s[34:35], -1 |
| ; GFX906-NEXT: buffer_load_dword v39, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload |
| ; GFX906-NEXT: s_mov_b64 exec, s[34:35] |
| ; GFX906-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-NEXT: v_readlane_b32 s4, v39, 10 |
| ; GFX906-NEXT: v_readlane_b32 s6, v39, 8 |
| ; GFX906-NEXT: v_readlane_b32 s8, v39, 6 |
| ; GFX906-NEXT: v_readlane_b32 s10, v39, 4 |
| ; GFX906-NEXT: v_readlane_b32 s16, v39, 22 |
| ; GFX906-NEXT: v_readlane_b32 s5, v39, 11 |
| ; GFX906-NEXT: v_readlane_b32 s7, v39, 9 |
| ; GFX906-NEXT: v_readlane_b32 s9, v39, 7 |
| ; GFX906-NEXT: v_readlane_b32 s11, v39, 5 |
| ; GFX906-NEXT: v_readlane_b32 s12, v39, 3 |
| ; GFX906-NEXT: v_readlane_b32 s13, v39, 2 |
| ; GFX906-NEXT: v_readlane_b32 s14, v39, 1 |
| ; GFX906-NEXT: v_readlane_b32 s15, v39, 0 |
| ; GFX906-NEXT: v_mov_b32_e32 v31, v40 |
| ; GFX906-NEXT: v_readlane_b32 s17, v39, 23 |
| ; GFX906-NEXT: v_readlane_b32 s21, v39, 12 |
| ; GFX906-NEXT: ;;#ASMSTART |
| ; GFX906-NEXT: ; use s21 |
| ; GFX906-NEXT: ;;#ASMEND |
| ; GFX906-NEXT: v_readlane_b32 s22, v39, 13 |
| ; GFX906-NEXT: ;;#ASMSTART |
| ; GFX906-NEXT: ; use s22 |
| ; GFX906-NEXT: ;;#ASMEND |
| ; GFX906-NEXT: v_readlane_b32 s23, v39, 14 |
| ; GFX906-NEXT: ;;#ASMSTART |
| ; GFX906-NEXT: ; use s23 |
| ; GFX906-NEXT: ;;#ASMEND |
| ; GFX906-NEXT: v_readlane_b32 s24, v39, 15 |
| ; GFX906-NEXT: ;;#ASMSTART |
| ; GFX906-NEXT: ; use s24 |
| ; GFX906-NEXT: ;;#ASMEND |
| ; GFX906-NEXT: v_readlane_b32 s25, v39, 16 |
| ; GFX906-NEXT: ;;#ASMSTART |
| ; GFX906-NEXT: ; use s25 |
| ; GFX906-NEXT: ;;#ASMEND |
| ; GFX906-NEXT: v_readlane_b32 s26, v39, 17 |
| ; GFX906-NEXT: ;;#ASMSTART |
| ; GFX906-NEXT: ; use s26 |
| ; GFX906-NEXT: ;;#ASMEND |
| ; GFX906-NEXT: v_readlane_b32 s27, v39, 18 |
| ; GFX906-NEXT: ;;#ASMSTART |
| ; GFX906-NEXT: ; use s27 |
| ; GFX906-NEXT: ;;#ASMEND |
| ; GFX906-NEXT: v_readlane_b32 s28, v39, 19 |
| ; GFX906-NEXT: ;;#ASMSTART |
| ; GFX906-NEXT: ; use s28 |
| ; GFX906-NEXT: ;;#ASMEND |
| ; GFX906-NEXT: v_readlane_b32 s29, v39, 20 |
| ; GFX906-NEXT: ;;#ASMSTART |
| ; GFX906-NEXT: ; use s29 |
| ; GFX906-NEXT: ;;#ASMEND |
| ; GFX906-NEXT: s_swappc_b64 s[30:31], s[16:17] |
| ; GFX906-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:60 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:64 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:68 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:72 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:76 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:80 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:84 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:88 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:92 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v21, off, s[0:3], s33 offset:96 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:100 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v23, off, s[0:3], s33 offset:104 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v24, off, s[0:3], s33 offset:108 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v25, off, s[0:3], s33 offset:112 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v26, off, s[0:3], s33 offset:116 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v27, off, s[0:3], s33 offset:120 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v28, off, s[0:3], s33 offset:124 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v29, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v30, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:140 ; 4-byte Folded Reload |
| ; GFX906-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:144 ; 4-byte Folded Reload |
| ; GFX906-NEXT: v_readlane_b32 s31, v41, 1 |
| ; GFX906-NEXT: v_readlane_b32 s30, v41, 0 |
| ; GFX906-NEXT: s_mov_b32 s32, s33 |
| ; GFX906-NEXT: v_readlane_b32 s4, v41, 4 |
| ; GFX906-NEXT: v_readlane_b32 s34, v41, 2 |
| ; GFX906-NEXT: v_readlane_b32 s35, v41, 3 |
| ; GFX906-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-NEXT: flat_store_dwordx4 v[0:1], v[30:33] offset:112 |
| ; GFX906-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-NEXT: flat_store_dwordx4 v[0:1], v[26:29] offset:96 |
| ; GFX906-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-NEXT: flat_store_dwordx4 v[0:1], v[22:25] offset:80 |
| ; GFX906-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-NEXT: flat_store_dwordx4 v[0:1], v[18:21] offset:64 |
| ; GFX906-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-NEXT: flat_store_dwordx4 v[0:1], v[14:17] offset:48 |
| ; GFX906-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-NEXT: flat_store_dwordx4 v[0:1], v[10:13] offset:32 |
| ; GFX906-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-NEXT: flat_store_dwordx4 v[0:1], v[6:9] offset:16 |
| ; GFX906-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-NEXT: flat_store_dwordx4 v[0:1], v[2:5] |
| ; GFX906-NEXT: s_waitcnt vmcnt(0) |
| ; GFX906-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload |
| ; GFX906-NEXT: s_xor_saveexec_b64 s[6:7], -1 |
| ; GFX906-NEXT: buffer_load_dword v39, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload |
| ; GFX906-NEXT: s_mov_b64 exec, -1 |
| ; GFX906-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:148 ; 4-byte Folded Reload |
| ; GFX906-NEXT: s_mov_b64 exec, s[6:7] |
| ; GFX906-NEXT: s_mov_b32 s33, s4 |
| ; GFX906-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; GFX906-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX908-LABEL: preserve_wwm_copy_dstreg: |
| ; GFX908: ; %bb.0: |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX908-NEXT: s_mov_b32 s16, s33 |
| ; GFX908-NEXT: s_mov_b32 s33, s32 |
| ; GFX908-NEXT: s_xor_saveexec_b64 s[18:19], -1 |
| ; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill |
| ; GFX908-NEXT: s_mov_b64 exec, s[18:19] |
| ; GFX908-NEXT: v_mov_b32_e32 v2, s16 |
| ; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:164 ; 4-byte Folded Spill |
| ; GFX908-NEXT: v_mov_b32_e32 v2, s34 |
| ; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:156 ; 4-byte Folded Spill |
| ; GFX908-NEXT: v_mov_b32_e32 v2, s35 |
| ; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:160 ; 4-byte Folded Spill |
| ; GFX908-NEXT: s_addk_i32 s32, 0x2c00 |
| ; GFX908-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill |
| ; GFX908-NEXT: s_mov_b64 s[16:17], exec |
| ; GFX908-NEXT: s_mov_b64 exec, 1 |
| ; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:168 |
| ; GFX908-NEXT: v_writelane_b32 v2, s30, 0 |
| ; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill |
| ; GFX908-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:168 |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: s_mov_b64 exec, s[16:17] |
| ; GFX908-NEXT: s_mov_b64 s[16:17], exec |
| ; GFX908-NEXT: s_mov_b64 exec, 1 |
| ; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:168 |
| ; GFX908-NEXT: v_writelane_b32 v2, s31, 0 |
| ; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill |
| ; GFX908-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:168 |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: s_mov_b64 exec, s[16:17] |
| ; GFX908-NEXT: s_mov_b32 s21, s15 |
| ; GFX908-NEXT: ; implicit-def: $vgpr39 : SGPR spill to VGPR lane |
| ; GFX908-NEXT: s_mov_b32 s22, s14 |
| ; GFX908-NEXT: v_writelane_b32 v39, s21, 0 |
| ; GFX908-NEXT: v_writelane_b32 v39, s22, 1 |
| ; GFX908-NEXT: s_mov_b32 s23, s13 |
| ; GFX908-NEXT: v_writelane_b32 v39, s23, 2 |
| ; GFX908-NEXT: s_mov_b32 s24, s12 |
| ; GFX908-NEXT: v_writelane_b32 v39, s24, 3 |
| ; GFX908-NEXT: s_mov_b64 s[26:27], s[10:11] |
| ; GFX908-NEXT: v_writelane_b32 v39, s26, 4 |
| ; GFX908-NEXT: v_writelane_b32 v39, s27, 5 |
| ; GFX908-NEXT: v_writelane_b32 v39, s8, 6 |
| ; GFX908-NEXT: v_writelane_b32 v39, s9, 7 |
| ; GFX908-NEXT: v_writelane_b32 v39, s6, 8 |
| ; GFX908-NEXT: v_writelane_b32 v39, s7, 9 |
| ; GFX908-NEXT: v_writelane_b32 v39, s4, 10 |
| ; GFX908-NEXT: v_mov_b32_e32 v32, v31 |
| ; GFX908-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill |
| ; GFX908-NEXT: s_nop 0 |
| ; GFX908-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill |
| ; GFX908-NEXT: v_writelane_b32 v39, s5, 11 |
| ; GFX908-NEXT: s_or_saveexec_b64 s[34:35], -1 |
| ; GFX908-NEXT: s_mov_b64 exec, s[34:35] |
| ; GFX908-NEXT: ;;#ASMSTART |
| ; GFX908-NEXT: ; def v[0:31] |
| ; GFX908-NEXT: ;;#ASMEND |
| ; GFX908-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill |
| ; GFX908-NEXT: s_nop 0 |
| ; GFX908-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill |
| ; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill |
| ; GFX908-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill |
| ; GFX908-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill |
| ; GFX908-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill |
| ; GFX908-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill |
| ; GFX908-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill |
| ; GFX908-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill |
| ; GFX908-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill |
| ; GFX908-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:68 ; 4-byte Folded Spill |
| ; GFX908-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:72 ; 4-byte Folded Spill |
| ; GFX908-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:76 ; 4-byte Folded Spill |
| ; GFX908-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:80 ; 4-byte Folded Spill |
| ; GFX908-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:84 ; 4-byte Folded Spill |
| ; GFX908-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:88 ; 4-byte Folded Spill |
| ; GFX908-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:92 ; 4-byte Folded Spill |
| ; GFX908-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill |
| ; GFX908-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:100 ; 4-byte Folded Spill |
| ; GFX908-NEXT: buffer_store_dword v19, off, s[0:3], s33 offset:104 ; 4-byte Folded Spill |
| ; GFX908-NEXT: buffer_store_dword v20, off, s[0:3], s33 offset:108 ; 4-byte Folded Spill |
| ; GFX908-NEXT: buffer_store_dword v21, off, s[0:3], s33 offset:112 ; 4-byte Folded Spill |
| ; GFX908-NEXT: buffer_store_dword v22, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill |
| ; GFX908-NEXT: buffer_store_dword v23, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill |
| ; GFX908-NEXT: buffer_store_dword v24, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill |
| ; GFX908-NEXT: buffer_store_dword v25, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill |
| ; GFX908-NEXT: buffer_store_dword v26, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill |
| ; GFX908-NEXT: buffer_store_dword v27, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill |
| ; GFX908-NEXT: buffer_store_dword v28, off, s[0:3], s33 offset:140 ; 4-byte Folded Spill |
| ; GFX908-NEXT: buffer_store_dword v29, off, s[0:3], s33 offset:144 ; 4-byte Folded Spill |
| ; GFX908-NEXT: buffer_store_dword v30, off, s[0:3], s33 offset:148 ; 4-byte Folded Spill |
| ; GFX908-NEXT: buffer_store_dword v31, off, s[0:3], s33 offset:152 ; 4-byte Folded Spill |
| ; GFX908-NEXT: ;;#ASMSTART |
| ; GFX908-NEXT: ; def v40 |
| ; GFX908-NEXT: ;;#ASMEND |
| ; GFX908-NEXT: ;;#ASMSTART |
| ; GFX908-NEXT: ; def s11 |
| ; GFX908-NEXT: ;;#ASMEND |
| ; GFX908-NEXT: v_writelane_b32 v39, s11, 12 |
| ; GFX908-NEXT: ;;#ASMSTART |
| ; GFX908-NEXT: ; def s12 |
| ; GFX908-NEXT: ;;#ASMEND |
| ; GFX908-NEXT: v_writelane_b32 v39, s12, 13 |
| ; GFX908-NEXT: ;;#ASMSTART |
| ; GFX908-NEXT: ; def s13 |
| ; GFX908-NEXT: ;;#ASMEND |
| ; GFX908-NEXT: v_writelane_b32 v39, s13, 14 |
| ; GFX908-NEXT: ;;#ASMSTART |
| ; GFX908-NEXT: ; def s14 |
| ; GFX908-NEXT: ;;#ASMEND |
| ; GFX908-NEXT: v_writelane_b32 v39, s14, 15 |
| ; GFX908-NEXT: ;;#ASMSTART |
| ; GFX908-NEXT: ; def s15 |
| ; GFX908-NEXT: ;;#ASMEND |
| ; GFX908-NEXT: v_writelane_b32 v39, s15, 16 |
| ; GFX908-NEXT: s_getpc_b64 s[10:11] |
| ; GFX908-NEXT: s_add_u32 s10, s10, foo@gotpcrel32@lo+4 |
| ; GFX908-NEXT: s_addc_u32 s11, s11, foo@gotpcrel32@hi+12 |
| ; GFX908-NEXT: ;;#ASMSTART |
| ; GFX908-NEXT: ; def s16 |
| ; GFX908-NEXT: ;;#ASMEND |
| ; GFX908-NEXT: v_writelane_b32 v39, s16, 17 |
| ; GFX908-NEXT: s_load_dwordx2 s[10:11], s[10:11], 0x0 |
| ; GFX908-NEXT: ;;#ASMSTART |
| ; GFX908-NEXT: ; def s17 |
| ; GFX908-NEXT: ;;#ASMEND |
| ; GFX908-NEXT: v_writelane_b32 v39, s17, 18 |
| ; GFX908-NEXT: ;;#ASMSTART |
| ; GFX908-NEXT: ; def s18 |
| ; GFX908-NEXT: ;;#ASMEND |
| ; GFX908-NEXT: v_writelane_b32 v39, s18, 19 |
| ; GFX908-NEXT: ;;#ASMSTART |
| ; GFX908-NEXT: ; def s19 |
| ; GFX908-NEXT: ;;#ASMEND |
| ; GFX908-NEXT: v_writelane_b32 v39, s19, 20 |
| ; GFX908-NEXT: ;;#ASMSTART |
| ; GFX908-NEXT: ; def s20 |
| ; GFX908-NEXT: ;;#ASMEND |
| ; GFX908-NEXT: v_writelane_b32 v39, s20, 21 |
| ; GFX908-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX908-NEXT: v_writelane_b32 v39, s10, 22 |
| ; GFX908-NEXT: v_writelane_b32 v39, s11, 23 |
| ; GFX908-NEXT: s_or_saveexec_b64 s[34:35], -1 |
| ; GFX908-NEXT: s_mov_b64 exec, s[34:35] |
| ; GFX908-NEXT: s_or_saveexec_b64 s[34:35], -1 |
| ; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill |
| ; GFX908-NEXT: s_mov_b64 exec, s[34:35] |
| ; GFX908-NEXT: v_readlane_b32 s16, v39, 22 |
| ; GFX908-NEXT: s_mov_b32 s12, s24 |
| ; GFX908-NEXT: s_mov_b32 s13, s23 |
| ; GFX908-NEXT: s_mov_b32 s14, s22 |
| ; GFX908-NEXT: v_mov_b32_e32 v31, v32 |
| ; GFX908-NEXT: s_mov_b32 s15, s21 |
| ; GFX908-NEXT: s_mov_b64 s[10:11], s[26:27] |
| ; GFX908-NEXT: v_readlane_b32 s17, v39, 23 |
| ; GFX908-NEXT: v_mov_b32_e32 v40, v32 |
| ; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17] |
| ; GFX908-NEXT: s_or_saveexec_b64 s[34:35], -1 |
| ; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload |
| ; GFX908-NEXT: s_mov_b64 exec, s[34:35] |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: v_readlane_b32 s11, v39, 12 |
| ; GFX908-NEXT: ;;#ASMSTART |
| ; GFX908-NEXT: ; use s11 |
| ; GFX908-NEXT: ;;#ASMEND |
| ; GFX908-NEXT: v_readlane_b32 s12, v39, 13 |
| ; GFX908-NEXT: ;;#ASMSTART |
| ; GFX908-NEXT: ; use s12 |
| ; GFX908-NEXT: ;;#ASMEND |
| ; GFX908-NEXT: v_readlane_b32 s13, v39, 14 |
| ; GFX908-NEXT: ;;#ASMSTART |
| ; GFX908-NEXT: ; use s13 |
| ; GFX908-NEXT: ;;#ASMEND |
| ; GFX908-NEXT: v_readlane_b32 s14, v39, 15 |
| ; GFX908-NEXT: ;;#ASMSTART |
| ; GFX908-NEXT: ; use s14 |
| ; GFX908-NEXT: ;;#ASMEND |
| ; GFX908-NEXT: v_readlane_b32 s15, v39, 16 |
| ; GFX908-NEXT: ;;#ASMSTART |
| ; GFX908-NEXT: ; use s15 |
| ; GFX908-NEXT: ;;#ASMEND |
| ; GFX908-NEXT: v_readlane_b32 s16, v39, 17 |
| ; GFX908-NEXT: ;;#ASMSTART |
| ; GFX908-NEXT: ; use s16 |
| ; GFX908-NEXT: ;;#ASMEND |
| ; GFX908-NEXT: v_readlane_b32 s17, v39, 18 |
| ; GFX908-NEXT: ;;#ASMSTART |
| ; GFX908-NEXT: ; use s17 |
| ; GFX908-NEXT: ;;#ASMEND |
| ; GFX908-NEXT: v_readlane_b32 s18, v39, 19 |
| ; GFX908-NEXT: ;;#ASMSTART |
| ; GFX908-NEXT: ; use s18 |
| ; GFX908-NEXT: ;;#ASMEND |
| ; GFX908-NEXT: v_readlane_b32 s19, v39, 20 |
| ; GFX908-NEXT: ;;#ASMSTART |
| ; GFX908-NEXT: ; use s19 |
| ; GFX908-NEXT: ;;#ASMEND |
| ; GFX908-NEXT: v_readlane_b32 s20, v39, 21 |
| ; GFX908-NEXT: ;;#ASMSTART |
| ; GFX908-NEXT: ; use s20 |
| ; GFX908-NEXT: ;;#ASMEND |
| ; GFX908-NEXT: ;;#ASMSTART |
| ; GFX908-NEXT: ; def s21 |
| ; GFX908-NEXT: ;;#ASMEND |
| ; GFX908-NEXT: v_writelane_b32 v39, s21, 12 |
| ; GFX908-NEXT: ;;#ASMSTART |
| ; GFX908-NEXT: ; def s22 |
| ; GFX908-NEXT: ;;#ASMEND |
| ; GFX908-NEXT: v_writelane_b32 v39, s22, 13 |
| ; GFX908-NEXT: ;;#ASMSTART |
| ; GFX908-NEXT: ; def s23 |
| ; GFX908-NEXT: ;;#ASMEND |
| ; GFX908-NEXT: v_writelane_b32 v39, s23, 14 |
| ; GFX908-NEXT: ;;#ASMSTART |
| ; GFX908-NEXT: ; def s24 |
| ; GFX908-NEXT: ;;#ASMEND |
| ; GFX908-NEXT: v_writelane_b32 v39, s24, 15 |
| ; GFX908-NEXT: ;;#ASMSTART |
| ; GFX908-NEXT: ; def s25 |
| ; GFX908-NEXT: ;;#ASMEND |
| ; GFX908-NEXT: v_writelane_b32 v39, s25, 16 |
| ; GFX908-NEXT: ;;#ASMSTART |
| ; GFX908-NEXT: ; def s26 |
| ; GFX908-NEXT: ;;#ASMEND |
| ; GFX908-NEXT: v_writelane_b32 v39, s26, 17 |
| ; GFX908-NEXT: ;;#ASMSTART |
| ; GFX908-NEXT: ; def s27 |
| ; GFX908-NEXT: ;;#ASMEND |
| ; GFX908-NEXT: v_writelane_b32 v39, s27, 18 |
| ; GFX908-NEXT: ;;#ASMSTART |
| ; GFX908-NEXT: ; def s28 |
| ; GFX908-NEXT: ;;#ASMEND |
| ; GFX908-NEXT: v_writelane_b32 v39, s28, 19 |
| ; GFX908-NEXT: ;;#ASMSTART |
| ; GFX908-NEXT: ; def s29 |
| ; GFX908-NEXT: ;;#ASMEND |
| ; GFX908-NEXT: v_writelane_b32 v39, s29, 20 |
| ; GFX908-NEXT: s_or_saveexec_b64 s[34:35], -1 |
| ; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill |
| ; GFX908-NEXT: s_mov_b64 exec, s[34:35] |
| ; GFX908-NEXT: v_readlane_b32 s4, v39, 10 |
| ; GFX908-NEXT: v_readlane_b32 s6, v39, 8 |
| ; GFX908-NEXT: v_readlane_b32 s8, v39, 6 |
| ; GFX908-NEXT: v_readlane_b32 s10, v39, 4 |
| ; GFX908-NEXT: v_readlane_b32 s16, v39, 22 |
| ; GFX908-NEXT: v_readlane_b32 s12, v39, 3 |
| ; GFX908-NEXT: v_mov_b32_e32 v31, v40 |
| ; GFX908-NEXT: v_readlane_b32 s13, v39, 2 |
| ; GFX908-NEXT: v_readlane_b32 s14, v39, 1 |
| ; GFX908-NEXT: v_readlane_b32 s15, v39, 0 |
| ; GFX908-NEXT: v_readlane_b32 s5, v39, 11 |
| ; GFX908-NEXT: v_readlane_b32 s7, v39, 9 |
| ; GFX908-NEXT: v_readlane_b32 s9, v39, 7 |
| ; GFX908-NEXT: v_readlane_b32 s11, v39, 5 |
| ; GFX908-NEXT: v_readlane_b32 s17, v39, 23 |
| ; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17] |
| ; GFX908-NEXT: s_or_saveexec_b64 s[34:35], -1 |
| ; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload |
| ; GFX908-NEXT: s_mov_b64 exec, s[34:35] |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: v_readlane_b32 s4, v39, 10 |
| ; GFX908-NEXT: v_readlane_b32 s6, v39, 8 |
| ; GFX908-NEXT: v_readlane_b32 s8, v39, 6 |
| ; GFX908-NEXT: v_readlane_b32 s10, v39, 4 |
| ; GFX908-NEXT: v_readlane_b32 s16, v39, 22 |
| ; GFX908-NEXT: v_readlane_b32 s5, v39, 11 |
| ; GFX908-NEXT: v_readlane_b32 s7, v39, 9 |
| ; GFX908-NEXT: v_readlane_b32 s9, v39, 7 |
| ; GFX908-NEXT: v_readlane_b32 s11, v39, 5 |
| ; GFX908-NEXT: v_readlane_b32 s12, v39, 3 |
| ; GFX908-NEXT: v_readlane_b32 s13, v39, 2 |
| ; GFX908-NEXT: v_readlane_b32 s14, v39, 1 |
| ; GFX908-NEXT: v_readlane_b32 s15, v39, 0 |
| ; GFX908-NEXT: v_mov_b32_e32 v31, v40 |
| ; GFX908-NEXT: v_readlane_b32 s17, v39, 23 |
| ; GFX908-NEXT: v_readlane_b32 s21, v39, 12 |
| ; GFX908-NEXT: ;;#ASMSTART |
| ; GFX908-NEXT: ; use s21 |
| ; GFX908-NEXT: ;;#ASMEND |
| ; GFX908-NEXT: v_readlane_b32 s22, v39, 13 |
| ; GFX908-NEXT: ;;#ASMSTART |
| ; GFX908-NEXT: ; use s22 |
| ; GFX908-NEXT: ;;#ASMEND |
| ; GFX908-NEXT: v_readlane_b32 s23, v39, 14 |
| ; GFX908-NEXT: ;;#ASMSTART |
| ; GFX908-NEXT: ; use s23 |
| ; GFX908-NEXT: ;;#ASMEND |
| ; GFX908-NEXT: v_readlane_b32 s24, v39, 15 |
| ; GFX908-NEXT: ;;#ASMSTART |
| ; GFX908-NEXT: ; use s24 |
| ; GFX908-NEXT: ;;#ASMEND |
| ; GFX908-NEXT: v_readlane_b32 s25, v39, 16 |
| ; GFX908-NEXT: ;;#ASMSTART |
| ; GFX908-NEXT: ; use s25 |
| ; GFX908-NEXT: ;;#ASMEND |
| ; GFX908-NEXT: v_readlane_b32 s26, v39, 17 |
| ; GFX908-NEXT: ;;#ASMSTART |
| ; GFX908-NEXT: ; use s26 |
| ; GFX908-NEXT: ;;#ASMEND |
| ; GFX908-NEXT: v_readlane_b32 s27, v39, 18 |
| ; GFX908-NEXT: ;;#ASMSTART |
| ; GFX908-NEXT: ; use s27 |
| ; GFX908-NEXT: ;;#ASMEND |
| ; GFX908-NEXT: v_readlane_b32 s28, v39, 19 |
| ; GFX908-NEXT: ;;#ASMSTART |
| ; GFX908-NEXT: ; use s28 |
| ; GFX908-NEXT: ;;#ASMEND |
| ; GFX908-NEXT: v_readlane_b32 s29, v39, 20 |
| ; GFX908-NEXT: ;;#ASMSTART |
| ; GFX908-NEXT: ; use s29 |
| ; GFX908-NEXT: ;;#ASMEND |
| ; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17] |
| ; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload |
| ; GFX908-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload |
| ; GFX908-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload |
| ; GFX908-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload |
| ; GFX908-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload |
| ; GFX908-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload |
| ; GFX908-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload |
| ; GFX908-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload |
| ; GFX908-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload |
| ; GFX908-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload |
| ; GFX908-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:60 ; 4-byte Folded Reload |
| ; GFX908-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:64 ; 4-byte Folded Reload |
| ; GFX908-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:68 ; 4-byte Folded Reload |
| ; GFX908-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:72 ; 4-byte Folded Reload |
| ; GFX908-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:76 ; 4-byte Folded Reload |
| ; GFX908-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:80 ; 4-byte Folded Reload |
| ; GFX908-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:84 ; 4-byte Folded Reload |
| ; GFX908-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:88 ; 4-byte Folded Reload |
| ; GFX908-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:92 ; 4-byte Folded Reload |
| ; GFX908-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:96 ; 4-byte Folded Reload |
| ; GFX908-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:100 ; 4-byte Folded Reload |
| ; GFX908-NEXT: buffer_load_dword v21, off, s[0:3], s33 offset:104 ; 4-byte Folded Reload |
| ; GFX908-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:108 ; 4-byte Folded Reload |
| ; GFX908-NEXT: buffer_load_dword v23, off, s[0:3], s33 offset:112 ; 4-byte Folded Reload |
| ; GFX908-NEXT: buffer_load_dword v24, off, s[0:3], s33 offset:116 ; 4-byte Folded Reload |
| ; GFX908-NEXT: buffer_load_dword v25, off, s[0:3], s33 offset:120 ; 4-byte Folded Reload |
| ; GFX908-NEXT: buffer_load_dword v26, off, s[0:3], s33 offset:124 ; 4-byte Folded Reload |
| ; GFX908-NEXT: buffer_load_dword v27, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload |
| ; GFX908-NEXT: buffer_load_dword v28, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload |
| ; GFX908-NEXT: buffer_load_dword v29, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload |
| ; GFX908-NEXT: buffer_load_dword v30, off, s[0:3], s33 offset:140 ; 4-byte Folded Reload |
| ; GFX908-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:144 ; 4-byte Folded Reload |
| ; GFX908-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:148 ; 4-byte Folded Reload |
| ; GFX908-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:152 ; 4-byte Folded Reload |
| ; GFX908-NEXT: s_mov_b64 s[4:5], exec |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: flat_store_dwordx4 v[0:1], v[30:33] offset:112 |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: flat_store_dwordx4 v[0:1], v[26:29] offset:96 |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: flat_store_dwordx4 v[0:1], v[22:25] offset:80 |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: flat_store_dwordx4 v[0:1], v[18:21] offset:64 |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: flat_store_dwordx4 v[0:1], v[14:17] offset:48 |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: flat_store_dwordx4 v[0:1], v[10:13] offset:32 |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: flat_store_dwordx4 v[0:1], v[6:9] offset:16 |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: flat_store_dwordx4 v[0:1], v[2:5] |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: s_mov_b64 exec, 1 |
| ; GFX908-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:168 |
| ; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: v_readlane_b32 s31, v0, 0 |
| ; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:168 |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX908-NEXT: s_mov_b64 s[4:5], exec |
| ; GFX908-NEXT: s_mov_b64 exec, 1 |
| ; GFX908-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:168 |
| ; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: v_readlane_b32 s30, v0, 0 |
| ; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:168 |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: s_mov_b64 exec, s[4:5] |
| ; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:164 ; 4-byte Folded Reload |
| ; GFX908-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload |
| ; GFX908-NEXT: s_mov_b32 s32, s33 |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: v_readfirstlane_b32 s4, v0 |
| ; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:156 ; 4-byte Folded Reload |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: v_readfirstlane_b32 s34, v0 |
| ; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:160 ; 4-byte Folded Reload |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: v_readfirstlane_b32 s35, v0 |
| ; GFX908-NEXT: s_xor_saveexec_b64 s[6:7], -1 |
| ; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload |
| ; GFX908-NEXT: s_mov_b64 exec, s[6:7] |
| ; GFX908-NEXT: s_mov_b32 s33, s4 |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; GFX908-NEXT: s_setpc_b64 s[30:31] |
| %vreg0 = call <32 x float> asm sideeffect "; def $0", "=v"() |
| %v40 = call i32 asm sideeffect "; def $0","=${v40}"() |
| |
| %s11 = call i32 asm sideeffect "; def $0","=${s11}"() |
| %s12 = call i32 asm sideeffect "; def $0","=${s12}"() |
| %s13 = call i32 asm sideeffect "; def $0","=${s13}"() |
| %s14 = call i32 asm sideeffect "; def $0","=${s14}"() |
| %s15 = call i32 asm sideeffect "; def $0","=${s15}"() |
| %s16 = call i32 asm sideeffect "; def $0","=${s16}"() |
| %s17 = call i32 asm sideeffect "; def $0","=${s17}"() |
| %s18 = call i32 asm sideeffect "; def $0","=${s18}"() |
| %s19 = call i32 asm sideeffect "; def $0","=${s19}"() |
| %s20 = call i32 asm sideeffect "; def $0","=${s20}"() |
| call void @foo() |
| call void asm sideeffect "; use $0","${s11}"(i32 %s11) |
| call void asm sideeffect "; use $0","${s12}"(i32 %s12) |
| call void asm sideeffect "; use $0","${s13}"(i32 %s13) |
| call void asm sideeffect "; use $0","${s14}"(i32 %s14) |
| call void asm sideeffect "; use $0","${s15}"(i32 %s15) |
| call void asm sideeffect "; use $0","${s16}"(i32 %s16) |
| call void asm sideeffect "; use $0","${s17}"(i32 %s17) |
| call void asm sideeffect "; use $0","${s18}"(i32 %s18) |
| call void asm sideeffect "; use $0","${s19}"(i32 %s19) |
| call void asm sideeffect "; use $0","${s20}"(i32 %s20) |
| |
| %s21 = call i32 asm sideeffect "; def $0","=${s21}"() |
| %s22 = call i32 asm sideeffect "; def $0","=${s22}"() |
| %s23 = call i32 asm sideeffect "; def $0","=${s23}"() |
| %s24 = call i32 asm sideeffect "; def $0","=${s24}"() |
| %s25 = call i32 asm sideeffect "; def $0","=${s25}"() |
| %s26 = call i32 asm sideeffect "; def $0","=${s26}"() |
| %s27 = call i32 asm sideeffect "; def $0","=${s27}"() |
| %s28 = call i32 asm sideeffect "; def $0","=${s28}"() |
| %s29 = call i32 asm sideeffect "; def $0","=${s29}"() |
| call void @foo() |
| call void asm sideeffect "; use $0","${s21}"(i32 %s21) |
| call void asm sideeffect "; use $0","${s22}"(i32 %s22) |
| call void asm sideeffect "; use $0","${s23}"(i32 %s23) |
| call void asm sideeffect "; use $0","${s24}"(i32 %s24) |
| call void asm sideeffect "; use $0","${s25}"(i32 %s25) |
| call void asm sideeffect "; use $0","${s26}"(i32 %s26) |
| call void asm sideeffect "; use $0","${s27}"(i32 %s27) |
| call void asm sideeffect "; use $0","${s28}"(i32 %s28) |
| call void asm sideeffect "; use $0","${s29}"(i32 %s29) |
| |
| call void @foo() |
| |
| store volatile <32 x float> %vreg0, ptr %parg0 |
| |
| ret void |
| } |
| |
| declare void @foo() |
| |
| attributes #0 = { "amdgpu-num-vgpr"="42" "amdgpu-num-sgpr"="40"} |
| |