blob: 7bc09057da897af05b26b6630055e851a3af7b04 [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX9 %s
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10 %s
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX11 %s
declare hidden amdgpu_gfx void @external_void_func_void() #0
define amdgpu_gfx void @test_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void() #0 {
; GFX9-LABEL: test_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_mov_b32 s34, s33
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX9-NEXT: s_mov_b64 exec, s[36:37]
; GFX9-NEXT: v_writelane_b32 v40, s4, 0
; GFX9-NEXT: v_writelane_b32 v40, s5, 1
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 2
; GFX9-NEXT: v_writelane_b32 v41, s34, 0
; GFX9-NEXT: v_writelane_b32 v40, s31, 3
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT: v_readlane_b32 s31, v40, 3
; GFX9-NEXT: v_readlane_b32 s30, v40, 2
; GFX9-NEXT: v_readlane_b32 s5, v40, 1
; GFX9-NEXT: v_readlane_b32 s4, v40, 0
; GFX9-NEXT: v_readlane_b32 s34, v41, 0
; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GFX9-NEXT: s_mov_b64 exec, s[36:37]
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: s_mov_b32 s33, s34
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_mov_b32 exec_lo, s35
; GFX10-NEXT: v_writelane_b32 v40, s4, 0
; GFX10-NEXT: s_addk_i32 s32, 0x200
; GFX10-NEXT: v_writelane_b32 v41, s34, 0
; GFX10-NEXT: v_writelane_b32 v40, s5, 1
; GFX10-NEXT: s_getpc_b64 s[4:5]
; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
; GFX10-NEXT: v_writelane_b32 v40, s30, 2
; GFX10-NEXT: v_writelane_b32 v40, s31, 3
; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX10-NEXT: v_readlane_b32 s31, v40, 3
; GFX10-NEXT: v_readlane_b32 s30, v40, 2
; GFX10-NEXT: v_readlane_b32 s5, v40, 1
; GFX10-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-NEXT: v_readlane_b32 s34, v41, 0
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33
; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_mov_b32 exec_lo, s35
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: s_mov_b32 s33, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: test_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: scratch_store_b32 off, v40, s33
; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:4
; GFX11-NEXT: s_mov_b32 exec_lo, s1
; GFX11-NEXT: v_writelane_b32 v40, s4, 0
; GFX11-NEXT: s_add_i32 s32, s32, 16
; GFX11-NEXT: v_writelane_b32 v41, s0, 0
; GFX11-NEXT: v_writelane_b32 v40, s5, 1
; GFX11-NEXT: s_getpc_b64 s[4:5]
; GFX11-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
; GFX11-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
; GFX11-NEXT: v_writelane_b32 v40, s30, 2
; GFX11-NEXT: v_writelane_b32 v40, s31, 3
; GFX11-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_readlane_b32 s31, v40, 3
; GFX11-NEXT: v_readlane_b32 s30, v40, 2
; GFX11-NEXT: v_readlane_b32 s5, v40, 1
; GFX11-NEXT: v_readlane_b32 s4, v40, 0
; GFX11-NEXT: v_readlane_b32 s0, v41, 0
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: scratch_load_b32 v40, off, s33
; GFX11-NEXT: scratch_load_b32 v41, off, s33 offset:4
; GFX11-NEXT: s_mov_b32 exec_lo, s1
; GFX11-NEXT: s_add_i32 s32, s32, -16
; GFX11-NEXT: s_mov_b32 s33, s0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
call amdgpu_gfx void @external_void_func_void()
call void asm sideeffect "", ""() #0
call amdgpu_gfx void @external_void_func_void()
ret void
}
define amdgpu_gfx void @void_func_void_clobber_s28_s29() #1 {
; GFX9-LABEL: void_func_void_clobber_s28_s29:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_xor_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: v_writelane_b32 v0, s28, 0
; GFX9-NEXT: v_writelane_b32 v0, s29, 1
; GFX9-NEXT: v_writelane_b32 v0, s30, 2
; GFX9-NEXT: v_writelane_b32 v0, s31, 3
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; clobber
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; clobber
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: v_readlane_b32 s31, v0, 3
; GFX9-NEXT: v_readlane_b32 s30, v0, 2
; GFX9-NEXT: v_readlane_b32 s29, v0, 1
; GFX9-NEXT: v_readlane_b32 s28, v0, 0
; GFX9-NEXT: s_xor_saveexec_b64 s[34:35], -1
; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX9-NEXT: s_mov_b64 exec, s[34:35]
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: void_func_void_clobber_s28_s29:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_xor_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: v_writelane_b32 v0, s28, 0
; GFX10-NEXT: v_writelane_b32 v0, s29, 1
; GFX10-NEXT: v_writelane_b32 v0, s30, 2
; GFX10-NEXT: v_writelane_b32 v0, s31, 3
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; clobber
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; clobber
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: v_readlane_b32 s31, v0, 3
; GFX10-NEXT: v_readlane_b32 s30, v0, 2
; GFX10-NEXT: v_readlane_b32 s29, v0, 1
; GFX10-NEXT: v_readlane_b32 s28, v0, 0
; GFX10-NEXT: s_xor_saveexec_b32 s34, -1
; GFX10-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_mov_b32 exec_lo, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_void_clobber_s28_s29:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
; GFX11-NEXT: scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill
; GFX11-NEXT: s_mov_b32 exec_lo, s0
; GFX11-NEXT: v_writelane_b32 v0, s28, 0
; GFX11-NEXT: v_writelane_b32 v0, s29, 1
; GFX11-NEXT: v_writelane_b32 v0, s30, 2
; GFX11-NEXT: v_writelane_b32 v0, s31, 3
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; clobber
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; clobber
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_readlane_b32 s31, v0, 3
; GFX11-NEXT: v_readlane_b32 s30, v0, 2
; GFX11-NEXT: v_readlane_b32 s29, v0, 1
; GFX11-NEXT: v_readlane_b32 s28, v0, 0
; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
; GFX11-NEXT: scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload
; GFX11-NEXT: s_mov_b32 exec_lo, s0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
call void asm sideeffect "; clobber", "~{s[30:31]}"() #0
call void asm sideeffect "; clobber", "~{s[28:29]}"() #0
ret void
}
define amdgpu_gfx void @test_call_void_func_void_mayclobber_s31(ptr addrspace(1) %out) #0 {
; GFX9-LABEL: test_call_void_func_void_mayclobber_s31:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_mov_b32 s34, s33
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX9-NEXT: s_mov_b64 exec, s[36:37]
; GFX9-NEXT: v_writelane_b32 v40, s4, 0
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 1
; GFX9-NEXT: v_writelane_b32 v41, s34, 0
; GFX9-NEXT: v_writelane_b32 v40, s31, 2
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; def s31
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_mov_b32 s4, s31
; GFX9-NEXT: s_getpc_b64 s[34:35]
; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4
; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12
; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
; GFX9-NEXT: s_mov_b32 s31, s4
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s31
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: v_readlane_b32 s31, v40, 2
; GFX9-NEXT: v_readlane_b32 s30, v40, 1
; GFX9-NEXT: v_readlane_b32 s4, v40, 0
; GFX9-NEXT: v_readlane_b32 s34, v41, 0
; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GFX9-NEXT: s_mov_b64 exec, s[36:37]
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: s_mov_b32 s33, s34
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_void_func_void_mayclobber_s31:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_mov_b32 exec_lo, s35
; GFX10-NEXT: v_writelane_b32 v40, s4, 0
; GFX10-NEXT: s_addk_i32 s32, 0x200
; GFX10-NEXT: v_writelane_b32 v41, s34, 0
; GFX10-NEXT: s_getpc_b64 s[34:35]
; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4
; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12
; GFX10-NEXT: v_writelane_b32 v40, s30, 1
; GFX10-NEXT: v_writelane_b32 v40, s31, 2
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; def s31
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_mov_b32 s4, s31
; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
; GFX10-NEXT: s_mov_b32 s31, s4
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s31
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: v_readlane_b32 s31, v40, 2
; GFX10-NEXT: v_readlane_b32 s30, v40, 1
; GFX10-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-NEXT: v_readlane_b32 s34, v41, 0
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33
; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_mov_b32 exec_lo, s35
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: s_mov_b32 s33, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: test_call_void_func_void_mayclobber_s31:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: scratch_store_b32 off, v40, s33
; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:4
; GFX11-NEXT: s_mov_b32 exec_lo, s1
; GFX11-NEXT: v_writelane_b32 v40, s4, 0
; GFX11-NEXT: s_add_i32 s32, s32, 16
; GFX11-NEXT: v_writelane_b32 v41, s0, 0
; GFX11-NEXT: s_getpc_b64 s[0:1]
; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4
; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12
; GFX11-NEXT: v_writelane_b32 v40, s30, 1
; GFX11-NEXT: v_writelane_b32 v40, s31, 2
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; def s31
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: s_mov_b32 s4, s31
; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT: s_mov_b32 s31, s4
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s31
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: v_readlane_b32 s31, v40, 2
; GFX11-NEXT: v_readlane_b32 s30, v40, 1
; GFX11-NEXT: v_readlane_b32 s4, v40, 0
; GFX11-NEXT: v_readlane_b32 s0, v41, 0
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: scratch_load_b32 v40, off, s33
; GFX11-NEXT: scratch_load_b32 v41, off, s33 offset:4
; GFX11-NEXT: s_mov_b32 exec_lo, s1
; GFX11-NEXT: s_add_i32 s32, s32, -16
; GFX11-NEXT: s_mov_b32 s33, s0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%s31 = call i32 asm sideeffect "; def $0", "={s31}"()
call amdgpu_gfx void @external_void_func_void()
call void asm sideeffect "; use $0", "{s31}"(i32 %s31)
ret void
}
define amdgpu_gfx void @test_call_void_func_void_mayclobber_v31(ptr addrspace(1) %out) #0 {
; GFX9-LABEL: test_call_void_func_void_mayclobber_v31:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_mov_b32 s34, s33
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; GFX9-NEXT: s_mov_b64 exec, s[36:37]
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: v_writelane_b32 v42, s34, 0
; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; def v31
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: v_mov_b32_e32 v41, v31
; GFX9-NEXT: s_getpc_b64 s[34:35]
; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4
; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12
; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
; GFX9-NEXT: v_mov_b32_e32 v31, v41
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use v31
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
; GFX9-NEXT: v_readlane_b32 s34, v42, 0
; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
; GFX9-NEXT: s_mov_b64 exec, s[36:37]
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: s_mov_b32 s33, s34
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_void_func_void_mayclobber_v31:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_mov_b32 exec_lo, s35
; GFX10-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-NEXT: s_addk_i32 s32, 0x200
; GFX10-NEXT: v_writelane_b32 v42, s34, 0
; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; def v31
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
; GFX10-NEXT: v_mov_b32_e32 v41, v31
; GFX10-NEXT: s_getpc_b64 s[34:35]
; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4
; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12
; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
; GFX10-NEXT: v_mov_b32_e32 v31, v41
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use v31
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: v_readlane_b32 s30, v40, 0
; GFX10-NEXT: v_readlane_b32 s34, v42, 0
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4
; GFX10-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:8
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_mov_b32 exec_lo, s35
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: s_mov_b32 s33, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: test_call_void_func_void_mayclobber_v31:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:4
; GFX11-NEXT: scratch_store_b32 off, v42, s33 offset:8
; GFX11-NEXT: s_mov_b32 exec_lo, s1
; GFX11-NEXT: v_writelane_b32 v40, s30, 0
; GFX11-NEXT: s_add_i32 s32, s32, 16
; GFX11-NEXT: v_writelane_b32 v42, s0, 0
; GFX11-NEXT: scratch_store_b32 off, v41, s33 ; 4-byte Folded Spill
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; def v31
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: v_writelane_b32 v40, s31, 1
; GFX11-NEXT: v_mov_b32_e32 v41, v31
; GFX11-NEXT: s_getpc_b64 s[0:1]
; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4
; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT: v_mov_b32_e32 v31, v41
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use v31
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: scratch_load_b32 v41, off, s33 ; 4-byte Folded Reload
; GFX11-NEXT: v_readlane_b32 s31, v40, 1
; GFX11-NEXT: v_readlane_b32 s30, v40, 0
; GFX11-NEXT: v_readlane_b32 s0, v42, 0
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:4
; GFX11-NEXT: scratch_load_b32 v42, off, s33 offset:8
; GFX11-NEXT: s_mov_b32 exec_lo, s1
; GFX11-NEXT: s_add_i32 s32, s32, -16
; GFX11-NEXT: s_mov_b32 s33, s0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%v31 = call i32 asm sideeffect "; def $0", "={v31}"()
call amdgpu_gfx void @external_void_func_void()
call void asm sideeffect "; use $0", "{v31}"(i32 %v31)
ret void
}
define amdgpu_gfx void @test_call_void_func_void_preserves_s33(ptr addrspace(1) %out) #0 {
; GFX9-LABEL: test_call_void_func_void_preserves_s33:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_mov_b32 s34, s33
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX9-NEXT: s_mov_b64 exec, s[36:37]
; GFX9-NEXT: v_writelane_b32 v40, s4, 0
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 1
; GFX9-NEXT: v_writelane_b32 v41, s34, 0
; GFX9-NEXT: v_writelane_b32 v40, s31, 2
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; def s33
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_mov_b32 s4, s33
; GFX9-NEXT: s_getpc_b64 s[34:35]
; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4
; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12
; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
; GFX9-NEXT: s_mov_b32 s33, s4
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s33
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: v_readlane_b32 s31, v40, 2
; GFX9-NEXT: v_readlane_b32 s30, v40, 1
; GFX9-NEXT: v_readlane_b32 s4, v40, 0
; GFX9-NEXT: v_readlane_b32 s34, v41, 0
; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GFX9-NEXT: s_mov_b64 exec, s[36:37]
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: s_mov_b32 s33, s34
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_void_func_void_preserves_s33:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_mov_b32 exec_lo, s35
; GFX10-NEXT: v_writelane_b32 v40, s4, 0
; GFX10-NEXT: s_addk_i32 s32, 0x200
; GFX10-NEXT: v_writelane_b32 v41, s34, 0
; GFX10-NEXT: s_getpc_b64 s[34:35]
; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4
; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; def s33
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: v_writelane_b32 v40, s30, 1
; GFX10-NEXT: s_mov_b32 s4, s33
; GFX10-NEXT: v_writelane_b32 v40, s31, 2
; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
; GFX10-NEXT: s_mov_b32 s33, s4
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s33
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: v_readlane_b32 s31, v40, 2
; GFX10-NEXT: v_readlane_b32 s30, v40, 1
; GFX10-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-NEXT: v_readlane_b32 s34, v41, 0
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33
; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_mov_b32 exec_lo, s35
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: s_mov_b32 s33, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: test_call_void_func_void_preserves_s33:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: scratch_store_b32 off, v40, s33
; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:4
; GFX11-NEXT: s_mov_b32 exec_lo, s1
; GFX11-NEXT: v_writelane_b32 v40, s4, 0
; GFX11-NEXT: s_add_i32 s32, s32, 16
; GFX11-NEXT: v_writelane_b32 v41, s0, 0
; GFX11-NEXT: s_getpc_b64 s[0:1]
; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4
; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; def s33
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: v_writelane_b32 v40, s30, 1
; GFX11-NEXT: s_mov_b32 s4, s33
; GFX11-NEXT: v_writelane_b32 v40, s31, 2
; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT: s_mov_b32 s33, s4
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s33
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_readlane_b32 s31, v40, 2
; GFX11-NEXT: v_readlane_b32 s30, v40, 1
; GFX11-NEXT: v_readlane_b32 s4, v40, 0
; GFX11-NEXT: v_readlane_b32 s0, v41, 0
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: scratch_load_b32 v40, off, s33
; GFX11-NEXT: scratch_load_b32 v41, off, s33 offset:4
; GFX11-NEXT: s_mov_b32 exec_lo, s1
; GFX11-NEXT: s_add_i32 s32, s32, -16
; GFX11-NEXT: s_mov_b32 s33, s0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%s33 = call i32 asm sideeffect "; def $0", "={s33}"()
call amdgpu_gfx void @external_void_func_void()
call void asm sideeffect "; use $0", "{s33}"(i32 %s33)
ret void
}
define amdgpu_gfx void @test_call_void_func_void_preserves_s34(ptr addrspace(1) %out) #0 {
; GFX9-LABEL: test_call_void_func_void_preserves_s34:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_mov_b32 s34, s33
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX9-NEXT: s_mov_b64 exec, s[36:37]
; GFX9-NEXT: v_writelane_b32 v40, s4, 0
; GFX9-NEXT: v_writelane_b32 v41, s34, 0
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 1
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; def s34
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: v_writelane_b32 v40, s31, 2
; GFX9-NEXT: s_mov_b32 s4, s34
; GFX9-NEXT: s_getpc_b64 s[34:35]
; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4
; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12
; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
; GFX9-NEXT: s_mov_b32 s34, s4
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s34
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: v_readlane_b32 s31, v40, 2
; GFX9-NEXT: v_readlane_b32 s30, v40, 1
; GFX9-NEXT: v_readlane_b32 s4, v40, 0
; GFX9-NEXT: v_readlane_b32 s34, v41, 0
; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GFX9-NEXT: s_mov_b64 exec, s[36:37]
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: s_mov_b32 s33, s34
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_void_func_void_preserves_s34:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_mov_b32 exec_lo, s35
; GFX10-NEXT: v_writelane_b32 v40, s4, 0
; GFX10-NEXT: s_addk_i32 s32, 0x200
; GFX10-NEXT: v_writelane_b32 v41, s34, 0
; GFX10-NEXT: s_getpc_b64 s[36:37]
; GFX10-NEXT: s_add_u32 s36, s36, external_void_func_void@rel32@lo+4
; GFX10-NEXT: s_addc_u32 s37, s37, external_void_func_void@rel32@hi+12
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; def s34
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: v_writelane_b32 v40, s30, 1
; GFX10-NEXT: s_mov_b32 s4, s34
; GFX10-NEXT: v_writelane_b32 v40, s31, 2
; GFX10-NEXT: s_swappc_b64 s[30:31], s[36:37]
; GFX10-NEXT: s_mov_b32 s34, s4
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s34
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: v_readlane_b32 s31, v40, 2
; GFX10-NEXT: v_readlane_b32 s30, v40, 1
; GFX10-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-NEXT: v_readlane_b32 s34, v41, 0
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33
; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_mov_b32 exec_lo, s35
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: s_mov_b32 s33, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: test_call_void_func_void_preserves_s34:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: scratch_store_b32 off, v40, s33
; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:4
; GFX11-NEXT: s_mov_b32 exec_lo, s1
; GFX11-NEXT: v_writelane_b32 v40, s4, 0
; GFX11-NEXT: s_add_i32 s32, s32, 16
; GFX11-NEXT: v_writelane_b32 v41, s0, 0
; GFX11-NEXT: s_getpc_b64 s[0:1]
; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4
; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; def s34
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: v_writelane_b32 v40, s30, 1
; GFX11-NEXT: s_mov_b32 s4, s34
; GFX11-NEXT: v_writelane_b32 v40, s31, 2
; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT: s_mov_b32 s34, s4
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s34
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_readlane_b32 s31, v40, 2
; GFX11-NEXT: v_readlane_b32 s30, v40, 1
; GFX11-NEXT: v_readlane_b32 s4, v40, 0
; GFX11-NEXT: v_readlane_b32 s0, v41, 0
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: scratch_load_b32 v40, off, s33
; GFX11-NEXT: scratch_load_b32 v41, off, s33 offset:4
; GFX11-NEXT: s_mov_b32 exec_lo, s1
; GFX11-NEXT: s_add_i32 s32, s32, -16
; GFX11-NEXT: s_mov_b32 s33, s0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%s34 = call i32 asm sideeffect "; def $0", "={s34}"()
call amdgpu_gfx void @external_void_func_void()
call void asm sideeffect "; use $0", "{s34}"(i32 %s34)
ret void
}
define amdgpu_gfx void @test_call_void_func_void_preserves_v40(ptr addrspace(1) %out) #0 {
; GFX9-LABEL: test_call_void_func_void_preserves_v40:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_mov_b32 s34, s33
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; GFX9-NEXT: s_mov_b64 exec, s[36:37]
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v41, s30, 0
; GFX9-NEXT: v_writelane_b32 v42, s34, 0
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX9-NEXT: v_writelane_b32 v41, s31, 1
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; def v40
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_getpc_b64 s[34:35]
; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4
; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12
; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use v40
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX9-NEXT: v_readlane_b32 s31, v41, 1
; GFX9-NEXT: v_readlane_b32 s30, v41, 0
; GFX9-NEXT: v_readlane_b32 s34, v42, 0
; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
; GFX9-NEXT: s_mov_b64 exec, s[36:37]
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: s_mov_b32 s33, s34
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_void_func_void_preserves_v40:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_mov_b32 exec_lo, s35
; GFX10-NEXT: v_writelane_b32 v41, s30, 0
; GFX10-NEXT: s_addk_i32 s32, 0x200
; GFX10-NEXT: v_writelane_b32 v42, s34, 0
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; def v40
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: v_writelane_b32 v41, s31, 1
; GFX10-NEXT: s_getpc_b64 s[34:35]
; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4
; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12
; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use v40
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX10-NEXT: v_readlane_b32 s31, v41, 1
; GFX10-NEXT: v_readlane_b32 s30, v41, 0
; GFX10-NEXT: v_readlane_b32 s34, v42, 0
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4
; GFX10-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:8
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_mov_b32 exec_lo, s35
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: s_mov_b32 s33, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: test_call_void_func_void_preserves_v40:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:4
; GFX11-NEXT: scratch_store_b32 off, v42, s33 offset:8
; GFX11-NEXT: s_mov_b32 exec_lo, s1
; GFX11-NEXT: v_writelane_b32 v41, s30, 0
; GFX11-NEXT: s_add_i32 s32, s32, 16
; GFX11-NEXT: v_writelane_b32 v42, s0, 0
; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; def v40
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: v_writelane_b32 v41, s31, 1
; GFX11-NEXT: s_getpc_b64 s[0:1]
; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4
; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use v40
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
; GFX11-NEXT: v_readlane_b32 s31, v41, 1
; GFX11-NEXT: v_readlane_b32 s30, v41, 0
; GFX11-NEXT: v_readlane_b32 s0, v42, 0
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: scratch_load_b32 v41, off, s33 offset:4
; GFX11-NEXT: scratch_load_b32 v42, off, s33 offset:8
; GFX11-NEXT: s_mov_b32 exec_lo, s1
; GFX11-NEXT: s_add_i32 s32, s32, -16
; GFX11-NEXT: s_mov_b32 s33, s0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%v40 = call i32 asm sideeffect "; def $0", "={v40}"()
call amdgpu_gfx void @external_void_func_void()
call void asm sideeffect "; use $0", "{v40}"(i32 %v40)
ret void
}
define hidden void @void_func_void_clobber_s33() #1 {
; GFX9-LABEL: void_func_void_clobber_s33:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX9-NEXT: s_mov_b64 exec, s[4:5]
; GFX9-NEXT: v_writelane_b32 v0, s33, 0
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; clobber
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: v_readlane_b32 s33, v0, 0
; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX9-NEXT: s_mov_b64 exec, s[4:5]
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: void_func_void_clobber_s33:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_xor_saveexec_b32 s4, -1
; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_mov_b32 exec_lo, s4
; GFX10-NEXT: v_writelane_b32 v0, s33, 0
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; clobber
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: v_readlane_b32 s33, v0, 0
; GFX10-NEXT: s_xor_saveexec_b32 s4, -1
; GFX10-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_mov_b32 exec_lo, s4
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_void_clobber_s33:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
; GFX11-NEXT: scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill
; GFX11-NEXT: s_mov_b32 exec_lo, s0
; GFX11-NEXT: v_writelane_b32 v0, s33, 0
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; clobber
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_readlane_b32 s33, v0, 0
; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
; GFX11-NEXT: scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload
; GFX11-NEXT: s_mov_b32 exec_lo, s0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
call void asm sideeffect "; clobber", "~{s33}"() #0
ret void
}
define hidden void @void_func_void_clobber_s34() #1 {
; GFX9-LABEL: void_func_void_clobber_s34:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX9-NEXT: s_mov_b64 exec, s[4:5]
; GFX9-NEXT: v_writelane_b32 v0, s34, 0
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; clobber
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: v_readlane_b32 s34, v0, 0
; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX9-NEXT: s_mov_b64 exec, s[4:5]
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: void_func_void_clobber_s34:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_xor_saveexec_b32 s4, -1
; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_mov_b32 exec_lo, s4
; GFX10-NEXT: v_writelane_b32 v0, s34, 0
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; clobber
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: v_readlane_b32 s34, v0, 0
; GFX10-NEXT: s_xor_saveexec_b32 s4, -1
; GFX10-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_mov_b32 exec_lo, s4
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: void_func_void_clobber_s34:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
; GFX11-NEXT: scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill
; GFX11-NEXT: s_mov_b32 exec_lo, s0
; GFX11-NEXT: v_writelane_b32 v0, s34, 0
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; clobber
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_readlane_b32 s34, v0, 0
; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
; GFX11-NEXT: scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload
; GFX11-NEXT: s_mov_b32 exec_lo, s0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
call void asm sideeffect "; clobber", "~{s34}"() #0
ret void
}
define amdgpu_gfx void @test_call_void_func_void_clobber_s33() #0 {
; GFX9-LABEL: test_call_void_func_void_clobber_s33:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_mov_b32 s34, s33
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX9-NEXT: s_mov_b64 exec, s[36:37]
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: v_writelane_b32 v41, s34, 0
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: s_getpc_b64 s[34:35]
; GFX9-NEXT: s_add_u32 s34, s34, void_func_void_clobber_s33@rel32@lo+4
; GFX9-NEXT: s_addc_u32 s35, s35, void_func_void_clobber_s33@rel32@hi+12
; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
; GFX9-NEXT: v_readlane_b32 s34, v41, 0
; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GFX9-NEXT: s_mov_b64 exec, s[36:37]
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: s_mov_b32 s33, s34
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_void_func_void_clobber_s33:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_mov_b32 exec_lo, s35
; GFX10-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-NEXT: s_addk_i32 s32, 0x200
; GFX10-NEXT: v_writelane_b32 v41, s34, 0
; GFX10-NEXT: s_getpc_b64 s[34:35]
; GFX10-NEXT: s_add_u32 s34, s34, void_func_void_clobber_s33@rel32@lo+4
; GFX10-NEXT: s_addc_u32 s35, s35, void_func_void_clobber_s33@rel32@hi+12
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: v_readlane_b32 s30, v40, 0
; GFX10-NEXT: v_readlane_b32 s34, v41, 0
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33
; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_mov_b32 exec_lo, s35
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: s_mov_b32 s33, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: test_call_void_func_void_clobber_s33:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: scratch_store_b32 off, v40, s33
; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:4
; GFX11-NEXT: s_mov_b32 exec_lo, s1
; GFX11-NEXT: v_writelane_b32 v40, s30, 0
; GFX11-NEXT: s_add_i32 s32, s32, 16
; GFX11-NEXT: v_writelane_b32 v41, s0, 0
; GFX11-NEXT: s_getpc_b64 s[0:1]
; GFX11-NEXT: s_add_u32 s0, s0, void_func_void_clobber_s33@rel32@lo+4
; GFX11-NEXT: s_addc_u32 s1, s1, void_func_void_clobber_s33@rel32@hi+12
; GFX11-NEXT: v_writelane_b32 v40, s31, 1
; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_readlane_b32 s31, v40, 1
; GFX11-NEXT: v_readlane_b32 s30, v40, 0
; GFX11-NEXT: v_readlane_b32 s0, v41, 0
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: scratch_load_b32 v40, off, s33
; GFX11-NEXT: scratch_load_b32 v41, off, s33 offset:4
; GFX11-NEXT: s_mov_b32 exec_lo, s1
; GFX11-NEXT: s_add_i32 s32, s32, -16
; GFX11-NEXT: s_mov_b32 s33, s0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
call amdgpu_gfx void @void_func_void_clobber_s33()
ret void
}
define amdgpu_gfx void @test_call_void_func_void_clobber_s34() #0 {
; GFX9-LABEL: test_call_void_func_void_clobber_s34:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_mov_b32 s34, s33
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX9-NEXT: s_mov_b64 exec, s[36:37]
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: v_writelane_b32 v41, s34, 0
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: s_getpc_b64 s[34:35]
; GFX9-NEXT: s_add_u32 s34, s34, void_func_void_clobber_s34@rel32@lo+4
; GFX9-NEXT: s_addc_u32 s35, s35, void_func_void_clobber_s34@rel32@hi+12
; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
; GFX9-NEXT: v_readlane_b32 s34, v41, 0
; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GFX9-NEXT: s_mov_b64 exec, s[36:37]
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: s_mov_b32 s33, s34
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_call_void_func_void_clobber_s34:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_mov_b32 exec_lo, s35
; GFX10-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-NEXT: s_addk_i32 s32, 0x200
; GFX10-NEXT: v_writelane_b32 v41, s34, 0
; GFX10-NEXT: s_getpc_b64 s[34:35]
; GFX10-NEXT: s_add_u32 s34, s34, void_func_void_clobber_s34@rel32@lo+4
; GFX10-NEXT: s_addc_u32 s35, s35, void_func_void_clobber_s34@rel32@hi+12
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: v_readlane_b32 s30, v40, 0
; GFX10-NEXT: v_readlane_b32 s34, v41, 0
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33
; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_mov_b32 exec_lo, s35
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: s_mov_b32 s33, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: test_call_void_func_void_clobber_s34:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: scratch_store_b32 off, v40, s33
; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:4
; GFX11-NEXT: s_mov_b32 exec_lo, s1
; GFX11-NEXT: v_writelane_b32 v40, s30, 0
; GFX11-NEXT: s_add_i32 s32, s32, 16
; GFX11-NEXT: v_writelane_b32 v41, s0, 0
; GFX11-NEXT: s_getpc_b64 s[0:1]
; GFX11-NEXT: s_add_u32 s0, s0, void_func_void_clobber_s34@rel32@lo+4
; GFX11-NEXT: s_addc_u32 s1, s1, void_func_void_clobber_s34@rel32@hi+12
; GFX11-NEXT: v_writelane_b32 v40, s31, 1
; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_readlane_b32 s31, v40, 1
; GFX11-NEXT: v_readlane_b32 s30, v40, 0
; GFX11-NEXT: v_readlane_b32 s0, v41, 0
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: scratch_load_b32 v40, off, s33
; GFX11-NEXT: scratch_load_b32 v41, off, s33 offset:4
; GFX11-NEXT: s_mov_b32 exec_lo, s1
; GFX11-NEXT: s_add_i32 s32, s32, -16
; GFX11-NEXT: s_mov_b32 s33, s0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
call amdgpu_gfx void @void_func_void_clobber_s34()
ret void
}
define amdgpu_gfx void @callee_saved_sgpr_kernel() #1 {
; GFX9-LABEL: callee_saved_sgpr_kernel:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_mov_b32 s34, s33
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX9-NEXT: s_mov_b64 exec, s[36:37]
; GFX9-NEXT: v_writelane_b32 v40, s4, 0
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 1
; GFX9-NEXT: v_writelane_b32 v41, s34, 0
; GFX9-NEXT: v_writelane_b32 v40, s31, 2
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; def s40
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_mov_b32 s4, s40
; GFX9-NEXT: s_getpc_b64 s[34:35]
; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4
; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12
; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s4
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: v_readlane_b32 s31, v40, 2
; GFX9-NEXT: v_readlane_b32 s30, v40, 1
; GFX9-NEXT: v_readlane_b32 s4, v40, 0
; GFX9-NEXT: v_readlane_b32 s34, v41, 0
; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GFX9-NEXT: s_mov_b64 exec, s[36:37]
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: s_mov_b32 s33, s34
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: callee_saved_sgpr_kernel:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_mov_b32 exec_lo, s35
; GFX10-NEXT: v_writelane_b32 v40, s4, 0
; GFX10-NEXT: s_addk_i32 s32, 0x200
; GFX10-NEXT: v_writelane_b32 v41, s34, 0
; GFX10-NEXT: s_getpc_b64 s[34:35]
; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4
; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; def s40
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: v_writelane_b32 v40, s30, 1
; GFX10-NEXT: s_mov_b32 s4, s40
; GFX10-NEXT: v_writelane_b32 v40, s31, 2
; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s4
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: v_readlane_b32 s31, v40, 2
; GFX10-NEXT: v_readlane_b32 s30, v40, 1
; GFX10-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-NEXT: v_readlane_b32 s34, v41, 0
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33
; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_mov_b32 exec_lo, s35
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: s_mov_b32 s33, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: callee_saved_sgpr_kernel:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: scratch_store_b32 off, v40, s33
; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:4
; GFX11-NEXT: s_mov_b32 exec_lo, s1
; GFX11-NEXT: v_writelane_b32 v40, s4, 0
; GFX11-NEXT: s_add_i32 s32, s32, 16
; GFX11-NEXT: v_writelane_b32 v41, s0, 0
; GFX11-NEXT: s_getpc_b64 s[0:1]
; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4
; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; def s40
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: v_writelane_b32 v40, s30, 1
; GFX11-NEXT: s_mov_b32 s4, s40
; GFX11-NEXT: v_writelane_b32 v40, s31, 2
; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s4
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_readlane_b32 s31, v40, 2
; GFX11-NEXT: v_readlane_b32 s30, v40, 1
; GFX11-NEXT: v_readlane_b32 s4, v40, 0
; GFX11-NEXT: v_readlane_b32 s0, v41, 0
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: scratch_load_b32 v40, off, s33
; GFX11-NEXT: scratch_load_b32 v41, off, s33 offset:4
; GFX11-NEXT: s_mov_b32 exec_lo, s1
; GFX11-NEXT: s_add_i32 s32, s32, -16
; GFX11-NEXT: s_mov_b32 s33, s0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%s40 = call i32 asm sideeffect "; def s40", "={s40}"() #0
call amdgpu_gfx void @external_void_func_void()
call void asm sideeffect "; use $0", "s"(i32 %s40) #0
ret void
}
define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 {
; GFX9-LABEL: callee_saved_sgpr_vgpr_kernel:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_mov_b32 s34, s33
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; GFX9-NEXT: s_mov_b64 exec, s[36:37]
; GFX9-NEXT: v_writelane_b32 v40, s4, 0
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: v_writelane_b32 v40, s30, 1
; GFX9-NEXT: v_writelane_b32 v42, s34, 0
; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX9-NEXT: v_writelane_b32 v40, s31, 2
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; def s40
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_mov_b32 s4, s40
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; def v32
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: v_mov_b32_e32 v41, v32
; GFX9-NEXT: s_getpc_b64 s[34:35]
; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4
; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12
; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s4
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use v41
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX9-NEXT: v_readlane_b32 s31, v40, 2
; GFX9-NEXT: v_readlane_b32 s30, v40, 1
; GFX9-NEXT: v_readlane_b32 s4, v40, 0
; GFX9-NEXT: v_readlane_b32 s34, v42, 0
; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
; GFX9-NEXT: s_mov_b64 exec, s[36:37]
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: s_mov_b32 s33, s34
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: callee_saved_sgpr_vgpr_kernel:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_mov_b32 exec_lo, s35
; GFX10-NEXT: v_writelane_b32 v40, s4, 0
; GFX10-NEXT: s_addk_i32 s32, 0x200
; GFX10-NEXT: v_writelane_b32 v42, s34, 0
; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; def s40
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: v_writelane_b32 v40, s30, 1
; GFX10-NEXT: s_mov_b32 s4, s40
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; def v32
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: v_mov_b32_e32 v41, v32
; GFX10-NEXT: s_getpc_b64 s[34:35]
; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4
; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12
; GFX10-NEXT: v_writelane_b32 v40, s31, 2
; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s4
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use v41
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX10-NEXT: v_readlane_b32 s31, v40, 2
; GFX10-NEXT: v_readlane_b32 s30, v40, 1
; GFX10-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-NEXT: v_readlane_b32 s34, v42, 0
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4
; GFX10-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:8
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_mov_b32 exec_lo, s35
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
; GFX10-NEXT: s_mov_b32 s33, s34
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: callee_saved_sgpr_vgpr_kernel:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:4
; GFX11-NEXT: scratch_store_b32 off, v42, s33 offset:8
; GFX11-NEXT: s_mov_b32 exec_lo, s1
; GFX11-NEXT: v_writelane_b32 v40, s4, 0
; GFX11-NEXT: s_add_i32 s32, s32, 16
; GFX11-NEXT: v_writelane_b32 v42, s0, 0
; GFX11-NEXT: scratch_store_b32 off, v41, s33 ; 4-byte Folded Spill
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; def s40
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: v_writelane_b32 v40, s30, 1
; GFX11-NEXT: s_mov_b32 s4, s40
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; def v32
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: v_mov_b32_e32 v41, v32
; GFX11-NEXT: s_getpc_b64 s[0:1]
; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4
; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12
; GFX11-NEXT: v_writelane_b32 v40, s31, 2
; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s4
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use v41
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: scratch_load_b32 v41, off, s33 ; 4-byte Folded Reload
; GFX11-NEXT: v_readlane_b32 s31, v40, 2
; GFX11-NEXT: v_readlane_b32 s30, v40, 1
; GFX11-NEXT: v_readlane_b32 s4, v40, 0
; GFX11-NEXT: v_readlane_b32 s0, v42, 0
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:4
; GFX11-NEXT: scratch_load_b32 v42, off, s33 offset:8
; GFX11-NEXT: s_mov_b32 exec_lo, s1
; GFX11-NEXT: s_add_i32 s32, s32, -16
; GFX11-NEXT: s_mov_b32 s33, s0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
%s40 = call i32 asm sideeffect "; def s40", "={s40}"() #0
%v32 = call i32 asm sideeffect "; def v32", "={v32}"() #0
call amdgpu_gfx void @external_void_func_void()
call void asm sideeffect "; use $0", "s"(i32 %s40) #0
call void asm sideeffect "; use $0", "v"(i32 %v32) #0
ret void
}
attributes #0 = { nounwind }
attributes #1 = { nounwind noinline }