blob: 61a195f9c314fffce460b2d5fb3dff221d773682 [file] [log] [blame] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -enable-ipra=0 < %s | FileCheck -check-prefixes=GCN,MUBUF %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -enable-ipra=0 < %s | FileCheck -check-prefixes=GCN,MUBUF %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 < %s | FileCheck -check-prefixes=GCN,MUBUF %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 -mattr=+enable-flat-scratch < %s | FileCheck -check-prefixes=GCN,FLATSCR %s
declare hidden void @external_void_func_void() #3
define amdgpu_kernel void @test_kernel_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void() #0 {
; FLATSCR-LABEL: test_kernel_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void:
; FLATSCR: ; %bb.0:
; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s8, s13
; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
; FLATSCR-NEXT: s_getpc_b64 s[34:35]
; FLATSCR-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4
; FLATSCR-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12
; FLATSCR-NEXT: s_mov_b32 s32, 0
; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[34:35]
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[34:35]
; FLATSCR-NEXT: s_endpgm
call void @external_void_func_void()
call void asm sideeffect "", ""() #0
call void @external_void_func_void()
ret void
}
define void @test_func_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void() #0 {
; MUBUF-LABEL: test_func_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void:
; MUBUF: ; %bb.0:
; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; MUBUF-NEXT: s_mov_b32 s4, s33
; MUBUF-NEXT: s_mov_b32 s33, s32
; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1
; MUBUF-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
; MUBUF-NEXT: s_mov_b64 exec, s[6:7]
; MUBUF-NEXT: v_writelane_b32 v40, s4, 4
; MUBUF-NEXT: v_writelane_b32 v40, s30, 0
; MUBUF-NEXT: v_writelane_b32 v40, s31, 1
; MUBUF-NEXT: s_addk_i32 s32, 0x400
; MUBUF-NEXT: v_writelane_b32 v40, s34, 2
; MUBUF-NEXT: v_writelane_b32 v40, s35, 3
; MUBUF-NEXT: s_getpc_b64 s[34:35]
; MUBUF-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4
; MUBUF-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12
; MUBUF-NEXT: s_swappc_b64 s[30:31], s[34:35]
; MUBUF-NEXT: ;;#ASMSTART
; MUBUF-NEXT: ;;#ASMEND
; MUBUF-NEXT: s_swappc_b64 s[30:31], s[34:35]
; MUBUF-NEXT: v_readlane_b32 s35, v40, 3
; MUBUF-NEXT: v_readlane_b32 s34, v40, 2
; MUBUF-NEXT: v_readlane_b32 s31, v40, 1
; MUBUF-NEXT: v_readlane_b32 s30, v40, 0
; MUBUF-NEXT: s_mov_b32 s32, s33
; MUBUF-NEXT: v_readlane_b32 s4, v40, 4
; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1
; MUBUF-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
; MUBUF-NEXT: s_mov_b64 exec, s[6:7]
; MUBUF-NEXT: s_mov_b32 s33, s4
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: s_setpc_b64 s[30:31]
;
; FLATSCR-LABEL: test_func_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void:
; FLATSCR: ; %bb.0:
; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; FLATSCR-NEXT: s_mov_b32 s0, s33
; FLATSCR-NEXT: s_mov_b32 s33, s32
; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1
; FLATSCR-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
; FLATSCR-NEXT: s_mov_b64 exec, s[2:3]
; FLATSCR-NEXT: v_writelane_b32 v40, s0, 4
; FLATSCR-NEXT: v_writelane_b32 v40, s30, 0
; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1
; FLATSCR-NEXT: s_add_i32 s32, s32, 16
; FLATSCR-NEXT: v_writelane_b32 v40, s34, 2
; FLATSCR-NEXT: v_writelane_b32 v40, s35, 3
; FLATSCR-NEXT: s_getpc_b64 s[34:35]
; FLATSCR-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4
; FLATSCR-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12
; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[34:35]
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[34:35]
; FLATSCR-NEXT: v_readlane_b32 s35, v40, 3
; FLATSCR-NEXT: v_readlane_b32 s34, v40, 2
; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1
; FLATSCR-NEXT: v_readlane_b32 s30, v40, 0
; FLATSCR-NEXT: s_mov_b32 s32, s33
; FLATSCR-NEXT: v_readlane_b32 s0, v40, 4
; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1
; FLATSCR-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
; FLATSCR-NEXT: s_mov_b64 exec, s[2:3]
; FLATSCR-NEXT: s_mov_b32 s33, s0
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: s_setpc_b64 s[30:31]
call void @external_void_func_void()
call void asm sideeffect "", ""() #0
call void @external_void_func_void()
ret void
}
define void @test_func_call_external_void_funcx2() #0 {
; MUBUF-LABEL: test_func_call_external_void_funcx2:
; MUBUF: ; %bb.0:
; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; MUBUF-NEXT: s_mov_b32 s4, s33
; MUBUF-NEXT: s_mov_b32 s33, s32
; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1
; MUBUF-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
; MUBUF-NEXT: s_mov_b64 exec, s[6:7]
; MUBUF-NEXT: v_writelane_b32 v40, s4, 4
; MUBUF-NEXT: v_writelane_b32 v40, s30, 0
; MUBUF-NEXT: v_writelane_b32 v40, s31, 1
; MUBUF-NEXT: s_addk_i32 s32, 0x400
; MUBUF-NEXT: v_writelane_b32 v40, s34, 2
; MUBUF-NEXT: v_writelane_b32 v40, s35, 3
; MUBUF-NEXT: s_getpc_b64 s[34:35]
; MUBUF-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4
; MUBUF-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12
; MUBUF-NEXT: s_swappc_b64 s[30:31], s[34:35]
; MUBUF-NEXT: s_swappc_b64 s[30:31], s[34:35]
; MUBUF-NEXT: v_readlane_b32 s35, v40, 3
; MUBUF-NEXT: v_readlane_b32 s34, v40, 2
; MUBUF-NEXT: v_readlane_b32 s31, v40, 1
; MUBUF-NEXT: v_readlane_b32 s30, v40, 0
; MUBUF-NEXT: s_mov_b32 s32, s33
; MUBUF-NEXT: v_readlane_b32 s4, v40, 4
; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1
; MUBUF-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
; MUBUF-NEXT: s_mov_b64 exec, s[6:7]
; MUBUF-NEXT: s_mov_b32 s33, s4
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: s_setpc_b64 s[30:31]
;
; FLATSCR-LABEL: test_func_call_external_void_funcx2:
; FLATSCR: ; %bb.0:
; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; FLATSCR-NEXT: s_mov_b32 s0, s33
; FLATSCR-NEXT: s_mov_b32 s33, s32
; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1
; FLATSCR-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
; FLATSCR-NEXT: s_mov_b64 exec, s[2:3]
; FLATSCR-NEXT: v_writelane_b32 v40, s0, 4
; FLATSCR-NEXT: v_writelane_b32 v40, s30, 0
; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1
; FLATSCR-NEXT: s_add_i32 s32, s32, 16
; FLATSCR-NEXT: v_writelane_b32 v40, s34, 2
; FLATSCR-NEXT: v_writelane_b32 v40, s35, 3
; FLATSCR-NEXT: s_getpc_b64 s[34:35]
; FLATSCR-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4
; FLATSCR-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12
; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[34:35]
; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[34:35]
; FLATSCR-NEXT: v_readlane_b32 s35, v40, 3
; FLATSCR-NEXT: v_readlane_b32 s34, v40, 2
; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1
; FLATSCR-NEXT: v_readlane_b32 s30, v40, 0
; FLATSCR-NEXT: s_mov_b32 s32, s33
; FLATSCR-NEXT: v_readlane_b32 s0, v40, 4
; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1
; FLATSCR-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
; FLATSCR-NEXT: s_mov_b64 exec, s[2:3]
; FLATSCR-NEXT: s_mov_b32 s33, s0
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: s_setpc_b64 s[30:31]
call void @external_void_func_void()
call void @external_void_func_void()
ret void
}
define void @void_func_void_clobber_s30_s31() #2 {
; MUBUF-LABEL: void_func_void_clobber_s30_s31:
; MUBUF: ; %bb.0:
; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; MUBUF-NEXT: s_xor_saveexec_b64 s[4:5], -1
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
; MUBUF-NEXT: s_mov_b64 exec, s[4:5]
; MUBUF-NEXT: v_writelane_b32 v0, s30, 0
; MUBUF-NEXT: v_writelane_b32 v0, s31, 1
; MUBUF-NEXT: ;;#ASMSTART
; MUBUF-NEXT: ; clobber
; MUBUF-NEXT: ;;#ASMEND
; MUBUF-NEXT: v_readlane_b32 s31, v0, 1
; MUBUF-NEXT: v_readlane_b32 s30, v0, 0
; MUBUF-NEXT: s_xor_saveexec_b64 s[4:5], -1
; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
; MUBUF-NEXT: s_mov_b64 exec, s[4:5]
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: s_setpc_b64 s[30:31]
;
; FLATSCR-LABEL: void_func_void_clobber_s30_s31:
; FLATSCR: ; %bb.0:
; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; FLATSCR-NEXT: s_xor_saveexec_b64 s[0:1], -1
; FLATSCR-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill
; FLATSCR-NEXT: s_mov_b64 exec, s[0:1]
; FLATSCR-NEXT: v_writelane_b32 v0, s30, 0
; FLATSCR-NEXT: v_writelane_b32 v0, s31, 1
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; clobber
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: v_readlane_b32 s31, v0, 1
; FLATSCR-NEXT: v_readlane_b32 s30, v0, 0
; FLATSCR-NEXT: s_xor_saveexec_b64 s[0:1], -1
; FLATSCR-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload
; FLATSCR-NEXT: s_mov_b64 exec, s[0:1]
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: s_setpc_b64 s[30:31]
call void asm sideeffect "; clobber", "~{s[30:31]}"() #0
ret void
}
define hidden void @void_func_void_clobber_vcc() #2 {
; GCN-LABEL: void_func_void_clobber_vcc:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: s_setpc_b64 s[30:31]
call void asm sideeffect "", "~{vcc}"() #0
ret void
}
define amdgpu_kernel void @test_call_void_func_void_clobber_vcc(ptr addrspace(1) %out) #0 {
; FLATSCR-LABEL: test_call_void_func_void_clobber_vcc:
; FLATSCR: ; %bb.0:
; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s8, s13
; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
; FLATSCR-NEXT: s_add_u32 s8, s4, 8
; FLATSCR-NEXT: s_addc_u32 s9, s5, 0
; FLATSCR-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; FLATSCR-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; FLATSCR-NEXT: s_mov_b32 s14, s12
; FLATSCR-NEXT: s_mov_b32 s13, s11
; FLATSCR-NEXT: s_mov_b32 s12, s10
; FLATSCR-NEXT: s_mov_b64 s[10:11], s[6:7]
; FLATSCR-NEXT: s_getpc_b64 s[16:17]
; FLATSCR-NEXT: s_add_u32 s16, s16, void_func_void_clobber_vcc@rel32@lo+4
; FLATSCR-NEXT: s_addc_u32 s17, s17, void_func_void_clobber_vcc@rel32@hi+12
; FLATSCR-NEXT: v_or3_b32 v31, v0, v1, v2
; FLATSCR-NEXT: s_mov_b64 s[4:5], s[0:1]
; FLATSCR-NEXT: s_mov_b64 s[6:7], s[2:3]
; FLATSCR-NEXT: s_mov_b32 s32, 0
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; def vcc
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: s_mov_b64 s[34:35], vcc
; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[16:17]
; FLATSCR-NEXT: global_load_dword v0, v[0:1], off glc
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: s_mov_b64 vcc, s[34:35]
; FLATSCR-NEXT: global_load_dword v0, v[0:1], off glc
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: ; kill: killed $vgpr0_vgpr1
; FLATSCR-NEXT: ; kill: killed $vgpr0_vgpr1
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; use vcc
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: s_endpgm
%vcc = call i64 asm sideeffect "; def $0", "={vcc}"()
call void @void_func_void_clobber_vcc()
%val0 = load volatile i32, ptr addrspace(1) poison
%val1 = load volatile i32, ptr addrspace(1) poison
call void asm sideeffect "; use $0", "{vcc}"(i64 %vcc)
ret void
}
define amdgpu_kernel void @test_call_void_func_void_mayclobber_s31(ptr addrspace(1) %out) #0 {
; FLATSCR-LABEL: test_call_void_func_void_mayclobber_s31:
; FLATSCR: ; %bb.0:
; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s8, s13
; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
; FLATSCR-NEXT: s_getpc_b64 s[0:1]
; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4
; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12
; FLATSCR-NEXT: s_mov_b32 s32, 0
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; def s31
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: s_mov_b32 s33, s31
; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1]
; FLATSCR-NEXT: s_mov_b32 s31, s33
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; use s31
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: s_endpgm
%s31 = call i32 asm sideeffect "; def $0", "={s31}"()
call void @external_void_func_void()
call void asm sideeffect "; use $0", "{s31}"(i32 %s31)
ret void
}
define amdgpu_kernel void @test_call_void_func_void_mayclobber_v31(ptr addrspace(1) %out) #0 {
; FLATSCR-LABEL: test_call_void_func_void_mayclobber_v31:
; FLATSCR: ; %bb.0:
; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s8, s13
; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
; FLATSCR-NEXT: s_getpc_b64 s[0:1]
; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4
; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12
; FLATSCR-NEXT: s_mov_b32 s32, 0
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; def v31
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: v_mov_b32_e32 v40, v31
; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1]
; FLATSCR-NEXT: v_mov_b32_e32 v31, v40
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; use v31
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: s_endpgm
%v31 = call i32 asm sideeffect "; def $0", "={v31}"()
call void @external_void_func_void()
call void asm sideeffect "; use $0", "{v31}"(i32 %v31)
ret void
}
; FIXME: What is the expected behavior for reserved registers here?
define amdgpu_kernel void @test_call_void_func_void_preserves_s33(ptr addrspace(1) %out) #0 {
; FLATSCR-LABEL: test_call_void_func_void_preserves_s33:
; FLATSCR: ; %bb.0:
; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s8, s13
; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
; FLATSCR-NEXT: s_getpc_b64 s[0:1]
; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4
; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12
; FLATSCR-NEXT: s_mov_b32 s32, 0
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; def s33
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1]
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; use s33
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: s_endpgm
%s33 = call i32 asm sideeffect "; def $0", "={s33}"()
call void @external_void_func_void()
call void asm sideeffect "; use $0", "{s33}"(i32 %s33)
ret void
}
define amdgpu_kernel void @test_call_void_func_void_preserves_s34(ptr addrspace(1) %out) #0 {
; FLATSCR-LABEL: test_call_void_func_void_preserves_s34:
; FLATSCR: ; %bb.0:
; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s8, s13
; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
; FLATSCR-NEXT: s_getpc_b64 s[0:1]
; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4
; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12
; FLATSCR-NEXT: s_mov_b32 s32, 0
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; def s34
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1]
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; use s34
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: s_endpgm
%s34 = call i32 asm sideeffect "; def $0", "={s34}"()
call void @external_void_func_void()
call void asm sideeffect "; use $0", "{s34}"(i32 %s34)
ret void
}
define amdgpu_kernel void @test_call_void_func_void_preserves_v40(ptr addrspace(1) %out) #0 {
; FLATSCR-LABEL: test_call_void_func_void_preserves_v40:
; FLATSCR: ; %bb.0:
; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s8, s13
; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
; FLATSCR-NEXT: s_getpc_b64 s[0:1]
; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4
; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12
; FLATSCR-NEXT: s_mov_b32 s32, 0
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; def v40
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1]
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; use v40
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: s_endpgm
%v40 = call i32 asm sideeffect "; def $0", "={v40}"()
call void @external_void_func_void()
call void asm sideeffect "; use $0", "{v40}"(i32 %v40)
ret void
}
define hidden void @void_func_void_clobber_s33() #2 {
; MUBUF-LABEL: void_func_void_clobber_s33:
; MUBUF: ; %bb.0:
; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; MUBUF-NEXT: s_xor_saveexec_b64 s[4:5], -1
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
; MUBUF-NEXT: s_mov_b64 exec, s[4:5]
; MUBUF-NEXT: v_writelane_b32 v0, s33, 0
; MUBUF-NEXT: ;;#ASMSTART
; MUBUF-NEXT: ; clobber
; MUBUF-NEXT: ;;#ASMEND
; MUBUF-NEXT: v_readlane_b32 s33, v0, 0
; MUBUF-NEXT: s_xor_saveexec_b64 s[4:5], -1
; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
; MUBUF-NEXT: s_mov_b64 exec, s[4:5]
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: s_setpc_b64 s[30:31]
;
; FLATSCR-LABEL: void_func_void_clobber_s33:
; FLATSCR: ; %bb.0:
; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; FLATSCR-NEXT: s_xor_saveexec_b64 s[0:1], -1
; FLATSCR-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill
; FLATSCR-NEXT: s_mov_b64 exec, s[0:1]
; FLATSCR-NEXT: v_writelane_b32 v0, s33, 0
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; clobber
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: v_readlane_b32 s33, v0, 0
; FLATSCR-NEXT: s_xor_saveexec_b64 s[0:1], -1
; FLATSCR-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload
; FLATSCR-NEXT: s_mov_b64 exec, s[0:1]
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: s_setpc_b64 s[30:31]
call void asm sideeffect "; clobber", "~{s33}"() #0
ret void
}
define hidden void @void_func_void_clobber_s34() #2 {
; MUBUF-LABEL: void_func_void_clobber_s34:
; MUBUF: ; %bb.0:
; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; MUBUF-NEXT: s_xor_saveexec_b64 s[4:5], -1
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
; MUBUF-NEXT: s_mov_b64 exec, s[4:5]
; MUBUF-NEXT: v_writelane_b32 v0, s34, 0
; MUBUF-NEXT: ;;#ASMSTART
; MUBUF-NEXT: ; clobber
; MUBUF-NEXT: ;;#ASMEND
; MUBUF-NEXT: v_readlane_b32 s34, v0, 0
; MUBUF-NEXT: s_xor_saveexec_b64 s[4:5], -1
; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
; MUBUF-NEXT: s_mov_b64 exec, s[4:5]
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: s_setpc_b64 s[30:31]
;
; FLATSCR-LABEL: void_func_void_clobber_s34:
; FLATSCR: ; %bb.0:
; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; FLATSCR-NEXT: s_xor_saveexec_b64 s[0:1], -1
; FLATSCR-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill
; FLATSCR-NEXT: s_mov_b64 exec, s[0:1]
; FLATSCR-NEXT: v_writelane_b32 v0, s34, 0
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; clobber
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: v_readlane_b32 s34, v0, 0
; FLATSCR-NEXT: s_xor_saveexec_b64 s[0:1], -1
; FLATSCR-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload
; FLATSCR-NEXT: s_mov_b64 exec, s[0:1]
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: s_setpc_b64 s[30:31]
call void asm sideeffect "; clobber", "~{s34}"() #0
ret void
}
define amdgpu_kernel void @test_call_void_func_void_clobber_s33() #0 {
; FLATSCR-LABEL: test_call_void_func_void_clobber_s33:
; FLATSCR: ; %bb.0:
; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s8, s13
; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
; FLATSCR-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; FLATSCR-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; FLATSCR-NEXT: s_mov_b32 s14, s12
; FLATSCR-NEXT: s_mov_b32 s13, s11
; FLATSCR-NEXT: s_mov_b32 s12, s10
; FLATSCR-NEXT: s_mov_b64 s[10:11], s[6:7]
; FLATSCR-NEXT: s_mov_b64 s[8:9], s[4:5]
; FLATSCR-NEXT: s_getpc_b64 s[16:17]
; FLATSCR-NEXT: s_add_u32 s16, s16, void_func_void_clobber_s33@rel32@lo+4
; FLATSCR-NEXT: s_addc_u32 s17, s17, void_func_void_clobber_s33@rel32@hi+12
; FLATSCR-NEXT: v_or3_b32 v31, v0, v1, v2
; FLATSCR-NEXT: s_mov_b64 s[4:5], s[0:1]
; FLATSCR-NEXT: s_mov_b64 s[6:7], s[2:3]
; FLATSCR-NEXT: s_mov_b32 s32, 0
; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[16:17]
; FLATSCR-NEXT: s_endpgm
call void @void_func_void_clobber_s33()
ret void
}
define amdgpu_kernel void @test_call_void_func_void_clobber_s34() #0 {
; FLATSCR-LABEL: test_call_void_func_void_clobber_s34:
; FLATSCR: ; %bb.0:
; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s8, s13
; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
; FLATSCR-NEXT: v_lshlrev_b32_e32 v2, 20, v2
; FLATSCR-NEXT: v_lshlrev_b32_e32 v1, 10, v1
; FLATSCR-NEXT: s_mov_b32 s14, s12
; FLATSCR-NEXT: s_mov_b32 s13, s11
; FLATSCR-NEXT: s_mov_b32 s12, s10
; FLATSCR-NEXT: s_mov_b64 s[10:11], s[6:7]
; FLATSCR-NEXT: s_mov_b64 s[8:9], s[4:5]
; FLATSCR-NEXT: s_getpc_b64 s[16:17]
; FLATSCR-NEXT: s_add_u32 s16, s16, void_func_void_clobber_s34@rel32@lo+4
; FLATSCR-NEXT: s_addc_u32 s17, s17, void_func_void_clobber_s34@rel32@hi+12
; FLATSCR-NEXT: v_or3_b32 v31, v0, v1, v2
; FLATSCR-NEXT: s_mov_b64 s[4:5], s[0:1]
; FLATSCR-NEXT: s_mov_b64 s[6:7], s[2:3]
; FLATSCR-NEXT: s_mov_b32 s32, 0
; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[16:17]
; FLATSCR-NEXT: s_endpgm
call void @void_func_void_clobber_s34()
ret void
}
define void @callee_saved_sgpr_func() #2 {
; MUBUF-LABEL: callee_saved_sgpr_func:
; MUBUF: ; %bb.0:
; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; MUBUF-NEXT: s_mov_b32 s4, s33
; MUBUF-NEXT: s_mov_b32 s33, s32
; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1
; MUBUF-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
; MUBUF-NEXT: s_mov_b64 exec, s[6:7]
; MUBUF-NEXT: v_writelane_b32 v40, s4, 3
; MUBUF-NEXT: s_addk_i32 s32, 0x400
; MUBUF-NEXT: v_writelane_b32 v40, s30, 0
; MUBUF-NEXT: v_writelane_b32 v40, s31, 1
; MUBUF-NEXT: s_getpc_b64 s[4:5]
; MUBUF-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
; MUBUF-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
; MUBUF-NEXT: v_writelane_b32 v40, s34, 2
; MUBUF-NEXT: ;;#ASMSTART
; MUBUF-NEXT: ; def s40
; MUBUF-NEXT: ;;#ASMEND
; MUBUF-NEXT: s_mov_b32 s34, s40
; MUBUF-NEXT: s_swappc_b64 s[30:31], s[4:5]
; MUBUF-NEXT: ;;#ASMSTART
; MUBUF-NEXT: ; use s34
; MUBUF-NEXT: ;;#ASMEND
; MUBUF-NEXT: v_readlane_b32 s34, v40, 2
; MUBUF-NEXT: v_readlane_b32 s31, v40, 1
; MUBUF-NEXT: v_readlane_b32 s30, v40, 0
; MUBUF-NEXT: s_mov_b32 s32, s33
; MUBUF-NEXT: v_readlane_b32 s4, v40, 3
; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1
; MUBUF-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
; MUBUF-NEXT: s_mov_b64 exec, s[6:7]
; MUBUF-NEXT: s_mov_b32 s33, s4
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: s_setpc_b64 s[30:31]
;
; FLATSCR-LABEL: callee_saved_sgpr_func:
; FLATSCR: ; %bb.0:
; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; FLATSCR-NEXT: s_mov_b32 s0, s33
; FLATSCR-NEXT: s_mov_b32 s33, s32
; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1
; FLATSCR-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
; FLATSCR-NEXT: s_mov_b64 exec, s[2:3]
; FLATSCR-NEXT: v_writelane_b32 v40, s0, 3
; FLATSCR-NEXT: s_add_i32 s32, s32, 16
; FLATSCR-NEXT: v_writelane_b32 v40, s30, 0
; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1
; FLATSCR-NEXT: s_getpc_b64 s[0:1]
; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4
; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12
; FLATSCR-NEXT: v_writelane_b32 v40, s34, 2
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; def s40
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: s_mov_b32 s34, s40
; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1]
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; use s34
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: v_readlane_b32 s34, v40, 2
; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1
; FLATSCR-NEXT: v_readlane_b32 s30, v40, 0
; FLATSCR-NEXT: s_mov_b32 s32, s33
; FLATSCR-NEXT: v_readlane_b32 s0, v40, 3
; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1
; FLATSCR-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
; FLATSCR-NEXT: s_mov_b64 exec, s[2:3]
; FLATSCR-NEXT: s_mov_b32 s33, s0
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: s_setpc_b64 s[30:31]
%s40 = call i32 asm sideeffect "; def s40", "={s40}"() #0
call void @external_void_func_void()
call void asm sideeffect "; use $0", "s"(i32 %s40) #0
ret void
}
define amdgpu_kernel void @callee_saved_sgpr_kernel() #2 {
; FLATSCR-LABEL: callee_saved_sgpr_kernel:
; FLATSCR: ; %bb.0:
; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s8, s13
; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
; FLATSCR-NEXT: s_getpc_b64 s[0:1]
; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4
; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12
; FLATSCR-NEXT: s_mov_b32 s32, 0
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; def s40
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: s_mov_b32 s33, s40
; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1]
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; use s33
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: s_endpgm
%s40 = call i32 asm sideeffect "; def s40", "={s40}"() #0
call void @external_void_func_void()
call void asm sideeffect "; use $0", "s"(i32 %s40) #0
ret void
}
; First call preserved VGPR is used so it can't be used for SGPR spills.
define void @callee_saved_sgpr_vgpr_func() #2 {
; MUBUF-LABEL: callee_saved_sgpr_vgpr_func:
; MUBUF: ; %bb.0:
; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; MUBUF-NEXT: s_mov_b32 s4, s33
; MUBUF-NEXT: s_mov_b32 s33, s32
; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1
; MUBUF-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; MUBUF-NEXT: s_mov_b64 exec, s[6:7]
; MUBUF-NEXT: v_writelane_b32 v41, s4, 3
; MUBUF-NEXT: s_addk_i32 s32, 0x400
; MUBUF-NEXT: v_writelane_b32 v41, s30, 0
; MUBUF-NEXT: v_writelane_b32 v41, s31, 1
; MUBUF-NEXT: s_getpc_b64 s[4:5]
; MUBUF-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
; MUBUF-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12
; MUBUF-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
; MUBUF-NEXT: v_writelane_b32 v41, s34, 2
; MUBUF-NEXT: ;;#ASMSTART
; MUBUF-NEXT: ; def s40
; MUBUF-NEXT: ;;#ASMEND
; MUBUF-NEXT: s_mov_b32 s34, s40
; MUBUF-NEXT: ;;#ASMSTART
; MUBUF-NEXT: ; def v40
; MUBUF-NEXT: ;;#ASMEND
; MUBUF-NEXT: s_swappc_b64 s[30:31], s[4:5]
; MUBUF-NEXT: ;;#ASMSTART
; MUBUF-NEXT: ; use s34
; MUBUF-NEXT: ;;#ASMEND
; MUBUF-NEXT: ;;#ASMSTART
; MUBUF-NEXT: ; use v40
; MUBUF-NEXT: ;;#ASMEND
; MUBUF-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
; MUBUF-NEXT: v_readlane_b32 s34, v41, 2
; MUBUF-NEXT: v_readlane_b32 s31, v41, 1
; MUBUF-NEXT: v_readlane_b32 s30, v41, 0
; MUBUF-NEXT: s_mov_b32 s32, s33
; MUBUF-NEXT: v_readlane_b32 s4, v41, 3
; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1
; MUBUF-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; MUBUF-NEXT: s_mov_b64 exec, s[6:7]
; MUBUF-NEXT: s_mov_b32 s33, s4
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: s_setpc_b64 s[30:31]
;
; FLATSCR-LABEL: callee_saved_sgpr_vgpr_func:
; FLATSCR: ; %bb.0:
; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; FLATSCR-NEXT: s_mov_b32 s0, s33
; FLATSCR-NEXT: s_mov_b32 s33, s32
; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1
; FLATSCR-NEXT: scratch_store_dword off, v41, s33 offset:4 ; 4-byte Folded Spill
; FLATSCR-NEXT: s_mov_b64 exec, s[2:3]
; FLATSCR-NEXT: v_writelane_b32 v41, s0, 3
; FLATSCR-NEXT: s_add_i32 s32, s32, 16
; FLATSCR-NEXT: v_writelane_b32 v41, s30, 0
; FLATSCR-NEXT: v_writelane_b32 v41, s31, 1
; FLATSCR-NEXT: s_getpc_b64 s[0:1]
; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4
; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12
; FLATSCR-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill
; FLATSCR-NEXT: v_writelane_b32 v41, s34, 2
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; def s40
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: s_mov_b32 s34, s40
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; def v40
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1]
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; use s34
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; use v40
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
; FLATSCR-NEXT: v_readlane_b32 s34, v41, 2
; FLATSCR-NEXT: v_readlane_b32 s31, v41, 1
; FLATSCR-NEXT: v_readlane_b32 s30, v41, 0
; FLATSCR-NEXT: s_mov_b32 s32, s33
; FLATSCR-NEXT: v_readlane_b32 s0, v41, 3
; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1
; FLATSCR-NEXT: scratch_load_dword v41, off, s33 offset:4 ; 4-byte Folded Reload
; FLATSCR-NEXT: s_mov_b64 exec, s[2:3]
; FLATSCR-NEXT: s_mov_b32 s33, s0
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: s_setpc_b64 s[30:31]
%s40 = call i32 asm sideeffect "; def s40", "={s40}"() #0
%v40 = call i32 asm sideeffect "; def v40", "={v40}"() #0
call void @external_void_func_void()
call void asm sideeffect "; use $0", "s"(i32 %s40) #0
call void asm sideeffect "; use $0", "v"(i32 %v40) #0
ret void
}
define amdgpu_kernel void @callee_saved_sgpr_vgpr_kernel() #2 {
; FLATSCR-LABEL: callee_saved_sgpr_vgpr_kernel:
; FLATSCR: ; %bb.0:
; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s8, s13
; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s9, 0
; FLATSCR-NEXT: s_getpc_b64 s[0:1]
; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4
; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12
; FLATSCR-NEXT: s_mov_b32 s32, 0
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; def s40
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: s_mov_b32 s33, s40
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; def v32
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: v_mov_b32_e32 v40, v32
; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1]
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; use s33
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ; use v40
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: s_endpgm
%s40 = call i32 asm sideeffect "; def s40", "={s40}"() #0
%v32 = call i32 asm sideeffect "; def v32", "={v32}"() #0
call void @external_void_func_void()
call void asm sideeffect "; use $0", "s"(i32 %s40) #0
call void asm sideeffect "; use $0", "v"(i32 %v32) #0
ret void
}
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
attributes #2 = { nounwind noinline }
attributes #3 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-cluster-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-cluster-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-cluster-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }