| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=CHECK,CHECK-TRUE16 %s |
| ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=CHECK,CHECK-FAKE16 %s |
| |
| ; Make sure we use a stack pointer and allocate 112 * 4 bytes at the beginning of the stack. |
| |
| define amdgpu_cs void @amdgpu_cs() #0 { |
| ; CHECK-LABEL: amdgpu_cs: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2) |
| ; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; CHECK-NEXT: s_cmp_lg_u32 0, s33 |
| ; CHECK-NEXT: s_cmovk_i32 s33, 0x1c0 |
| ; CHECK-NEXT: s_alloc_vgpr 0 |
| ; CHECK-NEXT: s_endpgm |
| ret void |
| } |
| |
| define amdgpu_kernel void @kernel() #0 { |
| ; CHECK-LABEL: kernel: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2) |
| ; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; CHECK-NEXT: s_cmp_lg_u32 0, s33 |
| ; CHECK-NEXT: s_cmovk_i32 s33, 0x1c0 |
| ; CHECK-NEXT: s_alloc_vgpr 0 |
| ; CHECK-NEXT: s_endpgm |
| ret void |
| } |
| |
| define amdgpu_cs void @with_local() #0 { |
| ; CHECK-TRUE16-LABEL: with_local: |
| ; CHECK-TRUE16: ; %bb.0: |
| ; CHECK-TRUE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2) |
| ; CHECK-TRUE16-NEXT: v_mov_b16_e32 v0.l, 13 |
| ; CHECK-TRUE16-NEXT: s_cmp_lg_u32 0, s33 |
| ; CHECK-TRUE16-NEXT: s_cmovk_i32 s33, 0x1c0 |
| ; CHECK-TRUE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS |
| ; CHECK-TRUE16-NEXT: s_wait_storecnt 0x0 |
| ; CHECK-TRUE16-NEXT: s_alloc_vgpr 0 |
| ; CHECK-TRUE16-NEXT: s_endpgm |
| ; |
| ; CHECK-FAKE16-LABEL: with_local: |
| ; CHECK-FAKE16: ; %bb.0: |
| ; CHECK-FAKE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2) |
| ; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 13 |
| ; CHECK-FAKE16-NEXT: s_cmp_lg_u32 0, s33 |
| ; CHECK-FAKE16-NEXT: s_cmovk_i32 s33, 0x1c0 |
| ; CHECK-FAKE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS |
| ; CHECK-FAKE16-NEXT: s_wait_storecnt 0x0 |
| ; CHECK-FAKE16-NEXT: s_alloc_vgpr 0 |
| ; CHECK-FAKE16-NEXT: s_endpgm |
| %local = alloca i32, addrspace(5) |
| store volatile i8 13, ptr addrspace(5) %local |
| ret void |
| } |
| |
| ; Check that we generate s_cselect for SP if we can fit |
| ; the offset in an inline constant. |
| define amdgpu_cs void @with_calls_inline_const() #0 { |
| ; CHECK-TRUE16-LABEL: with_calls_inline_const: |
| ; CHECK-TRUE16: ; %bb.0: |
| ; CHECK-TRUE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2) |
| ; CHECK-TRUE16-NEXT: v_mov_b16_e32 v0.l, 15 |
| ; CHECK-TRUE16-NEXT: s_cmp_lg_u32 0, s33 |
| ; CHECK-TRUE16-NEXT: s_mov_b32 s1, callee@abs32@hi |
| ; CHECK-TRUE16-NEXT: s_cmovk_i32 s33, 0x1c0 |
| ; CHECK-TRUE16-NEXT: s_mov_b32 s0, callee@abs32@lo |
| ; CHECK-TRUE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS |
| ; CHECK-TRUE16-NEXT: s_wait_storecnt 0x0 |
| ; CHECK-TRUE16-NEXT: v_mov_b32_e32 v0, 0x47 |
| ; CHECK-TRUE16-NEXT: s_cselect_b32 s32, 0x1d0, 16 |
| ; CHECK-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] |
| ; CHECK-TRUE16-NEXT: s_alloc_vgpr 0 |
| ; CHECK-TRUE16-NEXT: s_endpgm |
| ; |
| ; CHECK-FAKE16-LABEL: with_calls_inline_const: |
| ; CHECK-FAKE16: ; %bb.0: |
| ; CHECK-FAKE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2) |
| ; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 15 |
| ; CHECK-FAKE16-NEXT: s_cmp_lg_u32 0, s33 |
| ; CHECK-FAKE16-NEXT: s_mov_b32 s1, callee@abs32@hi |
| ; CHECK-FAKE16-NEXT: s_cmovk_i32 s33, 0x1c0 |
| ; CHECK-FAKE16-NEXT: s_mov_b32 s0, callee@abs32@lo |
| ; CHECK-FAKE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS |
| ; CHECK-FAKE16-NEXT: s_wait_storecnt 0x0 |
| ; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 0x47 |
| ; CHECK-FAKE16-NEXT: s_cselect_b32 s32, 0x1d0, 16 |
| ; CHECK-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1] |
| ; CHECK-FAKE16-NEXT: s_alloc_vgpr 0 |
| ; CHECK-FAKE16-NEXT: s_endpgm |
| %local = alloca i32, addrspace(5) |
| store volatile i8 15, ptr addrspace(5) %local |
| call amdgpu_gfx void @callee(i32 71) |
| ret void |
| } |
| |
| ; Check that we generate s_mov + s_cmovk if we can't |
| ; fit the offset for SP in an inline constant. |
| define amdgpu_cs void @with_calls_no_inline_const() #0 { |
| ; CHECK-TRUE16-LABEL: with_calls_no_inline_const: |
| ; CHECK-TRUE16: ; %bb.0: |
| ; CHECK-TRUE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2) |
| ; CHECK-TRUE16-NEXT: v_mov_b16_e32 v0.l, 15 |
| ; CHECK-TRUE16-NEXT: s_cmp_lg_u32 0, s33 |
| ; CHECK-TRUE16-NEXT: s_mov_b32 s1, callee@abs32@hi |
| ; CHECK-TRUE16-NEXT: s_cmovk_i32 s33, 0x1c0 |
| ; CHECK-TRUE16-NEXT: s_mov_b32 s0, callee@abs32@lo |
| ; CHECK-TRUE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS |
| ; CHECK-TRUE16-NEXT: s_wait_storecnt 0x0 |
| ; CHECK-TRUE16-NEXT: v_mov_b32_e32 v0, 0x47 |
| ; CHECK-TRUE16-NEXT: s_movk_i32 s32, 0x100 |
| ; CHECK-TRUE16-NEXT: s_cmovk_i32 s32, 0x2c0 |
| ; CHECK-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] |
| ; CHECK-TRUE16-NEXT: s_alloc_vgpr 0 |
| ; CHECK-TRUE16-NEXT: s_endpgm |
| ; |
| ; CHECK-FAKE16-LABEL: with_calls_no_inline_const: |
| ; CHECK-FAKE16: ; %bb.0: |
| ; CHECK-FAKE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2) |
| ; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 15 |
| ; CHECK-FAKE16-NEXT: s_cmp_lg_u32 0, s33 |
| ; CHECK-FAKE16-NEXT: s_mov_b32 s1, callee@abs32@hi |
| ; CHECK-FAKE16-NEXT: s_cmovk_i32 s33, 0x1c0 |
| ; CHECK-FAKE16-NEXT: s_mov_b32 s0, callee@abs32@lo |
| ; CHECK-FAKE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS |
| ; CHECK-FAKE16-NEXT: s_wait_storecnt 0x0 |
| ; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 0x47 |
| ; CHECK-FAKE16-NEXT: s_movk_i32 s32, 0x100 |
| ; CHECK-FAKE16-NEXT: s_cmovk_i32 s32, 0x2c0 |
| ; CHECK-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1] |
| ; CHECK-FAKE16-NEXT: s_alloc_vgpr 0 |
| ; CHECK-FAKE16-NEXT: s_endpgm |
| %local = alloca i32, i32 61, addrspace(5) |
| store volatile i8 15, ptr addrspace(5) %local |
| call amdgpu_gfx void @callee(i32 71) |
| ret void |
| } |
| |
| define amdgpu_cs void @with_spills() #0 { |
| ; CHECK-LABEL: with_spills: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2) |
| ; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; CHECK-NEXT: s_cmp_lg_u32 0, s33 |
| ; CHECK-NEXT: s_cmovk_i32 s33, 0x1c0 |
| ; CHECK-NEXT: s_alloc_vgpr 0 |
| ; CHECK-NEXT: s_endpgm |
| call void asm "; spills", "~{v40},~{v42}"() |
| ret void |
| } |
| |
| define amdgpu_cs void @realign_stack(<32 x i32> %x) #0 { |
| ; CHECK-LABEL: realign_stack: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2) |
| ; CHECK-NEXT: s_mov_b32 s1, callee@abs32@hi |
| ; CHECK-NEXT: s_cmp_lg_u32 0, s33 |
| ; CHECK-NEXT: s_mov_b32 s0, callee@abs32@lo |
| ; CHECK-NEXT: s_cmovk_i32 s33, 0x200 |
| ; CHECK-NEXT: s_movk_i32 s32, 0x100 |
| ; CHECK-NEXT: s_clause 0x7 |
| ; CHECK-NEXT: scratch_store_b128 off, v[28:31], s33 offset:112 |
| ; CHECK-NEXT: scratch_store_b128 off, v[24:27], s33 offset:96 |
| ; CHECK-NEXT: scratch_store_b128 off, v[20:23], s33 offset:80 |
| ; CHECK-NEXT: scratch_store_b128 off, v[16:19], s33 offset:64 |
| ; CHECK-NEXT: scratch_store_b128 off, v[12:15], s33 offset:48 |
| ; CHECK-NEXT: scratch_store_b128 off, v[8:11], s33 offset:32 |
| ; CHECK-NEXT: scratch_store_b128 off, v[4:7], s33 offset:16 |
| ; CHECK-NEXT: scratch_store_b128 off, v[0:3], s33 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 0x47 |
| ; CHECK-NEXT: s_cmovk_i32 s32, 0x300 |
| ; CHECK-NEXT: s_swappc_b64 s[30:31], s[0:1] |
| ; CHECK-NEXT: s_alloc_vgpr 0 |
| ; CHECK-NEXT: s_endpgm |
| %v = alloca <32 x i32>, align 128, addrspace(5) |
| store <32 x i32> %x, ptr addrspace(5) %v |
| call amdgpu_gfx void @callee(i32 71) |
| ret void |
| } |
| |
| define amdgpu_cs void @frame_pointer_none() #1 { |
| ; CHECK-TRUE16-LABEL: frame_pointer_none: |
| ; CHECK-TRUE16: ; %bb.0: |
| ; CHECK-TRUE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2) |
| ; CHECK-TRUE16-NEXT: v_mov_b16_e32 v0.l, 13 |
| ; CHECK-TRUE16-NEXT: s_cmp_lg_u32 0, s33 |
| ; CHECK-TRUE16-NEXT: s_cmovk_i32 s33, 0x1c0 |
| ; CHECK-TRUE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS |
| ; CHECK-TRUE16-NEXT: s_wait_storecnt 0x0 |
| ; CHECK-TRUE16-NEXT: s_alloc_vgpr 0 |
| ; CHECK-TRUE16-NEXT: s_endpgm |
| ; |
| ; CHECK-FAKE16-LABEL: frame_pointer_none: |
| ; CHECK-FAKE16: ; %bb.0: |
| ; CHECK-FAKE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2) |
| ; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 13 |
| ; CHECK-FAKE16-NEXT: s_cmp_lg_u32 0, s33 |
| ; CHECK-FAKE16-NEXT: s_cmovk_i32 s33, 0x1c0 |
| ; CHECK-FAKE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS |
| ; CHECK-FAKE16-NEXT: s_wait_storecnt 0x0 |
| ; CHECK-FAKE16-NEXT: s_alloc_vgpr 0 |
| ; CHECK-FAKE16-NEXT: s_endpgm |
| %local = alloca i32, addrspace(5) |
| store volatile i8 13, ptr addrspace(5) %local |
| ret void |
| } |
| |
| define amdgpu_cs void @frame_pointer_all() #2 { |
| ; CHECK-TRUE16-LABEL: frame_pointer_all: |
| ; CHECK-TRUE16: ; %bb.0: |
| ; CHECK-TRUE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2) |
| ; CHECK-TRUE16-NEXT: v_mov_b16_e32 v0.l, 13 |
| ; CHECK-TRUE16-NEXT: s_cmp_lg_u32 0, s33 |
| ; CHECK-TRUE16-NEXT: s_cmovk_i32 s33, 0x1c0 |
| ; CHECK-TRUE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS |
| ; CHECK-TRUE16-NEXT: s_wait_storecnt 0x0 |
| ; CHECK-TRUE16-NEXT: s_alloc_vgpr 0 |
| ; CHECK-TRUE16-NEXT: s_endpgm |
| ; |
| ; CHECK-FAKE16-LABEL: frame_pointer_all: |
| ; CHECK-FAKE16: ; %bb.0: |
| ; CHECK-FAKE16-NEXT: s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 2) |
| ; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 13 |
| ; CHECK-FAKE16-NEXT: s_cmp_lg_u32 0, s33 |
| ; CHECK-FAKE16-NEXT: s_cmovk_i32 s33, 0x1c0 |
| ; CHECK-FAKE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS |
| ; CHECK-FAKE16-NEXT: s_wait_storecnt 0x0 |
| ; CHECK-FAKE16-NEXT: s_alloc_vgpr 0 |
| ; CHECK-FAKE16-NEXT: s_endpgm |
| %local = alloca i32, addrspace(5) |
| store volatile i8 13, ptr addrspace(5) %local |
| ret void |
| } |
| |
| ; Non-entry functions and graphics shaders don't need to worry about CWSR. |
| define amdgpu_gs void @amdgpu_gs() #0 { |
| ; CHECK-TRUE16-LABEL: amdgpu_gs: |
| ; CHECK-TRUE16: ; %bb.0: |
| ; CHECK-TRUE16-NEXT: v_mov_b16_e32 v0.l, 15 |
| ; CHECK-TRUE16-NEXT: s_mov_b32 s1, callee@abs32@hi |
| ; CHECK-TRUE16-NEXT: s_mov_b32 s0, callee@abs32@lo |
| ; CHECK-TRUE16-NEXT: s_mov_b32 s32, 16 |
| ; CHECK-TRUE16-NEXT: scratch_store_b8 off, v0, off scope:SCOPE_SYS |
| ; CHECK-TRUE16-NEXT: s_wait_storecnt 0x0 |
| ; CHECK-TRUE16-NEXT: v_mov_b32_e32 v0, 0x47 |
| ; CHECK-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] |
| ; CHECK-TRUE16-NEXT: s_alloc_vgpr 0 |
| ; CHECK-TRUE16-NEXT: s_endpgm |
| ; |
| ; CHECK-FAKE16-LABEL: amdgpu_gs: |
| ; CHECK-FAKE16: ; %bb.0: |
| ; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 15 |
| ; CHECK-FAKE16-NEXT: s_mov_b32 s1, callee@abs32@hi |
| ; CHECK-FAKE16-NEXT: s_mov_b32 s0, callee@abs32@lo |
| ; CHECK-FAKE16-NEXT: s_mov_b32 s32, 16 |
| ; CHECK-FAKE16-NEXT: scratch_store_b8 off, v0, off scope:SCOPE_SYS |
| ; CHECK-FAKE16-NEXT: s_wait_storecnt 0x0 |
| ; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 0x47 |
| ; CHECK-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1] |
| ; CHECK-FAKE16-NEXT: s_alloc_vgpr 0 |
| ; CHECK-FAKE16-NEXT: s_endpgm |
| %local = alloca i32, addrspace(5) |
| store volatile i8 15, ptr addrspace(5) %local |
| call amdgpu_gfx void @callee(i32 71) |
| ret void |
| } |
| |
| define amdgpu_gfx void @amdgpu_gfx() #0 { |
| ; CHECK-TRUE16-LABEL: amdgpu_gfx: |
| ; CHECK-TRUE16: ; %bb.0: |
| ; CHECK-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; CHECK-TRUE16-NEXT: s_wait_expcnt 0x0 |
| ; CHECK-TRUE16-NEXT: s_wait_samplecnt 0x0 |
| ; CHECK-TRUE16-NEXT: s_wait_bvhcnt 0x0 |
| ; CHECK-TRUE16-NEXT: s_wait_kmcnt 0x0 |
| ; CHECK-TRUE16-NEXT: s_mov_b32 s0, s33 |
| ; CHECK-TRUE16-NEXT: s_mov_b32 s33, s32 |
| ; CHECK-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 |
| ; CHECK-TRUE16-NEXT: scratch_store_b32 off, v40, s33 offset:4 ; 4-byte Folded Spill |
| ; CHECK-TRUE16-NEXT: s_wait_alu 0xfffe |
| ; CHECK-TRUE16-NEXT: s_mov_b32 exec_lo, s1 |
| ; CHECK-TRUE16-NEXT: v_writelane_b32 v40, s0, 2 |
| ; CHECK-TRUE16-NEXT: v_mov_b16_e32 v0.l, 15 |
| ; CHECK-TRUE16-NEXT: s_mov_b32 s1, callee@abs32@hi |
| ; CHECK-TRUE16-NEXT: s_mov_b32 s0, callee@abs32@lo |
| ; CHECK-TRUE16-NEXT: s_add_co_i32 s32, s32, 16 |
| ; CHECK-TRUE16-NEXT: v_writelane_b32 v40, s30, 0 |
| ; CHECK-TRUE16-NEXT: s_wait_storecnt 0x0 |
| ; CHECK-TRUE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS |
| ; CHECK-TRUE16-NEXT: s_wait_storecnt 0x0 |
| ; CHECK-TRUE16-NEXT: v_mov_b32_e32 v0, 0x47 |
| ; CHECK-TRUE16-NEXT: v_writelane_b32 v40, s31, 1 |
| ; CHECK-TRUE16-NEXT: s_wait_alu 0xfffe |
| ; CHECK-TRUE16-NEXT: s_swappc_b64 s[30:31], s[0:1] |
| ; CHECK-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; CHECK-TRUE16-NEXT: v_readlane_b32 s31, v40, 1 |
| ; CHECK-TRUE16-NEXT: v_readlane_b32 s30, v40, 0 |
| ; CHECK-TRUE16-NEXT: s_mov_b32 s32, s33 |
| ; CHECK-TRUE16-NEXT: v_readlane_b32 s0, v40, 2 |
| ; CHECK-TRUE16-NEXT: s_or_saveexec_b32 s1, -1 |
| ; CHECK-TRUE16-NEXT: scratch_load_b32 v40, off, s33 offset:4 ; 4-byte Folded Reload |
| ; CHECK-TRUE16-NEXT: s_wait_alu 0xfffe |
| ; CHECK-TRUE16-NEXT: s_mov_b32 exec_lo, s1 |
| ; CHECK-TRUE16-NEXT: s_mov_b32 s33, s0 |
| ; CHECK-TRUE16-NEXT: s_wait_loadcnt 0x0 |
| ; CHECK-TRUE16-NEXT: s_wait_alu 0xfffe |
| ; CHECK-TRUE16-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; CHECK-FAKE16-LABEL: amdgpu_gfx: |
| ; CHECK-FAKE16: ; %bb.0: |
| ; CHECK-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; CHECK-FAKE16-NEXT: s_wait_expcnt 0x0 |
| ; CHECK-FAKE16-NEXT: s_wait_samplecnt 0x0 |
| ; CHECK-FAKE16-NEXT: s_wait_bvhcnt 0x0 |
| ; CHECK-FAKE16-NEXT: s_wait_kmcnt 0x0 |
| ; CHECK-FAKE16-NEXT: s_mov_b32 s0, s33 |
| ; CHECK-FAKE16-NEXT: s_mov_b32 s33, s32 |
| ; CHECK-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 |
| ; CHECK-FAKE16-NEXT: scratch_store_b32 off, v40, s33 offset:4 ; 4-byte Folded Spill |
| ; CHECK-FAKE16-NEXT: s_wait_alu 0xfffe |
| ; CHECK-FAKE16-NEXT: s_mov_b32 exec_lo, s1 |
| ; CHECK-FAKE16-NEXT: v_writelane_b32 v40, s0, 2 |
| ; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 15 |
| ; CHECK-FAKE16-NEXT: s_mov_b32 s1, callee@abs32@hi |
| ; CHECK-FAKE16-NEXT: s_mov_b32 s0, callee@abs32@lo |
| ; CHECK-FAKE16-NEXT: s_add_co_i32 s32, s32, 16 |
| ; CHECK-FAKE16-NEXT: v_writelane_b32 v40, s30, 0 |
| ; CHECK-FAKE16-NEXT: s_wait_storecnt 0x0 |
| ; CHECK-FAKE16-NEXT: scratch_store_b8 off, v0, s33 scope:SCOPE_SYS |
| ; CHECK-FAKE16-NEXT: s_wait_storecnt 0x0 |
| ; CHECK-FAKE16-NEXT: v_mov_b32_e32 v0, 0x47 |
| ; CHECK-FAKE16-NEXT: v_writelane_b32 v40, s31, 1 |
| ; CHECK-FAKE16-NEXT: s_wait_alu 0xfffe |
| ; CHECK-FAKE16-NEXT: s_swappc_b64 s[30:31], s[0:1] |
| ; CHECK-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; CHECK-FAKE16-NEXT: v_readlane_b32 s31, v40, 1 |
| ; CHECK-FAKE16-NEXT: v_readlane_b32 s30, v40, 0 |
| ; CHECK-FAKE16-NEXT: s_mov_b32 s32, s33 |
| ; CHECK-FAKE16-NEXT: v_readlane_b32 s0, v40, 2 |
| ; CHECK-FAKE16-NEXT: s_or_saveexec_b32 s1, -1 |
| ; CHECK-FAKE16-NEXT: scratch_load_b32 v40, off, s33 offset:4 ; 4-byte Folded Reload |
| ; CHECK-FAKE16-NEXT: s_wait_alu 0xfffe |
| ; CHECK-FAKE16-NEXT: s_mov_b32 exec_lo, s1 |
| ; CHECK-FAKE16-NEXT: s_mov_b32 s33, s0 |
| ; CHECK-FAKE16-NEXT: s_wait_loadcnt 0x0 |
| ; CHECK-FAKE16-NEXT: s_wait_alu 0xfffe |
| ; CHECK-FAKE16-NEXT: s_setpc_b64 s[30:31] |
| %local = alloca i32, addrspace(5) |
| store volatile i8 15, ptr addrspace(5) %local |
| call amdgpu_gfx void @callee(i32 71) |
| ret void |
| } |
| |
| define void @default() #0 { |
| ; CHECK-LABEL: default: |
| ; CHECK: ; %bb.0: |
| ; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; CHECK-NEXT: s_wait_expcnt 0x0 |
| ; CHECK-NEXT: s_wait_samplecnt 0x0 |
| ; CHECK-NEXT: s_wait_bvhcnt 0x0 |
| ; CHECK-NEXT: s_wait_kmcnt 0x0 |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| ret void |
| } |
| |
| declare amdgpu_gfx void @callee(i32) #0 |
| |
| attributes #0 = { nounwind "amdgpu-dynamic-vgpr-block-size"="16" } |
| attributes #1 = { nounwind "frame-pointer"="none" "amdgpu-dynamic-vgpr-block-size"="16" } |
| attributes #2 = { nounwind "frame-pointer"="all" "amdgpu-dynamic-vgpr-block-size"="16" } |