| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 |
| ; RUN: llc -global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GISEL-GFX11 %s |
| ; RUN: llc -global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefix=GISEL-GFX10 %s |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn--amdpal -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=DAGISEL-GFX11 %s |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn--amdpal -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefix=DAGISEL-GFX10 %s |
| |
| define amdgpu_cs_chain_preserve void @amdgpu_cs_chain_preserve_no_stack({ptr, i32, <4 x i32>} inreg %a, {ptr, i32, <4 x i32>} %b) { |
| ; GISEL-GFX11-LABEL: amdgpu_cs_chain_preserve_no_stack: |
| ; GISEL-GFX11: ; %bb.0: |
| ; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GISEL-GFX11-NEXT: s_endpgm |
| ; |
| ; GISEL-GFX10-LABEL: amdgpu_cs_chain_preserve_no_stack: |
| ; GISEL-GFX10: ; %bb.0: |
| ; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GISEL-GFX10-NEXT: s_endpgm |
| ; |
| ; DAGISEL-GFX11-LABEL: amdgpu_cs_chain_preserve_no_stack: |
| ; DAGISEL-GFX11: ; %bb.0: |
| ; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; DAGISEL-GFX11-NEXT: s_endpgm |
| ; |
| ; DAGISEL-GFX10-LABEL: amdgpu_cs_chain_preserve_no_stack: |
| ; DAGISEL-GFX10: ; %bb.0: |
| ; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; DAGISEL-GFX10-NEXT: s_endpgm |
| ret void |
| } |
| |
| define amdgpu_cs void @cs_to_chain_preserve(<3 x i32> inreg %a, <3 x i32> %b) { |
| ; GISEL-GFX11-LABEL: cs_to_chain_preserve: |
| ; GISEL-GFX11: ; %bb.0: |
| ; GISEL-GFX11-NEXT: v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v10, v2 |
| ; GISEL-GFX11-NEXT: s_mov_b32 s3, s0 |
| ; GISEL-GFX11-NEXT: ;;#ASMSTART |
| ; GISEL-GFX11-NEXT: s_nop |
| ; GISEL-GFX11-NEXT: ;;#ASMEND |
| ; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_preserve_callee@abs32@lo |
| ; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GISEL-GFX11-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v1 |
| ; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_preserve_callee@abs32@hi |
| ; GISEL-GFX11-NEXT: s_mov_b32 s0, s3 |
| ; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 |
| ; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5] |
| ; |
| ; GISEL-GFX10-LABEL: cs_to_chain_preserve: |
| ; GISEL-GFX10: ; %bb.0: |
| ; GISEL-GFX10-NEXT: s_getpc_b64 s[100:101] |
| ; GISEL-GFX10-NEXT: s_mov_b32 s100, s0 |
| ; GISEL-GFX10-NEXT: v_mov_b32_e32 v3, v0 |
| ; GISEL-GFX10-NEXT: s_load_dwordx4 s[100:103], s[100:101], 0x10 |
| ; GISEL-GFX10-NEXT: v_mov_b32_e32 v9, v1 |
| ; GISEL-GFX10-NEXT: v_mov_b32_e32 v10, v2 |
| ; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_preserve_callee@abs32@lo |
| ; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_preserve_callee@abs32@hi |
| ; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) |
| ; GISEL-GFX10-NEXT: s_bitset0_b32 s103, 21 |
| ; GISEL-GFX10-NEXT: s_add_u32 s100, s100, s3 |
| ; GISEL-GFX10-NEXT: s_mov_b32 s3, s0 |
| ; GISEL-GFX10-NEXT: ;;#ASMSTART |
| ; GISEL-GFX10-NEXT: s_nop |
| ; GISEL-GFX10-NEXT: ;;#ASMEND |
| ; GISEL-GFX10-NEXT: s_addc_u32 s101, s101, 0 |
| ; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v3 |
| ; GISEL-GFX10-NEXT: s_mov_b64 s[48:49], s[100:101] |
| ; GISEL-GFX10-NEXT: s_mov_b32 s0, s3 |
| ; GISEL-GFX10-NEXT: s_mov_b64 s[50:51], s[102:103] |
| ; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 |
| ; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5] |
| ; |
| ; DAGISEL-GFX11-LABEL: cs_to_chain_preserve: |
| ; DAGISEL-GFX11: ; %bb.0: |
| ; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v10, v2 |
| ; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0 |
| ; DAGISEL-GFX11-NEXT: ;;#ASMSTART |
| ; DAGISEL-GFX11-NEXT: s_nop |
| ; DAGISEL-GFX11-NEXT: ;;#ASMEND |
| ; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_preserve_callee@abs32@hi |
| ; DAGISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v1 |
| ; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_preserve_callee@abs32@lo |
| ; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s3 |
| ; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 |
| ; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5] |
| ; |
| ; DAGISEL-GFX10-LABEL: cs_to_chain_preserve: |
| ; DAGISEL-GFX10: ; %bb.0: |
| ; DAGISEL-GFX10-NEXT: s_getpc_b64 s[100:101] |
| ; DAGISEL-GFX10-NEXT: s_mov_b32 s100, s0 |
| ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v3, v0 |
| ; DAGISEL-GFX10-NEXT: s_load_dwordx4 s[100:103], s[100:101], 0x10 |
| ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v9, v1 |
| ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v10, v2 |
| ; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_preserve_callee@abs32@hi |
| ; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_preserve_callee@abs32@lo |
| ; DAGISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) |
| ; DAGISEL-GFX10-NEXT: s_bitset0_b32 s103, 21 |
| ; DAGISEL-GFX10-NEXT: s_add_u32 s100, s100, s3 |
| ; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0 |
| ; DAGISEL-GFX10-NEXT: ;;#ASMSTART |
| ; DAGISEL-GFX10-NEXT: s_nop |
| ; DAGISEL-GFX10-NEXT: ;;#ASMEND |
| ; DAGISEL-GFX10-NEXT: s_addc_u32 s101, s101, 0 |
| ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v3 |
| ; DAGISEL-GFX10-NEXT: s_mov_b64 s[48:49], s[100:101] |
| ; DAGISEL-GFX10-NEXT: s_mov_b64 s[50:51], s[102:103] |
| ; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s3 |
| ; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 |
| ; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5] |
| call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"() |
| call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_preserve_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0) |
| unreachable |
| } |
| |
| define amdgpu_cs_chain void @chain_to_chain_preserve(<3 x i32> inreg %a, <3 x i32> %b) { |
| ; GISEL-GFX11-LABEL: chain_to_chain_preserve: |
| ; GISEL-GFX11: ; %bb.0: |
| ; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8 |
| ; GISEL-GFX11-NEXT: s_mov_b32 s3, s0 |
| ; GISEL-GFX11-NEXT: ;;#ASMSTART |
| ; GISEL-GFX11-NEXT: s_nop |
| ; GISEL-GFX11-NEXT: ;;#ASMEND |
| ; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_preserve_callee@abs32@lo |
| ; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_preserve_callee@abs32@hi |
| ; GISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1 |
| ; GISEL-GFX11-NEXT: s_mov_b32 s0, s3 |
| ; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 |
| ; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5] |
| ; |
| ; GISEL-GFX10-LABEL: chain_to_chain_preserve: |
| ; GISEL-GFX10: ; %bb.0: |
| ; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8 |
| ; GISEL-GFX10-NEXT: s_mov_b32 s3, s0 |
| ; GISEL-GFX10-NEXT: ;;#ASMSTART |
| ; GISEL-GFX10-NEXT: s_nop |
| ; GISEL-GFX10-NEXT: ;;#ASMEND |
| ; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_preserve_callee@abs32@lo |
| ; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_preserve_callee@abs32@hi |
| ; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1 |
| ; GISEL-GFX10-NEXT: s_mov_b32 s0, s3 |
| ; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 |
| ; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5] |
| ; |
| ; DAGISEL-GFX11-LABEL: chain_to_chain_preserve: |
| ; DAGISEL-GFX11: ; %bb.0: |
| ; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8 |
| ; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0 |
| ; DAGISEL-GFX11-NEXT: ;;#ASMSTART |
| ; DAGISEL-GFX11-NEXT: s_nop |
| ; DAGISEL-GFX11-NEXT: ;;#ASMEND |
| ; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_preserve_callee@abs32@hi |
| ; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_preserve_callee@abs32@lo |
| ; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1 |
| ; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s3 |
| ; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 |
| ; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5] |
| ; |
| ; DAGISEL-GFX10-LABEL: chain_to_chain_preserve: |
| ; DAGISEL-GFX10: ; %bb.0: |
| ; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8 |
| ; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0 |
| ; DAGISEL-GFX10-NEXT: ;;#ASMSTART |
| ; DAGISEL-GFX10-NEXT: s_nop |
| ; DAGISEL-GFX10-NEXT: ;;#ASMEND |
| ; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_preserve_callee@abs32@hi |
| ; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_preserve_callee@abs32@lo |
| ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1 |
| ; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s3 |
| ; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 |
| ; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5] |
| call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"() |
| call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_preserve_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0) |
| unreachable |
| } |
| |
| define amdgpu_cs_chain_preserve void @chain_preserve_to_chain_preserve(<3 x i32> inreg %a, <3 x i32> %b) { |
| ; GISEL-GFX11-LABEL: chain_preserve_to_chain_preserve: |
| ; GISEL-GFX11: ; %bb.0: |
| ; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GISEL-GFX11-NEXT: scratch_store_b32 off, v16, off ; 4-byte Folded Spill |
| ; GISEL-GFX11-NEXT: s_mov_b32 s3, s0 |
| ; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8 |
| ; GISEL-GFX11-NEXT: ;;#ASMSTART |
| ; GISEL-GFX11-NEXT: s_nop |
| ; GISEL-GFX11-NEXT: ;;#ASMEND |
| ; GISEL-GFX11-NEXT: scratch_load_b32 v16, off, off ; 4-byte Folded Reload |
| ; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_preserve_callee@abs32@lo |
| ; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_preserve_callee@abs32@hi |
| ; GISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1 |
| ; GISEL-GFX11-NEXT: s_mov_b32 s0, s3 |
| ; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 |
| ; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5] |
| ; |
| ; GISEL-GFX10-LABEL: chain_preserve_to_chain_preserve: |
| ; GISEL-GFX10: ; %bb.0: |
| ; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], 0 ; 4-byte Folded Spill |
| ; GISEL-GFX10-NEXT: s_mov_b32 s3, s0 |
| ; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8 |
| ; GISEL-GFX10-NEXT: ;;#ASMSTART |
| ; GISEL-GFX10-NEXT: s_nop |
| ; GISEL-GFX10-NEXT: ;;#ASMEND |
| ; GISEL-GFX10-NEXT: buffer_load_dword v16, off, s[48:51], 0 ; 4-byte Folded Reload |
| ; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_preserve_callee@abs32@lo |
| ; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_preserve_callee@abs32@hi |
| ; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1 |
| ; GISEL-GFX10-NEXT: s_mov_b32 s0, s3 |
| ; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 |
| ; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5] |
| ; |
| ; DAGISEL-GFX11-LABEL: chain_preserve_to_chain_preserve: |
| ; DAGISEL-GFX11: ; %bb.0: |
| ; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v16, off ; 4-byte Folded Spill |
| ; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8 |
| ; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0 |
| ; DAGISEL-GFX11-NEXT: ;;#ASMSTART |
| ; DAGISEL-GFX11-NEXT: s_nop |
| ; DAGISEL-GFX11-NEXT: ;;#ASMEND |
| ; DAGISEL-GFX11-NEXT: scratch_load_b32 v16, off, off ; 4-byte Folded Reload |
| ; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_preserve_callee@abs32@hi |
| ; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1 |
| ; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_preserve_callee@abs32@lo |
| ; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s3 |
| ; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 |
| ; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5] |
| ; |
| ; DAGISEL-GFX10-LABEL: chain_preserve_to_chain_preserve: |
| ; DAGISEL-GFX10: ; %bb.0: |
| ; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; DAGISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], 0 ; 4-byte Folded Spill |
| ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8 |
| ; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0 |
| ; DAGISEL-GFX10-NEXT: ;;#ASMSTART |
| ; DAGISEL-GFX10-NEXT: s_nop |
| ; DAGISEL-GFX10-NEXT: ;;#ASMEND |
| ; DAGISEL-GFX10-NEXT: buffer_load_dword v16, off, s[48:51], 0 ; 4-byte Folded Reload |
| ; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_preserve_callee@abs32@hi |
| ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1 |
| ; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_preserve_callee@abs32@lo |
| ; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s3 |
| ; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 |
| ; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5] |
| call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"() |
| call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_preserve_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0) |
| unreachable |
| } |
| |
| define amdgpu_cs_chain_preserve void @chain_preserve_to_chain(<3 x i32> inreg %a, <3 x i32> %b) { |
| ; GISEL-GFX11-LABEL: chain_preserve_to_chain: |
| ; GISEL-GFX11: ; %bb.0: |
| ; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GISEL-GFX11-NEXT: scratch_store_b32 off, v16, off ; 4-byte Folded Spill |
| ; GISEL-GFX11-NEXT: s_mov_b32 s3, s0 |
| ; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8 |
| ; GISEL-GFX11-NEXT: ;;#ASMSTART |
| ; GISEL-GFX11-NEXT: s_nop |
| ; GISEL-GFX11-NEXT: ;;#ASMEND |
| ; GISEL-GFX11-NEXT: scratch_load_b32 v16, off, off ; 4-byte Folded Reload |
| ; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo |
| ; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi |
| ; GISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1 |
| ; GISEL-GFX11-NEXT: s_mov_b32 s0, s3 |
| ; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 |
| ; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5] |
| ; |
| ; GISEL-GFX10-LABEL: chain_preserve_to_chain: |
| ; GISEL-GFX10: ; %bb.0: |
| ; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], 0 ; 4-byte Folded Spill |
| ; GISEL-GFX10-NEXT: s_mov_b32 s3, s0 |
| ; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8 |
| ; GISEL-GFX10-NEXT: ;;#ASMSTART |
| ; GISEL-GFX10-NEXT: s_nop |
| ; GISEL-GFX10-NEXT: ;;#ASMEND |
| ; GISEL-GFX10-NEXT: buffer_load_dword v16, off, s[48:51], 0 ; 4-byte Folded Reload |
| ; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo |
| ; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi |
| ; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1 |
| ; GISEL-GFX10-NEXT: s_mov_b32 s0, s3 |
| ; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 |
| ; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5] |
| ; |
| ; DAGISEL-GFX11-LABEL: chain_preserve_to_chain: |
| ; DAGISEL-GFX11: ; %bb.0: |
| ; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v16, off ; 4-byte Folded Spill |
| ; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8 |
| ; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0 |
| ; DAGISEL-GFX11-NEXT: ;;#ASMSTART |
| ; DAGISEL-GFX11-NEXT: s_nop |
| ; DAGISEL-GFX11-NEXT: ;;#ASMEND |
| ; DAGISEL-GFX11-NEXT: scratch_load_b32 v16, off, off ; 4-byte Folded Reload |
| ; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi |
| ; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1 |
| ; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo |
| ; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s3 |
| ; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 |
| ; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5] |
| ; |
| ; DAGISEL-GFX10-LABEL: chain_preserve_to_chain: |
| ; DAGISEL-GFX10: ; %bb.0: |
| ; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; DAGISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], 0 ; 4-byte Folded Spill |
| ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8 |
| ; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0 |
| ; DAGISEL-GFX10-NEXT: ;;#ASMSTART |
| ; DAGISEL-GFX10-NEXT: s_nop |
| ; DAGISEL-GFX10-NEXT: ;;#ASMEND |
| ; DAGISEL-GFX10-NEXT: buffer_load_dword v16, off, s[48:51], 0 ; 4-byte Folded Reload |
| ; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi |
| ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1 |
| ; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo |
| ; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s3 |
| ; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 |
| ; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5] |
| call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"() |
| call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0) |
| unreachable |
| } |
| |
| define amdgpu_cs_chain_preserve void @chain_preserve_to_chain_wwm(<3 x i32> inreg %a, <3 x i32> %b) { |
| ; GISEL-GFX11-LABEL: chain_preserve_to_chain_wwm: |
| ; GISEL-GFX11: ; %bb.0: |
| ; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GISEL-GFX11-NEXT: scratch_store_b32 off, v16, off ; 4-byte Folded Spill |
| ; GISEL-GFX11-NEXT: s_mov_b32 s3, s0 |
| ; GISEL-GFX11-NEXT: s_or_saveexec_b32 s0, -1 |
| ; GISEL-GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; GISEL-GFX11-NEXT: v_cndmask_b32_e64 v1, 4, 3, s0 |
| ; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, s0 |
| ; GISEL-GFX11-NEXT: ;;#ASMSTART |
| ; GISEL-GFX11-NEXT: s_nop |
| ; GISEL-GFX11-NEXT: ;;#ASMEND |
| ; GISEL-GFX11-NEXT: scratch_load_b32 v16, off, off ; 4-byte Folded Reload |
| ; GISEL-GFX11-NEXT: v_mov_b32_e32 v2, v1 |
| ; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo |
| ; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi |
| ; GISEL-GFX11-NEXT: s_mov_b32 s0, s3 |
| ; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GISEL-GFX11-NEXT: v_mov_b32_e32 v8, v2 |
| ; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 |
| ; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5] |
| ; |
| ; GISEL-GFX10-LABEL: chain_preserve_to_chain_wwm: |
| ; GISEL-GFX10: ; %bb.0: |
| ; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], 0 ; 4-byte Folded Spill |
| ; GISEL-GFX10-NEXT: s_mov_b32 s3, s0 |
| ; GISEL-GFX10-NEXT: s_or_saveexec_b32 s0, -1 |
| ; GISEL-GFX10-NEXT: v_cndmask_b32_e64 v1, 4, 3, s0 |
| ; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, s0 |
| ; GISEL-GFX10-NEXT: ;;#ASMSTART |
| ; GISEL-GFX10-NEXT: s_nop |
| ; GISEL-GFX10-NEXT: ;;#ASMEND |
| ; GISEL-GFX10-NEXT: buffer_load_dword v16, off, s[48:51], 0 ; 4-byte Folded Reload |
| ; GISEL-GFX10-NEXT: v_mov_b32_e32 v2, v1 |
| ; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo |
| ; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi |
| ; GISEL-GFX10-NEXT: s_mov_b32 s0, s3 |
| ; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v2 |
| ; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 |
| ; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5] |
| ; |
| ; DAGISEL-GFX11-LABEL: chain_preserve_to_chain_wwm: |
| ; DAGISEL-GFX11: ; %bb.0: |
| ; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v16, off ; 4-byte Folded Spill |
| ; DAGISEL-GFX11-NEXT: s_or_saveexec_b32 s4, -1 |
| ; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0 |
| ; DAGISEL-GFX11-NEXT: v_cndmask_b32_e64 v1, 4, 3, s4 |
| ; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, s4 |
| ; DAGISEL-GFX11-NEXT: ;;#ASMSTART |
| ; DAGISEL-GFX11-NEXT: s_nop |
| ; DAGISEL-GFX11-NEXT: ;;#ASMEND |
| ; DAGISEL-GFX11-NEXT: scratch_load_b32 v16, off, off ; 4-byte Folded Reload |
| ; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v2, v1 |
| ; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi |
| ; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo |
| ; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s3 |
| ; DAGISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v8, v2 |
| ; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 |
| ; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5] |
| ; |
| ; DAGISEL-GFX10-LABEL: chain_preserve_to_chain_wwm: |
| ; DAGISEL-GFX10: ; %bb.0: |
| ; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; DAGISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], 0 ; 4-byte Folded Spill |
| ; DAGISEL-GFX10-NEXT: s_or_saveexec_b32 s4, -1 |
| ; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0 |
| ; DAGISEL-GFX10-NEXT: v_cndmask_b32_e64 v1, 4, 3, s4 |
| ; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, s4 |
| ; DAGISEL-GFX10-NEXT: ;;#ASMSTART |
| ; DAGISEL-GFX10-NEXT: s_nop |
| ; DAGISEL-GFX10-NEXT: ;;#ASMEND |
| ; DAGISEL-GFX10-NEXT: buffer_load_dword v16, off, s[48:51], 0 ; 4-byte Folded Reload |
| ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v2, v1 |
| ; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi |
| ; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo |
| ; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s3 |
| ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v2 |
| ; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 |
| ; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5] |
| %i = call i32 @llvm.amdgcn.set.inactive(i32 3, i32 4) |
| call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"() |
| %w = call i32 @llvm.amdgcn.wwm(i32 %i) |
| %c = insertelement <3 x i32> %b, i32 %w, i32 0 |
| call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %c, i32 0) |
| unreachable |
| } |
| |
| define amdgpu_cs_chain_preserve void @chain_preserve_to_chain_use_all_v0_v7(<3 x i32> inreg %a, <3 x i32> %b) { |
| ; GISEL-GFX11-LABEL: chain_preserve_to_chain_use_all_v0_v7: |
| ; GISEL-GFX11: ; %bb.0: |
| ; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GISEL-GFX11-NEXT: s_clause 0x1 |
| ; GISEL-GFX11-NEXT: scratch_store_b32 off, v11, off offset:4 |
| ; GISEL-GFX11-NEXT: scratch_store_b32 off, v16, off |
| ; GISEL-GFX11-NEXT: v_mov_b32_e32 v11, v8 |
| ; GISEL-GFX11-NEXT: s_mov_b32 s3, s0 |
| ; GISEL-GFX11-NEXT: ;;#ASMSTART |
| ; GISEL-GFX11-NEXT: s_nop |
| ; GISEL-GFX11-NEXT: ;;#ASMEND |
| ; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo |
| ; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi |
| ; GISEL-GFX11-NEXT: v_mov_b32_e32 v8, v11 |
| ; GISEL-GFX11-NEXT: s_clause 0x1 |
| ; GISEL-GFX11-NEXT: scratch_load_b32 v16, off, off |
| ; GISEL-GFX11-NEXT: scratch_load_b32 v11, off, off offset:4 |
| ; GISEL-GFX11-NEXT: s_mov_b32 s0, s3 |
| ; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 |
| ; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5] |
| ; |
| ; GISEL-GFX10-LABEL: chain_preserve_to_chain_use_all_v0_v7: |
| ; GISEL-GFX10: ; %bb.0: |
| ; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GISEL-GFX10-NEXT: buffer_store_dword v11, off, s[48:51], 0 offset:4 ; 4-byte Folded Spill |
| ; GISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], 0 ; 4-byte Folded Spill |
| ; GISEL-GFX10-NEXT: v_mov_b32_e32 v11, v8 |
| ; GISEL-GFX10-NEXT: s_mov_b32 s3, s0 |
| ; GISEL-GFX10-NEXT: ;;#ASMSTART |
| ; GISEL-GFX10-NEXT: s_nop |
| ; GISEL-GFX10-NEXT: ;;#ASMEND |
| ; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo |
| ; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi |
| ; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v11 |
| ; GISEL-GFX10-NEXT: s_clause 0x1 |
| ; GISEL-GFX10-NEXT: buffer_load_dword v16, off, s[48:51], 0 |
| ; GISEL-GFX10-NEXT: buffer_load_dword v11, off, s[48:51], 0 offset:4 |
| ; GISEL-GFX10-NEXT: s_mov_b32 s0, s3 |
| ; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 |
| ; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5] |
| ; |
| ; DAGISEL-GFX11-LABEL: chain_preserve_to_chain_use_all_v0_v7: |
| ; DAGISEL-GFX11: ; %bb.0: |
| ; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; DAGISEL-GFX11-NEXT: s_clause 0x1 |
| ; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v11, off offset:4 |
| ; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v16, off |
| ; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v11, v8 |
| ; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0 |
| ; DAGISEL-GFX11-NEXT: ;;#ASMSTART |
| ; DAGISEL-GFX11-NEXT: s_nop |
| ; DAGISEL-GFX11-NEXT: ;;#ASMEND |
| ; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi |
| ; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo |
| ; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v8, v11 |
| ; DAGISEL-GFX11-NEXT: s_clause 0x1 |
| ; DAGISEL-GFX11-NEXT: scratch_load_b32 v16, off, off |
| ; DAGISEL-GFX11-NEXT: scratch_load_b32 v11, off, off offset:4 |
| ; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s3 |
| ; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 |
| ; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5] |
| ; |
| ; DAGISEL-GFX10-LABEL: chain_preserve_to_chain_use_all_v0_v7: |
| ; DAGISEL-GFX10: ; %bb.0: |
| ; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; DAGISEL-GFX10-NEXT: buffer_store_dword v11, off, s[48:51], 0 offset:4 ; 4-byte Folded Spill |
| ; DAGISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], 0 ; 4-byte Folded Spill |
| ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v11, v8 |
| ; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0 |
| ; DAGISEL-GFX10-NEXT: ;;#ASMSTART |
| ; DAGISEL-GFX10-NEXT: s_nop |
| ; DAGISEL-GFX10-NEXT: ;;#ASMEND |
| ; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi |
| ; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo |
| ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v11 |
| ; DAGISEL-GFX10-NEXT: s_clause 0x1 |
| ; DAGISEL-GFX10-NEXT: buffer_load_dword v16, off, s[48:51], 0 |
| ; DAGISEL-GFX10-NEXT: buffer_load_dword v11, off, s[48:51], 0 offset:4 |
| ; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s3 |
| ; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 |
| ; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5] |
| call void asm "s_nop", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v16},~{s0}"() |
| call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0) |
| unreachable |
| } |
| |
| define amdgpu_cs_chain_preserve void @chain_preserve_to_chain_preserve_fewer_args(<3 x i32> inreg %a, <3 x i32> %b) { |
| ; GISEL-GFX11-LABEL: chain_preserve_to_chain_preserve_fewer_args: |
| ; GISEL-GFX11: ; %bb.0: |
| ; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GISEL-GFX11-NEXT: scratch_store_b32 off, v16, off ; 4-byte Folded Spill |
| ; GISEL-GFX11-NEXT: s_mov_b32 s2, s0 |
| ; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8 |
| ; GISEL-GFX11-NEXT: ;;#ASMSTART |
| ; GISEL-GFX11-NEXT: s_nop |
| ; GISEL-GFX11-NEXT: ;;#ASMEND |
| ; GISEL-GFX11-NEXT: scratch_load_b32 v16, off, off ; 4-byte Folded Reload |
| ; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_preserve_callee_2@abs32@lo |
| ; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_preserve_callee_2@abs32@hi |
| ; GISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1 |
| ; GISEL-GFX11-NEXT: s_mov_b32 s0, s2 |
| ; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 |
| ; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5] |
| ; |
| ; GISEL-GFX10-LABEL: chain_preserve_to_chain_preserve_fewer_args: |
| ; GISEL-GFX10: ; %bb.0: |
| ; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], 0 ; 4-byte Folded Spill |
| ; GISEL-GFX10-NEXT: s_mov_b32 s2, s0 |
| ; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8 |
| ; GISEL-GFX10-NEXT: ;;#ASMSTART |
| ; GISEL-GFX10-NEXT: s_nop |
| ; GISEL-GFX10-NEXT: ;;#ASMEND |
| ; GISEL-GFX10-NEXT: buffer_load_dword v16, off, s[48:51], 0 ; 4-byte Folded Reload |
| ; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_preserve_callee_2@abs32@lo |
| ; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_preserve_callee_2@abs32@hi |
| ; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1 |
| ; GISEL-GFX10-NEXT: s_mov_b32 s0, s2 |
| ; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 |
| ; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5] |
| ; |
| ; DAGISEL-GFX11-LABEL: chain_preserve_to_chain_preserve_fewer_args: |
| ; DAGISEL-GFX11: ; %bb.0: |
| ; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v16, off ; 4-byte Folded Spill |
| ; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8 |
| ; DAGISEL-GFX11-NEXT: s_mov_b32 s2, s0 |
| ; DAGISEL-GFX11-NEXT: ;;#ASMSTART |
| ; DAGISEL-GFX11-NEXT: s_nop |
| ; DAGISEL-GFX11-NEXT: ;;#ASMEND |
| ; DAGISEL-GFX11-NEXT: scratch_load_b32 v16, off, off ; 4-byte Folded Reload |
| ; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_preserve_callee_2@abs32@hi |
| ; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1 |
| ; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_preserve_callee_2@abs32@lo |
| ; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s2 |
| ; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 |
| ; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5] |
| ; |
| ; DAGISEL-GFX10-LABEL: chain_preserve_to_chain_preserve_fewer_args: |
| ; DAGISEL-GFX10: ; %bb.0: |
| ; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; DAGISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], 0 ; 4-byte Folded Spill |
| ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8 |
| ; DAGISEL-GFX10-NEXT: s_mov_b32 s2, s0 |
| ; DAGISEL-GFX10-NEXT: ;;#ASMSTART |
| ; DAGISEL-GFX10-NEXT: s_nop |
| ; DAGISEL-GFX10-NEXT: ;;#ASMEND |
| ; DAGISEL-GFX10-NEXT: buffer_load_dword v16, off, s[48:51], 0 ; 4-byte Folded Reload |
| ; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_preserve_callee_2@abs32@hi |
| ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1 |
| ; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_preserve_callee_2@abs32@lo |
| ; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s2 |
| ; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 |
| ; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5] |
| %s = shufflevector <3 x i32> %a, <3 x i32> zeroinitializer, <2 x i32> <i32 0, i32 1> |
| %v = shufflevector <3 x i32> %b, <3 x i32> zeroinitializer, <2 x i32> <i32 0, i32 1> |
| call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"() |
| call void(ptr, i32, <2 x i32>, <2 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v2i32(ptr @chain_preserve_callee_2, i32 -1, <2 x i32> inreg %s, <2 x i32> %v, i32 0) |
| unreachable |
| } |
| |
| ; Note that amdgpu_cs_chain_preserve functions are not allowed to call |
| ; llvm.amdgcn.cs.chain with more vgpr args than they received as parameters. |
| |
| define amdgpu_cs_chain_preserve void @amdgpu_cs_chain_preserve_dont_realign_stack(i32 %idx) { |
| ; GISEL-GFX11-LABEL: amdgpu_cs_chain_preserve_dont_realign_stack: |
| ; GISEL-GFX11: ; %bb.0: |
| ; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GISEL-GFX11-NEXT: s_mov_b32 s3, 4 |
| ; GISEL-GFX11-NEXT: s_mov_b32 s2, 3 |
| ; GISEL-GFX11-NEXT: s_mov_b32 s1, 2 |
| ; GISEL-GFX11-NEXT: s_mov_b32 s0, 1 |
| ; GISEL-GFX11-NEXT: v_lshlrev_b32_e32 v0, 4, v8 |
| ; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GISEL-GFX11-NEXT: v_mov_b32_e32 v4, v0 |
| ; GISEL-GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s3 |
| ; GISEL-GFX11-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2 |
| ; GISEL-GFX11-NEXT: scratch_store_b128 v4, v[0:3], off dlc |
| ; GISEL-GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GISEL-GFX11-NEXT: s_endpgm |
| ; |
| ; GISEL-GFX10-LABEL: amdgpu_cs_chain_preserve_dont_realign_stack: |
| ; GISEL-GFX10: ; %bb.0: |
| ; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GISEL-GFX10-NEXT: v_lshlrev_b32_e32 v0, 4, v8 |
| ; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 1 |
| ; GISEL-GFX10-NEXT: v_mov_b32_e32 v2, 2 |
| ; GISEL-GFX10-NEXT: v_mov_b32_e32 v3, 3 |
| ; GISEL-GFX10-NEXT: v_mov_b32_e32 v4, 4 |
| ; GISEL-GFX10-NEXT: buffer_store_dword v1, v0, s[48:51], 0 offen |
| ; GISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GISEL-GFX10-NEXT: buffer_store_dword v2, v0, s[48:51], 0 offen offset:4 |
| ; GISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GISEL-GFX10-NEXT: buffer_store_dword v3, v0, s[48:51], 0 offen offset:8 |
| ; GISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GISEL-GFX10-NEXT: buffer_store_dword v4, v0, s[48:51], 0 offen offset:12 |
| ; GISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GISEL-GFX10-NEXT: s_endpgm |
| ; |
| ; DAGISEL-GFX11-LABEL: amdgpu_cs_chain_preserve_dont_realign_stack: |
| ; DAGISEL-GFX11: ; %bb.0: |
| ; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2 |
| ; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4 |
| ; DAGISEL-GFX11-NEXT: v_lshl_add_u32 v4, v8, 4, 0 |
| ; DAGISEL-GFX11-NEXT: scratch_store_b128 v4, v[0:3], off dlc |
| ; DAGISEL-GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; DAGISEL-GFX11-NEXT: s_endpgm |
| ; |
| ; DAGISEL-GFX10-LABEL: amdgpu_cs_chain_preserve_dont_realign_stack: |
| ; DAGISEL-GFX10: ; %bb.0: |
| ; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v0, 4 |
| ; DAGISEL-GFX10-NEXT: v_lshl_add_u32 v1, v8, 4, 0 |
| ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v2, 3 |
| ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v3, 2 |
| ; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v4, 1 |
| ; DAGISEL-GFX10-NEXT: buffer_store_dword v0, v1, s[48:51], 0 offen offset:12 |
| ; DAGISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; DAGISEL-GFX10-NEXT: buffer_store_dword v2, v1, s[48:51], 0 offen offset:8 |
| ; DAGISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; DAGISEL-GFX10-NEXT: buffer_store_dword v3, v1, s[48:51], 0 offen offset:4 |
| ; DAGISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; DAGISEL-GFX10-NEXT: buffer_store_dword v4, v1, s[48:51], 0 offen |
| ; DAGISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; DAGISEL-GFX10-NEXT: s_endpgm |
| %alloca.align32 = alloca [8 x <4 x i32>], align 32, addrspace(5) |
| %gep0 = getelementptr inbounds [8 x <4 x i32>], ptr addrspace(5) %alloca.align32, i32 0, i32 %idx |
| store volatile <4 x i32> <i32 1, i32 2, i32 3, i32 4>, ptr addrspace(5) %gep0, align 32 |
| ret void |
| } |
| |
| declare void @llvm.amdgcn.cs.chain.v3i32(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) |
| declare amdgpu_cs_chain_preserve void @chain_preserve_callee(<3 x i32> inreg, <3 x i32>) |
| declare amdgpu_cs_chain void @chain_callee(<3 x i32> inreg, <3 x i32>) |
| |
| declare void @llvm.amdgcn.cs.chain.v2i32(ptr, i32, <2 x i32>, <2 x i32>, i32, ...) |
| declare amdgpu_cs_chain_preserve void @chain_preserve_callee_2(<2 x i32> inreg, <2 x i32>) |
| |
| declare i32 @llvm.amdgcn.set.inactive(i32, i32) |
| declare i32 @llvm.amdgcn.wwm(i32) |