| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX90A %s |
| ; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX908 %s |
| |
| define void @func_empty() #0 { |
| ; GCN-LABEL: func_empty: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| ret void |
| } |
| |
| define void @func_areg_4() #0 { |
| ; GCN-LABEL: func_areg_4: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: ; use agpr3 |
| ; GCN-NEXT: ;;#ASMEND |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| call void asm sideeffect "; use agpr3", "~{a3}" () |
| ret void |
| } |
| |
| define void @func_areg_32() #0 { |
| ; GCN-LABEL: func_areg_32: |
| ; GCN: ; %bb.0: |
| ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GCN-NEXT: ;;#ASMSTART |
| ; GCN-NEXT: ; use agpr31 |
| ; GCN-NEXT: ;;#ASMEND |
| ; GCN-NEXT: s_setpc_b64 s[30:31] |
| call void asm sideeffect "; use agpr31", "~{a31}" () |
| ret void |
| } |
| |
| define void @func_areg_33() #0 { |
| ; GFX90A-LABEL: func_areg_33: |
| ; GFX90A: ; %bb.0: |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX90A-NEXT: v_accvgpr_read_b32 v0, a32 ; Reload Reuse |
| ; GFX90A-NEXT: ;;#ASMSTART |
| ; GFX90A-NEXT: ; use agpr32 |
| ; GFX90A-NEXT: ;;#ASMEND |
| ; GFX90A-NEXT: v_accvgpr_write_b32 a32, v0 ; Reload Reuse |
| ; GFX90A-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX908-LABEL: func_areg_33: |
| ; GFX908: ; %bb.0: |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX908-NEXT: ;;#ASMSTART |
| ; GFX908-NEXT: ; use agpr32 |
| ; GFX908-NEXT: ;;#ASMEND |
| ; GFX908-NEXT: s_setpc_b64 s[30:31] |
| call void asm sideeffect "; use agpr32", "~{a32}" () |
| ret void |
| } |
| |
| |
| define void @func_areg_64() #0 { |
| ; GFX90A-LABEL: func_areg_64: |
| ; GFX90A: ; %bb.0: |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX90A-NEXT: v_accvgpr_read_b32 v0, a63 ; Reload Reuse |
| ; GFX90A-NEXT: ;;#ASMSTART |
| ; GFX90A-NEXT: ; use agpr63 |
| ; GFX90A-NEXT: ;;#ASMEND |
| ; GFX90A-NEXT: v_accvgpr_write_b32 a63, v0 ; Reload Reuse |
| ; GFX90A-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX908-LABEL: func_areg_64: |
| ; GFX908: ; %bb.0: |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX908-NEXT: ;;#ASMSTART |
| ; GFX908-NEXT: ; use agpr63 |
| ; GFX908-NEXT: ;;#ASMEND |
| ; GFX908-NEXT: s_setpc_b64 s[30:31] |
| call void asm sideeffect "; use agpr63", "~{a63}" () |
| ret void |
| } |
| |
| define void @func_areg_31_63() #0 { |
| ; GFX90A-LABEL: func_areg_31_63: |
| ; GFX90A: ; %bb.0: |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX90A-NEXT: v_accvgpr_read_b32 v0, a63 ; Reload Reuse |
| ; GFX90A-NEXT: ;;#ASMSTART |
| ; GFX90A-NEXT: ; use agpr31, agpr63 |
| ; GFX90A-NEXT: ;;#ASMEND |
| ; GFX90A-NEXT: v_accvgpr_write_b32 a63, v0 ; Reload Reuse |
| ; GFX90A-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX908-LABEL: func_areg_31_63: |
| ; GFX908: ; %bb.0: |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX908-NEXT: ;;#ASMSTART |
| ; GFX908-NEXT: ; use agpr31, agpr63 |
| ; GFX908-NEXT: ;;#ASMEND |
| ; GFX908-NEXT: s_setpc_b64 s[30:31] |
| call void asm sideeffect "; use agpr31, agpr63", "~{a31},~{a63}" () |
| ret void |
| } |
| |
| declare void @func_unknown() #0 |
| |
| define amdgpu_kernel void @test_call_empty() #0 { |
| ; GFX90A-LABEL: test_call_empty: |
| ; GFX90A: ; %bb.0: ; %bb |
| ; GFX90A-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0 |
| ; GFX90A-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1 |
| ; GFX90A-NEXT: s_mov_b32 s22, -1 |
| ; GFX90A-NEXT: s_mov_b32 s23, 0xe00000 |
| ; GFX90A-NEXT: s_add_u32 s20, s20, s11 |
| ; GFX90A-NEXT: s_addc_u32 s21, s21, 0 |
| ; GFX90A-NEXT: s_mov_b32 s12, s8 |
| ; GFX90A-NEXT: s_add_u32 s8, s4, 36 |
| ; GFX90A-NEXT: s_mov_b32 s13, s9 |
| ; GFX90A-NEXT: s_addc_u32 s9, s5, 0 |
| ; GFX90A-NEXT: s_getpc_b64 s[4:5] |
| ; GFX90A-NEXT: s_add_u32 s4, s4, func_empty@gotpcrel32@lo+4 |
| ; GFX90A-NEXT: s_addc_u32 s5, s5, func_empty@gotpcrel32@hi+12 |
| ; GFX90A-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 |
| ; GFX90A-NEXT: s_mov_b32 s14, s10 |
| ; GFX90A-NEXT: s_mov_b64 s[10:11], s[6:7] |
| ; GFX90A-NEXT: s_mov_b64 s[4:5], s[0:1] |
| ; GFX90A-NEXT: s_mov_b64 s[6:7], s[2:3] |
| ; GFX90A-NEXT: s_mov_b64 s[0:1], s[20:21] |
| ; GFX90A-NEXT: v_mov_b32_e32 v31, v0 |
| ; GFX90A-NEXT: s_mov_b64 s[2:3], s[22:23] |
| ; GFX90A-NEXT: s_mov_b32 s32, 0 |
| ; GFX90A-NEXT: ;;#ASMSTART |
| ; GFX90A-NEXT: ; def a[0:31] |
| ; GFX90A-NEXT: ;;#ASMEND |
| ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX90A-NEXT: s_swappc_b64 s[30:31], s[16:17] |
| ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[28:31], off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[24:27], off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[20:23], off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[16:19], off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[12:15], off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[8:11], off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[4:7], off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[0:3], off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: s_endpgm |
| ; |
| ; GFX908-LABEL: test_call_empty: |
| ; GFX908: ; %bb.0: ; %bb |
| ; GFX908-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0 |
| ; GFX908-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1 |
| ; GFX908-NEXT: s_mov_b32 s22, -1 |
| ; GFX908-NEXT: s_mov_b32 s23, 0xe00000 |
| ; GFX908-NEXT: s_add_u32 s20, s20, s11 |
| ; GFX908-NEXT: s_addc_u32 s21, s21, 0 |
| ; GFX908-NEXT: s_mov_b32 s12, s8 |
| ; GFX908-NEXT: s_add_u32 s8, s4, 36 |
| ; GFX908-NEXT: s_mov_b32 s13, s9 |
| ; GFX908-NEXT: s_addc_u32 s9, s5, 0 |
| ; GFX908-NEXT: s_getpc_b64 s[4:5] |
| ; GFX908-NEXT: s_add_u32 s4, s4, func_empty@gotpcrel32@lo+4 |
| ; GFX908-NEXT: s_addc_u32 s5, s5, func_empty@gotpcrel32@hi+12 |
| ; GFX908-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 |
| ; GFX908-NEXT: s_mov_b32 s14, s10 |
| ; GFX908-NEXT: s_mov_b64 s[10:11], s[6:7] |
| ; GFX908-NEXT: v_lshlrev_b32_e32 v2, 20, v2 |
| ; GFX908-NEXT: v_lshlrev_b32_e32 v1, 10, v1 |
| ; GFX908-NEXT: s_mov_b64 s[4:5], s[0:1] |
| ; GFX908-NEXT: s_mov_b64 s[6:7], s[2:3] |
| ; GFX908-NEXT: s_mov_b64 s[0:1], s[20:21] |
| ; GFX908-NEXT: v_or3_b32 v31, v0, v1, v2 |
| ; GFX908-NEXT: s_mov_b64 s[2:3], s[22:23] |
| ; GFX908-NEXT: s_mov_b32 s32, 0 |
| ; GFX908-NEXT: ;;#ASMSTART |
| ; GFX908-NEXT: ; def a[0:31] |
| ; GFX908-NEXT: ;;#ASMEND |
| ; GFX908-NEXT: v_accvgpr_read_b32 v6, a3 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v5, a2 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v4, a1 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v3, a0 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v10, a7 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v9, a6 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v8, a5 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v7, a4 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v14, a11 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v13, a10 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v12, a9 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v11, a8 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v18, a15 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v17, a14 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v16, a13 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v15, a12 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v22, a19 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v21, a18 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v20, a17 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v19, a16 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v26, a23 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v25, a22 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v24, a21 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v23, a20 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v30, a27 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v29, a26 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v28, a25 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v27, a24 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v35, a31 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v34, a30 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v33, a29 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v32, a28 |
| ; GFX908-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17] |
| ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[32:35], off |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[27:30], off |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[23:26], off |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[19:22], off |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[15:18], off |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[11:14], off |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[7:10], off |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[3:6], off |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: s_endpgm |
| bb: |
| %reg = call <32 x float> asm sideeffect "; def $0", "=a"() |
| call void @func_empty() |
| store volatile <32 x float> %reg, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define amdgpu_kernel void @test_call_areg4() #0 { |
| ; GFX90A-LABEL: test_call_areg4: |
| ; GFX90A: ; %bb.0: ; %bb |
| ; GFX90A-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0 |
| ; GFX90A-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1 |
| ; GFX90A-NEXT: s_mov_b32 s22, -1 |
| ; GFX90A-NEXT: s_mov_b32 s23, 0xe00000 |
| ; GFX90A-NEXT: s_add_u32 s20, s20, s11 |
| ; GFX90A-NEXT: s_addc_u32 s21, s21, 0 |
| ; GFX90A-NEXT: s_mov_b32 s12, s8 |
| ; GFX90A-NEXT: s_add_u32 s8, s4, 36 |
| ; GFX90A-NEXT: s_mov_b32 s13, s9 |
| ; GFX90A-NEXT: s_addc_u32 s9, s5, 0 |
| ; GFX90A-NEXT: s_getpc_b64 s[4:5] |
| ; GFX90A-NEXT: s_add_u32 s4, s4, func_areg_4@gotpcrel32@lo+4 |
| ; GFX90A-NEXT: s_addc_u32 s5, s5, func_areg_4@gotpcrel32@hi+12 |
| ; GFX90A-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 |
| ; GFX90A-NEXT: s_mov_b32 s14, s10 |
| ; GFX90A-NEXT: s_mov_b64 s[10:11], s[6:7] |
| ; GFX90A-NEXT: s_mov_b64 s[4:5], s[0:1] |
| ; GFX90A-NEXT: s_mov_b64 s[6:7], s[2:3] |
| ; GFX90A-NEXT: s_mov_b64 s[0:1], s[20:21] |
| ; GFX90A-NEXT: v_mov_b32_e32 v31, v0 |
| ; GFX90A-NEXT: s_mov_b64 s[2:3], s[22:23] |
| ; GFX90A-NEXT: s_mov_b32 s32, 0 |
| ; GFX90A-NEXT: ;;#ASMSTART |
| ; GFX90A-NEXT: ; def a[4:35] |
| ; GFX90A-NEXT: ;;#ASMEND |
| ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX90A-NEXT: s_swappc_b64 s[30:31], s[16:17] |
| ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[32:35], off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[28:31], off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[24:27], off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[20:23], off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[16:19], off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[12:15], off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[8:11], off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[4:7], off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: s_endpgm |
| ; |
| ; GFX908-LABEL: test_call_areg4: |
| ; GFX908: ; %bb.0: ; %bb |
| ; GFX908-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0 |
| ; GFX908-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1 |
| ; GFX908-NEXT: s_mov_b32 s22, -1 |
| ; GFX908-NEXT: s_mov_b32 s23, 0xe00000 |
| ; GFX908-NEXT: s_add_u32 s20, s20, s11 |
| ; GFX908-NEXT: s_addc_u32 s21, s21, 0 |
| ; GFX908-NEXT: s_mov_b32 s12, s8 |
| ; GFX908-NEXT: s_add_u32 s8, s4, 36 |
| ; GFX908-NEXT: s_mov_b32 s13, s9 |
| ; GFX908-NEXT: s_addc_u32 s9, s5, 0 |
| ; GFX908-NEXT: s_getpc_b64 s[4:5] |
| ; GFX908-NEXT: s_add_u32 s4, s4, func_areg_4@gotpcrel32@lo+4 |
| ; GFX908-NEXT: s_addc_u32 s5, s5, func_areg_4@gotpcrel32@hi+12 |
| ; GFX908-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 |
| ; GFX908-NEXT: s_mov_b32 s14, s10 |
| ; GFX908-NEXT: s_mov_b64 s[10:11], s[6:7] |
| ; GFX908-NEXT: v_lshlrev_b32_e32 v2, 20, v2 |
| ; GFX908-NEXT: v_lshlrev_b32_e32 v1, 10, v1 |
| ; GFX908-NEXT: s_mov_b64 s[4:5], s[0:1] |
| ; GFX908-NEXT: s_mov_b64 s[6:7], s[2:3] |
| ; GFX908-NEXT: s_mov_b64 s[0:1], s[20:21] |
| ; GFX908-NEXT: v_or3_b32 v31, v0, v1, v2 |
| ; GFX908-NEXT: s_mov_b64 s[2:3], s[22:23] |
| ; GFX908-NEXT: s_mov_b32 s32, 0 |
| ; GFX908-NEXT: ;;#ASMSTART |
| ; GFX908-NEXT: ; def a[0:31] |
| ; GFX908-NEXT: ;;#ASMEND |
| ; GFX908-NEXT: v_accvgpr_read_b32 v6, a3 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v5, a2 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v4, a1 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v3, a0 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v10, a7 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v9, a6 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v8, a5 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v7, a4 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v14, a11 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v13, a10 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v12, a9 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v11, a8 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v18, a15 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v17, a14 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v16, a13 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v15, a12 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v22, a19 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v21, a18 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v20, a17 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v19, a16 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v26, a23 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v25, a22 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v24, a21 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v23, a20 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v30, a27 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v29, a26 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v28, a25 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v27, a24 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v35, a31 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v34, a30 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v33, a29 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v32, a28 |
| ; GFX908-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17] |
| ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[32:35], off |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[27:30], off |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[23:26], off |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[19:22], off |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[15:18], off |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[11:14], off |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[7:10], off |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[3:6], off |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: s_endpgm |
| bb: |
| %reg = call <32 x float> asm sideeffect "; def $0", "=a"() |
| call void @func_areg_4() |
| store volatile <32 x float> %reg, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define amdgpu_kernel void @test_call_areg32() #0 { |
| ; GFX90A-LABEL: test_call_areg32: |
| ; GFX90A: ; %bb.0: ; %bb |
| ; GFX90A-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0 |
| ; GFX90A-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1 |
| ; GFX90A-NEXT: s_mov_b32 s22, -1 |
| ; GFX90A-NEXT: s_mov_b32 s23, 0xe00000 |
| ; GFX90A-NEXT: s_add_u32 s20, s20, s11 |
| ; GFX90A-NEXT: s_addc_u32 s21, s21, 0 |
| ; GFX90A-NEXT: s_mov_b32 s12, s8 |
| ; GFX90A-NEXT: s_add_u32 s8, s4, 36 |
| ; GFX90A-NEXT: s_mov_b32 s13, s9 |
| ; GFX90A-NEXT: s_addc_u32 s9, s5, 0 |
| ; GFX90A-NEXT: s_getpc_b64 s[4:5] |
| ; GFX90A-NEXT: s_add_u32 s4, s4, func_areg_32@gotpcrel32@lo+4 |
| ; GFX90A-NEXT: s_addc_u32 s5, s5, func_areg_32@gotpcrel32@hi+12 |
| ; GFX90A-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 |
| ; GFX90A-NEXT: s_mov_b32 s14, s10 |
| ; GFX90A-NEXT: s_mov_b64 s[10:11], s[6:7] |
| ; GFX90A-NEXT: s_mov_b64 s[4:5], s[0:1] |
| ; GFX90A-NEXT: s_mov_b64 s[6:7], s[2:3] |
| ; GFX90A-NEXT: s_mov_b64 s[0:1], s[20:21] |
| ; GFX90A-NEXT: v_mov_b32_e32 v31, v0 |
| ; GFX90A-NEXT: s_mov_b64 s[2:3], s[22:23] |
| ; GFX90A-NEXT: s_mov_b32 s32, 0 |
| ; GFX90A-NEXT: ;;#ASMSTART |
| ; GFX90A-NEXT: ; def a[32:63] |
| ; GFX90A-NEXT: ;;#ASMEND |
| ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX90A-NEXT: s_swappc_b64 s[30:31], s[16:17] |
| ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[60:63], off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[56:59], off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[52:55], off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[48:51], off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[44:47], off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[40:43], off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[36:39], off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[32:35], off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: s_endpgm |
| ; |
| ; GFX908-LABEL: test_call_areg32: |
| ; GFX908: ; %bb.0: ; %bb |
| ; GFX908-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0 |
| ; GFX908-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1 |
| ; GFX908-NEXT: s_mov_b32 s22, -1 |
| ; GFX908-NEXT: s_mov_b32 s23, 0xe00000 |
| ; GFX908-NEXT: s_add_u32 s20, s20, s11 |
| ; GFX908-NEXT: s_addc_u32 s21, s21, 0 |
| ; GFX908-NEXT: s_mov_b32 s12, s8 |
| ; GFX908-NEXT: s_add_u32 s8, s4, 36 |
| ; GFX908-NEXT: s_mov_b32 s13, s9 |
| ; GFX908-NEXT: s_addc_u32 s9, s5, 0 |
| ; GFX908-NEXT: s_getpc_b64 s[4:5] |
| ; GFX908-NEXT: s_add_u32 s4, s4, func_areg_32@gotpcrel32@lo+4 |
| ; GFX908-NEXT: s_addc_u32 s5, s5, func_areg_32@gotpcrel32@hi+12 |
| ; GFX908-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 |
| ; GFX908-NEXT: s_mov_b32 s14, s10 |
| ; GFX908-NEXT: s_mov_b64 s[10:11], s[6:7] |
| ; GFX908-NEXT: v_lshlrev_b32_e32 v2, 20, v2 |
| ; GFX908-NEXT: v_lshlrev_b32_e32 v1, 10, v1 |
| ; GFX908-NEXT: s_mov_b64 s[4:5], s[0:1] |
| ; GFX908-NEXT: s_mov_b64 s[6:7], s[2:3] |
| ; GFX908-NEXT: s_mov_b64 s[0:1], s[20:21] |
| ; GFX908-NEXT: v_or3_b32 v31, v0, v1, v2 |
| ; GFX908-NEXT: s_mov_b64 s[2:3], s[22:23] |
| ; GFX908-NEXT: s_mov_b32 s32, 0 |
| ; GFX908-NEXT: ;;#ASMSTART |
| ; GFX908-NEXT: ; def a[0:31] |
| ; GFX908-NEXT: ;;#ASMEND |
| ; GFX908-NEXT: v_accvgpr_read_b32 v6, a3 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v5, a2 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v4, a1 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v3, a0 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v10, a7 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v9, a6 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v8, a5 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v7, a4 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v14, a11 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v13, a10 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v12, a9 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v11, a8 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v18, a15 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v17, a14 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v16, a13 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v15, a12 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v22, a19 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v21, a18 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v20, a17 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v19, a16 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v26, a23 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v25, a22 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v24, a21 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v23, a20 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v30, a27 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v29, a26 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v28, a25 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v27, a24 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v35, a31 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v34, a30 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v33, a29 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v32, a28 |
| ; GFX908-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17] |
| ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[32:35], off |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[27:30], off |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[23:26], off |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[19:22], off |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[15:18], off |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[11:14], off |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[7:10], off |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[3:6], off |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: s_endpgm |
| bb: |
| %reg = call <32 x float> asm sideeffect "; def $0", "=a"() |
| call void @func_areg_32() |
| store volatile <32 x float> %reg, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define amdgpu_kernel void @test_call_areg64() #0 { |
| ; GFX90A-LABEL: test_call_areg64: |
| ; GFX90A: ; %bb.0: ; %bb |
| ; GFX90A-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0 |
| ; GFX90A-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1 |
| ; GFX90A-NEXT: s_mov_b32 s22, -1 |
| ; GFX90A-NEXT: s_mov_b32 s23, 0xe00000 |
| ; GFX90A-NEXT: s_add_u32 s20, s20, s11 |
| ; GFX90A-NEXT: s_addc_u32 s21, s21, 0 |
| ; GFX90A-NEXT: s_mov_b32 s12, s8 |
| ; GFX90A-NEXT: s_add_u32 s8, s4, 36 |
| ; GFX90A-NEXT: s_mov_b32 s13, s9 |
| ; GFX90A-NEXT: s_addc_u32 s9, s5, 0 |
| ; GFX90A-NEXT: s_getpc_b64 s[4:5] |
| ; GFX90A-NEXT: s_add_u32 s4, s4, func_areg_64@gotpcrel32@lo+4 |
| ; GFX90A-NEXT: s_addc_u32 s5, s5, func_areg_64@gotpcrel32@hi+12 |
| ; GFX90A-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 |
| ; GFX90A-NEXT: s_mov_b32 s14, s10 |
| ; GFX90A-NEXT: s_mov_b64 s[10:11], s[6:7] |
| ; GFX90A-NEXT: s_mov_b64 s[4:5], s[0:1] |
| ; GFX90A-NEXT: s_mov_b64 s[6:7], s[2:3] |
| ; GFX90A-NEXT: s_mov_b64 s[0:1], s[20:21] |
| ; GFX90A-NEXT: v_mov_b32_e32 v31, v0 |
| ; GFX90A-NEXT: s_mov_b64 s[2:3], s[22:23] |
| ; GFX90A-NEXT: s_mov_b32 s32, 0 |
| ; GFX90A-NEXT: ;;#ASMSTART |
| ; GFX90A-NEXT: ; def a[0:31] |
| ; GFX90A-NEXT: ;;#ASMEND |
| ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX90A-NEXT: s_swappc_b64 s[30:31], s[16:17] |
| ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[28:31], off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[24:27], off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[20:23], off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[16:19], off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[12:15], off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[8:11], off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[4:7], off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[0:3], off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: s_endpgm |
| ; |
| ; GFX908-LABEL: test_call_areg64: |
| ; GFX908: ; %bb.0: ; %bb |
| ; GFX908-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0 |
| ; GFX908-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1 |
| ; GFX908-NEXT: s_mov_b32 s22, -1 |
| ; GFX908-NEXT: s_mov_b32 s23, 0xe00000 |
| ; GFX908-NEXT: s_add_u32 s20, s20, s11 |
| ; GFX908-NEXT: s_addc_u32 s21, s21, 0 |
| ; GFX908-NEXT: s_mov_b32 s12, s8 |
| ; GFX908-NEXT: s_add_u32 s8, s4, 36 |
| ; GFX908-NEXT: s_mov_b32 s13, s9 |
| ; GFX908-NEXT: s_addc_u32 s9, s5, 0 |
| ; GFX908-NEXT: s_getpc_b64 s[4:5] |
| ; GFX908-NEXT: s_add_u32 s4, s4, func_areg_64@gotpcrel32@lo+4 |
| ; GFX908-NEXT: s_addc_u32 s5, s5, func_areg_64@gotpcrel32@hi+12 |
| ; GFX908-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 |
| ; GFX908-NEXT: s_mov_b32 s14, s10 |
| ; GFX908-NEXT: s_mov_b64 s[10:11], s[6:7] |
| ; GFX908-NEXT: v_lshlrev_b32_e32 v2, 20, v2 |
| ; GFX908-NEXT: v_lshlrev_b32_e32 v1, 10, v1 |
| ; GFX908-NEXT: s_mov_b64 s[4:5], s[0:1] |
| ; GFX908-NEXT: s_mov_b64 s[6:7], s[2:3] |
| ; GFX908-NEXT: s_mov_b64 s[0:1], s[20:21] |
| ; GFX908-NEXT: v_or3_b32 v31, v0, v1, v2 |
| ; GFX908-NEXT: s_mov_b64 s[2:3], s[22:23] |
| ; GFX908-NEXT: s_mov_b32 s32, 0 |
| ; GFX908-NEXT: ;;#ASMSTART |
| ; GFX908-NEXT: ; def a[0:31] |
| ; GFX908-NEXT: ;;#ASMEND |
| ; GFX908-NEXT: v_accvgpr_read_b32 v6, a3 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v5, a2 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v4, a1 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v3, a0 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v10, a7 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v9, a6 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v8, a5 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v7, a4 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v14, a11 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v13, a10 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v12, a9 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v11, a8 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v18, a15 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v17, a14 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v16, a13 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v15, a12 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v22, a19 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v21, a18 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v20, a17 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v19, a16 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v26, a23 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v25, a22 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v24, a21 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v23, a20 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v30, a27 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v29, a26 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v28, a25 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v27, a24 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v35, a31 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v34, a30 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v33, a29 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v32, a28 |
| ; GFX908-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17] |
| ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[32:35], off |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[27:30], off |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[23:26], off |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[19:22], off |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[15:18], off |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[11:14], off |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[7:10], off |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[3:6], off |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: s_endpgm |
| bb: |
| %reg = call <32 x float> asm sideeffect "; def $0", "=a"() |
| call void @func_areg_64() |
| store volatile <32 x float> %reg, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define amdgpu_kernel void @test_call_areg31_63() #0 { |
| ; GFX90A-LABEL: test_call_areg31_63: |
| ; GFX90A: ; %bb.0: ; %bb |
| ; GFX90A-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0 |
| ; GFX90A-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1 |
| ; GFX90A-NEXT: s_mov_b32 s22, -1 |
| ; GFX90A-NEXT: s_mov_b32 s23, 0xe00000 |
| ; GFX90A-NEXT: s_add_u32 s20, s20, s11 |
| ; GFX90A-NEXT: s_addc_u32 s21, s21, 0 |
| ; GFX90A-NEXT: s_mov_b32 s12, s8 |
| ; GFX90A-NEXT: s_add_u32 s8, s4, 36 |
| ; GFX90A-NEXT: s_mov_b32 s13, s9 |
| ; GFX90A-NEXT: s_addc_u32 s9, s5, 0 |
| ; GFX90A-NEXT: s_getpc_b64 s[4:5] |
| ; GFX90A-NEXT: s_add_u32 s4, s4, func_areg_31_63@gotpcrel32@lo+4 |
| ; GFX90A-NEXT: s_addc_u32 s5, s5, func_areg_31_63@gotpcrel32@hi+12 |
| ; GFX90A-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 |
| ; GFX90A-NEXT: s_mov_b32 s14, s10 |
| ; GFX90A-NEXT: s_mov_b64 s[10:11], s[6:7] |
| ; GFX90A-NEXT: s_mov_b64 s[4:5], s[0:1] |
| ; GFX90A-NEXT: s_mov_b64 s[6:7], s[2:3] |
| ; GFX90A-NEXT: s_mov_b64 s[0:1], s[20:21] |
| ; GFX90A-NEXT: v_mov_b32_e32 v31, v0 |
| ; GFX90A-NEXT: s_mov_b64 s[2:3], s[22:23] |
| ; GFX90A-NEXT: s_mov_b32 s32, 0 |
| ; GFX90A-NEXT: ;;#ASMSTART |
| ; GFX90A-NEXT: ; def a[32:63] |
| ; GFX90A-NEXT: ;;#ASMEND |
| ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX90A-NEXT: s_swappc_b64 s[30:31], s[16:17] |
| ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[60:63], off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[56:59], off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[52:55], off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[48:51], off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[44:47], off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[40:43], off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[36:39], off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[32:35], off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: s_endpgm |
| ; |
| ; GFX908-LABEL: test_call_areg31_63: |
| ; GFX908: ; %bb.0: ; %bb |
| ; GFX908-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0 |
| ; GFX908-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1 |
| ; GFX908-NEXT: s_mov_b32 s22, -1 |
| ; GFX908-NEXT: s_mov_b32 s23, 0xe00000 |
| ; GFX908-NEXT: s_add_u32 s20, s20, s11 |
| ; GFX908-NEXT: s_addc_u32 s21, s21, 0 |
| ; GFX908-NEXT: s_mov_b32 s12, s8 |
| ; GFX908-NEXT: s_add_u32 s8, s4, 36 |
| ; GFX908-NEXT: s_mov_b32 s13, s9 |
| ; GFX908-NEXT: s_addc_u32 s9, s5, 0 |
| ; GFX908-NEXT: s_getpc_b64 s[4:5] |
| ; GFX908-NEXT: s_add_u32 s4, s4, func_areg_31_63@gotpcrel32@lo+4 |
| ; GFX908-NEXT: s_addc_u32 s5, s5, func_areg_31_63@gotpcrel32@hi+12 |
| ; GFX908-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 |
| ; GFX908-NEXT: s_mov_b32 s14, s10 |
| ; GFX908-NEXT: s_mov_b64 s[10:11], s[6:7] |
| ; GFX908-NEXT: v_lshlrev_b32_e32 v2, 20, v2 |
| ; GFX908-NEXT: v_lshlrev_b32_e32 v1, 10, v1 |
| ; GFX908-NEXT: s_mov_b64 s[4:5], s[0:1] |
| ; GFX908-NEXT: s_mov_b64 s[6:7], s[2:3] |
| ; GFX908-NEXT: s_mov_b64 s[0:1], s[20:21] |
| ; GFX908-NEXT: v_or3_b32 v31, v0, v1, v2 |
| ; GFX908-NEXT: s_mov_b64 s[2:3], s[22:23] |
| ; GFX908-NEXT: s_mov_b32 s32, 0 |
| ; GFX908-NEXT: ;;#ASMSTART |
| ; GFX908-NEXT: ; def a[0:31] |
| ; GFX908-NEXT: ;;#ASMEND |
| ; GFX908-NEXT: v_accvgpr_read_b32 v6, a3 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v5, a2 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v4, a1 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v3, a0 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v10, a7 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v9, a6 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v8, a5 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v7, a4 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v14, a11 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v13, a10 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v12, a9 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v11, a8 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v18, a15 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v17, a14 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v16, a13 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v15, a12 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v22, a19 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v21, a18 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v20, a17 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v19, a16 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v26, a23 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v25, a22 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v24, a21 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v23, a20 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v30, a27 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v29, a26 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v28, a25 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v27, a24 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v35, a31 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v34, a30 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v33, a29 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v32, a28 |
| ; GFX908-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17] |
| ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[32:35], off |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[27:30], off |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[23:26], off |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[19:22], off |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[15:18], off |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[11:14], off |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[7:10], off |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[3:6], off |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: s_endpgm |
| bb: |
| %reg = call <32 x float> asm sideeffect "; def $0", "=a"() |
| call void @func_areg_31_63() |
| store volatile <32 x float> %reg, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define amdgpu_kernel void @test_call_unknown() #0 { |
| ; GFX90A-LABEL: test_call_unknown: |
| ; GFX90A: ; %bb.0: ; %bb |
| ; GFX90A-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 |
| ; GFX90A-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 |
| ; GFX90A-NEXT: s_mov_b32 s38, -1 |
| ; GFX90A-NEXT: s_mov_b32 s39, 0xe00000 |
| ; GFX90A-NEXT: s_add_u32 s36, s36, s11 |
| ; GFX90A-NEXT: s_addc_u32 s37, s37, 0 |
| ; GFX90A-NEXT: s_mov_b32 s12, s8 |
| ; GFX90A-NEXT: s_add_u32 s8, s4, 36 |
| ; GFX90A-NEXT: s_mov_b32 s13, s9 |
| ; GFX90A-NEXT: s_addc_u32 s9, s5, 0 |
| ; GFX90A-NEXT: s_getpc_b64 s[4:5] |
| ; GFX90A-NEXT: s_add_u32 s4, s4, func_unknown@gotpcrel32@lo+4 |
| ; GFX90A-NEXT: s_addc_u32 s5, s5, func_unknown@gotpcrel32@hi+12 |
| ; GFX90A-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 |
| ; GFX90A-NEXT: s_mov_b32 s14, s10 |
| ; GFX90A-NEXT: s_mov_b64 s[10:11], s[6:7] |
| ; GFX90A-NEXT: s_mov_b64 s[4:5], s[0:1] |
| ; GFX90A-NEXT: s_mov_b64 s[6:7], s[2:3] |
| ; GFX90A-NEXT: s_mov_b64 s[0:1], s[36:37] |
| ; GFX90A-NEXT: v_mov_b32_e32 v31, v0 |
| ; GFX90A-NEXT: s_mov_b64 s[2:3], s[38:39] |
| ; GFX90A-NEXT: s_mov_b32 s32, 0 |
| ; GFX90A-NEXT: ;;#ASMSTART |
| ; GFX90A-NEXT: ; def a[32:63] |
| ; GFX90A-NEXT: ;;#ASMEND |
| ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX90A-NEXT: s_swappc_b64 s[30:31], s[16:17] |
| ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[60:63], off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[56:59], off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[52:55], off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[48:51], off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[44:47], off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[40:43], off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[36:39], off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_store_dwordx4 v[0:1], a[32:35], off |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: s_endpgm |
| ; |
| ; GFX908-LABEL: test_call_unknown: |
| ; GFX908: ; %bb.0: ; %bb |
| ; GFX908-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 |
| ; GFX908-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 |
| ; GFX908-NEXT: s_mov_b32 s38, -1 |
| ; GFX908-NEXT: s_mov_b32 s39, 0xe00000 |
| ; GFX908-NEXT: s_add_u32 s36, s36, s11 |
| ; GFX908-NEXT: s_addc_u32 s37, s37, 0 |
| ; GFX908-NEXT: s_mov_b32 s12, s8 |
| ; GFX908-NEXT: s_add_u32 s8, s4, 36 |
| ; GFX908-NEXT: s_mov_b32 s13, s9 |
| ; GFX908-NEXT: s_addc_u32 s9, s5, 0 |
| ; GFX908-NEXT: s_getpc_b64 s[4:5] |
| ; GFX908-NEXT: s_add_u32 s4, s4, func_unknown@gotpcrel32@lo+4 |
| ; GFX908-NEXT: s_addc_u32 s5, s5, func_unknown@gotpcrel32@hi+12 |
| ; GFX908-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 |
| ; GFX908-NEXT: s_mov_b32 s14, s10 |
| ; GFX908-NEXT: s_mov_b64 s[10:11], s[6:7] |
| ; GFX908-NEXT: v_lshlrev_b32_e32 v2, 20, v2 |
| ; GFX908-NEXT: v_lshlrev_b32_e32 v1, 10, v1 |
| ; GFX908-NEXT: s_mov_b64 s[4:5], s[0:1] |
| ; GFX908-NEXT: s_mov_b64 s[6:7], s[2:3] |
| ; GFX908-NEXT: s_mov_b64 s[0:1], s[36:37] |
| ; GFX908-NEXT: v_or3_b32 v31, v0, v1, v2 |
| ; GFX908-NEXT: s_mov_b64 s[2:3], s[38:39] |
| ; GFX908-NEXT: s_mov_b32 s32, 0 |
| ; GFX908-NEXT: ;;#ASMSTART |
| ; GFX908-NEXT: ; def a[0:31] |
| ; GFX908-NEXT: ;;#ASMEND |
| ; GFX908-NEXT: v_accvgpr_read_b32 v43, a3 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v42, a2 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v41, a1 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v40, a0 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v47, a7 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v46, a6 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v45, a5 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v44, a4 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v59, a11 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v58, a10 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v57, a9 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v56, a8 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v63, a15 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v62, a14 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v61, a13 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v60, a12 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v75, a19 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v74, a18 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v73, a17 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v72, a16 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v79, a23 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v78, a22 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v77, a21 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v76, a20 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v91, a27 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v90, a26 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v89, a25 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v88, a24 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v95, a31 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v94, a30 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v93, a29 |
| ; GFX908-NEXT: v_accvgpr_read_b32 v92, a28 |
| ; GFX908-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17] |
| ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[92:95], off |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[88:91], off |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[76:79], off |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[72:75], off |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[60:63], off |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[56:59], off |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[44:47], off |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: global_store_dwordx4 v[0:1], v[40:43], off |
| ; GFX908-NEXT: s_waitcnt vmcnt(0) |
| ; GFX908-NEXT: s_endpgm |
| bb: |
| %reg = call <32 x float> asm sideeffect "; def $0", "=a"() |
| call void @func_unknown() |
| store volatile <32 x float> %reg, ptr addrspace(1) poison |
| ret void |
| } |
| |
| attributes #0 = { nounwind noinline "amdgpu-flat-work-group-size"="1,512" } |