| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 |
| ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s |
| ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -enable-var-scope -check-prefix=GFX11 %s |
| |
| define void @void_func_i1_inreg(i1 inreg %arg0) #0 { |
| ; GFX9-LABEL: void_func_i1_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: s_and_b32 s4, s16, 1 |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s4 |
| ; GFX9-NEXT: global_store_byte v[0:1], v0, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_i1_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_and_b32 s0, s0, 1 |
| ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) |
| ; GFX11-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX11-NEXT: global_store_b8 v[0:1], v0, off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store i1 %arg0, ptr addrspace(1) undef |
| ret void |
| } |
| |
| define void @void_func_i8_inreg(i8 inreg %arg0) #0 { |
| ; GFX9-LABEL: void_func_i8_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s16 |
| ; GFX9-NEXT: global_store_byte v[0:1], v0, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_i8_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX11-NEXT: global_store_b8 v[0:1], v0, off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store i8 %arg0, ptr addrspace(1) undef |
| ret void |
| } |
| |
| define void @void_func_i16_inreg(i16 inreg %arg0) #0 { |
| ; GFX9-LABEL: void_func_i16_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s16 |
| ; GFX9-NEXT: global_store_short v[0:1], v0, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_i16_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX11-NEXT: global_store_b16 v[0:1], v0, off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store i16 %arg0, ptr addrspace(1) undef |
| ret void |
| } |
| |
| define void @void_func_i32_inreg(i32 inreg %arg0) #0 { |
| ; GFX9-LABEL: void_func_i32_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s16 |
| ; GFX9-NEXT: global_store_dword v[0:1], v0, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_i32_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX11-NEXT: global_store_b32 v[0:1], v0, off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store i32 %arg0, ptr addrspace(1) undef |
| ret void |
| } |
| |
| define void @void_func_i64_inreg(i64 inreg %arg0) #0 { |
| ; GFX9-LABEL: void_func_i64_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s16 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s17 |
| ; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_i64_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 |
| ; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store i64 %arg0, ptr addrspace(1) undef |
| ret void |
| } |
| |
| define void @void_func_f16_inreg(half inreg %arg0) #0 { |
| ; GFX9-LABEL: void_func_f16_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s16 |
| ; GFX9-NEXT: global_store_short v[0:1], v0, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_f16_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX11-NEXT: global_store_b16 v[0:1], v0, off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store half %arg0, ptr addrspace(1) undef |
| ret void |
| } |
| |
| define void @void_func_f32_inreg(float inreg %arg0) #0 { |
| ; GFX9-LABEL: void_func_f32_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s16 |
| ; GFX9-NEXT: global_store_dword v[0:1], v0, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_f32_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX11-NEXT: global_store_b32 v[0:1], v0, off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store float %arg0, ptr addrspace(1) undef |
| ret void |
| } |
| |
| define void @void_func_f64_inreg(double inreg %arg0) #0 { |
| ; GFX9-LABEL: void_func_f64_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s16 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s17 |
| ; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_f64_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 |
| ; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store double %arg0, ptr addrspace(1) undef |
| ret void |
| } |
| |
| define void @void_func_v2i16_inreg(<2 x i16> inreg %arg0) #0 { |
| ; GFX9-LABEL: void_func_v2i16_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s16 |
| ; GFX9-NEXT: global_store_dword v[0:1], v0, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v2i16_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX11-NEXT: global_store_b32 v[0:1], v0, off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <2 x i16> %arg0, ptr addrspace(1) undef |
| ret void |
| } |
| |
| define void @void_func_v3i16_inreg(<3 x i16> inreg %arg0) #0 { |
| ; GFX9-LABEL: void_func_v3i16_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s17 |
| ; GFX9-NEXT: global_store_short v[0:1], v0, off |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s16 |
| ; GFX9-NEXT: global_store_dword v[0:1], v0, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v3i16_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: global_store_b16 v[0:1], v0, off |
| ; GFX11-NEXT: global_store_b32 v[0:1], v1, off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <3 x i16> %arg0, ptr addrspace(1) undef |
| ret void |
| } |
| |
| define void @void_func_v4i16_inreg(<4 x i16> inreg %arg0) #0 { |
| ; GFX89-LABEL: void_func_v4i16_inreg: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: v_mov_b32_e32 v0, s4 |
| ; GFX89-NEXT: v_mov_b32_e32 v1, s5 |
| ; GFX89-NEXT: global_store_dwordx2 v[0:1], v[0:1], off |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: void_func_v4i16_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s16 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s17 |
| ; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v4i16_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 |
| ; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <4 x i16> %arg0, ptr addrspace(1) undef |
| ret void |
| } |
| |
| define void @void_func_v5i16_inreg(<5 x i16> inreg %arg0) #0 { |
| ; GFX9-LABEL: void_func_v5i16_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s18 |
| ; GFX9-NEXT: global_store_short v[0:1], v0, off |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s16 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s17 |
| ; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v5i16_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v1, s1 |
| ; GFX11-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: global_store_b16 v[0:1], v2, off |
| ; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <5 x i16> %arg0, ptr addrspace(1) undef |
| ret void |
| } |
| |
| define void @void_func_v8i16_inreg(<8 x i16> inreg %arg0) #0 { |
| ; GFX9-LABEL: void_func_v8i16_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s16 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s17 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s18 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s19 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v8i16_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 |
| ; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <8 x i16> %arg0, ptr addrspace(1) undef |
| ret void |
| } |
| |
| define void @void_func_v2i32_inreg(<2 x i32> inreg %arg0) #0 { |
| ; GFX9-LABEL: void_func_v2i32_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s16 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s17 |
| ; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v2i32_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 |
| ; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <2 x i32> %arg0, ptr addrspace(1) undef |
| ret void |
| } |
| |
| define void @void_func_v3i32_inreg(<3 x i32> inreg %arg0) #0 { |
| ; GFX9-LABEL: void_func_v3i32_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s16 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s17 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s18 |
| ; GFX9-NEXT: global_store_dwordx3 v[0:1], v[0:2], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v3i32_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 |
| ; GFX11-NEXT: v_mov_b32_e32 v2, s2 |
| ; GFX11-NEXT: global_store_b96 v[0:1], v[0:2], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <3 x i32> %arg0, ptr addrspace(1) undef |
| ret void |
| } |
| |
| define void @void_func_v4i32_inreg(<4 x i32> inreg %arg0) #0 { |
| ; GFX9-LABEL: void_func_v4i32_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s16 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s17 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s18 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s19 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v4i32_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 |
| ; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <4 x i32> %arg0, ptr addrspace(1) undef |
| ret void |
| } |
| |
| define void @void_func_v5i32_inreg(<5 x i32> inreg %arg0) #0 { |
| ; GFX9-LABEL: void_func_v5i32_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s20 |
| ; GFX9-NEXT: global_store_dword v[0:1], v0, off |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s16 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s17 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s18 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s19 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v5i32_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v4, s16 :: v_dual_mov_b32 v1, s1 |
| ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s3 |
| ; GFX11-NEXT: v_mov_b32_e32 v2, s2 |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: global_store_b32 v[0:1], v4, off |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <5 x i32> %arg0, ptr addrspace(1) undef |
| ret void |
| } |
| |
| define void @void_func_v8i32_inreg(<8 x i32> inreg %arg0) #0 { |
| ; GFX9-LABEL: void_func_v8i32_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s20 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s21 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s22 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s23 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_nop 0 |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s16 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s17 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s18 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s19 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v8i32_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s17 |
| ; GFX11-NEXT: v_dual_mov_b32 v2, s18 :: v_dual_mov_b32 v3, s19 |
| ; GFX11-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1 |
| ; GFX11-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3 |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <8 x i32> %arg0, ptr addrspace(1) undef |
| ret void |
| } |
| |
| define void @void_func_v16i32_inreg(<16 x i32> inreg %arg0) #0 { |
| ; CIGFX89-LABEL: void_func_v16i32_inreg: |
| ; CIGFX89: ; %bb.0: |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CIGFX89-NEXT: v_mov_b32_e32 v0, s16 |
| ; CIGFX89-NEXT: v_mov_b32_e32 v1, s17 |
| ; CIGFX89-NEXT: v_mov_b32_e32 v2, s18 |
| ; CIGFX89-NEXT: v_mov_b32_e32 v3, s19 |
| ; CIGFX89-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; CIGFX89-NEXT: s_nop 0 |
| ; CIGFX89-NEXT: v_mov_b32_e32 v0, s12 |
| ; CIGFX89-NEXT: v_mov_b32_e32 v1, s13 |
| ; CIGFX89-NEXT: v_mov_b32_e32 v2, s14 |
| ; CIGFX89-NEXT: v_mov_b32_e32 v3, s15 |
| ; CIGFX89-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; CIGFX89-NEXT: s_nop 0 |
| ; CIGFX89-NEXT: v_mov_b32_e32 v0, s8 |
| ; CIGFX89-NEXT: v_mov_b32_e32 v1, s9 |
| ; CIGFX89-NEXT: v_mov_b32_e32 v2, s10 |
| ; CIGFX89-NEXT: v_mov_b32_e32 v3, s11 |
| ; CIGFX89-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; CIGFX89-NEXT: s_nop 0 |
| ; CIGFX89-NEXT: v_mov_b32_e32 v0, s4 |
| ; CIGFX89-NEXT: v_mov_b32_e32 v1, s5 |
| ; CIGFX89-NEXT: v_mov_b32_e32 v2, s6 |
| ; CIGFX89-NEXT: v_mov_b32_e32 v3, s7 |
| ; CIGFX89-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: void_func_v16i32_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v3, v1 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, v0 |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s28 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s29 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_nop 0 |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s24 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s25 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s26 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s27 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_nop 0 |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s20 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s21 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s22 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s23 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_nop 0 |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s16 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s17 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s18 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s19 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v16i32_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v0, s24 :: v_dual_mov_b32 v1, s25 |
| ; GFX11-NEXT: v_dual_mov_b32 v2, s26 :: v_dual_mov_b32 v3, s27 |
| ; GFX11-NEXT: v_dual_mov_b32 v4, s20 :: v_dual_mov_b32 v5, s21 |
| ; GFX11-NEXT: v_dual_mov_b32 v6, s22 :: v_dual_mov_b32 v7, s23 |
| ; GFX11-NEXT: v_dual_mov_b32 v8, s16 :: v_dual_mov_b32 v9, s17 |
| ; GFX11-NEXT: v_dual_mov_b32 v10, s18 :: v_dual_mov_b32 v11, s19 |
| ; GFX11-NEXT: v_dual_mov_b32 v12, s0 :: v_dual_mov_b32 v13, s1 |
| ; GFX11-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v15, s3 |
| ; GFX11-NEXT: s_clause 0x3 |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <16 x i32> %arg0, ptr addrspace(1) undef |
| ret void |
| } |
| |
| define void @void_func_v32i32_inreg(<32 x i32> inreg %arg0) #0 { |
| ; GFX9-LABEL: void_func_v32i32_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v19, v1 |
| ; GFX9-NEXT: v_mov_b32_e32 v18, v0 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[14:17], off |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[10:13], off |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[6:9], off |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off |
| ; GFX9-NEXT: v_mov_b32_e32 v16, s28 |
| ; GFX9-NEXT: v_mov_b32_e32 v17, s29 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[16:19], off |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s24 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s25 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s26 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s27 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_nop 0 |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s20 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s21 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s22 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s23 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_nop 0 |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s16 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s17 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s18 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s19 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v32i32_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v15, v1 :: v_dual_mov_b32 v14, v0 |
| ; GFX11-NEXT: s_clause 0x2 |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[10:13], off |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[6:9], off |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off |
| ; GFX11-NEXT: v_dual_mov_b32 v12, s28 :: v_dual_mov_b32 v13, s29 |
| ; GFX11-NEXT: v_dual_mov_b32 v0, s24 :: v_dual_mov_b32 v1, s25 |
| ; GFX11-NEXT: v_dual_mov_b32 v2, s26 :: v_dual_mov_b32 v3, s27 |
| ; GFX11-NEXT: v_dual_mov_b32 v4, s20 :: v_dual_mov_b32 v5, s21 |
| ; GFX11-NEXT: v_dual_mov_b32 v6, s22 :: v_dual_mov_b32 v7, s23 |
| ; GFX11-NEXT: v_dual_mov_b32 v8, s16 :: v_dual_mov_b32 v9, s17 |
| ; GFX11-NEXT: v_dual_mov_b32 v10, s18 :: v_dual_mov_b32 v11, s19 |
| ; GFX11-NEXT: v_dual_mov_b32 v16, s0 :: v_dual_mov_b32 v17, s1 |
| ; GFX11-NEXT: v_dual_mov_b32 v18, s2 :: v_dual_mov_b32 v19, s3 |
| ; GFX11-NEXT: s_clause 0x4 |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[16:19], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <32 x i32> %arg0, ptr addrspace(1) undef |
| ret void |
| } |
| |
| define void @void_func_v2i64_inreg(<2 x i64> inreg %arg0) #0 { |
| ; GFX9-LABEL: void_func_v2i64_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s16 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s17 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s18 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s19 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v2i64_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 |
| ; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <2 x i64> %arg0, ptr addrspace(1) undef |
| ret void |
| } |
| |
| define void @void_func_v3i64_inreg(<3 x i64> inreg %arg0) #0 { |
| ; GFX9-LABEL: void_func_v3i64_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s20 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s21 |
| ; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s16 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s17 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s18 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s19 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v3i64_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v4, s16 :: v_dual_mov_b32 v5, s17 |
| ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 |
| ; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: global_store_b64 v[0:1], v[4:5], off |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <3 x i64> %arg0, ptr addrspace(1) undef |
| ret void |
| } |
| |
| define void @void_func_v4i64_inreg(<4 x i64> inreg %arg0) #0 { |
| ; GFX9-LABEL: void_func_v4i64_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s20 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s21 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s22 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s23 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_nop 0 |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s16 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s17 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s18 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s19 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v4i64_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s17 |
| ; GFX11-NEXT: v_dual_mov_b32 v2, s18 :: v_dual_mov_b32 v3, s19 |
| ; GFX11-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1 |
| ; GFX11-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3 |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <4 x i64> %arg0, ptr addrspace(1) undef |
| ret void |
| } |
| |
| define void @void_func_v5i64_inreg(<5 x i64> inreg %arg0) #0 { |
| ; GFX9-LABEL: void_func_v5i64_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s20 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s21 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s22 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s23 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_nop 0 |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s16 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s17 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s18 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s19 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_nop 0 |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s24 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s25 |
| ; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v5i64_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s17 |
| ; GFX11-NEXT: v_dual_mov_b32 v2, s18 :: v_dual_mov_b32 v3, s19 |
| ; GFX11-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1 |
| ; GFX11-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3 |
| ; GFX11-NEXT: v_dual_mov_b32 v8, s20 :: v_dual_mov_b32 v9, s21 |
| ; GFX11-NEXT: s_clause 0x2 |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off |
| ; GFX11-NEXT: global_store_b64 v[0:1], v[8:9], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <5 x i64> %arg0, ptr addrspace(1) undef |
| ret void |
| } |
| |
| define void @void_func_v8i64_inreg(<8 x i64> inreg %arg0) #0 { |
| ; GFX9-LABEL: void_func_v8i64_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v3, v1 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, v0 |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s28 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s29 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_nop 0 |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s24 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s25 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s26 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s27 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_nop 0 |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s20 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s21 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s22 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s23 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_nop 0 |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s16 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s17 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s18 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s19 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v8i64_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v0, s24 :: v_dual_mov_b32 v1, s25 |
| ; GFX11-NEXT: v_dual_mov_b32 v2, s26 :: v_dual_mov_b32 v3, s27 |
| ; GFX11-NEXT: v_dual_mov_b32 v4, s20 :: v_dual_mov_b32 v5, s21 |
| ; GFX11-NEXT: v_dual_mov_b32 v6, s22 :: v_dual_mov_b32 v7, s23 |
| ; GFX11-NEXT: v_dual_mov_b32 v8, s16 :: v_dual_mov_b32 v9, s17 |
| ; GFX11-NEXT: v_dual_mov_b32 v10, s18 :: v_dual_mov_b32 v11, s19 |
| ; GFX11-NEXT: v_dual_mov_b32 v12, s0 :: v_dual_mov_b32 v13, s1 |
| ; GFX11-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v15, s3 |
| ; GFX11-NEXT: s_clause 0x3 |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <8 x i64> %arg0, ptr addrspace(1) undef |
| ret void |
| } |
| |
| define void @void_func_v16i64_inreg(<16 x i64> inreg %arg0) #0 { |
| ; GFX9-LABEL: void_func_v16i64_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v19, v1 |
| ; GFX9-NEXT: v_mov_b32_e32 v18, v0 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[14:17], off |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[10:13], off |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[6:9], off |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off |
| ; GFX9-NEXT: v_mov_b32_e32 v16, s28 |
| ; GFX9-NEXT: v_mov_b32_e32 v17, s29 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[16:19], off |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s24 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s25 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s26 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s27 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_nop 0 |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s20 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s21 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s22 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s23 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_nop 0 |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s16 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s17 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s18 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s19 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v16i64_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v15, v1 :: v_dual_mov_b32 v14, v0 |
| ; GFX11-NEXT: s_clause 0x2 |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[10:13], off |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[6:9], off |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off |
| ; GFX11-NEXT: v_dual_mov_b32 v12, s28 :: v_dual_mov_b32 v13, s29 |
| ; GFX11-NEXT: v_dual_mov_b32 v0, s24 :: v_dual_mov_b32 v1, s25 |
| ; GFX11-NEXT: v_dual_mov_b32 v2, s26 :: v_dual_mov_b32 v3, s27 |
| ; GFX11-NEXT: v_dual_mov_b32 v4, s20 :: v_dual_mov_b32 v5, s21 |
| ; GFX11-NEXT: v_dual_mov_b32 v6, s22 :: v_dual_mov_b32 v7, s23 |
| ; GFX11-NEXT: v_dual_mov_b32 v8, s16 :: v_dual_mov_b32 v9, s17 |
| ; GFX11-NEXT: v_dual_mov_b32 v10, s18 :: v_dual_mov_b32 v11, s19 |
| ; GFX11-NEXT: v_dual_mov_b32 v16, s0 :: v_dual_mov_b32 v17, s1 |
| ; GFX11-NEXT: v_dual_mov_b32 v18, s2 :: v_dual_mov_b32 v19, s3 |
| ; GFX11-NEXT: s_clause 0x4 |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[16:19], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <16 x i64> %arg0, ptr addrspace(1) undef |
| ret void |
| } |
| |
| define void @void_func_v2f16_inreg(<2 x half> inreg %arg0) #0 { |
| ; GFX9-LABEL: void_func_v2f16_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s16 |
| ; GFX9-NEXT: global_store_dword v[0:1], v0, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v2f16_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX11-NEXT: global_store_b32 v[0:1], v0, off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <2 x half> %arg0, ptr addrspace(1) undef |
| ret void |
| } |
| |
| define void @void_func_v3f16_inreg(<3 x half> inreg %arg0) #0 { |
| ; GFX9-LABEL: void_func_v3f16_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s17 |
| ; GFX9-NEXT: global_store_short v[0:1], v0, off |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s16 |
| ; GFX9-NEXT: global_store_dword v[0:1], v0, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v3f16_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: global_store_b16 v[0:1], v0, off |
| ; GFX11-NEXT: global_store_b32 v[0:1], v1, off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <3 x half> %arg0, ptr addrspace(1) undef |
| ret void |
| } |
| |
| define void @void_func_v4f16_inreg(<4 x half> inreg %arg0) #0 { |
| ; GFX9-LABEL: void_func_v4f16_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s16 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s17 |
| ; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v4f16_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 |
| ; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <4 x half> %arg0, ptr addrspace(1) undef |
| ret void |
| } |
| |
| define void @void_func_v8f16_inreg(<8 x half> inreg %arg0) #0 { |
| ; GFX9-LABEL: void_func_v8f16_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s16 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s17 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s18 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s19 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v8f16_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 |
| ; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <8 x half> %arg0, ptr addrspace(1) undef |
| ret void |
| } |
| |
| define void @void_func_v16f16_inreg(<16 x half> inreg %arg0) #0 { |
| ; GFX9-LABEL: void_func_v16f16_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s20 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s21 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s22 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s23 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_nop 0 |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s16 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s17 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s18 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s19 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v16f16_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s17 |
| ; GFX11-NEXT: v_dual_mov_b32 v2, s18 :: v_dual_mov_b32 v3, s19 |
| ; GFX11-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1 |
| ; GFX11-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3 |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <16 x half> %arg0, ptr addrspace(1) undef |
| ret void |
| } |
| |
| define void @void_func_v2f32_inreg(<2 x float> inreg %arg0) #0 { |
| ; GFX9-LABEL: void_func_v2f32_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s16 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s17 |
| ; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v2f32_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 |
| ; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <2 x float> %arg0, ptr addrspace(1) undef |
| ret void |
| } |
| |
| define void @void_func_v3f32_inreg(<3 x float> inreg %arg0) #0 { |
| ; GFX9-LABEL: void_func_v3f32_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s16 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s17 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s18 |
| ; GFX9-NEXT: global_store_dwordx3 v[0:1], v[0:2], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v3f32_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 |
| ; GFX11-NEXT: v_mov_b32_e32 v2, s2 |
| ; GFX11-NEXT: global_store_b96 v[0:1], v[0:2], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <3 x float> %arg0, ptr addrspace(1) undef |
| ret void |
| } |
| |
| define void @void_func_v4f32_inreg(<4 x float> inreg %arg0) #0 { |
| ; GFX9-LABEL: void_func_v4f32_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s16 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s17 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s18 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s19 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v4f32_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 |
| ; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <4 x float> %arg0, ptr addrspace(1) undef |
| ret void |
| } |
| |
| define void @void_func_v8f32_inreg(<8 x float> inreg %arg0) #0 { |
| ; GFX9-LABEL: void_func_v8f32_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s20 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s21 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s22 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s23 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_nop 0 |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s16 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s17 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s18 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s19 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v8f32_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s17 |
| ; GFX11-NEXT: v_dual_mov_b32 v2, s18 :: v_dual_mov_b32 v3, s19 |
| ; GFX11-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1 |
| ; GFX11-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3 |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <8 x float> %arg0, ptr addrspace(1) undef |
| ret void |
| } |
| |
| define void @void_func_v16f32_inreg(<16 x float> inreg %arg0) #0 { |
| ; GFX9-LABEL: void_func_v16f32_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v3, v1 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, v0 |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s28 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s29 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_nop 0 |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s24 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s25 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s26 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s27 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_nop 0 |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s20 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s21 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s22 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s23 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_nop 0 |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s16 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s17 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s18 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s19 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v16f32_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v0, s24 :: v_dual_mov_b32 v1, s25 |
| ; GFX11-NEXT: v_dual_mov_b32 v2, s26 :: v_dual_mov_b32 v3, s27 |
| ; GFX11-NEXT: v_dual_mov_b32 v4, s20 :: v_dual_mov_b32 v5, s21 |
| ; GFX11-NEXT: v_dual_mov_b32 v6, s22 :: v_dual_mov_b32 v7, s23 |
| ; GFX11-NEXT: v_dual_mov_b32 v8, s16 :: v_dual_mov_b32 v9, s17 |
| ; GFX11-NEXT: v_dual_mov_b32 v10, s18 :: v_dual_mov_b32 v11, s19 |
| ; GFX11-NEXT: v_dual_mov_b32 v12, s0 :: v_dual_mov_b32 v13, s1 |
| ; GFX11-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v15, s3 |
| ; GFX11-NEXT: s_clause 0x3 |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <16 x float> %arg0, ptr addrspace(1) undef |
| ret void |
| } |
| |
| define void @void_func_v2f64_inreg(<2 x double> inreg %arg0) #0 { |
| ; GFX9-LABEL: void_func_v2f64_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s16 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s17 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s18 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s19 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v2f64_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 |
| ; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <2 x double> %arg0, ptr addrspace(1) undef |
| ret void |
| } |
| |
| define void @void_func_v3f64_inreg(<3 x double> inreg %arg0) #0 { |
| ; GFX9-LABEL: void_func_v3f64_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s20 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s21 |
| ; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s16 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s17 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s18 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s19 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v3f64_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v4, s16 :: v_dual_mov_b32 v5, s17 |
| ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 |
| ; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: global_store_b64 v[0:1], v[4:5], off |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <3 x double> %arg0, ptr addrspace(1) undef |
| ret void |
| } |
| |
| define void @void_func_v4f64_inreg(<4 x double> inreg %arg0) #0 { |
| ; GFX9-LABEL: void_func_v4f64_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s20 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s21 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s22 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s23 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_nop 0 |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s16 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s17 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s18 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s19 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v4f64_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s17 |
| ; GFX11-NEXT: v_dual_mov_b32 v2, s18 :: v_dual_mov_b32 v3, s19 |
| ; GFX11-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1 |
| ; GFX11-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3 |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <4 x double> %arg0, ptr addrspace(1) undef |
| ret void |
| } |
| |
| define void @void_func_v8f64_inreg(<8 x double> inreg %arg0) #0 { |
| ; GFX9-LABEL: void_func_v8f64_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v3, v1 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, v0 |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s28 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s29 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_nop 0 |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s24 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s25 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s26 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s27 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_nop 0 |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s20 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s21 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s22 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s23 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_nop 0 |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s16 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s17 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s18 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s19 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v8f64_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v0, s24 :: v_dual_mov_b32 v1, s25 |
| ; GFX11-NEXT: v_dual_mov_b32 v2, s26 :: v_dual_mov_b32 v3, s27 |
| ; GFX11-NEXT: v_dual_mov_b32 v4, s20 :: v_dual_mov_b32 v5, s21 |
| ; GFX11-NEXT: v_dual_mov_b32 v6, s22 :: v_dual_mov_b32 v7, s23 |
| ; GFX11-NEXT: v_dual_mov_b32 v8, s16 :: v_dual_mov_b32 v9, s17 |
| ; GFX11-NEXT: v_dual_mov_b32 v10, s18 :: v_dual_mov_b32 v11, s19 |
| ; GFX11-NEXT: v_dual_mov_b32 v12, s0 :: v_dual_mov_b32 v13, s1 |
| ; GFX11-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v15, s3 |
| ; GFX11-NEXT: s_clause 0x3 |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <8 x double> %arg0, ptr addrspace(1) undef |
| ret void |
| } |
| |
| define void @void_func_v16f64_inreg(<16 x double> inreg %arg0) #0 { |
| ; GFX9-LABEL: void_func_v16f64_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v19, v1 |
| ; GFX9-NEXT: v_mov_b32_e32 v18, v0 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[14:17], off |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[10:13], off |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[6:9], off |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off |
| ; GFX9-NEXT: v_mov_b32_e32 v16, s28 |
| ; GFX9-NEXT: v_mov_b32_e32 v17, s29 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[16:19], off |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s24 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s25 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s26 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s27 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_nop 0 |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s20 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s21 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s22 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s23 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_nop 0 |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s16 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s17 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s18 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s19 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v16f64_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v15, v1 :: v_dual_mov_b32 v14, v0 |
| ; GFX11-NEXT: s_clause 0x2 |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[10:13], off |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[6:9], off |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off |
| ; GFX11-NEXT: v_dual_mov_b32 v12, s28 :: v_dual_mov_b32 v13, s29 |
| ; GFX11-NEXT: v_dual_mov_b32 v0, s24 :: v_dual_mov_b32 v1, s25 |
| ; GFX11-NEXT: v_dual_mov_b32 v2, s26 :: v_dual_mov_b32 v3, s27 |
| ; GFX11-NEXT: v_dual_mov_b32 v4, s20 :: v_dual_mov_b32 v5, s21 |
| ; GFX11-NEXT: v_dual_mov_b32 v6, s22 :: v_dual_mov_b32 v7, s23 |
| ; GFX11-NEXT: v_dual_mov_b32 v8, s16 :: v_dual_mov_b32 v9, s17 |
| ; GFX11-NEXT: v_dual_mov_b32 v10, s18 :: v_dual_mov_b32 v11, s19 |
| ; GFX11-NEXT: v_dual_mov_b32 v16, s0 :: v_dual_mov_b32 v17, s1 |
| ; GFX11-NEXT: v_dual_mov_b32 v18, s2 :: v_dual_mov_b32 v19, s3 |
| ; GFX11-NEXT: s_clause 0x4 |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[16:19], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <16 x double> %arg0, ptr addrspace(1) undef |
| ret void |
| } |
| |
| define void @void_func_v32i32_i1_i8_i16_f32_inreg(<32 x i32> inreg %arg0, i1 inreg %arg1, i8 inreg %arg2, i16 inreg %arg3, half inreg %arg4) #0 { |
| ; GFX9-LABEL: void_func_v32i32_i1_i8_i16_f32_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v25, v1 |
| ; GFX9-NEXT: v_mov_b32_e32 v24, v0 |
| ; GFX9-NEXT: v_mov_b32_e32 v22, s28 |
| ; GFX9-NEXT: v_mov_b32_e32 v23, s29 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[14:17], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[10:13], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[6:9], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[22:25], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s24 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s25 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s26 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s27 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s20 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s21 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s22 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s23 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s16 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s17 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s18 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s19 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: v_and_b32_e32 v0, 1, v18 |
| ; GFX9-NEXT: global_store_byte v[0:1], v0, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_byte v[0:1], v19, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_short v[0:1], v20, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_short v[0:1], v21, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v32i32_i1_i8_i16_f32_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v21, v1 :: v_dual_mov_b32 v20, v0 |
| ; GFX11-NEXT: v_dual_mov_b32 v18, s28 :: v_dual_mov_b32 v19, s29 |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[10:13], off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[6:9], off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[18:21], off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: v_dual_mov_b32 v0, s24 :: v_dual_mov_b32 v1, s25 |
| ; GFX11-NEXT: v_dual_mov_b32 v2, s26 :: v_dual_mov_b32 v3, s27 |
| ; GFX11-NEXT: v_dual_mov_b32 v4, s20 :: v_dual_mov_b32 v5, s21 |
| ; GFX11-NEXT: v_dual_mov_b32 v6, s22 :: v_dual_mov_b32 v7, s23 |
| ; GFX11-NEXT: v_dual_mov_b32 v8, s16 :: v_dual_mov_b32 v9, s17 |
| ; GFX11-NEXT: v_dual_mov_b32 v10, s18 :: v_dual_mov_b32 v11, s19 |
| ; GFX11-NEXT: v_dual_mov_b32 v18, s0 :: v_dual_mov_b32 v19, s1 |
| ; GFX11-NEXT: v_dual_mov_b32 v20, s2 :: v_dual_mov_b32 v21, s3 |
| ; GFX11-NEXT: v_and_b32_e32 v12, 1, v14 |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[18:21], off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b8 v[0:1], v12, off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b8 v[0:1], v15, off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b16 v[0:1], v16, off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b16 v[0:1], v17, off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store volatile <32 x i32> %arg0, ptr addrspace(1) undef |
| store volatile i1 %arg1, ptr addrspace(1) undef |
| store volatile i8 %arg2, ptr addrspace(1) undef |
| store volatile i16 %arg3, ptr addrspace(1) undef |
| store volatile half %arg4, ptr addrspace(1) undef |
| ret void |
| } |
| |
| define void @void_func_v32i32_v2i32_v2f32_inreg(<32 x i32> inreg %arg0, <2 x i32> inreg %arg1, <2 x float> inreg %arg2) #0 { |
| ; GFX9-LABEL: void_func_v32i32_v2i32_v2f32_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v25, v1 |
| ; GFX9-NEXT: v_mov_b32_e32 v24, v0 |
| ; GFX9-NEXT: v_mov_b32_e32 v22, s28 |
| ; GFX9-NEXT: v_mov_b32_e32 v23, s29 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[14:17], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[10:13], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[6:9], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[22:25], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s24 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s25 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s26 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s27 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s20 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s21 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s22 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s23 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s16 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s17 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s18 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s19 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_dwordx2 v[0:1], v[18:19], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_dwordx2 v[0:1], v[20:21], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v32i32_v2i32_v2f32_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v21, v1 :: v_dual_mov_b32 v20, v0 |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[10:13], off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[6:9], off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: v_dual_mov_b32 v18, s28 :: v_dual_mov_b32 v19, s29 |
| ; GFX11-NEXT: v_dual_mov_b32 v0, s24 :: v_dual_mov_b32 v1, s25 |
| ; GFX11-NEXT: v_dual_mov_b32 v2, s26 :: v_dual_mov_b32 v3, s27 |
| ; GFX11-NEXT: v_dual_mov_b32 v4, s20 :: v_dual_mov_b32 v5, s21 |
| ; GFX11-NEXT: v_dual_mov_b32 v6, s22 :: v_dual_mov_b32 v7, s23 |
| ; GFX11-NEXT: v_dual_mov_b32 v8, s16 :: v_dual_mov_b32 v9, s17 |
| ; GFX11-NEXT: v_dual_mov_b32 v10, s18 :: v_dual_mov_b32 v11, s19 |
| ; GFX11-NEXT: v_dual_mov_b32 v22, s0 :: v_dual_mov_b32 v23, s1 |
| ; GFX11-NEXT: v_dual_mov_b32 v24, s2 :: v_dual_mov_b32 v25, s3 |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[18:21], off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[22:25], off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b64 v[0:1], v[14:15], off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b64 v[0:1], v[16:17], off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store volatile <32 x i32> %arg0, ptr addrspace(1) undef |
| store volatile <2 x i32> %arg1, ptr addrspace(1) undef |
| store volatile <2 x float> %arg2, ptr addrspace(1) undef |
| ret void |
| } |
| |
| define void @too_many_args_use_workitem_id_x_inreg( |
| ; GFX9-LABEL: too_many_args_use_workitem_id_x_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v18, s16 |
| ; GFX9-NEXT: global_store_dword v[0:1], v18, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v18, s17 |
| ; GFX9-NEXT: global_store_dword v[0:1], v18, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v18, s18 |
| ; GFX9-NEXT: global_store_dword v[0:1], v18, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v18, s19 |
| ; GFX9-NEXT: global_store_dword v[0:1], v18, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v18, s20 |
| ; GFX9-NEXT: global_store_dword v[0:1], v18, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v18, s21 |
| ; GFX9-NEXT: global_store_dword v[0:1], v18, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v18, s22 |
| ; GFX9-NEXT: global_store_dword v[0:1], v18, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v18, s23 |
| ; GFX9-NEXT: global_store_dword v[0:1], v18, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v18, s24 |
| ; GFX9-NEXT: global_store_dword v[0:1], v18, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v18, s25 |
| ; GFX9-NEXT: global_store_dword v[0:1], v18, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v18, s26 |
| ; GFX9-NEXT: global_store_dword v[0:1], v18, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v18, s27 |
| ; GFX9-NEXT: global_store_dword v[0:1], v18, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v18, s28 |
| ; GFX9-NEXT: global_store_dword v[0:1], v18, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v18, s29 |
| ; GFX9-NEXT: global_store_dword v[0:1], v18, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_dword v[0:1], v0, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_dword v[0:1], v1, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_dword v[0:1], v2, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_dword v[0:1], v3, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_dword v[0:1], v4, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_dword v[0:1], v5, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_dword v[0:1], v6, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_dword v[0:1], v7, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_dword v[0:1], v8, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_dword v[0:1], v9, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_dword v[0:1], v10, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_dword v[0:1], v11, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_dword v[0:1], v12, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_dword v[0:1], v13, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_dword v[0:1], v14, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_dword v[0:1], v15, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_dword v[0:1], v16, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_dword v[0:1], v17, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: too_many_args_use_workitem_id_x_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v14, s0 :: v_dual_mov_b32 v15, s1 |
| ; GFX11-NEXT: v_mov_b32_e32 v16, s2 |
| ; GFX11-NEXT: v_mov_b32_e32 v18, s19 |
| ; GFX11-NEXT: global_store_b32 v[0:1], v14, off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b32 v[0:1], v15, off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b32 v[0:1], v16, off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: v_dual_mov_b32 v14, s3 :: v_dual_mov_b32 v17, s18 |
| ; GFX11-NEXT: v_dual_mov_b32 v15, s16 :: v_dual_mov_b32 v16, s17 |
| ; GFX11-NEXT: global_store_b32 v[0:1], v14, off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b32 v[0:1], v15, off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b32 v[0:1], v16, off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b32 v[0:1], v17, off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b32 v[0:1], v18, off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: v_dual_mov_b32 v15, s21 :: v_dual_mov_b32 v14, s20 |
| ; GFX11-NEXT: v_dual_mov_b32 v16, s22 :: v_dual_mov_b32 v17, s23 |
| ; GFX11-NEXT: v_mov_b32_e32 v18, s24 |
| ; GFX11-NEXT: global_store_b32 v[0:1], v14, off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b32 v[0:1], v15, off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b32 v[0:1], v16, off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b32 v[0:1], v17, off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b32 v[0:1], v18, off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: v_dual_mov_b32 v17, s28 :: v_dual_mov_b32 v14, s25 |
| ; GFX11-NEXT: v_dual_mov_b32 v15, s26 :: v_dual_mov_b32 v16, s27 |
| ; GFX11-NEXT: v_mov_b32_e32 v18, s29 |
| ; GFX11-NEXT: global_store_b32 v[0:1], v14, off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b32 v[0:1], v15, off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b32 v[0:1], v16, off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b32 v[0:1], v17, off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b32 v[0:1], v18, off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b32 v[0:1], v1, off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b32 v[0:1], v2, off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b32 v[0:1], v3, off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b32 v[0:1], v4, off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b32 v[0:1], v5, off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b32 v[0:1], v6, off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b32 v[0:1], v7, off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b32 v[0:1], v8, off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b32 v[0:1], v9, off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b32 v[0:1], v10, off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b32 v[0:1], v11, off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b32 v[0:1], v12, off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b32 v[0:1], v13, off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| i32 inreg %arg0, i32 inreg %arg1, i32 inreg %arg2, i32 inreg %arg3, i32 inreg %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 inreg %arg7, |
| i32 inreg %arg8, i32 inreg %arg9, i32 inreg %arg10, i32 inreg %arg11, i32 inreg %arg12, i32 inreg %arg13, i32 inreg %arg14, i32 inreg %arg15, |
| i32 inreg %arg16, i32 inreg %arg17, i32 inreg %arg18, i32 inreg %arg19, i32 inreg %arg20, i32 inreg %arg21, i32 inreg %arg22, i32 inreg %arg23, |
| i32 inreg %arg24, i32 inreg %arg25, i32 inreg %arg26, i32 inreg %arg27, i32 inreg %arg28, i32 inreg %arg29, i32 inreg %arg30, i32 inreg %arg31) { |
| ;%val = call i32 @llvm.amdgcn.workitem.id.x() |
| ;store volatile i32 %val, ptr addrspace(1) undef |
| |
| store volatile i32 %arg0, ptr addrspace(1) undef |
| store volatile i32 %arg1, ptr addrspace(1) undef |
| store volatile i32 %arg2, ptr addrspace(1) undef |
| store volatile i32 %arg3, ptr addrspace(1) undef |
| store volatile i32 %arg4, ptr addrspace(1) undef |
| store volatile i32 %arg5, ptr addrspace(1) undef |
| store volatile i32 %arg6, ptr addrspace(1) undef |
| store volatile i32 %arg7, ptr addrspace(1) undef |
| |
| store volatile i32 %arg8, ptr addrspace(1) undef |
| store volatile i32 %arg9, ptr addrspace(1) undef |
| store volatile i32 %arg10, ptr addrspace(1) undef |
| store volatile i32 %arg11, ptr addrspace(1) undef |
| store volatile i32 %arg12, ptr addrspace(1) undef |
| store volatile i32 %arg13, ptr addrspace(1) undef |
| store volatile i32 %arg14, ptr addrspace(1) undef |
| store volatile i32 %arg15, ptr addrspace(1) undef |
| |
| store volatile i32 %arg16, ptr addrspace(1) undef |
| store volatile i32 %arg17, ptr addrspace(1) undef |
| store volatile i32 %arg18, ptr addrspace(1) undef |
| store volatile i32 %arg19, ptr addrspace(1) undef |
| store volatile i32 %arg20, ptr addrspace(1) undef |
| store volatile i32 %arg21, ptr addrspace(1) undef |
| store volatile i32 %arg22, ptr addrspace(1) undef |
| store volatile i32 %arg23, ptr addrspace(1) undef |
| |
| store volatile i32 %arg24, ptr addrspace(1) undef |
| store volatile i32 %arg25, ptr addrspace(1) undef |
| store volatile i32 %arg26, ptr addrspace(1) undef |
| store volatile i32 %arg27, ptr addrspace(1) undef |
| store volatile i32 %arg28, ptr addrspace(1) undef |
| store volatile i32 %arg29, ptr addrspace(1) undef |
| store volatile i32 %arg30, ptr addrspace(1) undef |
| store volatile i32 %arg31, ptr addrspace(1) undef |
| |
| ret void |
| } |
| |
| define void @void_func_i32_v2float_inreg(i32 inreg %arg0, <2 x float> inreg %arg1) #0 { |
| ; GFX9-LABEL: void_func_i32_v2float_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s16 |
| ; GFX9-NEXT: global_store_dword v[0:1], v0, off |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s17 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s18 |
| ; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_i32_v2float_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v1, s2 |
| ; GFX11-NEXT: v_mov_b32_e32 v0, s1 |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: global_store_b32 v[0:1], v2, off |
| ; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store i32 %arg0, ptr addrspace(1) undef |
| store <2 x float> %arg1, ptr addrspace(1) undef |
| ret void |
| } |
| |
| define void @caller_void_func_i32_v2float_inreg(i32 inreg %arg0, <2 x float> inreg %arg1) #0 { |
| ; GFX9-LABEL: caller_void_func_i32_v2float_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: s_mov_b32 s19, s33 |
| ; GFX9-NEXT: s_mov_b32 s33, s32 |
| ; GFX9-NEXT: s_or_saveexec_b64 s[20:21], -1 |
| ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill |
| ; GFX9-NEXT: s_mov_b64 exec, s[20:21] |
| ; GFX9-NEXT: s_addk_i32 s32, 0x400 |
| ; GFX9-NEXT: s_getpc_b64 s[20:21] |
| ; GFX9-NEXT: s_add_u32 s20, s20, caller_void_func_i32_v2float_inreg@gotpcrel32@lo+4 |
| ; GFX9-NEXT: s_addc_u32 s21, s21, caller_void_func_i32_v2float_inreg@gotpcrel32@hi+12 |
| ; GFX9-NEXT: s_load_dwordx2 s[20:21], s[20:21], 0x0 |
| ; GFX9-NEXT: v_writelane_b32 v40, s19, 2 |
| ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 |
| ; GFX9-NEXT: s_mov_b32 s2, s18 |
| ; GFX9-NEXT: s_mov_b32 s1, s17 |
| ; GFX9-NEXT: s_mov_b32 s0, s16 |
| ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 |
| ; GFX9-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX9-NEXT: s_swappc_b64 s[30:31], s[20:21] |
| ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 |
| ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 |
| ; GFX9-NEXT: s_mov_b32 s32, s33 |
| ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 |
| ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 |
| ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload |
| ; GFX9-NEXT: s_mov_b64 exec, s[6:7] |
| ; GFX9-NEXT: s_mov_b32 s33, s4 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: caller_void_func_i32_v2float_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, s33 |
| ; GFX11-NEXT: s_mov_b32 s33, s32 |
| ; GFX11-NEXT: s_or_saveexec_b32 s16, -1 |
| ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill |
| ; GFX11-NEXT: s_mov_b32 exec_lo, s16 |
| ; GFX11-NEXT: s_add_i32 s32, s32, 16 |
| ; GFX11-NEXT: s_getpc_b64 s[16:17] |
| ; GFX11-NEXT: s_add_u32 s16, s16, caller_void_func_i32_v2float_inreg@gotpcrel32@lo+4 |
| ; GFX11-NEXT: s_addc_u32 s17, s17, caller_void_func_i32_v2float_inreg@gotpcrel32@hi+12 |
| ; GFX11-NEXT: v_writelane_b32 v40, s3, 2 |
| ; GFX11-NEXT: s_load_b64 s[16:17], s[16:17], 0x0 |
| ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 |
| ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: s_swappc_b64 s[30:31], s[16:17] |
| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 |
| ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 |
| ; GFX11-NEXT: s_mov_b32 s32, s33 |
| ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 |
| ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 |
| ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload |
| ; GFX11-NEXT: s_mov_b32 exec_lo, s1 |
| ; GFX11-NEXT: s_mov_b32 s33, s0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| ; GFX11-NEXT s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT s_mov_b32 s3, s33 |
| ; GFX11-NEXT s_mov_b32 s33, s32 |
| ; GFX11-NEXT s_or_saveexec_b32 s4, -1 |
| ; GFX11-NEXT scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill |
| ; GFX11-NEXT s_mov_b32 exec_lo, s4 |
| ; GFX11-NEXT s_add_i32 s32, s32, 16 |
| ; GFX11-NEXT s_getpc_b64 s[4:5] |
| ; GFX11-NEXT s_add_u32 s4, s4, caller_void_func_i32_v2float_inreg@gotpcrel32@lo+4 |
| ; GFX11-NEXT s_addc_u32 s5, s5, caller_void_func_i32_v2float_inreg@gotpcrel32@hi+12 |
| ; GFX11-NEXT v_writelane_b32 v40, s3, 2 |
| ; GFX11-NEXT s_load_b64 s[4:5], s[4:5], 0x0 |
| ; GFX11-NEXT v_writelane_b32 v40, s30, 0 |
| ; GFX11-NEXT v_writelane_b32 v40, s31, 1 |
| ; GFX11-NEXT s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT s_swappc_b64 s[30:31], s[4:5] |
| ; GFX11-NEXT s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-NEXT v_readlane_b32 s31, v40, 1 |
| ; GFX11-NEXT v_readlane_b32 s30, v40, 0 |
| ; GFX11-NEXT v_readlane_b32 s0, v40, 2 |
| ; GFX11-NEXT s_or_saveexec_b32 s1, -1 |
| ; GFX11-NEXT scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload |
| ; GFX11-NEXT s_mov_b32 exec_lo, s1 |
| ; GFX11-NEXT s_add_i32 s32, s32, -16 |
| ; GFX11-NEXT s_mov_b32 s33, s0 |
| ; GFX11-NEXT s_waitcnt vmcnt(0) |
| ; GFX11-NEXT s_setpc_b64 s[30:31] |
| call void @caller_void_func_i32_v2float_inreg(i32 inreg %arg0, <2 x float> inreg %arg1) |
| ret void |
| } |
| |
| define void @void_func_bf16_inreg(bfloat inreg %arg0) #0 { |
| ; GFX9-LABEL: void_func_bf16_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s16 |
| ; GFX9-NEXT: global_store_short v[0:1], v0, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_bf16_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX11-NEXT: global_store_b16 v[0:1], v0, off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store bfloat %arg0, ptr addrspace(1) undef |
| ret void |
| } |
| |
| define void @void_func_v2bf16_inreg(<2 x bfloat> inreg %arg0) #0 { |
| ; GFX9-LABEL: void_func_v2bf16_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s16 |
| ; GFX9-NEXT: global_store_dword v[0:1], v0, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v2bf16_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_mov_b32_e32 v0, s0 |
| ; GFX11-NEXT: global_store_b32 v[0:1], v0, off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <2 x bfloat> %arg0, ptr addrspace(1) undef |
| ret void |
| } |
| |
| define void @void_func_v3bf16_inreg(<3 x bfloat> inreg %arg0) #0 { |
| ; GFX9-LABEL: void_func_v3bf16_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s17 |
| ; GFX9-NEXT: global_store_short v[0:1], v0, off |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s16 |
| ; GFX9-NEXT: global_store_dword v[0:1], v0, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v3bf16_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: global_store_b16 v[0:1], v0, off |
| ; GFX11-NEXT: global_store_b32 v[0:1], v1, off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <3 x bfloat> %arg0, ptr addrspace(1) undef |
| ret void |
| } |
| |
| define void @void_func_v4bf16_inreg(<4 x bfloat> inreg %arg0) #0 { |
| ; GFX9-LABEL: void_func_v4bf16_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s16 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s17 |
| ; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v4bf16_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 |
| ; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <4 x bfloat> %arg0, ptr addrspace(1) undef |
| ret void |
| } |
| |
| define void @void_func_v8bf16_inreg(<8 x bfloat> inreg %arg0) #0 { |
| ; GFX9-LABEL: void_func_v8bf16_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s16 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s17 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s18 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s19 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v8bf16_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 |
| ; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <8 x bfloat> %arg0, ptr addrspace(1) undef |
| ret void |
| } |
| |
| define void @void_func_v16bf16_inreg(<16 x bfloat> inreg %arg0) #0 { |
| ; GFX9-LABEL: void_func_v16bf16_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s20 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s21 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s22 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s23 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_nop 0 |
| ; GFX9-NEXT: v_mov_b32_e32 v0, s16 |
| ; GFX9-NEXT: v_mov_b32_e32 v1, s17 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s18 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s19 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v16bf16_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s17 |
| ; GFX11-NEXT: v_dual_mov_b32 v2, s18 :: v_dual_mov_b32 v3, s19 |
| ; GFX11-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1 |
| ; GFX11-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3 |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <16 x bfloat> %arg0, ptr addrspace(1) undef |
| ret void |
| } |
| |
| define void @void_func_2_i32_inreg(i32 inreg %arg0, i32 inreg %arg1, ptr addrspace(1) %ptr) { |
| ; GFX9-LABEL: void_func_2_i32_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s16 |
| ; GFX9-NEXT: global_store_dword v[0:1], v2, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s17 |
| ; GFX9-NEXT: global_store_dword v[0:1], v2, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_2_i32_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1 |
| ; GFX11-NEXT: global_store_b32 v[0:1], v2, off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b32 v[0:1], v3, off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store volatile i32 %arg0, ptr addrspace(1) %ptr |
| store volatile i32 %arg1, ptr addrspace(1) %ptr |
| ret void |
| } |
| |
| define void @void_func_2_i64_inreg(i64 inreg %arg0, i64 inreg %arg1, ptr addrspace(1) %ptr) { |
| ; GFX9-LABEL: void_func_2_i64_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v4, s16 |
| ; GFX9-NEXT: v_mov_b32_e32 v5, s17 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s18 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s19 |
| ; GFX9-NEXT: global_store_dwordx2 v[0:1], v[4:5], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_dwordx2 v[0:1], v[2:3], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_2_i64_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v5, s1 |
| ; GFX11-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v3, s3 |
| ; GFX11-NEXT: global_store_b64 v[0:1], v[4:5], off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store volatile i64 %arg0, ptr addrspace(1) %ptr |
| store volatile i64 %arg1, ptr addrspace(1) %ptr |
| ret void |
| } |
| |
| define void @void_func_i64_inreg_i32_inreg_i64_inreg(i64 inreg %arg0, i32 inreg %arg1, i64 inreg %arg2, ptr addrspace(1) %ptr) { |
| ; GFX9-LABEL: void_func_i64_inreg_i32_inreg_i64_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v4, s16 |
| ; GFX9-NEXT: v_mov_b32_e32 v5, s17 |
| ; GFX9-NEXT: global_store_dwordx2 v[0:1], v[4:5], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v4, s18 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s19 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s20 |
| ; GFX9-NEXT: global_store_dword v[0:1], v4, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: global_store_dwordx2 v[0:1], v[2:3], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_i64_inreg_i32_inreg_i64_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v2, s3 :: v_dual_mov_b32 v5, s1 |
| ; GFX11-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v3, s16 |
| ; GFX11-NEXT: v_mov_b32_e32 v6, s2 |
| ; GFX11-NEXT: global_store_b64 v[0:1], v[4:5], off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b32 v[0:1], v6, off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store volatile i64 %arg0, ptr addrspace(1) %ptr |
| store volatile i32 %arg1, ptr addrspace(1) %ptr |
| store volatile i64 %arg2, ptr addrspace(1) %ptr |
| ret void |
| } |
| |
| define void @void_func_5_i32_inreg(i32 inreg %arg0, i32 inreg %arg1, i32 inreg %arg2, i32 inreg %arg3, i32 inreg %arg4, ptr addrspace(1) %ptr) { |
| ; GFX9-LABEL: void_func_5_i32_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s16 |
| ; GFX9-NEXT: global_store_dword v[0:1], v2, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s17 |
| ; GFX9-NEXT: global_store_dword v[0:1], v2, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s18 |
| ; GFX9-NEXT: global_store_dword v[0:1], v2, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s19 |
| ; GFX9-NEXT: global_store_dword v[0:1], v2, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s20 |
| ; GFX9-NEXT: global_store_dword v[0:1], v2, off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_5_i32_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1 |
| ; GFX11-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3 |
| ; GFX11-NEXT: v_mov_b32_e32 v6, s16 |
| ; GFX11-NEXT: global_store_b32 v[0:1], v2, off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b32 v[0:1], v3, off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b32 v[0:1], v4, off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b32 v[0:1], v5, off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: global_store_b32 v[0:1], v6, off dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store volatile i32 %arg0, ptr addrspace(1) %ptr |
| store volatile i32 %arg1, ptr addrspace(1) %ptr |
| store volatile i32 %arg2, ptr addrspace(1) %ptr |
| store volatile i32 %arg3, ptr addrspace(1) %ptr |
| store volatile i32 %arg4, ptr addrspace(1) %ptr |
| ret void |
| } |
| |
| define void @void_func_a5i32_inreg([5 x i32] inreg %arg0, ptr addrspace(1) %ptr) { |
| ; GFX9-LABEL: void_func_a5i32_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s20 |
| ; GFX9-NEXT: global_store_dword v[0:1], v2, off offset:16 |
| ; GFX9-NEXT: v_mov_b32_e32 v5, s19 |
| ; GFX9-NEXT: v_mov_b32_e32 v4, s18 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s17 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s16 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_a5i32_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v6, s16 :: v_dual_mov_b32 v5, s3 |
| ; GFX11-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v3, s1 |
| ; GFX11-NEXT: v_mov_b32_e32 v2, s0 |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: global_store_b32 v[0:1], v6, off offset:16 |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store [5 x i32] %arg0, ptr addrspace(1) %ptr |
| ret void |
| } |
| |
| ; Force all implicit inputs to be required |
| declare void @extern() |
| |
| define void @void_func_a13i32_inreg([13 x i32] inreg %arg0, ptr addrspace(1) %ptr) { |
| ; GFX9-LABEL: void_func_a13i32_inreg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: s_mov_b32 s29, s33 |
| ; GFX9-NEXT: s_mov_b32 s33, s32 |
| ; GFX9-NEXT: s_or_saveexec_b64 vcc, -1 |
| ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill |
| ; GFX9-NEXT: s_mov_b64 exec, vcc |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s28 |
| ; GFX9-NEXT: global_store_dword v[0:1], v2, off offset:48 |
| ; GFX9-NEXT: v_mov_b32_e32 v5, s27 |
| ; GFX9-NEXT: v_mov_b32_e32 v4, s26 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s25 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s24 |
| ; GFX9-NEXT: s_addk_i32 s32, 0x400 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:32 |
| ; GFX9-NEXT: v_writelane_b32 v40, s29, 2 |
| ; GFX9-NEXT: v_mov_b32_e32 v5, s23 |
| ; GFX9-NEXT: v_mov_b32_e32 v4, s22 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s21 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s20 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:16 |
| ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 |
| ; GFX9-NEXT: v_mov_b32_e32 v3, s17 |
| ; GFX9-NEXT: v_mov_b32_e32 v2, s16 |
| ; GFX9-NEXT: s_getpc_b64 s[16:17] |
| ; GFX9-NEXT: s_add_u32 s16, s16, extern@gotpcrel32@lo+4 |
| ; GFX9-NEXT: s_addc_u32 s17, s17, extern@gotpcrel32@hi+12 |
| ; GFX9-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 |
| ; GFX9-NEXT: v_mov_b32_e32 v5, s19 |
| ; GFX9-NEXT: v_mov_b32_e32 v4, s18 |
| ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 |
| ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off |
| ; GFX9-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] |
| ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 |
| ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 |
| ; GFX9-NEXT: s_mov_b32 s32, s33 |
| ; GFX9-NEXT: v_readlane_b32 s4, v40, 2 |
| ; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 |
| ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload |
| ; GFX9-NEXT: s_mov_b64 exec, s[6:7] |
| ; GFX9-NEXT: s_mov_b32 s33, s4 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_a13i32_inreg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s25, s33 |
| ; GFX11-NEXT: s_mov_b32 s33, s32 |
| ; GFX11-NEXT: s_or_saveexec_b32 s26, -1 |
| ; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill |
| ; GFX11-NEXT: s_mov_b32 exec_lo, s26 |
| ; GFX11-NEXT: s_add_i32 s32, s32, 16 |
| ; GFX11-NEXT: v_dual_mov_b32 v4, s22 :: v_dual_mov_b32 v3, s21 |
| ; GFX11-NEXT: v_dual_mov_b32 v2, s20 :: v_dual_mov_b32 v9, s19 |
| ; GFX11-NEXT: s_getpc_b64 s[20:21] |
| ; GFX11-NEXT: s_add_u32 s20, s20, extern@gotpcrel32@lo+4 |
| ; GFX11-NEXT: s_addc_u32 s21, s21, extern@gotpcrel32@hi+12 |
| ; GFX11-NEXT: v_dual_mov_b32 v8, s18 :: v_dual_mov_b32 v7, s17 |
| ; GFX11-NEXT: v_dual_mov_b32 v6, s16 :: v_dual_mov_b32 v13, s3 |
| ; GFX11-NEXT: s_load_b64 s[16:17], s[20:21], 0x0 |
| ; GFX11-NEXT: v_writelane_b32 v40, s25, 2 |
| ; GFX11-NEXT: v_dual_mov_b32 v14, s24 :: v_dual_mov_b32 v5, s23 |
| ; GFX11-NEXT: v_dual_mov_b32 v12, s2 :: v_dual_mov_b32 v11, s1 |
| ; GFX11-NEXT: v_writelane_b32 v40, s30, 0 |
| ; GFX11-NEXT: v_mov_b32_e32 v10, s0 |
| ; GFX11-NEXT: s_clause 0x3 |
| ; GFX11-NEXT: global_store_b32 v[0:1], v14, off offset:48 |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off offset:32 |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[6:9], off offset:16 |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[10:13], off |
| ; GFX11-NEXT: v_writelane_b32 v40, s31, 1 |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: s_swappc_b64 s[30:31], s[16:17] |
| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-NEXT: v_readlane_b32 s31, v40, 1 |
| ; GFX11-NEXT: v_readlane_b32 s30, v40, 0 |
| ; GFX11-NEXT: s_mov_b32 s32, s33 |
| ; GFX11-NEXT: v_readlane_b32 s0, v40, 2 |
| ; GFX11-NEXT: s_or_saveexec_b32 s1, -1 |
| ; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload |
| ; GFX11-NEXT: s_mov_b32 exec_lo, s1 |
| ; GFX11-NEXT: s_mov_b32 s33, s0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store [13 x i32] %arg0, ptr addrspace(1) %ptr |
| call void @extern() |
| ret void |
| } |
| |
| ; define void @void_func_a14i32_inreg([14 x i32] inreg %arg0, ptr addrspace(1) %ptr) { |
| ; store [14 x i32] %arg0, ptr addrspace(1) %ptr |
| ; call void @extern() |
| ; ret void |
| ; } |
| |
| ; FIXME: |
| ; define void @void_func_a15i32_inreg([15 x i32] inreg %arg0, ptr addrspace(1) %ptr) { |
| ; store [15 x i32] %arg0, ptr addrspace(1) %ptr |
| ; call void @extern() |
| ; ret void |
| ; } |
| |
| ; FIXME: |
| ; define void @void_func_a16i32_inreg([16 x i32] inreg %arg0, ptr addrspace(1) %ptr) { |
| ; store [16 x i32] %arg0, ptr addrspace(1) %ptr |
| ; call void @extern() |
| ; ret void |
| ; } |
| |
| ; FIXME: Should still fail |
| define void @void_func_a16i32_inreg__noimplicit([16 x i32] inreg %arg0, ptr addrspace(1) %ptr) { |
| ; GFX9-LABEL: void_func_a16i32_inreg__noimplicit: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_mov_b32_e32 v7, v1 |
| ; GFX9-NEXT: v_mov_b32_e32 v6, v0 |
| ; GFX9-NEXT: v_mov_b32_e32 v5, s29 |
| ; GFX9-NEXT: v_mov_b32_e32 v4, s28 |
| ; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:48 |
| ; GFX9-NEXT: s_nop 0 |
| ; GFX9-NEXT: v_mov_b32_e32 v7, s27 |
| ; GFX9-NEXT: v_mov_b32_e32 v6, s26 |
| ; GFX9-NEXT: v_mov_b32_e32 v5, s25 |
| ; GFX9-NEXT: v_mov_b32_e32 v4, s24 |
| ; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:32 |
| ; GFX9-NEXT: s_nop 0 |
| ; GFX9-NEXT: v_mov_b32_e32 v7, s23 |
| ; GFX9-NEXT: v_mov_b32_e32 v6, s22 |
| ; GFX9-NEXT: v_mov_b32_e32 v5, s21 |
| ; GFX9-NEXT: v_mov_b32_e32 v4, s20 |
| ; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:16 |
| ; GFX9-NEXT: s_nop 0 |
| ; GFX9-NEXT: v_mov_b32_e32 v7, s19 |
| ; GFX9-NEXT: v_mov_b32_e32 v6, s18 |
| ; GFX9-NEXT: v_mov_b32_e32 v5, s17 |
| ; GFX9-NEXT: v_mov_b32_e32 v4, s16 |
| ; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_a16i32_inreg__noimplicit: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v5, s27 :: v_dual_mov_b32 v4, s26 |
| ; GFX11-NEXT: v_dual_mov_b32 v3, s25 :: v_dual_mov_b32 v2, s24 |
| ; GFX11-NEXT: v_dual_mov_b32 v9, s23 :: v_dual_mov_b32 v8, s22 |
| ; GFX11-NEXT: v_dual_mov_b32 v7, s21 :: v_dual_mov_b32 v6, s20 |
| ; GFX11-NEXT: v_dual_mov_b32 v13, s19 :: v_dual_mov_b32 v12, s18 |
| ; GFX11-NEXT: v_dual_mov_b32 v11, s17 :: v_dual_mov_b32 v10, s16 |
| ; GFX11-NEXT: v_dual_mov_b32 v17, s3 :: v_dual_mov_b32 v16, s2 |
| ; GFX11-NEXT: v_dual_mov_b32 v15, s1 :: v_dual_mov_b32 v14, s0 |
| ; GFX11-NEXT: s_clause 0x3 |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off offset:48 |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[6:9], off offset:32 |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[10:13], off offset:16 |
| ; GFX11-NEXT: global_store_b128 v[0:1], v[14:17], off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store [16 x i32] %arg0, ptr addrspace(1) %ptr |
| ret void |
| } |
| |
| attributes #0 = { nounwind } |
| attributes #1 = { nounwind noinline } |
| |
| |
| |
| |
| |
| |
| |