| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 |
| ; RUN: llc -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=CIGFX89,CI %s |
| ; RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=CIGFX89,GFX89,VI %s |
| ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=CIGFX89,GFX89,GFX9 %s |
| ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s |
| |
| define void @void_func_i1(i1 %arg0) #0 { |
| ; CIGFX89-LABEL: void_func_i1: |
| ; CIGFX89: ; %bb.0: |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CIGFX89-NEXT: v_and_b32_e32 v0, 1, v0 |
| ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; CIGFX89-NEXT: s_mov_b32 s6, -1 |
| ; CIGFX89-NEXT: buffer_store_byte v0, off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_i1: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_store_b8 v0, off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store i1 %arg0, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_i1_zeroext(i1 zeroext %arg0) #0 { |
| ; CIGFX89-LABEL: void_func_i1_zeroext: |
| ; CIGFX89: ; %bb.0: |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CIGFX89-NEXT: v_or_b32_e32 v0, 12, v0 |
| ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; CIGFX89-NEXT: s_mov_b32 s6, -1 |
| ; CIGFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_i1_zeroext: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_or_b32_e32 v0, 12, v0 |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %ext = zext i1 %arg0 to i32 |
| %add = add i32 %ext, 12 |
| store i32 %add, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_i1_signext(i1 signext %arg0) #0 { |
| ; CI-LABEL: void_func_i1_signext: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: v_add_i32_e32 v0, vcc, 12, v0 |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-LABEL: void_func_i1_signext: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-NEXT: v_add_u32_e32 v0, vcc, 12, v0 |
| ; VI-NEXT: s_mov_b32 s7, 0xf000 |
| ; VI-NEXT: s_mov_b32 s6, -1 |
| ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: void_func_i1_signext: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_add_u32_e32 v0, 12, v0 |
| ; GFX9-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX9-NEXT: s_mov_b32 s6, -1 |
| ; GFX9-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_i1_signext: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_add_nc_u32_e32 v0, 12, v0 |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %ext = sext i1 %arg0 to i32 |
| %add = add i32 %ext, 12 |
| store i32 %add, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @i1_arg_i1_use(i1 %arg) #0 { |
| ; CIGFX89-LABEL: i1_arg_i1_use: |
| ; CIGFX89: ; %bb.0: ; %bb |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CIGFX89-NEXT: v_and_b32_e32 v0, 1, v0 |
| ; CIGFX89-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 |
| ; CIGFX89-NEXT: s_xor_b64 s[6:7], vcc, -1 |
| ; CIGFX89-NEXT: s_and_saveexec_b64 s[4:5], s[6:7] |
| ; CIGFX89-NEXT: s_cbranch_execz .LBB3_2 |
| ; CIGFX89-NEXT: ; %bb.1: ; %bb1 |
| ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; CIGFX89-NEXT: s_mov_b32 s6, -1 |
| ; CIGFX89-NEXT: v_mov_b32_e32 v0, 0 |
| ; CIGFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: .LBB3_2: ; %bb2 |
| ; CIGFX89-NEXT: s_or_b64 exec, exec, s[4:5] |
| ; CIGFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: i1_arg_i1_use: |
| ; GFX11: ; %bb.0: ; %bb |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) |
| ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 |
| ; GFX11-NEXT: s_xor_b32 s1, vcc_lo, -1 |
| ; GFX11-NEXT: s_and_saveexec_b32 s0, s1 |
| ; GFX11-NEXT: s_cbranch_execz .LBB3_2 |
| ; GFX11-NEXT: ; %bb.1: ; %bb1 |
| ; GFX11-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: .LBB3_2: ; %bb2 |
| ; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| bb: |
| br i1 %arg, label %bb2, label %bb1 |
| |
| bb1: |
| store volatile i32 0, ptr addrspace(1) poison |
| br label %bb2 |
| |
| bb2: |
| ret void |
| } |
| |
| define void @void_func_i8(i8 %arg0) #0 { |
| ; CIGFX89-LABEL: void_func_i8: |
| ; CIGFX89: ; %bb.0: |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; CIGFX89-NEXT: s_mov_b32 s6, -1 |
| ; CIGFX89-NEXT: buffer_store_byte v0, off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_i8: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_store_b8 v0, off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store i8 %arg0, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_i8_zeroext(i8 zeroext %arg0) #0 { |
| ; CI-LABEL: void_func_i8_zeroext: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: v_add_i32_e32 v0, vcc, 12, v0 |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-LABEL: void_func_i8_zeroext: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-NEXT: v_add_u32_e32 v0, vcc, 12, v0 |
| ; VI-NEXT: s_mov_b32 s7, 0xf000 |
| ; VI-NEXT: s_mov_b32 s6, -1 |
| ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: void_func_i8_zeroext: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_add_u32_e32 v0, 12, v0 |
| ; GFX9-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX9-NEXT: s_mov_b32 s6, -1 |
| ; GFX9-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_i8_zeroext: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_add_nc_u32_e32 v0, 12, v0 |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %ext = zext i8 %arg0 to i32 |
| %add = add i32 %ext, 12 |
| store i32 %add, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_i8_signext(i8 signext %arg0) #0 { |
| ; CI-LABEL: void_func_i8_signext: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: v_add_i32_e32 v0, vcc, 12, v0 |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-LABEL: void_func_i8_signext: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-NEXT: v_add_u32_e32 v0, vcc, 12, v0 |
| ; VI-NEXT: s_mov_b32 s7, 0xf000 |
| ; VI-NEXT: s_mov_b32 s6, -1 |
| ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: void_func_i8_signext: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_add_u32_e32 v0, 12, v0 |
| ; GFX9-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX9-NEXT: s_mov_b32 s6, -1 |
| ; GFX9-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_i8_signext: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_add_nc_u32_e32 v0, 12, v0 |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %ext = sext i8 %arg0 to i32 |
| %add = add i32 %ext, 12 |
| store i32 %add, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_i16(i16 %arg0) #0 { |
| ; CIGFX89-LABEL: void_func_i16: |
| ; CIGFX89: ; %bb.0: |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; CIGFX89-NEXT: s_mov_b32 s6, -1 |
| ; CIGFX89-NEXT: buffer_store_short v0, off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_i16: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store i16 %arg0, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_i16_zeroext(i16 zeroext %arg0) #0 { |
| ; CI-LABEL: void_func_i16_zeroext: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: v_add_i32_e32 v0, vcc, 12, v0 |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-LABEL: void_func_i16_zeroext: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-NEXT: v_add_u32_e32 v0, vcc, 12, v0 |
| ; VI-NEXT: s_mov_b32 s7, 0xf000 |
| ; VI-NEXT: s_mov_b32 s6, -1 |
| ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: void_func_i16_zeroext: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_add_u32_e32 v0, 12, v0 |
| ; GFX9-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX9-NEXT: s_mov_b32 s6, -1 |
| ; GFX9-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_i16_zeroext: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_add_nc_u32_e32 v0, 12, v0 |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %ext = zext i16 %arg0 to i32 |
| %add = add i32 %ext, 12 |
| store i32 %add, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_i16_signext(i16 signext %arg0) #0 { |
| ; CI-LABEL: void_func_i16_signext: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: v_add_i32_e32 v0, vcc, 12, v0 |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-LABEL: void_func_i16_signext: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-NEXT: v_add_u32_e32 v0, vcc, 12, v0 |
| ; VI-NEXT: s_mov_b32 s7, 0xf000 |
| ; VI-NEXT: s_mov_b32 s6, -1 |
| ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: void_func_i16_signext: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_add_u32_e32 v0, 12, v0 |
| ; GFX9-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX9-NEXT: s_mov_b32 s6, -1 |
| ; GFX9-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_i16_signext: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_add_nc_u32_e32 v0, 12, v0 |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %ext = sext i16 %arg0 to i32 |
| %add = add i32 %ext, 12 |
| store i32 %add, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_i32(i32 %arg0) #0 { |
| ; CIGFX89-LABEL: void_func_i32: |
| ; CIGFX89: ; %bb.0: |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; CIGFX89-NEXT: s_mov_b32 s6, -1 |
| ; CIGFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_i32: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store i32 %arg0, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_i64(i64 %arg0) #0 { |
| ; CIGFX89-LABEL: void_func_i64: |
| ; CIGFX89: ; %bb.0: |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; CIGFX89-NEXT: s_mov_b32 s6, -1 |
| ; CIGFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_i64: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store i64 %arg0, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_f16(half %arg0) #0 { |
| ; CI-LABEL: void_func_f16: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: buffer_store_short v0, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX89-LABEL: void_func_f16: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX89-NEXT: s_mov_b32 s6, -1 |
| ; GFX89-NEXT: buffer_store_short v0, off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_f16: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store half %arg0, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_f32(float %arg0) #0 { |
| ; CIGFX89-LABEL: void_func_f32: |
| ; CIGFX89: ; %bb.0: |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; CIGFX89-NEXT: s_mov_b32 s6, -1 |
| ; CIGFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_f32: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store float %arg0, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_f64(double %arg0) #0 { |
| ; CIGFX89-LABEL: void_func_f64: |
| ; CIGFX89: ; %bb.0: |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; CIGFX89-NEXT: s_mov_b32 s6, -1 |
| ; CIGFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_f64: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store double %arg0, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_v2i32(<2 x i32> %arg0) #0 { |
| ; CIGFX89-LABEL: void_func_v2i32: |
| ; CIGFX89: ; %bb.0: |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; CIGFX89-NEXT: s_mov_b32 s6, -1 |
| ; CIGFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v2i32: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <2 x i32> %arg0, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_v3i32(<3 x i32> %arg0) #0 { |
| ; CIGFX89-LABEL: void_func_v3i32: |
| ; CIGFX89: ; %bb.0: |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; CIGFX89-NEXT: s_mov_b32 s6, -1 |
| ; CIGFX89-NEXT: buffer_store_dwordx3 v[0:2], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v3i32: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_store_b96 v[0:2], off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <3 x i32> %arg0, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_v4i32(<4 x i32> %arg0) #0 { |
| ; CIGFX89-LABEL: void_func_v4i32: |
| ; CIGFX89: ; %bb.0: |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; CIGFX89-NEXT: s_mov_b32 s6, -1 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v4i32: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <4 x i32> %arg0, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_v5i32(<5 x i32> %arg0) #0 { |
| ; CIGFX89-LABEL: void_func_v5i32: |
| ; CIGFX89: ; %bb.0: |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; CIGFX89-NEXT: s_mov_b32 s6, -1 |
| ; CIGFX89-NEXT: buffer_store_dword v4, off, s[4:7], 0 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v5i32: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: buffer_store_b32 v4, off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <5 x i32> %arg0, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_v8i32(<8 x i32> %arg0) #0 { |
| ; CIGFX89-LABEL: void_func_v8i32: |
| ; CIGFX89: ; %bb.0: |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; CIGFX89-NEXT: s_mov_b32 s6, -1 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v8i32: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <8 x i32> %arg0, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_v16i32(<16 x i32> %arg0) #0 { |
| ; CIGFX89-LABEL: void_func_v16i32: |
| ; CIGFX89: ; %bb.0: |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; CIGFX89-NEXT: s_mov_b32 s6, -1 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v16i32: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_clause 0x3 |
| ; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <16 x i32> %arg0, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_v32i32(<32 x i32> %arg0) #0 { |
| ; CIGFX89-LABEL: void_func_v32i32: |
| ; CIGFX89: ; %bb.0: |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CIGFX89-NEXT: buffer_load_dword v31, off, s[0:3], s32 |
| ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; CIGFX89-NEXT: s_mov_b32 s6, -1 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(6) |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v32i32: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: scratch_load_b32 v31, off, s32 |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_clause 0x3 |
| ; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_clause 0x3 |
| ; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <32 x i32> %arg0, ptr addrspace(1) poison |
| ret void |
| } |
| |
| ; 1 over register limit |
| define void @void_func_v33i32(<33 x i32> %arg0) #0 { |
| ; CI-LABEL: void_func_v33i32: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 |
| ; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 |
| ; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32 |
| ; CI-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:4 |
| ; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 |
| ; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 |
| ; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 |
| ; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 |
| ; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(6) |
| ; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(6) |
| ; CI-NEXT: buffer_store_dword v20, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-LABEL: void_func_v33i32: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-NEXT: s_mov_b32 s7, 0xf000 |
| ; VI-NEXT: s_mov_b32 s6, -1 |
| ; VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 |
| ; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 |
| ; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32 |
| ; VI-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:4 |
| ; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 |
| ; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 |
| ; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 |
| ; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 |
| ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(6) |
| ; VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(6) |
| ; VI-NEXT: buffer_store_dword v20, off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: void_func_v33i32: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX9-NEXT: s_mov_b32 s6, -1 |
| ; GFX9-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 |
| ; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 |
| ; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 |
| ; GFX9-NEXT: s_nop 0 |
| ; GFX9-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:4 |
| ; GFX9-NEXT: s_nop 0 |
| ; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 |
| ; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 |
| ; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 |
| ; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 |
| ; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(6) |
| ; GFX9-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(6) |
| ; GFX9-NEXT: buffer_store_dword v20, off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v33i32: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: scratch_load_b32 v31, off, s32 |
| ; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:4 |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_clause 0x5 |
| ; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_store_b32 v32, off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <33 x i32> %arg0, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_v2i64(<2 x i64> %arg0) #0 { |
| ; CIGFX89-LABEL: void_func_v2i64: |
| ; CIGFX89: ; %bb.0: |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; CIGFX89-NEXT: s_mov_b32 s6, -1 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v2i64: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <2 x i64> %arg0, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_v3i64(<3 x i64> %arg0) #0 { |
| ; CIGFX89-LABEL: void_func_v3i64: |
| ; CIGFX89: ; %bb.0: |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; CIGFX89-NEXT: s_mov_b32 s6, -1 |
| ; CIGFX89-NEXT: buffer_store_dwordx2 v[4:5], off, s[4:7], 0 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v3i64: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: buffer_store_b64 v[4:5], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <3 x i64> %arg0, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_v4i64(<4 x i64> %arg0) #0 { |
| ; CIGFX89-LABEL: void_func_v4i64: |
| ; CIGFX89: ; %bb.0: |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; CIGFX89-NEXT: s_mov_b32 s6, -1 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v4i64: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <4 x i64> %arg0, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_v5i64(<5 x i64> %arg0) #0 { |
| ; CIGFX89-LABEL: void_func_v5i64: |
| ; CIGFX89: ; %bb.0: |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; CIGFX89-NEXT: s_mov_b32 s6, -1 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 |
| ; CIGFX89-NEXT: buffer_store_dwordx2 v[8:9], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v5i64: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_clause 0x2 |
| ; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b64 v[8:9], off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <5 x i64> %arg0, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_v8i64(<8 x i64> %arg0) #0 { |
| ; CIGFX89-LABEL: void_func_v8i64: |
| ; CIGFX89: ; %bb.0: |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; CIGFX89-NEXT: s_mov_b32 s6, -1 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v8i64: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_clause 0x3 |
| ; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <8 x i64> %arg0, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_v16i64(<16 x i64> %arg0) #0 { |
| ; CIGFX89-LABEL: void_func_v16i64: |
| ; CIGFX89: ; %bb.0: |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CIGFX89-NEXT: buffer_load_dword v31, off, s[0:3], s32 |
| ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; CIGFX89-NEXT: s_mov_b32 s6, -1 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(6) |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v16i64: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: scratch_load_b32 v31, off, s32 |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_clause 0x3 |
| ; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_clause 0x3 |
| ; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <16 x i64> %arg0, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_v2i8(<2 x i8> %arg0) #0 { |
| ; CI-LABEL: void_func_v2i8: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v1 |
| ; CI-NEXT: v_and_b32_e32 v0, 0xff, v0 |
| ; CI-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; CI-NEXT: s_mov_b64 s[4:5], 0 |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: buffer_store_short v0, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX89-LABEL: void_func_v2i8: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v1 |
| ; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX89-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX89-NEXT: s_mov_b32 s6, -1 |
| ; GFX89-NEXT: buffer_store_short v0, off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v2i8: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1 |
| ; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 |
| ; GFX11-NEXT: s_mov_b64 s[0:1], 0 |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <2 x i8> %arg0, ptr addrspace(1) null |
| ret void |
| } |
| |
| define void @void_func_v2i16(<2 x i16> %arg0) #0 { |
| ; CI-LABEL: void_func_v2i16: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 |
| ; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0 |
| ; CI-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX89-LABEL: void_func_v2i16: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX89-NEXT: s_mov_b32 s6, -1 |
| ; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v2i16: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <2 x i16> %arg0, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_v3i8(<3 x i8> %arg0) #0 { |
| ; CI-LABEL: void_func_v3i8: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v1 |
| ; CI-NEXT: v_and_b32_e32 v0, 0xff, v0 |
| ; CI-NEXT: s_mov_b64 s[4:5], 2 |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; CI-NEXT: buffer_store_byte v2, off, s[4:7], 0 |
| ; CI-NEXT: s_mov_b64 s[4:5], 0 |
| ; CI-NEXT: buffer_store_short v0, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX89-LABEL: void_func_v3i8: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v1 |
| ; GFX89-NEXT: s_mov_b64 s[4:5], 2 |
| ; GFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX89-NEXT: s_mov_b32 s6, -1 |
| ; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX89-NEXT: buffer_store_byte v2, off, s[4:7], 0 |
| ; GFX89-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX89-NEXT: buffer_store_short v0, off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v3i8: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1 |
| ; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 |
| ; GFX11-NEXT: s_mov_b64 s[0:1], 2 |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX11-NEXT: buffer_store_b8 v2, off, s[0:3], 0 |
| ; GFX11-NEXT: s_mov_b64 s[0:1], 0 |
| ; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <3 x i8> %arg0, ptr addrspace(1) null |
| ret void |
| } |
| |
| define void @void_func_v4i8(<4 x i8> %arg0) #0 { |
| ; CI-LABEL: void_func_v4i8: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: v_and_b32_e32 v2, 0xff, v2 |
| ; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v1 |
| ; CI-NEXT: v_and_b32_e32 v0, 0xff, v0 |
| ; CI-NEXT: v_lshlrev_b32_e32 v3, 24, v3 |
| ; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2 |
| ; CI-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; CI-NEXT: v_or_b32_e32 v2, v3, v2 |
| ; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0 |
| ; CI-NEXT: v_or_b32_e32 v0, v0, v2 |
| ; CI-NEXT: s_mov_b64 s[4:5], 0 |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX89-LABEL: void_func_v4i8: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v1 |
| ; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v3 |
| ; GFX89-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD |
| ; GFX89-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX89-NEXT: s_mov_b32 s6, -1 |
| ; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v4i8: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1 |
| ; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 |
| ; GFX11-NEXT: v_lshlrev_b16 v3, 8, v3 |
| ; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 |
| ; GFX11-NEXT: s_mov_b64 s[0:1], 0 |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: v_or_b32_e32 v1, v2, v3 |
| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) |
| ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 |
| ; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 |
| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <4 x i8> %arg0, ptr addrspace(1) null |
| ret void |
| } |
| |
| define void @void_func_v5i8(<5 x i8> %arg0) #0 { |
| ; CI-LABEL: void_func_v5i8: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: v_and_b32_e32 v2, 0xff, v2 |
| ; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v1 |
| ; CI-NEXT: v_and_b32_e32 v0, 0xff, v0 |
| ; CI-NEXT: v_lshlrev_b32_e32 v3, 24, v3 |
| ; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2 |
| ; CI-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; CI-NEXT: v_or_b32_e32 v2, v3, v2 |
| ; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0 |
| ; CI-NEXT: s_mov_b64 s[4:5], 4 |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: v_or_b32_e32 v0, v0, v2 |
| ; CI-NEXT: buffer_store_byte v4, off, s[4:7], 0 |
| ; CI-NEXT: s_mov_b64 s[4:5], 0 |
| ; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX89-LABEL: void_func_v5i8: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v1 |
| ; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v3 |
| ; GFX89-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX89-NEXT: s_mov_b64 s[4:5], 4 |
| ; GFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX89-NEXT: s_mov_b32 s6, -1 |
| ; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD |
| ; GFX89-NEXT: buffer_store_byte v4, off, s[4:7], 0 |
| ; GFX89-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v5i8: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1 |
| ; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 |
| ; GFX11-NEXT: v_lshlrev_b16 v3, 8, v3 |
| ; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 |
| ; GFX11-NEXT: s_mov_b64 s[0:1], 4 |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: v_or_b32_e32 v1, v2, v3 |
| ; GFX11-NEXT: buffer_store_b8 v4, off, s[0:3], 0 |
| ; GFX11-NEXT: s_mov_b64 s[0:1], 0 |
| ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 |
| ; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 |
| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <5 x i8> %arg0, ptr addrspace(1) null |
| ret void |
| } |
| |
| define void @void_func_v8i8(<8 x i8> %arg0) #0 { |
| ; CI-LABEL: void_func_v8i8: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: v_and_b32_e32 v6, 0xff, v6 |
| ; CI-NEXT: v_lshlrev_b32_e32 v5, 8, v5 |
| ; CI-NEXT: v_and_b32_e32 v4, 0xff, v4 |
| ; CI-NEXT: v_and_b32_e32 v2, 0xff, v2 |
| ; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v1 |
| ; CI-NEXT: v_and_b32_e32 v0, 0xff, v0 |
| ; CI-NEXT: v_lshlrev_b32_e32 v7, 24, v7 |
| ; CI-NEXT: v_lshlrev_b32_e32 v6, 16, v6 |
| ; CI-NEXT: v_or_b32_e32 v4, v4, v5 |
| ; CI-NEXT: v_lshlrev_b32_e32 v3, 24, v3 |
| ; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2 |
| ; CI-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; CI-NEXT: v_or_b32_e32 v6, v7, v6 |
| ; CI-NEXT: v_and_b32_e32 v4, 0xffff, v4 |
| ; CI-NEXT: v_or_b32_e32 v2, v3, v2 |
| ; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0 |
| ; CI-NEXT: v_or_b32_e32 v4, v4, v6 |
| ; CI-NEXT: v_or_b32_e32 v3, v0, v2 |
| ; CI-NEXT: s_mov_b64 s[4:5], 0 |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: buffer_store_dwordx2 v[3:4], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX89-LABEL: void_func_v8i8: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: v_lshlrev_b16_e32 v5, 8, v5 |
| ; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v1 |
| ; GFX89-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX89-NEXT: v_lshlrev_b16_e32 v5, 8, v7 |
| ; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v3 |
| ; GFX89-NEXT: v_or_b32_sdwa v5, v6, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX89-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX89-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD |
| ; GFX89-NEXT: v_or_b32_sdwa v3, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD |
| ; GFX89-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX89-NEXT: s_mov_b32 s6, -1 |
| ; GFX89-NEXT: buffer_store_dwordx2 v[3:4], off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v8i8: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_lshlrev_b16 v5, 8, v5 |
| ; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v4 |
| ; GFX11-NEXT: v_lshlrev_b16 v7, 8, v7 |
| ; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v6 |
| ; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1 |
| ; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 |
| ; GFX11-NEXT: v_lshlrev_b16 v3, 8, v3 |
| ; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 |
| ; GFX11-NEXT: v_or_b32_e32 v4, v4, v5 |
| ; GFX11-NEXT: v_or_b32_e32 v5, v6, v7 |
| ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX11-NEXT: s_mov_b64 s[0:1], 0 |
| ; GFX11-NEXT: v_or_b32_e32 v1, v2, v3 |
| ; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v4 |
| ; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v5 |
| ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: v_lshlrev_b32_e32 v4, 16, v1 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: v_or_b32_e32 v1, v2, v3 |
| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) |
| ; GFX11-NEXT: v_or_b32_e32 v0, v0, v4 |
| ; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <8 x i8> %arg0, ptr addrspace(1) null |
| ret void |
| } |
| |
| define void @void_func_v16i8(<16 x i8> %arg0) #0 { |
| ; CI-LABEL: void_func_v16i8: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: v_and_b32_e32 v14, 0xff, v14 |
| ; CI-NEXT: v_lshlrev_b32_e32 v13, 8, v13 |
| ; CI-NEXT: v_and_b32_e32 v12, 0xff, v12 |
| ; CI-NEXT: v_and_b32_e32 v10, 0xff, v10 |
| ; CI-NEXT: v_lshlrev_b32_e32 v9, 8, v9 |
| ; CI-NEXT: v_and_b32_e32 v8, 0xff, v8 |
| ; CI-NEXT: v_and_b32_e32 v6, 0xff, v6 |
| ; CI-NEXT: v_lshlrev_b32_e32 v5, 8, v5 |
| ; CI-NEXT: v_and_b32_e32 v4, 0xff, v4 |
| ; CI-NEXT: v_and_b32_e32 v2, 0xff, v2 |
| ; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v1 |
| ; CI-NEXT: v_and_b32_e32 v0, 0xff, v0 |
| ; CI-NEXT: v_lshlrev_b32_e32 v15, 24, v15 |
| ; CI-NEXT: v_lshlrev_b32_e32 v14, 16, v14 |
| ; CI-NEXT: v_or_b32_e32 v12, v12, v13 |
| ; CI-NEXT: v_lshlrev_b32_e32 v11, 24, v11 |
| ; CI-NEXT: v_lshlrev_b32_e32 v10, 16, v10 |
| ; CI-NEXT: v_or_b32_e32 v8, v8, v9 |
| ; CI-NEXT: v_lshlrev_b32_e32 v7, 24, v7 |
| ; CI-NEXT: v_lshlrev_b32_e32 v6, 16, v6 |
| ; CI-NEXT: v_or_b32_e32 v4, v4, v5 |
| ; CI-NEXT: v_lshlrev_b32_e32 v3, 24, v3 |
| ; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2 |
| ; CI-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; CI-NEXT: v_or_b32_e32 v14, v15, v14 |
| ; CI-NEXT: v_and_b32_e32 v12, 0xffff, v12 |
| ; CI-NEXT: v_or_b32_e32 v10, v11, v10 |
| ; CI-NEXT: v_and_b32_e32 v8, 0xffff, v8 |
| ; CI-NEXT: v_or_b32_e32 v6, v7, v6 |
| ; CI-NEXT: v_and_b32_e32 v4, 0xffff, v4 |
| ; CI-NEXT: v_or_b32_e32 v2, v3, v2 |
| ; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0 |
| ; CI-NEXT: v_or_b32_e32 v12, v12, v14 |
| ; CI-NEXT: v_or_b32_e32 v11, v8, v10 |
| ; CI-NEXT: v_or_b32_e32 v10, v4, v6 |
| ; CI-NEXT: v_or_b32_e32 v9, v0, v2 |
| ; CI-NEXT: s_mov_b64 s[4:5], 0 |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: buffer_store_dwordx4 v[9:12], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX89-LABEL: void_func_v16i8: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: v_lshlrev_b16_e32 v13, 8, v13 |
| ; GFX89-NEXT: v_lshlrev_b16_e32 v9, 8, v9 |
| ; GFX89-NEXT: v_lshlrev_b16_e32 v5, 8, v5 |
| ; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v1 |
| ; GFX89-NEXT: v_or_b32_sdwa v12, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX89-NEXT: v_lshlrev_b16_e32 v13, 8, v15 |
| ; GFX89-NEXT: v_or_b32_sdwa v8, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX89-NEXT: v_lshlrev_b16_e32 v9, 8, v11 |
| ; GFX89-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX89-NEXT: v_lshlrev_b16_e32 v5, 8, v7 |
| ; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v3 |
| ; GFX89-NEXT: v_or_b32_sdwa v13, v14, v13 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX89-NEXT: v_or_b32_sdwa v9, v10, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX89-NEXT: v_or_b32_sdwa v5, v6, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX89-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX89-NEXT: v_or_b32_sdwa v12, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD |
| ; GFX89-NEXT: v_or_b32_sdwa v11, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD |
| ; GFX89-NEXT: v_or_b32_sdwa v10, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD |
| ; GFX89-NEXT: v_or_b32_sdwa v9, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD |
| ; GFX89-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX89-NEXT: s_mov_b32 s6, -1 |
| ; GFX89-NEXT: buffer_store_dwordx4 v[9:12], off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v16i8: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_lshlrev_b16 v13, 8, v13 |
| ; GFX11-NEXT: v_and_b32_e32 v12, 0xff, v12 |
| ; GFX11-NEXT: v_lshlrev_b16 v15, 8, v15 |
| ; GFX11-NEXT: v_and_b32_e32 v14, 0xff, v14 |
| ; GFX11-NEXT: v_lshlrev_b16 v9, 8, v9 |
| ; GFX11-NEXT: v_and_b32_e32 v8, 0xff, v8 |
| ; GFX11-NEXT: v_lshlrev_b16 v11, 8, v11 |
| ; GFX11-NEXT: v_and_b32_e32 v10, 0xff, v10 |
| ; GFX11-NEXT: v_lshlrev_b16 v5, 8, v5 |
| ; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v4 |
| ; GFX11-NEXT: v_lshlrev_b16 v7, 8, v7 |
| ; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v6 |
| ; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1 |
| ; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 |
| ; GFX11-NEXT: v_lshlrev_b16 v3, 8, v3 |
| ; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 |
| ; GFX11-NEXT: v_or_b32_e32 v12, v12, v13 |
| ; GFX11-NEXT: v_or_b32_e32 v13, v14, v15 |
| ; GFX11-NEXT: v_or_b32_e32 v8, v8, v9 |
| ; GFX11-NEXT: v_or_b32_e32 v10, v10, v11 |
| ; GFX11-NEXT: v_or_b32_e32 v4, v4, v5 |
| ; GFX11-NEXT: v_or_b32_e32 v5, v6, v7 |
| ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX11-NEXT: v_or_b32_e32 v1, v2, v3 |
| ; GFX11-NEXT: v_and_b32_e32 v9, 0xffff, v12 |
| ; GFX11-NEXT: v_lshlrev_b32_e32 v12, 16, v13 |
| ; GFX11-NEXT: v_and_b32_e32 v8, 0xffff, v8 |
| ; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v10 |
| ; GFX11-NEXT: v_and_b32_e32 v4, 0xffff, v4 |
| ; GFX11-NEXT: v_lshlrev_b32_e32 v5, 16, v5 |
| ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 |
| ; GFX11-NEXT: v_lshlrev_b32_e32 v6, 16, v1 |
| ; GFX11-NEXT: v_or_b32_e32 v3, v9, v12 |
| ; GFX11-NEXT: v_or_b32_e32 v2, v8, v2 |
| ; GFX11-NEXT: v_or_b32_e32 v1, v4, v5 |
| ; GFX11-NEXT: s_mov_b64 s[0:1], 0 |
| ; GFX11-NEXT: v_or_b32_e32 v0, v0, v6 |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <16 x i8> %arg0, ptr addrspace(1) null |
| ret void |
| } |
| |
| define void @void_func_v32i8(<32 x i8> %arg0) #0 { |
| ; CI-LABEL: void_func_v32i8: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: v_lshlrev_b32_e32 v5, 8, v5 |
| ; CI-NEXT: v_and_b32_e32 v4, 0xff, v4 |
| ; CI-NEXT: v_or_b32_e32 v4, v4, v5 |
| ; CI-NEXT: buffer_load_dword v5, off, s[0:3], s32 |
| ; CI-NEXT: v_lshlrev_b32_e32 v9, 8, v9 |
| ; CI-NEXT: v_and_b32_e32 v8, 0xff, v8 |
| ; CI-NEXT: v_lshlrev_b32_e32 v13, 8, v13 |
| ; CI-NEXT: v_and_b32_e32 v12, 0xff, v12 |
| ; CI-NEXT: v_or_b32_e32 v8, v8, v9 |
| ; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v1 |
| ; CI-NEXT: v_and_b32_e32 v0, 0xff, v0 |
| ; CI-NEXT: v_and_b32_e32 v9, 0xff, v14 |
| ; CI-NEXT: v_and_b32_e32 v10, 0xff, v10 |
| ; CI-NEXT: v_and_b32_e32 v6, 0xff, v6 |
| ; CI-NEXT: v_or_b32_e32 v12, v12, v13 |
| ; CI-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; CI-NEXT: v_lshlrev_b32_e32 v1, 24, v15 |
| ; CI-NEXT: v_lshlrev_b32_e32 v11, 24, v11 |
| ; CI-NEXT: v_lshlrev_b32_e32 v7, 24, v7 |
| ; CI-NEXT: v_and_b32_e32 v2, 0xff, v2 |
| ; CI-NEXT: v_lshlrev_b32_e32 v9, 16, v9 |
| ; CI-NEXT: v_lshlrev_b32_e32 v10, 16, v10 |
| ; CI-NEXT: v_lshlrev_b32_e32 v6, 16, v6 |
| ; CI-NEXT: v_lshlrev_b32_e32 v3, 24, v3 |
| ; CI-NEXT: v_lshlrev_b32_e32 v13, 8, v29 |
| ; CI-NEXT: v_and_b32_e32 v14, 0xff, v28 |
| ; CI-NEXT: v_and_b32_e32 v26, 0xff, v26 |
| ; CI-NEXT: v_lshlrev_b32_e32 v25, 8, v25 |
| ; CI-NEXT: v_and_b32_e32 v24, 0xff, v24 |
| ; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2 |
| ; CI-NEXT: v_or_b32_e32 v1, v1, v9 |
| ; CI-NEXT: v_or_b32_e32 v9, v11, v10 |
| ; CI-NEXT: v_and_b32_e32 v10, 0xffff, v12 |
| ; CI-NEXT: v_or_b32_e32 v6, v7, v6 |
| ; CI-NEXT: v_and_b32_e32 v4, 0xffff, v4 |
| ; CI-NEXT: v_lshlrev_b32_e32 v15, 24, v27 |
| ; CI-NEXT: v_and_b32_e32 v27, 0xff, v30 |
| ; CI-NEXT: v_or_b32_e32 v13, v14, v13 |
| ; CI-NEXT: v_lshlrev_b32_e32 v14, 16, v26 |
| ; CI-NEXT: v_or_b32_e32 v7, v3, v2 |
| ; CI-NEXT: v_or_b32_e32 v3, v10, v1 |
| ; CI-NEXT: v_or_b32_e32 v1, v4, v6 |
| ; CI-NEXT: v_lshlrev_b32_e32 v26, 16, v27 |
| ; CI-NEXT: v_or_b32_e32 v11, v15, v14 |
| ; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0 |
| ; CI-NEXT: v_and_b32_e32 v12, 0xffff, v13 |
| ; CI-NEXT: v_and_b32_e32 v8, 0xffff, v8 |
| ; CI-NEXT: v_or_b32_e32 v0, v0, v7 |
| ; CI-NEXT: v_or_b32_e32 v2, v8, v9 |
| ; CI-NEXT: v_and_b32_e32 v8, 0xff, v20 |
| ; CI-NEXT: v_and_b32_e32 v9, 0xff, v16 |
| ; CI-NEXT: s_mov_b64 s[4:5], 16 |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: v_lshlrev_b32_e32 v4, 24, v5 |
| ; CI-NEXT: v_or_b32_e32 v5, v24, v25 |
| ; CI-NEXT: v_and_b32_e32 v5, 0xffff, v5 |
| ; CI-NEXT: v_or_b32_e32 v4, v4, v26 |
| ; CI-NEXT: v_or_b32_e32 v6, v5, v11 |
| ; CI-NEXT: v_and_b32_e32 v5, 0xff, v22 |
| ; CI-NEXT: v_or_b32_e32 v7, v12, v4 |
| ; CI-NEXT: v_lshlrev_b32_e32 v4, 24, v23 |
| ; CI-NEXT: v_lshlrev_b32_e32 v5, 16, v5 |
| ; CI-NEXT: v_or_b32_e32 v4, v4, v5 |
| ; CI-NEXT: v_lshlrev_b32_e32 v5, 8, v21 |
| ; CI-NEXT: v_or_b32_e32 v5, v8, v5 |
| ; CI-NEXT: v_and_b32_e32 v5, 0xffff, v5 |
| ; CI-NEXT: v_and_b32_e32 v8, 0xff, v18 |
| ; CI-NEXT: v_or_b32_e32 v5, v5, v4 |
| ; CI-NEXT: v_lshlrev_b32_e32 v4, 24, v19 |
| ; CI-NEXT: v_lshlrev_b32_e32 v8, 16, v8 |
| ; CI-NEXT: v_or_b32_e32 v4, v4, v8 |
| ; CI-NEXT: v_lshlrev_b32_e32 v8, 8, v17 |
| ; CI-NEXT: v_or_b32_e32 v8, v9, v8 |
| ; CI-NEXT: v_and_b32_e32 v8, 0xffff, v8 |
| ; CI-NEXT: v_or_b32_e32 v4, v8, v4 |
| ; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 |
| ; CI-NEXT: s_mov_b64 s[4:5], 0 |
| ; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX89-LABEL: void_func_v32i8: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: v_lshlrev_b16_e32 v9, 8, v9 |
| ; GFX89-NEXT: v_or_b32_sdwa v8, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX89-NEXT: v_lshlrev_b16_e32 v9, 8, v11 |
| ; GFX89-NEXT: v_or_b32_sdwa v9, v10, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX89-NEXT: buffer_load_ubyte v10, off, s[0:3], s32 |
| ; GFX89-NEXT: v_lshlrev_b16_e32 v13, 8, v13 |
| ; GFX89-NEXT: v_or_b32_sdwa v12, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX89-NEXT: v_lshlrev_b16_e32 v13, 8, v15 |
| ; GFX89-NEXT: v_lshlrev_b16_e32 v5, 8, v5 |
| ; GFX89-NEXT: v_lshlrev_b16_e32 v7, 8, v7 |
| ; GFX89-NEXT: v_lshlrev_b16_e32 v3, 8, v3 |
| ; GFX89-NEXT: v_or_b32_sdwa v13, v14, v13 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v1 |
| ; GFX89-NEXT: v_lshlrev_b16_e32 v11, 8, v29 |
| ; GFX89-NEXT: v_lshlrev_b16_e32 v14, 8, v25 |
| ; GFX89-NEXT: v_lshlrev_b16_e32 v15, 8, v27 |
| ; GFX89-NEXT: v_lshlrev_b16_e32 v21, 8, v21 |
| ; GFX89-NEXT: v_lshlrev_b16_e32 v23, 8, v23 |
| ; GFX89-NEXT: v_lshlrev_b16_e32 v17, 8, v17 |
| ; GFX89-NEXT: v_lshlrev_b16_e32 v19, 8, v19 |
| ; GFX89-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX89-NEXT: v_or_b32_sdwa v5, v6, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX89-NEXT: v_or_b32_sdwa v6, v2, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX89-NEXT: v_or_b32_sdwa v2, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD |
| ; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX89-NEXT: v_or_b32_sdwa v7, v28, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX89-NEXT: v_or_b32_sdwa v11, v24, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX89-NEXT: v_or_b32_sdwa v14, v26, v15 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX89-NEXT: v_or_b32_sdwa v15, v20, v21 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX89-NEXT: v_or_b32_sdwa v20, v22, v23 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX89-NEXT: v_or_b32_sdwa v16, v16, v17 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX89-NEXT: v_or_b32_sdwa v17, v18, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX89-NEXT: s_mov_b64 s[4:5], 16 |
| ; GFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX89-NEXT: s_mov_b32 s6, -1 |
| ; GFX89-NEXT: v_or_b32_sdwa v1, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD |
| ; GFX89-NEXT: v_or_b32_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD |
| ; GFX89-NEXT: v_or_b32_sdwa v6, v11, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD |
| ; GFX89-NEXT: v_or_b32_sdwa v5, v15, v20 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD |
| ; GFX89-NEXT: v_or_b32_sdwa v4, v16, v17 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD |
| ; GFX89-NEXT: v_or_b32_sdwa v3, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: v_lshlrev_b16_e32 v8, 8, v10 |
| ; GFX89-NEXT: v_or_b32_sdwa v8, v30, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD |
| ; GFX89-NEXT: v_or_b32_sdwa v7, v7, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD |
| ; GFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 |
| ; GFX89-NEXT: s_mov_b64 s[4:5], 0 |
| ; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v32i8: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: scratch_load_u8 v31, off, s32 |
| ; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1 |
| ; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 |
| ; GFX11-NEXT: v_lshlrev_b16 v3, 8, v3 |
| ; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 |
| ; GFX11-NEXT: v_lshlrev_b16 v9, 8, v9 |
| ; GFX11-NEXT: v_and_b32_e32 v8, 0xff, v8 |
| ; GFX11-NEXT: v_lshlrev_b16 v11, 8, v11 |
| ; GFX11-NEXT: v_and_b32_e32 v10, 0xff, v10 |
| ; GFX11-NEXT: v_lshlrev_b16 v17, 8, v17 |
| ; GFX11-NEXT: v_and_b32_e32 v16, 0xff, v16 |
| ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; GFX11-NEXT: v_or_b32_e32 v1, v2, v3 |
| ; GFX11-NEXT: v_lshlrev_b16 v13, 8, v13 |
| ; GFX11-NEXT: v_and_b32_e32 v12, 0xff, v12 |
| ; GFX11-NEXT: v_lshlrev_b16 v15, 8, v15 |
| ; GFX11-NEXT: v_and_b32_e32 v14, 0xff, v14 |
| ; GFX11-NEXT: v_lshlrev_b16 v5, 8, v5 |
| ; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v4 |
| ; GFX11-NEXT: v_lshlrev_b16 v7, 8, v7 |
| ; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v6 |
| ; GFX11-NEXT: v_lshlrev_b16 v29, 8, v29 |
| ; GFX11-NEXT: v_and_b32_e32 v28, 0xff, v28 |
| ; GFX11-NEXT: v_and_b32_e32 v30, 0xff, v30 |
| ; GFX11-NEXT: v_lshlrev_b16 v25, 8, v25 |
| ; GFX11-NEXT: v_and_b32_e32 v24, 0xff, v24 |
| ; GFX11-NEXT: v_lshlrev_b16 v27, 8, v27 |
| ; GFX11-NEXT: v_and_b32_e32 v26, 0xff, v26 |
| ; GFX11-NEXT: v_lshlrev_b16 v21, 8, v21 |
| ; GFX11-NEXT: v_and_b32_e32 v20, 0xff, v20 |
| ; GFX11-NEXT: v_lshlrev_b16 v23, 8, v23 |
| ; GFX11-NEXT: v_and_b32_e32 v22, 0xff, v22 |
| ; GFX11-NEXT: v_lshlrev_b16 v19, 8, v19 |
| ; GFX11-NEXT: v_and_b32_e32 v18, 0xff, v18 |
| ; GFX11-NEXT: v_or_b32_e32 v8, v8, v9 |
| ; GFX11-NEXT: v_or_b32_e32 v9, v10, v11 |
| ; GFX11-NEXT: v_or_b32_e32 v11, v16, v17 |
| ; GFX11-NEXT: v_lshlrev_b32_e32 v17, 16, v1 |
| ; GFX11-NEXT: v_or_b32_e32 v12, v12, v13 |
| ; GFX11-NEXT: v_or_b32_e32 v13, v14, v15 |
| ; GFX11-NEXT: v_or_b32_e32 v4, v4, v5 |
| ; GFX11-NEXT: v_or_b32_e32 v5, v6, v7 |
| ; GFX11-NEXT: v_or_b32_e32 v2, v28, v29 |
| ; GFX11-NEXT: v_or_b32_e32 v3, v24, v25 |
| ; GFX11-NEXT: v_or_b32_e32 v6, v26, v27 |
| ; GFX11-NEXT: v_or_b32_e32 v7, v20, v21 |
| ; GFX11-NEXT: v_or_b32_e32 v10, v22, v23 |
| ; GFX11-NEXT: v_or_b32_e32 v14, v18, v19 |
| ; GFX11-NEXT: v_and_b32_e32 v15, 0xffff, v4 |
| ; GFX11-NEXT: v_lshlrev_b32_e32 v16, 16, v5 |
| ; GFX11-NEXT: v_and_b32_e32 v18, 0xffff, v2 |
| ; GFX11-NEXT: v_and_b32_e32 v4, 0xffff, v3 |
| ; GFX11-NEXT: v_lshlrev_b32_e32 v5, 16, v6 |
| ; GFX11-NEXT: v_and_b32_e32 v7, 0xffff, v7 |
| ; GFX11-NEXT: v_lshlrev_b32_e32 v10, 16, v10 |
| ; GFX11-NEXT: v_and_b32_e32 v11, 0xffff, v11 |
| ; GFX11-NEXT: v_lshlrev_b32_e32 v14, 16, v14 |
| ; GFX11-NEXT: v_and_b32_e32 v12, 0xffff, v12 |
| ; GFX11-NEXT: v_lshlrev_b32_e32 v13, 16, v13 |
| ; GFX11-NEXT: v_and_b32_e32 v8, 0xffff, v8 |
| ; GFX11-NEXT: v_lshlrev_b32_e32 v9, 16, v9 |
| ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 |
| ; GFX11-NEXT: v_or_b32_e32 v6, v4, v5 |
| ; GFX11-NEXT: v_or_b32_e32 v5, v7, v10 |
| ; GFX11-NEXT: v_or_b32_e32 v4, v11, v14 |
| ; GFX11-NEXT: v_or_b32_e32 v3, v12, v13 |
| ; GFX11-NEXT: v_or_b32_e32 v2, v8, v9 |
| ; GFX11-NEXT: v_or_b32_e32 v0, v0, v17 |
| ; GFX11-NEXT: s_mov_b64 s[0:1], 16 |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: v_lshlrev_b16 v1, 8, v31 |
| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) |
| ; GFX11-NEXT: v_or_b32_e32 v1, v30, v1 |
| ; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 |
| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-NEXT: v_or_b32_e32 v7, v18, v1 |
| ; GFX11-NEXT: v_or_b32_e32 v1, v15, v16 |
| ; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 |
| ; GFX11-NEXT: s_mov_b64 s[0:1], 0 |
| ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <32 x i8> %arg0, ptr addrspace(1) null |
| ret void |
| } |
| |
| define void @void_func_v3i16(<3 x i16> %arg0) #0 { |
| ; CI-LABEL: void_func_v3i16: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 |
| ; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0 |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; CI-NEXT: buffer_store_short v2, off, s[4:7], 0 |
| ; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX89-LABEL: void_func_v3i16: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX89-NEXT: s_mov_b32 s6, -1 |
| ; GFX89-NEXT: buffer_store_short v1, off, s[4:7], 0 |
| ; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v3i16: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: buffer_store_b16 v1, off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <3 x i16> %arg0, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_v4i16(<4 x i16> %arg0) #0 { |
| ; CI-LABEL: void_func_v4i16: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 |
| ; CI-NEXT: v_and_b32_e32 v2, 0xffff, v2 |
| ; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 |
| ; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0 |
| ; CI-NEXT: v_or_b32_e32 v2, v2, v3 |
| ; CI-NEXT: v_or_b32_e32 v1, v0, v1 |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: buffer_store_dwordx2 v[1:2], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX89-LABEL: void_func_v4i16: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX89-NEXT: s_mov_b32 s6, -1 |
| ; GFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v4i16: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <4 x i16> %arg0, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_v5i16(<5 x i16> %arg0) #0 { |
| ; CI-LABEL: void_func_v5i16: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 |
| ; CI-NEXT: v_and_b32_e32 v2, 0xffff, v2 |
| ; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 |
| ; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0 |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: v_or_b32_e32 v2, v2, v3 |
| ; CI-NEXT: v_or_b32_e32 v1, v0, v1 |
| ; CI-NEXT: buffer_store_short v4, off, s[4:7], 0 |
| ; CI-NEXT: buffer_store_dwordx2 v[1:2], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX89-LABEL: void_func_v5i16: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX89-NEXT: s_mov_b32 s6, -1 |
| ; GFX89-NEXT: buffer_store_short v2, off, s[4:7], 0 |
| ; GFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v5i16: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: buffer_store_b16 v2, off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <5 x i16> %arg0, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_v8i16(<8 x i16> %arg0) #0 { |
| ; CI-LABEL: void_func_v8i16: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: v_lshlrev_b32_e32 v7, 16, v7 |
| ; CI-NEXT: v_and_b32_e32 v6, 0xffff, v6 |
| ; CI-NEXT: v_lshlrev_b32_e32 v5, 16, v5 |
| ; CI-NEXT: v_and_b32_e32 v4, 0xffff, v4 |
| ; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 |
| ; CI-NEXT: v_and_b32_e32 v2, 0xffff, v2 |
| ; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 |
| ; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0 |
| ; CI-NEXT: v_or_b32_e32 v6, v6, v7 |
| ; CI-NEXT: v_or_b32_e32 v5, v4, v5 |
| ; CI-NEXT: v_or_b32_e32 v4, v2, v3 |
| ; CI-NEXT: v_or_b32_e32 v3, v0, v1 |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: buffer_store_dwordx4 v[3:6], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX89-LABEL: void_func_v8i16: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX89-NEXT: s_mov_b32 s6, -1 |
| ; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v8i16: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <8 x i16> %arg0, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_v16i16(<16 x i16> %arg0) #0 { |
| ; CI-LABEL: void_func_v16i16: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: v_lshlrev_b32_e32 v5, 16, v5 |
| ; CI-NEXT: v_and_b32_e32 v4, 0xffff, v4 |
| ; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 |
| ; CI-NEXT: v_and_b32_e32 v2, 0xffff, v2 |
| ; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 |
| ; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0 |
| ; CI-NEXT: v_or_b32_e32 v5, v4, v5 |
| ; CI-NEXT: v_or_b32_e32 v4, v2, v3 |
| ; CI-NEXT: v_or_b32_e32 v3, v0, v1 |
| ; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v15 |
| ; CI-NEXT: v_and_b32_e32 v1, 0xffff, v14 |
| ; CI-NEXT: v_or_b32_e32 v14, v1, v0 |
| ; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v13 |
| ; CI-NEXT: v_and_b32_e32 v1, 0xffff, v12 |
| ; CI-NEXT: v_or_b32_e32 v13, v1, v0 |
| ; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v11 |
| ; CI-NEXT: v_and_b32_e32 v1, 0xffff, v10 |
| ; CI-NEXT: v_or_b32_e32 v12, v1, v0 |
| ; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v9 |
| ; CI-NEXT: v_and_b32_e32 v1, 0xffff, v8 |
| ; CI-NEXT: v_lshlrev_b32_e32 v7, 16, v7 |
| ; CI-NEXT: v_and_b32_e32 v6, 0xffff, v6 |
| ; CI-NEXT: v_or_b32_e32 v11, v1, v0 |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: v_or_b32_e32 v6, v6, v7 |
| ; CI-NEXT: buffer_store_dwordx4 v[11:14], off, s[4:7], 0 |
| ; CI-NEXT: buffer_store_dwordx4 v[3:6], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX89-LABEL: void_func_v16i16: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX89-NEXT: s_mov_b32 s6, -1 |
| ; GFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 |
| ; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v16i16: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <16 x i16> %arg0, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_v2i24(<2 x i24> %arg0) #0 { |
| ; CI-LABEL: void_func_v2i24: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: v_add_i32_e32 v0, vcc, v0, v1 |
| ; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v0 |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: buffer_store_byte v1, off, s[4:7], 0 |
| ; CI-NEXT: buffer_store_short v0, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-LABEL: void_func_v2i24: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1 |
| ; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v0 |
| ; VI-NEXT: s_mov_b32 s7, 0xf000 |
| ; VI-NEXT: s_mov_b32 s6, -1 |
| ; VI-NEXT: buffer_store_byte v1, off, s[4:7], 0 |
| ; VI-NEXT: buffer_store_short v0, off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: void_func_v2i24: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 |
| ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v0 |
| ; GFX9-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX9-NEXT: s_mov_b32 s6, -1 |
| ; GFX9-NEXT: buffer_store_byte v1, off, s[4:7], 0 |
| ; GFX9-NEXT: buffer_store_short v0, off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v2i24: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_add_nc_u32_e32 v0, v0, v1 |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0 |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: buffer_store_b8 v1, off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %elt0 = extractelement <2 x i24> %arg0, i32 0 |
| %elt1 = extractelement <2 x i24> %arg0, i32 1 |
| %add = add i24 %elt0, %elt1 |
| store i24 %add, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_v2f32(<2 x float> %arg0) #0 { |
| ; CIGFX89-LABEL: void_func_v2f32: |
| ; CIGFX89: ; %bb.0: |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; CIGFX89-NEXT: s_mov_b32 s6, -1 |
| ; CIGFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v2f32: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <2 x float> %arg0, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_v3f32(<3 x float> %arg0) #0 { |
| ; CIGFX89-LABEL: void_func_v3f32: |
| ; CIGFX89: ; %bb.0: |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; CIGFX89-NEXT: s_mov_b32 s6, -1 |
| ; CIGFX89-NEXT: buffer_store_dwordx3 v[0:2], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v3f32: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_store_b96 v[0:2], off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <3 x float> %arg0, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_v4f32(<4 x float> %arg0) #0 { |
| ; CIGFX89-LABEL: void_func_v4f32: |
| ; CIGFX89: ; %bb.0: |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; CIGFX89-NEXT: s_mov_b32 s6, -1 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v4f32: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <4 x float> %arg0, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_v8f32(<8 x float> %arg0) #0 { |
| ; CIGFX89-LABEL: void_func_v8f32: |
| ; CIGFX89: ; %bb.0: |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; CIGFX89-NEXT: s_mov_b32 s6, -1 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v8f32: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <8 x float> %arg0, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_v16f32(<16 x float> %arg0) #0 { |
| ; CIGFX89-LABEL: void_func_v16f32: |
| ; CIGFX89: ; %bb.0: |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; CIGFX89-NEXT: s_mov_b32 s6, -1 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v16f32: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_clause 0x3 |
| ; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <16 x float> %arg0, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_v2f64(<2 x double> %arg0) #0 { |
| ; CIGFX89-LABEL: void_func_v2f64: |
| ; CIGFX89: ; %bb.0: |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; CIGFX89-NEXT: s_mov_b32 s6, -1 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v2f64: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <2 x double> %arg0, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_v3f64(<3 x double> %arg0) #0 { |
| ; CIGFX89-LABEL: void_func_v3f64: |
| ; CIGFX89: ; %bb.0: |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; CIGFX89-NEXT: s_mov_b32 s6, -1 |
| ; CIGFX89-NEXT: buffer_store_dwordx2 v[4:5], off, s[4:7], 0 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v3f64: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: buffer_store_b64 v[4:5], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <3 x double> %arg0, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_v4f64(<4 x double> %arg0) #0 { |
| ; CIGFX89-LABEL: void_func_v4f64: |
| ; CIGFX89: ; %bb.0: |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; CIGFX89-NEXT: s_mov_b32 s6, -1 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v4f64: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <4 x double> %arg0, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_v8f64(<8 x double> %arg0) #0 { |
| ; CIGFX89-LABEL: void_func_v8f64: |
| ; CIGFX89: ; %bb.0: |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; CIGFX89-NEXT: s_mov_b32 s6, -1 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v8f64: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_clause 0x3 |
| ; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <8 x double> %arg0, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_v16f64(<16 x double> %arg0) #0 { |
| ; CIGFX89-LABEL: void_func_v16f64: |
| ; CIGFX89: ; %bb.0: |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CIGFX89-NEXT: buffer_load_dword v31, off, s[0:3], s32 |
| ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; CIGFX89-NEXT: s_mov_b32 s6, -1 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(6) |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v16f64: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: scratch_load_b32 v31, off, s32 |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_clause 0x3 |
| ; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_clause 0x3 |
| ; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <16 x double> %arg0, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_v2f16(<2 x half> %arg0) #0 { |
| ; CI-LABEL: void_func_v2f16: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 |
| ; CI-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX89-LABEL: void_func_v2f16: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX89-NEXT: s_mov_b32 s6, -1 |
| ; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v2f16: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <2 x half> %arg0, ptr addrspace(1) poison |
| ret void |
| } |
| |
| ; FIXME: Different abi if f16 legal |
| define void @void_func_v3f16(<3 x half> %arg0) #0 { |
| ; CI-LABEL: void_func_v3f16: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: v_or_b32_e32 v0, v0, v1 |
| ; CI-NEXT: buffer_store_short v2, off, s[4:7], 0 |
| ; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX89-LABEL: void_func_v3f16: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX89-NEXT: s_mov_b32 s6, -1 |
| ; GFX89-NEXT: buffer_store_short v1, off, s[4:7], 0 |
| ; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v3f16: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: buffer_store_b16 v1, off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <3 x half> %arg0, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_v4f16(<4 x half> %arg0) #0 { |
| ; CI-LABEL: void_func_v4f16: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 |
| ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 |
| ; CI-NEXT: v_cvt_f16_f32_e32 v4, v1 |
| ; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v3 |
| ; CI-NEXT: v_or_b32_e32 v1, v2, v1 |
| ; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v4 |
| ; CI-NEXT: v_or_b32_e32 v0, v0, v2 |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX89-LABEL: void_func_v4f16: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX89-NEXT: s_mov_b32 s6, -1 |
| ; GFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v4f16: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <4 x half> %arg0, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_v8f16(<8 x half> %arg0) #0 { |
| ; CI-LABEL: void_func_v8f16: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: v_cvt_f16_f32_e32 v7, v7 |
| ; CI-NEXT: v_cvt_f16_f32_e32 v6, v6 |
| ; CI-NEXT: v_cvt_f16_f32_e32 v8, v5 |
| ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 |
| ; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; CI-NEXT: v_cvt_f16_f32_e32 v4, v4 |
| ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 |
| ; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; CI-NEXT: v_lshlrev_b32_e32 v5, 16, v7 |
| ; CI-NEXT: v_or_b32_e32 v5, v6, v5 |
| ; CI-NEXT: v_lshlrev_b32_e32 v6, 16, v8 |
| ; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 |
| ; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 |
| ; CI-NEXT: v_or_b32_e32 v4, v4, v6 |
| ; CI-NEXT: v_or_b32_e32 v3, v2, v3 |
| ; CI-NEXT: v_or_b32_e32 v2, v0, v1 |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: buffer_store_dwordx4 v[2:5], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX89-LABEL: void_func_v8f16: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX89-NEXT: s_mov_b32 s6, -1 |
| ; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v8f16: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <8 x half> %arg0, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_v16f16(<16 x half> %arg0) #0 { |
| ; CI-LABEL: void_func_v16f16: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: v_cvt_f16_f32_e32 v7, v7 |
| ; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 |
| ; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 |
| ; CI-NEXT: v_cvt_f16_f32_e32 v6, v6 |
| ; CI-NEXT: v_cvt_f16_f32_e32 v16, v5 |
| ; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 |
| ; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 |
| ; CI-NEXT: v_cvt_f16_f32_e32 v4, v4 |
| ; CI-NEXT: v_lshlrev_b32_e32 v5, 16, v7 |
| ; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 |
| ; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 |
| ; CI-NEXT: v_or_b32_e32 v5, v6, v5 |
| ; CI-NEXT: v_lshlrev_b32_e32 v6, 16, v16 |
| ; CI-NEXT: v_or_b32_e32 v3, v2, v3 |
| ; CI-NEXT: v_or_b32_e32 v2, v0, v1 |
| ; CI-NEXT: v_cvt_f16_f32_e32 v0, v15 |
| ; CI-NEXT: v_or_b32_e32 v4, v4, v6 |
| ; CI-NEXT: v_cvt_f16_f32_e32 v1, v14 |
| ; CI-NEXT: v_cvt_f16_f32_e32 v6, v13 |
| ; CI-NEXT: v_cvt_f16_f32_e32 v7, v12 |
| ; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 |
| ; CI-NEXT: v_or_b32_e32 v13, v1, v0 |
| ; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v6 |
| ; CI-NEXT: v_or_b32_e32 v12, v7, v0 |
| ; CI-NEXT: v_cvt_f16_f32_e32 v0, v11 |
| ; CI-NEXT: v_cvt_f16_f32_e32 v1, v10 |
| ; CI-NEXT: v_cvt_f16_f32_e32 v6, v9 |
| ; CI-NEXT: v_cvt_f16_f32_e32 v7, v8 |
| ; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 |
| ; CI-NEXT: v_or_b32_e32 v11, v1, v0 |
| ; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v6 |
| ; CI-NEXT: v_or_b32_e32 v10, v7, v0 |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: buffer_store_dwordx4 v[10:13], off, s[4:7], 0 |
| ; CI-NEXT: buffer_store_dwordx4 v[2:5], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX89-LABEL: void_func_v16f16: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX89-NEXT: s_mov_b32 s6, -1 |
| ; GFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 |
| ; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v16f16: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <16 x half> %arg0, ptr addrspace(1) poison |
| ret void |
| } |
| |
| ; Make sure there is no alignment requirement for passed vgprs. |
| define void @void_func_i32_i64_i32(i32 %arg0, i64 %arg1, i32 %arg2) #0 { |
| ; CIGFX89-LABEL: void_func_i32_i64_i32: |
| ; CIGFX89: ; %bb.0: |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; CIGFX89-NEXT: s_mov_b32 s6, -1 |
| ; CIGFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: buffer_store_dwordx2 v[1:2], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: buffer_store_dword v3, off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_i32_i64_i32: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b64 v[1:2], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b32 v3, off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store volatile i32 %arg0, ptr addrspace(1) poison |
| store volatile i64 %arg1, ptr addrspace(1) poison |
| store volatile i32 %arg2, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_struct_i32({ i32 } %arg0) #0 { |
| ; CIGFX89-LABEL: void_func_struct_i32: |
| ; CIGFX89: ; %bb.0: |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; CIGFX89-NEXT: s_mov_b32 s6, -1 |
| ; CIGFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_struct_i32: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store { i32 } %arg0, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_struct_i8_i32({ i8, i32 } %arg0) #0 { |
| ; CIGFX89-LABEL: void_func_struct_i8_i32: |
| ; CIGFX89: ; %bb.0: |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; CIGFX89-NEXT: s_mov_b32 s6, -1 |
| ; CIGFX89-NEXT: buffer_store_dword v1, off, s[4:7], 0 |
| ; CIGFX89-NEXT: buffer_store_byte v0, off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_struct_i8_i32: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: buffer_store_b32 v1, off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b8 v0, off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store { i8, i32 } %arg0, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_byval_struct_i8_i32(ptr addrspace(5) byval({ i8, i32 }) %arg0) #0 { |
| ; CIGFX89-LABEL: void_func_byval_struct_i8_i32: |
| ; CIGFX89: ; %bb.0: |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CIGFX89-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 |
| ; CIGFX89-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 |
| ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; CIGFX89-NEXT: s_mov_b32 s6, -1 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(1) |
| ; CIGFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(1) |
| ; CIGFX89-NEXT: buffer_store_byte v1, off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_byval_struct_i8_i32: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: scratch_load_b32 v0, off, s32 offset:4 |
| ; GFX11-NEXT: scratch_load_u8 v1, off, s32 |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_store_b8 v1, off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %arg0.load = load { i8, i32 }, ptr addrspace(5) %arg0 |
| store { i8, i32 } %arg0.load, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_byval_struct_i8_i32_x2(ptr addrspace(5) byval({ i8, i32 }) %arg0, ptr addrspace(5) byval({ i8, i32 }) %arg1, i32 %arg2) #0 { |
| ; CI-LABEL: void_func_byval_struct_i8_i32_x2: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 glc |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 glc |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_load_ubyte v3, off, s[0:3], s32 offset:8 glc |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:12 glc |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: s_mov_b32 m0, -1 |
| ; CI-NEXT: buffer_store_dword v2, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_byte v1, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_dword v4, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_byte v3, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: ds_write_b32 v0, v0 |
| ; CI-NEXT: s_waitcnt lgkmcnt(0) |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-LABEL: void_func_byval_struct_i8_i32_x2: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 glc |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 glc |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_load_ubyte v3, off, s[0:3], s32 offset:8 glc |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:12 glc |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: s_mov_b32 s7, 0xf000 |
| ; VI-NEXT: s_mov_b32 s6, -1 |
| ; VI-NEXT: s_mov_b32 m0, -1 |
| ; VI-NEXT: buffer_store_dword v2, off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_store_byte v1, off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_store_dword v4, off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_store_byte v3, off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: ds_write_b32 v0, v0 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: void_func_byval_struct_i8_i32_x2: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 glc |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 glc |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_load_ubyte v3, off, s[0:3], s32 offset:8 glc |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:12 glc |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX9-NEXT: s_mov_b32 s6, -1 |
| ; GFX9-NEXT: buffer_store_dword v2, off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_store_byte v1, off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_store_dword v4, off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_store_byte v3, off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: ds_write_b32 v0, v0 |
| ; GFX9-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_byval_struct_i8_i32_x2: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: scratch_load_u8 v1, off, s32 glc dlc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: scratch_load_b32 v2, off, s32 offset:4 glc dlc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: scratch_load_u8 v3, off, s32 offset:8 glc dlc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: scratch_load_b32 v4, off, s32 offset:12 glc dlc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_store_b32 v2, off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b8 v1, off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b32 v4, off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b8 v3, off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: ds_store_b32 v0, v0 |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %arg0.load = load volatile { i8, i32 }, ptr addrspace(5) %arg0 |
| %arg1.load = load volatile { i8, i32 }, ptr addrspace(5) %arg1 |
| store volatile { i8, i32 } %arg0.load, ptr addrspace(1) poison |
| store volatile { i8, i32 } %arg1.load, ptr addrspace(1) poison |
| store volatile i32 %arg2, ptr addrspace(3) poison |
| ret void |
| } |
| |
| define void @void_func_byval_i32_byval_i64(ptr addrspace(5) byval(i32) %arg0, ptr addrspace(5) byval(i64) %arg1) #0 { |
| ; CIGFX89-LABEL: void_func_byval_i32_byval_i64: |
| ; CIGFX89: ; %bb.0: |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CIGFX89-NEXT: buffer_load_dword v2, off, s[0:3], s32 |
| ; CIGFX89-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:8 |
| ; CIGFX89-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:12 |
| ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; CIGFX89-NEXT: s_mov_b32 s6, -1 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(2) |
| ; CIGFX89-NEXT: buffer_store_dword v2, off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(1) |
| ; CIGFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_byval_i32_byval_i64: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: scratch_load_b32 v2, off, s32 |
| ; GFX11-NEXT: scratch_load_b64 v[0:1], off, s32 offset:8 |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-NEXT: buffer_store_b32 v2, off, s[0:3], 0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %arg0.load = load i32, ptr addrspace(5) %arg0 |
| %arg1.load = load i64, ptr addrspace(5) %arg1 |
| store i32 %arg0.load, ptr addrspace(1) poison |
| store i64 %arg1.load, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_v32i32_i32_i64(<32 x i32> %arg0, i32 %arg1, i64 %arg2) #0 { |
| ; CIGFX89-LABEL: void_func_v32i32_i32_i64: |
| ; CIGFX89: ; %bb.0: |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CIGFX89-NEXT: buffer_load_dword v31, off, s[0:3], s32 |
| ; CIGFX89-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:12 |
| ; CIGFX89-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8 |
| ; CIGFX89-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:4 |
| ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; CIGFX89-NEXT: s_mov_b32 s6, -1 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(3) |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: buffer_store_dword v34, off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: buffer_store_dwordx2 v[32:33], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v32i32_i32_i64: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_clause 0x3 |
| ; GFX11-NEXT: scratch_load_b32 v31, off, s32 |
| ; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:12 |
| ; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:4 |
| ; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:8 |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-NEXT: buffer_store_b32 v34, off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_store_b64 v[32:33], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store volatile <32 x i32> %arg0, ptr addrspace(1) poison |
| store volatile i32 %arg1, ptr addrspace(1) poison |
| store volatile i64 %arg2, ptr addrspace(1) poison |
| ret void |
| } |
| |
| ; FIXME: Different ext load types on CI vs. VI |
| define void @void_func_v32i32_i1_i8_i16_bf16(<32 x i32> %arg0, i1 %arg1, i8 %arg2, i16 %arg3, half %arg4, bfloat %arg5) #0 { |
| ; CI-LABEL: void_func_v32i32_i1_i8_i16_bf16: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32 |
| ; CI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:20 |
| ; CI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:16 |
| ; CI-NEXT: buffer_load_ubyte v34, off, s[0:3], s32 offset:4 |
| ; CI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:8 |
| ; CI-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:12 |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: s_waitcnt vmcnt(5) |
| ; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: v_mul_f32_e32 v12, 1.0, v32 |
| ; CI-NEXT: v_cvt_f16_f32_e32 v13, v33 |
| ; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: v_and_b32_e32 v0, 1, v34 |
| ; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v12 |
| ; CI-NEXT: buffer_store_byte v0, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_byte v35, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_short v36, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_short v13, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_short v1, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX89-LABEL: void_func_v32i32_i1_i8_i16_bf16: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: buffer_load_dword v31, off, s[0:3], s32 |
| ; GFX89-NEXT: buffer_load_ubyte v32, off, s[0:3], s32 offset:4 |
| ; GFX89-NEXT: buffer_load_ushort v33, off, s[0:3], s32 offset:8 |
| ; GFX89-NEXT: buffer_load_ushort v34, off, s[0:3], s32 offset:12 |
| ; GFX89-NEXT: buffer_load_ushort v35, off, s[0:3], s32 offset:16 |
| ; GFX89-NEXT: buffer_load_ushort v36, off, s[0:3], s32 offset:20 |
| ; GFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX89-NEXT: s_mov_b32 s6, -1 |
| ; GFX89-NEXT: s_waitcnt vmcnt(5) |
| ; GFX89-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: v_and_b32_e32 v0, 1, v32 |
| ; GFX89-NEXT: buffer_store_byte v0, off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: buffer_store_byte v33, off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: buffer_store_short v34, off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: buffer_store_short v35, off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: buffer_store_short v36, off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v32i32_i1_i8_i16_bf16: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_clause 0x5 |
| ; GFX11-NEXT: scratch_load_b32 v31, off, s32 |
| ; GFX11-NEXT: scratch_load_u8 v32, off, s32 offset:4 |
| ; GFX11-NEXT: scratch_load_u16 v33, off, s32 offset:8 |
| ; GFX11-NEXT: scratch_load_u16 v34, off, s32 offset:12 |
| ; GFX11-NEXT: scratch_load_u16 v35, off, s32 offset:16 |
| ; GFX11-NEXT: scratch_load_u16 v36, off, s32 offset:20 |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_waitcnt vmcnt(5) |
| ; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(4) |
| ; GFX11-NEXT: v_and_b32_e32 v16, 1, v32 |
| ; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b8 v16, off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-NEXT: buffer_store_b8 v33, off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-NEXT: buffer_store_b16 v34, off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-NEXT: buffer_store_b16 v35, off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_store_b16 v36, off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store volatile <32 x i32> %arg0, ptr addrspace(1) poison |
| store volatile i1 %arg1, ptr addrspace(1) poison |
| store volatile i8 %arg2, ptr addrspace(1) poison |
| store volatile i16 %arg3, ptr addrspace(1) poison |
| store volatile half %arg4, ptr addrspace(1) poison |
| store volatile bfloat %arg5, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_v32i32_v2i32_v2f32(<32 x i32> %arg0, <2 x i32> %arg1, <2 x float> %arg2) #0 { |
| ; CIGFX89-LABEL: void_func_v32i32_v2i32_v2f32: |
| ; CIGFX89: ; %bb.0: |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CIGFX89-NEXT: buffer_load_dword v31, off, s[0:3], s32 |
| ; CIGFX89-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8 |
| ; CIGFX89-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4 |
| ; CIGFX89-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:16 |
| ; CIGFX89-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:12 |
| ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; CIGFX89-NEXT: s_mov_b32 s6, -1 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(4) |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: buffer_store_dwordx2 v[32:33], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: buffer_store_dwordx2 v[34:35], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v32i32_v2i32_v2f32: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_clause 0x4 |
| ; GFX11-NEXT: scratch_load_b32 v31, off, s32 |
| ; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:8 |
| ; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:4 |
| ; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:16 |
| ; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:12 |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_waitcnt vmcnt(4) |
| ; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-NEXT: buffer_store_b64 v[32:33], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_store_b64 v[34:35], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store volatile <32 x i32> %arg0, ptr addrspace(1) poison |
| store volatile <2 x i32> %arg1, ptr addrspace(1) poison |
| store volatile <2 x float> %arg2, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_v32i32_v2i16_v2f16_v2bf16_v4bf16(<32 x i32> %arg0, <2 x i16> %arg1, <2 x half> %arg2, <2 x bfloat> %arg3, <4 x bfloat> %arg4) #0 { |
| ; CI-LABEL: void_func_v32i32_v2i16_v2f16_v2bf16_v4bf16: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32 |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:28 |
| ; CI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:32 |
| ; CI-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:36 |
| ; CI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:40 |
| ; CI-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:20 |
| ; CI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:24 |
| ; CI-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:12 |
| ; CI-NEXT: s_waitcnt vmcnt(7) |
| ; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:16 |
| ; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:8 |
| ; CI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:4 |
| ; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: v_cvt_f16_f32_e32 v10, v38 |
| ; CI-NEXT: v_mul_f32_e32 v4, 1.0, v32 |
| ; CI-NEXT: v_mul_f32_e32 v5, 1.0, v33 |
| ; CI-NEXT: v_mul_f32_e32 v6, 1.0, v34 |
| ; CI-NEXT: v_mul_f32_e32 v7, 1.0, v35 |
| ; CI-NEXT: v_mul_f32_e32 v8, 1.0, v36 |
| ; CI-NEXT: v_mul_f32_e32 v9, 1.0, v37 |
| ; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_short v16, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_short v17, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: v_cvt_f16_f32_e32 v11, v20 |
| ; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v4 |
| ; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v5 |
| ; CI-NEXT: v_lshrrev_b32_e32 v2, 16, v6 |
| ; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v7 |
| ; CI-NEXT: v_lshrrev_b32_e32 v4, 16, v8 |
| ; CI-NEXT: v_lshrrev_b32_e32 v5, 16, v9 |
| ; CI-NEXT: buffer_store_short v11, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_short v10, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_short v5, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_short v4, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_short v3, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_short v2, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_short v1, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_short v0, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX89-LABEL: void_func_v32i32_v2i16_v2f16_v2bf16_v4bf16: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: buffer_load_dword v31, off, s[0:3], s32 |
| ; GFX89-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:20 |
| ; GFX89-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:16 |
| ; GFX89-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:4 |
| ; GFX89-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:8 |
| ; GFX89-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:12 |
| ; GFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX89-NEXT: s_mov_b32 s6, -1 |
| ; GFX89-NEXT: s_waitcnt vmcnt(5) |
| ; GFX89-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: buffer_store_dword v34, off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: buffer_store_dword v35, off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: buffer_store_dword v36, off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: buffer_store_dwordx2 v[32:33], off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v32i32_v2i16_v2f16_v2bf16_v4bf16: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_clause 0x5 |
| ; GFX11-NEXT: scratch_load_b32 v31, off, s32 |
| ; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:20 |
| ; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:4 |
| ; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:8 |
| ; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:12 |
| ; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:16 |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_waitcnt vmcnt(5) |
| ; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-NEXT: buffer_store_b32 v34, off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-NEXT: buffer_store_b32 v35, off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-NEXT: buffer_store_b32 v36, off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_store_b64 v[32:33], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store volatile <32 x i32> %arg0, ptr addrspace(1) poison |
| store volatile <2 x i16> %arg1, ptr addrspace(1) poison |
| store volatile <2 x half> %arg2, ptr addrspace(1) poison |
| store volatile <2 x bfloat> %arg3, ptr addrspace(1) poison |
| store volatile <4 x bfloat> %arg4, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_v32i32_v2i64_v2f64(<32 x i32> %arg0, <2 x i64> %arg1, <2 x double> %arg2) #0 { |
| ; CIGFX89-LABEL: void_func_v32i32_v2i64_v2f64: |
| ; CIGFX89: ; %bb.0: |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CIGFX89-NEXT: buffer_load_dword v31, off, s[0:3], s32 |
| ; CIGFX89-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:20 |
| ; CIGFX89-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:16 |
| ; CIGFX89-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:12 |
| ; CIGFX89-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8 |
| ; CIGFX89-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4 |
| ; CIGFX89-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:32 |
| ; CIGFX89-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:28 |
| ; CIGFX89-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:24 |
| ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; CIGFX89-NEXT: s_mov_b32 s6, -1 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(8) |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[32:35], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[36:39], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v32i32_v2i64_v2f64: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_clause 0x8 |
| ; GFX11-NEXT: scratch_load_b32 v31, off, s32 |
| ; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:32 |
| ; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:28 |
| ; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:24 |
| ; GFX11-NEXT: scratch_load_b32 v39, off, s32 offset:16 |
| ; GFX11-NEXT: scratch_load_b32 v38, off, s32 offset:12 |
| ; GFX11-NEXT: scratch_load_b32 v37, off, s32 offset:8 |
| ; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:4 |
| ; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:20 |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_waitcnt vmcnt(8) |
| ; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-NEXT: buffer_store_b128 v[36:39], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_store_b128 v[32:35], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store volatile <32 x i32> %arg0, ptr addrspace(1) poison |
| store volatile <2 x i64> %arg1, ptr addrspace(1) poison |
| store volatile <2 x double> %arg2, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_v32i32_v4i32_v4f32(<32 x i32> %arg0, <4 x i32> %arg1, <4 x float> %arg2) #0 { |
| ; CIGFX89-LABEL: void_func_v32i32_v4i32_v4f32: |
| ; CIGFX89: ; %bb.0: |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CIGFX89-NEXT: buffer_load_dword v31, off, s[0:3], s32 |
| ; CIGFX89-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:20 |
| ; CIGFX89-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:16 |
| ; CIGFX89-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:12 |
| ; CIGFX89-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8 |
| ; CIGFX89-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4 |
| ; CIGFX89-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:32 |
| ; CIGFX89-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:28 |
| ; CIGFX89-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:24 |
| ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; CIGFX89-NEXT: s_mov_b32 s6, -1 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(8) |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[32:35], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: buffer_store_dwordx4 v[36:39], off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v32i32_v4i32_v4f32: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_clause 0x8 |
| ; GFX11-NEXT: scratch_load_b32 v31, off, s32 |
| ; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:16 |
| ; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:12 |
| ; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:8 |
| ; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:4 |
| ; GFX11-NEXT: scratch_load_b32 v39, off, s32 offset:32 |
| ; GFX11-NEXT: scratch_load_b32 v38, off, s32 offset:28 |
| ; GFX11-NEXT: scratch_load_b32 v37, off, s32 offset:24 |
| ; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:20 |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_waitcnt vmcnt(8) |
| ; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(4) |
| ; GFX11-NEXT: buffer_store_b128 v[32:35], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_store_b128 v[36:39], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store volatile <32 x i32> %arg0, ptr addrspace(1) poison |
| store volatile <4 x i32> %arg1, ptr addrspace(1) poison |
| store volatile <4 x float> %arg2, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8 x float> %arg2) #0 { |
| ; CI-LABEL: void_func_v32i32_v8i32_v8f32: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32 |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:64 |
| ; CI-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:60 |
| ; CI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:56 |
| ; CI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:52 |
| ; CI-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:16 |
| ; CI-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:12 |
| ; CI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:8 |
| ; CI-NEXT: s_waitcnt vmcnt(7) |
| ; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:4 |
| ; CI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:32 |
| ; CI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:28 |
| ; CI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:24 |
| ; CI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:20 |
| ; CI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:48 |
| ; CI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:44 |
| ; CI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:40 |
| ; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:36 |
| ; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_dwordx4 v[36:39], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_dwordx4 v[32:35], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-LABEL: void_func_v32i32_v8i32_v8f32: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32 |
| ; VI-NEXT: s_mov_b32 s7, 0xf000 |
| ; VI-NEXT: s_mov_b32 s6, -1 |
| ; VI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:64 |
| ; VI-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:60 |
| ; VI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:56 |
| ; VI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:52 |
| ; VI-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:16 |
| ; VI-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:12 |
| ; VI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:8 |
| ; VI-NEXT: s_waitcnt vmcnt(7) |
| ; VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:4 |
| ; VI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:32 |
| ; VI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:28 |
| ; VI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:24 |
| ; VI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:20 |
| ; VI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:48 |
| ; VI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:44 |
| ; VI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:40 |
| ; VI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:36 |
| ; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_store_dwordx4 v[36:39], off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_store_dwordx4 v[32:35], off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: void_func_v32i32_v8i32_v8f32: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 |
| ; GFX9-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX9-NEXT: s_mov_b32 s6, -1 |
| ; GFX9-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:64 |
| ; GFX9-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:60 |
| ; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:56 |
| ; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:52 |
| ; GFX9-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:16 |
| ; GFX9-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:12 |
| ; GFX9-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:8 |
| ; GFX9-NEXT: s_waitcnt vmcnt(7) |
| ; GFX9-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:4 |
| ; GFX9-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:32 |
| ; GFX9-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:28 |
| ; GFX9-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:24 |
| ; GFX9-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:20 |
| ; GFX9-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:48 |
| ; GFX9-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:44 |
| ; GFX9-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:40 |
| ; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:36 |
| ; GFX9-NEXT: s_nop 0 |
| ; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_store_dwordx4 v[36:39], off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_store_dwordx4 v[32:35], off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v32i32_v8i32_v8f32: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_clause 0x10 |
| ; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:48 |
| ; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:44 |
| ; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:40 |
| ; GFX11-NEXT: scratch_load_b32 v39, off, s32 offset:64 |
| ; GFX11-NEXT: scratch_load_b32 v38, off, s32 offset:60 |
| ; GFX11-NEXT: scratch_load_b32 v31, off, s32 |
| ; GFX11-NEXT: scratch_load_b32 v37, off, s32 offset:56 |
| ; GFX11-NEXT: scratch_load_b32 v51, off, s32 offset:16 |
| ; GFX11-NEXT: scratch_load_b32 v50, off, s32 offset:12 |
| ; GFX11-NEXT: scratch_load_b32 v49, off, s32 offset:8 |
| ; GFX11-NEXT: scratch_load_b32 v55, off, s32 offset:32 |
| ; GFX11-NEXT: scratch_load_b32 v54, off, s32 offset:28 |
| ; GFX11-NEXT: scratch_load_b32 v53, off, s32 offset:24 |
| ; GFX11-NEXT: scratch_load_b32 v52, off, s32 offset:20 |
| ; GFX11-NEXT: scratch_load_b32 v48, off, s32 offset:4 |
| ; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:52 |
| ; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:36 |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_waitcnt vmcnt(11) |
| ; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-NEXT: buffer_store_b128 v[52:55], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-NEXT: buffer_store_b128 v[48:51], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-NEXT: buffer_store_b128 v[36:39], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_store_b128 v[32:35], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store volatile <32 x i32> %arg0, ptr addrspace(1) poison |
| store volatile <8 x i32> %arg1, ptr addrspace(1) poison |
| store volatile <8 x float> %arg2, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_v32i32_v16i32_v16f32(<32 x i32> %arg0, <16 x i32> %arg1, <16 x float> %arg2) #0 { |
| ; CI-LABEL: void_func_v32i32_v16i32_v16f32: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32 |
| ; CI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:64 |
| ; CI-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:60 |
| ; CI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:56 |
| ; CI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:52 |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:48 |
| ; CI-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:44 |
| ; CI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:40 |
| ; CI-NEXT: s_waitcnt vmcnt(7) |
| ; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:36 |
| ; CI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:32 |
| ; CI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:28 |
| ; CI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:24 |
| ; CI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:20 |
| ; CI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:16 |
| ; CI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:12 |
| ; CI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:8 |
| ; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4 |
| ; CI-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:96 |
| ; CI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:92 |
| ; CI-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:88 |
| ; CI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:84 |
| ; CI-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:112 |
| ; CI-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:108 |
| ; CI-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:104 |
| ; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_dwordx4 v[32:35], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:100 |
| ; CI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:128 |
| ; CI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:124 |
| ; CI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:120 |
| ; CI-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:116 |
| ; CI-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:80 |
| ; CI-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:76 |
| ; CI-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:72 |
| ; CI-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:68 |
| ; CI-NEXT: buffer_store_dwordx4 v[36:39], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-LABEL: void_func_v32i32_v16i32_v16f32: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32 |
| ; VI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:64 |
| ; VI-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:60 |
| ; VI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:56 |
| ; VI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:52 |
| ; VI-NEXT: s_mov_b32 s7, 0xf000 |
| ; VI-NEXT: s_mov_b32 s6, -1 |
| ; VI-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:48 |
| ; VI-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:44 |
| ; VI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:40 |
| ; VI-NEXT: s_waitcnt vmcnt(7) |
| ; VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:36 |
| ; VI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:32 |
| ; VI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:28 |
| ; VI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:24 |
| ; VI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:20 |
| ; VI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:16 |
| ; VI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:12 |
| ; VI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:8 |
| ; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4 |
| ; VI-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:96 |
| ; VI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:92 |
| ; VI-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:88 |
| ; VI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:84 |
| ; VI-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:112 |
| ; VI-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:108 |
| ; VI-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:104 |
| ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_store_dwordx4 v[32:35], off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:100 |
| ; VI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:128 |
| ; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:124 |
| ; VI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:120 |
| ; VI-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:116 |
| ; VI-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:80 |
| ; VI-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:76 |
| ; VI-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:72 |
| ; VI-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:68 |
| ; VI-NEXT: buffer_store_dwordx4 v[36:39], off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: void_func_v32i32_v16i32_v16f32: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 |
| ; GFX9-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:64 |
| ; GFX9-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:60 |
| ; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:56 |
| ; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:52 |
| ; GFX9-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX9-NEXT: s_mov_b32 s6, -1 |
| ; GFX9-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:48 |
| ; GFX9-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:44 |
| ; GFX9-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:40 |
| ; GFX9-NEXT: s_waitcnt vmcnt(7) |
| ; GFX9-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:36 |
| ; GFX9-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:32 |
| ; GFX9-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:28 |
| ; GFX9-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:24 |
| ; GFX9-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:20 |
| ; GFX9-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:16 |
| ; GFX9-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:12 |
| ; GFX9-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:8 |
| ; GFX9-NEXT: s_nop 0 |
| ; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4 |
| ; GFX9-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:96 |
| ; GFX9-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:92 |
| ; GFX9-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:88 |
| ; GFX9-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:84 |
| ; GFX9-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:112 |
| ; GFX9-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:108 |
| ; GFX9-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:104 |
| ; GFX9-NEXT: s_nop 0 |
| ; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_store_dwordx4 v[32:35], off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:100 |
| ; GFX9-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:128 |
| ; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:124 |
| ; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:120 |
| ; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:116 |
| ; GFX9-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:80 |
| ; GFX9-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:76 |
| ; GFX9-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:72 |
| ; GFX9-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:68 |
| ; GFX9-NEXT: s_nop 0 |
| ; GFX9-NEXT: buffer_store_dwordx4 v[36:39], off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v32i32_v16i32_v16f32: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_clause 0x1f |
| ; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:80 |
| ; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:76 |
| ; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:72 |
| ; GFX11-NEXT: scratch_load_b32 v39, off, s32 offset:96 |
| ; GFX11-NEXT: scratch_load_b32 v38, off, s32 offset:92 |
| ; GFX11-NEXT: scratch_load_b32 v37, off, s32 offset:88 |
| ; GFX11-NEXT: scratch_load_b32 v51, off, s32 offset:112 |
| ; GFX11-NEXT: scratch_load_b32 v50, off, s32 offset:108 |
| ; GFX11-NEXT: scratch_load_b32 v49, off, s32 offset:104 |
| ; GFX11-NEXT: scratch_load_b32 v55, off, s32 offset:128 |
| ; GFX11-NEXT: scratch_load_b32 v54, off, s32 offset:124 |
| ; GFX11-NEXT: scratch_load_b32 v53, off, s32 offset:120 |
| ; GFX11-NEXT: scratch_load_b32 v67, off, s32 offset:16 |
| ; GFX11-NEXT: scratch_load_b32 v66, off, s32 offset:12 |
| ; GFX11-NEXT: scratch_load_b32 v65, off, s32 offset:8 |
| ; GFX11-NEXT: scratch_load_b32 v71, off, s32 offset:32 |
| ; GFX11-NEXT: scratch_load_b32 v70, off, s32 offset:28 |
| ; GFX11-NEXT: scratch_load_b32 v31, off, s32 |
| ; GFX11-NEXT: scratch_load_b32 v69, off, s32 offset:24 |
| ; GFX11-NEXT: scratch_load_b32 v83, off, s32 offset:48 |
| ; GFX11-NEXT: scratch_load_b32 v82, off, s32 offset:44 |
| ; GFX11-NEXT: scratch_load_b32 v81, off, s32 offset:40 |
| ; GFX11-NEXT: scratch_load_b32 v87, off, s32 offset:64 |
| ; GFX11-NEXT: scratch_load_b32 v86, off, s32 offset:60 |
| ; GFX11-NEXT: scratch_load_b32 v85, off, s32 offset:56 |
| ; GFX11-NEXT: scratch_load_b32 v84, off, s32 offset:52 |
| ; GFX11-NEXT: scratch_load_b32 v80, off, s32 offset:36 |
| ; GFX11-NEXT: scratch_load_b32 v68, off, s32 offset:20 |
| ; GFX11-NEXT: scratch_load_b32 v64, off, s32 offset:4 |
| ; GFX11-NEXT: scratch_load_b32 v52, off, s32 offset:116 |
| ; GFX11-NEXT: scratch_load_b32 v48, off, s32 offset:100 |
| ; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:84 |
| ; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:68 |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_waitcnt vmcnt(15) |
| ; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(7) |
| ; GFX11-NEXT: buffer_store_b128 v[84:87], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(6) |
| ; GFX11-NEXT: buffer_store_b128 v[80:83], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(5) |
| ; GFX11-NEXT: buffer_store_b128 v[68:71], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(4) |
| ; GFX11-NEXT: buffer_store_b128 v[64:67], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-NEXT: buffer_store_b128 v[52:55], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-NEXT: buffer_store_b128 v[48:51], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-NEXT: buffer_store_b128 v[36:39], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_store_b128 v[32:35], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store volatile <32 x i32> %arg0, ptr addrspace(1) poison |
| store volatile <16 x i32> %arg1, ptr addrspace(1) poison |
| store volatile <16 x float> %arg2, ptr addrspace(1) poison |
| ret void |
| } |
| |
| ; Make sure v3 isn't a wasted register because of v3 types being promoted to v4 |
| define void @void_func_v3f32_wasted_reg(<3 x float> %arg0, i32 %arg1) #0 { |
| ; CI-LABEL: void_func_v3f32_wasted_reg: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: s_mov_b32 m0, -1 |
| ; CI-NEXT: ds_write_b32 v0, v0 |
| ; CI-NEXT: ds_write_b32 v0, v1 |
| ; CI-NEXT: ds_write_b32 v0, v2 |
| ; CI-NEXT: ds_write_b32 v0, v3 |
| ; CI-NEXT: s_waitcnt lgkmcnt(0) |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-LABEL: void_func_v3f32_wasted_reg: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-NEXT: s_mov_b32 m0, -1 |
| ; VI-NEXT: ds_write_b32 v0, v0 |
| ; VI-NEXT: ds_write_b32 v0, v1 |
| ; VI-NEXT: ds_write_b32 v0, v2 |
| ; VI-NEXT: ds_write_b32 v0, v3 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: void_func_v3f32_wasted_reg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: ds_write_b32 v0, v0 |
| ; GFX9-NEXT: ds_write_b32 v0, v1 |
| ; GFX9-NEXT: ds_write_b32 v0, v2 |
| ; GFX9-NEXT: ds_write_b32 v0, v3 |
| ; GFX9-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v3f32_wasted_reg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: ds_store_b32 v0, v0 |
| ; GFX11-NEXT: ds_store_b32 v0, v1 |
| ; GFX11-NEXT: ds_store_b32 v0, v2 |
| ; GFX11-NEXT: ds_store_b32 v0, v3 |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %arg0.0 = extractelement <3 x float> %arg0, i32 0 |
| %arg0.1 = extractelement <3 x float> %arg0, i32 1 |
| %arg0.2 = extractelement <3 x float> %arg0, i32 2 |
| store volatile float %arg0.0, ptr addrspace(3) poison |
| store volatile float %arg0.1, ptr addrspace(3) poison |
| store volatile float %arg0.2, ptr addrspace(3) poison |
| store volatile i32 %arg1, ptr addrspace(3) poison |
| ret void |
| } |
| |
| define void @void_func_v3i32_wasted_reg(<3 x i32> %arg0, i32 %arg1) #0 { |
| ; CI-LABEL: void_func_v3i32_wasted_reg: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: s_mov_b32 m0, -1 |
| ; CI-NEXT: ds_write_b32 v0, v0 |
| ; CI-NEXT: ds_write_b32 v0, v1 |
| ; CI-NEXT: ds_write_b32 v0, v2 |
| ; CI-NEXT: ds_write_b32 v0, v3 |
| ; CI-NEXT: s_waitcnt lgkmcnt(0) |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-LABEL: void_func_v3i32_wasted_reg: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-NEXT: s_mov_b32 m0, -1 |
| ; VI-NEXT: ds_write_b32 v0, v0 |
| ; VI-NEXT: ds_write_b32 v0, v1 |
| ; VI-NEXT: ds_write_b32 v0, v2 |
| ; VI-NEXT: ds_write_b32 v0, v3 |
| ; VI-NEXT: s_waitcnt lgkmcnt(0) |
| ; VI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: void_func_v3i32_wasted_reg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: ds_write_b32 v0, v0 |
| ; GFX9-NEXT: ds_write_b32 v0, v1 |
| ; GFX9-NEXT: ds_write_b32 v0, v2 |
| ; GFX9-NEXT: ds_write_b32 v0, v3 |
| ; GFX9-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v3i32_wasted_reg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: ds_store_b32 v0, v0 |
| ; GFX11-NEXT: ds_store_b32 v0, v1 |
| ; GFX11-NEXT: ds_store_b32 v0, v2 |
| ; GFX11-NEXT: ds_store_b32 v0, v3 |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %arg0.0 = extractelement <3 x i32> %arg0, i32 0 |
| %arg0.1 = extractelement <3 x i32> %arg0, i32 1 |
| %arg0.2 = extractelement <3 x i32> %arg0, i32 2 |
| store volatile i32 %arg0.0, ptr addrspace(3) poison |
| store volatile i32 %arg0.1, ptr addrspace(3) poison |
| store volatile i32 %arg0.2, ptr addrspace(3) poison |
| store volatile i32 %arg1, ptr addrspace(3) poison |
| ret void |
| } |
| |
| ; Check there is no crash. |
| define void @void_func_volatile_v16i8(<16 x i8> %arg0) #0 { |
| ; CIGFX89-LABEL: void_func_volatile_v16i8: |
| ; CIGFX89: ; %bb.0: |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; CIGFX89-NEXT: s_mov_b32 s6, -1 |
| ; CIGFX89-NEXT: buffer_store_byte v15, off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: buffer_store_byte v14, off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: buffer_store_byte v13, off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: buffer_store_byte v12, off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: buffer_store_byte v11, off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: buffer_store_byte v10, off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: buffer_store_byte v9, off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: buffer_store_byte v8, off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: buffer_store_byte v7, off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: buffer_store_byte v6, off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: buffer_store_byte v5, off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: buffer_store_byte v4, off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: buffer_store_byte v3, off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: buffer_store_byte v2, off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: buffer_store_byte v1, off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: buffer_store_byte v0, off, s[4:7], 0 |
| ; CIGFX89-NEXT: s_waitcnt vmcnt(0) |
| ; CIGFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_volatile_v16i8: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_store_b8 v15, off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b8 v14, off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b8 v13, off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b8 v12, off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b8 v11, off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b8 v10, off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b8 v9, off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b8 v8, off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b8 v7, off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b8 v6, off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b8 v5, off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b8 v4, off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b8 v3, off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b8 v2, off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b8 v1, off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b8 v0, off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store volatile <16 x i8> %arg0, ptr addrspace(1) poison |
| ret void |
| } |
| |
| ; Check there is no crash. |
| define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 { |
| ; CI-LABEL: void_func_v32i32_v16i8: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32 |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:60 |
| ; CI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:64 |
| ; CI-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:48 |
| ; CI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:52 |
| ; CI-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:56 |
| ; CI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:36 |
| ; CI-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:40 |
| ; CI-NEXT: s_waitcnt vmcnt(7) |
| ; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:28 |
| ; CI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:32 |
| ; CI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:20 |
| ; CI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:24 |
| ; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:16 |
| ; CI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:12 |
| ; CI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:8 |
| ; CI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:4 |
| ; CI-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:44 |
| ; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_byte v33, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_byte v32, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_byte v36, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_byte v35, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_byte v34, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_byte v20, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_byte v38, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_byte v37, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_byte v17, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_byte v16, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_byte v19, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_byte v18, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_byte v12, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_byte v13, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_byte v14, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: buffer_store_byte v15, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; VI-LABEL: void_func_v32i32_v16i8: |
| ; VI: ; %bb.0: |
| ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32 |
| ; VI-NEXT: s_mov_b32 s7, 0xf000 |
| ; VI-NEXT: s_mov_b32 s6, -1 |
| ; VI-NEXT: buffer_load_ubyte v32, off, s[0:3], s32 offset:60 |
| ; VI-NEXT: buffer_load_ubyte v33, off, s[0:3], s32 offset:64 |
| ; VI-NEXT: buffer_load_ubyte v34, off, s[0:3], s32 offset:48 |
| ; VI-NEXT: buffer_load_ubyte v35, off, s[0:3], s32 offset:52 |
| ; VI-NEXT: buffer_load_ubyte v36, off, s[0:3], s32 offset:56 |
| ; VI-NEXT: buffer_load_ubyte v37, off, s[0:3], s32 offset:36 |
| ; VI-NEXT: buffer_load_ubyte v38, off, s[0:3], s32 offset:40 |
| ; VI-NEXT: s_waitcnt vmcnt(7) |
| ; VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_load_ubyte v16, off, s[0:3], s32 offset:28 |
| ; VI-NEXT: buffer_load_ubyte v17, off, s[0:3], s32 offset:32 |
| ; VI-NEXT: buffer_load_ubyte v18, off, s[0:3], s32 offset:20 |
| ; VI-NEXT: buffer_load_ubyte v19, off, s[0:3], s32 offset:24 |
| ; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_load_ubyte v12, off, s[0:3], s32 offset:16 |
| ; VI-NEXT: buffer_load_ubyte v13, off, s[0:3], s32 offset:12 |
| ; VI-NEXT: buffer_load_ubyte v14, off, s[0:3], s32 offset:8 |
| ; VI-NEXT: buffer_load_ubyte v15, off, s[0:3], s32 offset:4 |
| ; VI-NEXT: buffer_load_ubyte v20, off, s[0:3], s32 offset:44 |
| ; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_store_byte v33, off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_store_byte v32, off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_store_byte v36, off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_store_byte v35, off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_store_byte v34, off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_store_byte v20, off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_store_byte v38, off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_store_byte v37, off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_store_byte v17, off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_store_byte v16, off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_store_byte v19, off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_store_byte v18, off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_store_byte v12, off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_store_byte v13, off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_store_byte v14, off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: buffer_store_byte v15, off, s[4:7], 0 |
| ; VI-NEXT: s_waitcnt vmcnt(0) |
| ; VI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: void_func_v32i32_v16i8: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 |
| ; GFX9-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX9-NEXT: s_mov_b32 s6, -1 |
| ; GFX9-NEXT: buffer_load_ubyte v32, off, s[0:3], s32 offset:60 |
| ; GFX9-NEXT: buffer_load_ubyte v33, off, s[0:3], s32 offset:64 |
| ; GFX9-NEXT: buffer_load_ubyte v34, off, s[0:3], s32 offset:48 |
| ; GFX9-NEXT: buffer_load_ubyte v35, off, s[0:3], s32 offset:52 |
| ; GFX9-NEXT: buffer_load_ubyte v36, off, s[0:3], s32 offset:56 |
| ; GFX9-NEXT: buffer_load_ubyte v37, off, s[0:3], s32 offset:36 |
| ; GFX9-NEXT: buffer_load_ubyte v38, off, s[0:3], s32 offset:40 |
| ; GFX9-NEXT: s_waitcnt vmcnt(7) |
| ; GFX9-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_load_ubyte v16, off, s[0:3], s32 offset:28 |
| ; GFX9-NEXT: buffer_load_ubyte v17, off, s[0:3], s32 offset:32 |
| ; GFX9-NEXT: buffer_load_ubyte v18, off, s[0:3], s32 offset:20 |
| ; GFX9-NEXT: buffer_load_ubyte v19, off, s[0:3], s32 offset:24 |
| ; GFX9-NEXT: buffer_load_ubyte v20, off, s[0:3], s32 offset:44 |
| ; GFX9-NEXT: s_nop 0 |
| ; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_load_ubyte v12, off, s[0:3], s32 offset:16 |
| ; GFX9-NEXT: buffer_load_ubyte v13, off, s[0:3], s32 offset:12 |
| ; GFX9-NEXT: buffer_load_ubyte v14, off, s[0:3], s32 offset:8 |
| ; GFX9-NEXT: buffer_load_ubyte v15, off, s[0:3], s32 offset:4 |
| ; GFX9-NEXT: s_nop 0 |
| ; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_store_byte v33, off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_store_byte v32, off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_store_byte v36, off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_store_byte v35, off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_store_byte v34, off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_store_byte v20, off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_store_byte v38, off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_store_byte v37, off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_store_byte v17, off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_store_byte v16, off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_store_byte v19, off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_store_byte v18, off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_store_byte v12, off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_store_byte v13, off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_store_byte v14, off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: buffer_store_byte v15, off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v32i32_v16i8: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_clause 0x10 |
| ; GFX11-NEXT: scratch_load_b32 v31, off, s32 |
| ; GFX11-NEXT: scratch_load_u8 v32, off, s32 offset:64 |
| ; GFX11-NEXT: scratch_load_u8 v33, off, s32 offset:60 |
| ; GFX11-NEXT: scratch_load_u8 v34, off, s32 offset:56 |
| ; GFX11-NEXT: scratch_load_u8 v35, off, s32 offset:52 |
| ; GFX11-NEXT: scratch_load_u8 v36, off, s32 offset:48 |
| ; GFX11-NEXT: scratch_load_u8 v37, off, s32 offset:44 |
| ; GFX11-NEXT: scratch_load_u8 v38, off, s32 offset:40 |
| ; GFX11-NEXT: scratch_load_u8 v39, off, s32 offset:36 |
| ; GFX11-NEXT: scratch_load_u8 v48, off, s32 offset:32 |
| ; GFX11-NEXT: scratch_load_u8 v49, off, s32 offset:28 |
| ; GFX11-NEXT: scratch_load_u8 v50, off, s32 offset:24 |
| ; GFX11-NEXT: scratch_load_u8 v51, off, s32 offset:20 |
| ; GFX11-NEXT: scratch_load_u8 v52, off, s32 offset:16 |
| ; GFX11-NEXT: scratch_load_u8 v53, off, s32 offset:12 |
| ; GFX11-NEXT: scratch_load_u8 v54, off, s32 offset:8 |
| ; GFX11-NEXT: scratch_load_u8 v55, off, s32 offset:4 |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_waitcnt vmcnt(16) |
| ; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(15) |
| ; GFX11-NEXT: buffer_store_b8 v32, off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(14) |
| ; GFX11-NEXT: buffer_store_b8 v33, off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(13) |
| ; GFX11-NEXT: buffer_store_b8 v34, off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(12) |
| ; GFX11-NEXT: buffer_store_b8 v35, off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(11) |
| ; GFX11-NEXT: buffer_store_b8 v36, off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(10) |
| ; GFX11-NEXT: buffer_store_b8 v37, off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(9) |
| ; GFX11-NEXT: buffer_store_b8 v38, off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(8) |
| ; GFX11-NEXT: buffer_store_b8 v39, off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(7) |
| ; GFX11-NEXT: buffer_store_b8 v48, off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(6) |
| ; GFX11-NEXT: buffer_store_b8 v49, off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(5) |
| ; GFX11-NEXT: buffer_store_b8 v50, off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(4) |
| ; GFX11-NEXT: buffer_store_b8 v51, off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-NEXT: buffer_store_b8 v52, off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-NEXT: buffer_store_b8 v53, off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-NEXT: buffer_store_b8 v54, off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_store_b8 v55, off, s[0:3], 0 dlc |
| ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store volatile <32 x i32> %arg0, ptr addrspace(1) poison |
| store volatile <16 x i8> %arg1, ptr addrspace(1) poison |
| ret void |
| } |
| |
| |
| define void @void_func_bf16(bfloat %arg0) #0 { |
| ; CI-LABEL: void_func_bf16: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: v_mul_f32_e32 v0, 1.0, v0 |
| ; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: buffer_store_short v0, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX89-LABEL: void_func_bf16: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX89-NEXT: s_mov_b32 s6, -1 |
| ; GFX89-NEXT: buffer_store_short v0, off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_bf16: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store bfloat %arg0, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_v2bf16(<2 x bfloat> %arg0) #0 { |
| ; CI-LABEL: void_func_v2bf16: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: v_mul_f32_e32 v1, 1.0, v1 |
| ; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 |
| ; CI-NEXT: v_mul_f32_e32 v0, 1.0, v0 |
| ; CI-NEXT: v_alignbit_b32 v0, v1, v0, 16 |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX89-LABEL: void_func_v2bf16: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX89-NEXT: s_mov_b32 s6, -1 |
| ; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v2bf16: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <2 x bfloat> %arg0, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_v3bf16(<3 x bfloat> %arg0) #0 { |
| ; CI-LABEL: void_func_v3bf16: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: v_mul_f32_e32 v1, 1.0, v1 |
| ; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 |
| ; CI-NEXT: v_mul_f32_e32 v0, 1.0, v0 |
| ; CI-NEXT: v_alignbit_b32 v0, v1, v0, 16 |
| ; CI-NEXT: v_mul_f32_e32 v1, 1.0, v2 |
| ; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: buffer_store_short v1, off, s[4:7], 0 |
| ; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX89-LABEL: void_func_v3bf16: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX89-NEXT: s_mov_b32 s6, -1 |
| ; GFX89-NEXT: buffer_store_short v1, off, s[4:7], 0 |
| ; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v3bf16: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: buffer_store_b16 v1, off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <3 x bfloat> %arg0, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_v4bf16(<4 x bfloat> %arg0) #0 { |
| ; CI-LABEL: void_func_v4bf16: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: v_mul_f32_e32 v3, 1.0, v3 |
| ; CI-NEXT: v_mul_f32_e32 v1, 1.0, v1 |
| ; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v3 |
| ; CI-NEXT: v_mul_f32_e32 v2, 1.0, v2 |
| ; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 |
| ; CI-NEXT: v_mul_f32_e32 v0, 1.0, v0 |
| ; CI-NEXT: v_alignbit_b32 v2, v3, v2, 16 |
| ; CI-NEXT: v_alignbit_b32 v1, v1, v0, 16 |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: buffer_store_dwordx2 v[1:2], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX89-LABEL: void_func_v4bf16: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX89-NEXT: s_mov_b32 s6, -1 |
| ; GFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v4bf16: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <4 x bfloat> %arg0, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_v8bf16(<8 x bfloat> %arg0) #0 { |
| ; CI-LABEL: void_func_v8bf16: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: v_mul_f32_e32 v7, 1.0, v7 |
| ; CI-NEXT: v_mul_f32_e32 v5, 1.0, v5 |
| ; CI-NEXT: v_mul_f32_e32 v3, 1.0, v3 |
| ; CI-NEXT: v_mul_f32_e32 v1, 1.0, v1 |
| ; CI-NEXT: v_lshrrev_b32_e32 v7, 16, v7 |
| ; CI-NEXT: v_mul_f32_e32 v6, 1.0, v6 |
| ; CI-NEXT: v_lshrrev_b32_e32 v5, 16, v5 |
| ; CI-NEXT: v_mul_f32_e32 v4, 1.0, v4 |
| ; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v3 |
| ; CI-NEXT: v_mul_f32_e32 v2, 1.0, v2 |
| ; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 |
| ; CI-NEXT: v_mul_f32_e32 v0, 1.0, v0 |
| ; CI-NEXT: v_alignbit_b32 v6, v7, v6, 16 |
| ; CI-NEXT: v_alignbit_b32 v5, v5, v4, 16 |
| ; CI-NEXT: v_alignbit_b32 v4, v3, v2, 16 |
| ; CI-NEXT: v_alignbit_b32 v3, v1, v0, 16 |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: buffer_store_dwordx4 v[3:6], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX89-LABEL: void_func_v8bf16: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX89-NEXT: s_mov_b32 s6, -1 |
| ; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v8bf16: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <8 x bfloat> %arg0, ptr addrspace(1) poison |
| ret void |
| } |
| |
| define void @void_func_v16bf16(<16 x bfloat> %arg0) #0 { |
| ; CI-LABEL: void_func_v16bf16: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: v_mul_f32_e32 v5, 1.0, v5 |
| ; CI-NEXT: v_mul_f32_e32 v3, 1.0, v3 |
| ; CI-NEXT: v_mul_f32_e32 v1, 1.0, v1 |
| ; CI-NEXT: v_lshrrev_b32_e32 v5, 16, v5 |
| ; CI-NEXT: v_mul_f32_e32 v4, 1.0, v4 |
| ; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v3 |
| ; CI-NEXT: v_mul_f32_e32 v2, 1.0, v2 |
| ; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 |
| ; CI-NEXT: v_mul_f32_e32 v0, 1.0, v0 |
| ; CI-NEXT: v_alignbit_b32 v5, v5, v4, 16 |
| ; CI-NEXT: v_alignbit_b32 v4, v3, v2, 16 |
| ; CI-NEXT: v_alignbit_b32 v3, v1, v0, 16 |
| ; CI-NEXT: v_mul_f32_e32 v0, 1.0, v15 |
| ; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 |
| ; CI-NEXT: v_mul_f32_e32 v1, 1.0, v14 |
| ; CI-NEXT: v_alignbit_b32 v14, v0, v1, 16 |
| ; CI-NEXT: v_mul_f32_e32 v0, 1.0, v13 |
| ; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 |
| ; CI-NEXT: v_mul_f32_e32 v1, 1.0, v12 |
| ; CI-NEXT: v_alignbit_b32 v13, v0, v1, 16 |
| ; CI-NEXT: v_mul_f32_e32 v0, 1.0, v11 |
| ; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 |
| ; CI-NEXT: v_mul_f32_e32 v1, 1.0, v10 |
| ; CI-NEXT: v_alignbit_b32 v12, v0, v1, 16 |
| ; CI-NEXT: v_mul_f32_e32 v0, 1.0, v9 |
| ; CI-NEXT: v_mul_f32_e32 v7, 1.0, v7 |
| ; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 |
| ; CI-NEXT: v_mul_f32_e32 v1, 1.0, v8 |
| ; CI-NEXT: v_lshrrev_b32_e32 v7, 16, v7 |
| ; CI-NEXT: v_mul_f32_e32 v6, 1.0, v6 |
| ; CI-NEXT: v_alignbit_b32 v11, v0, v1, 16 |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: v_alignbit_b32 v6, v7, v6, 16 |
| ; CI-NEXT: buffer_store_dwordx4 v[11:14], off, s[4:7], 0 |
| ; CI-NEXT: buffer_store_dwordx4 v[3:6], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX89-LABEL: void_func_v16bf16: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX89-NEXT: s_mov_b32 s6, -1 |
| ; GFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 |
| ; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_v16bf16: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| store <16 x bfloat> %arg0, ptr addrspace(1) poison |
| ret void |
| } |
| |
| attributes #0 = { nounwind } |