| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 |
| ; RUN: llc -mtriple=amdgcn-- -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX789,CI %s |
| ; RUN: llc -mtriple=amdgcn-- -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX789,GFX89,GFX8 %s |
| ; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX789,GFX89,GFX9 %s |
| ; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s |
| |
| define i1 @i1_func_void() #0 { |
| ; GFX789-LABEL: i1_func_void: |
| ; GFX789: ; %bb.0: |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX789-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX789-NEXT: s_mov_b32 s6, -1 |
| ; GFX789-NEXT: buffer_load_ubyte v0, off, s[4:7], 0 |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) |
| ; GFX789-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: i1_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_load_u8 v0, off, s[0:3], 0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %val = load i1, ptr addrspace(1) poison |
| ret i1 %val |
| } |
| |
| ; FIXME: Missing and? |
| define zeroext i1 @i1_zeroext_func_void() #0 { |
| ; GFX789-LABEL: i1_zeroext_func_void: |
| ; GFX789: ; %bb.0: |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX789-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX789-NEXT: s_mov_b32 s6, -1 |
| ; GFX789-NEXT: buffer_load_ubyte v0, off, s[4:7], 0 |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) |
| ; GFX789-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: i1_zeroext_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_load_u8 v0, off, s[0:3], 0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %val = load i1, ptr addrspace(1) poison |
| ret i1 %val |
| } |
| |
| define signext i1 @i1_signext_func_void() #0 { |
| ; GFX789-LABEL: i1_signext_func_void: |
| ; GFX789: ; %bb.0: |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX789-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX789-NEXT: s_mov_b32 s6, -1 |
| ; GFX789-NEXT: buffer_load_ubyte v0, off, s[4:7], 0 |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) |
| ; GFX789-NEXT: v_bfe_i32 v0, v0, 0, 1 |
| ; GFX789-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: i1_signext_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_load_u8 v0, off, s[0:3], 0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: v_bfe_i32 v0, v0, 0, 1 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %val = load i1, ptr addrspace(1) poison |
| ret i1 %val |
| } |
| |
| define i8 @i8_func_void() #0 { |
| ; GFX789-LABEL: i8_func_void: |
| ; GFX789: ; %bb.0: |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX789-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX789-NEXT: s_mov_b32 s6, -1 |
| ; GFX789-NEXT: buffer_load_ubyte v0, off, s[4:7], 0 |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) |
| ; GFX789-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: i8_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_load_u8 v0, off, s[0:3], 0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %val = load i8, ptr addrspace(1) poison |
| ret i8 %val |
| } |
| |
| define zeroext i8 @i8_zeroext_func_void() #0 { |
| ; GFX789-LABEL: i8_zeroext_func_void: |
| ; GFX789: ; %bb.0: |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX789-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX789-NEXT: s_mov_b32 s6, -1 |
| ; GFX789-NEXT: buffer_load_ubyte v0, off, s[4:7], 0 |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) |
| ; GFX789-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: i8_zeroext_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_load_u8 v0, off, s[0:3], 0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %val = load i8, ptr addrspace(1) poison |
| ret i8 %val |
| } |
| |
| define signext i8 @i8_signext_func_void() #0 { |
| ; GFX789-LABEL: i8_signext_func_void: |
| ; GFX789: ; %bb.0: |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX789-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX789-NEXT: s_mov_b32 s6, -1 |
| ; GFX789-NEXT: buffer_load_sbyte v0, off, s[4:7], 0 |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) |
| ; GFX789-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: i8_signext_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_load_i8 v0, off, s[0:3], 0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %val = load i8, ptr addrspace(1) poison |
| ret i8 %val |
| } |
| |
| define i16 @i16_func_void() #0 { |
| ; GFX789-LABEL: i16_func_void: |
| ; GFX789: ; %bb.0: |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX789-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX789-NEXT: s_mov_b32 s6, -1 |
| ; GFX789-NEXT: buffer_load_ushort v0, off, s[4:7], 0 |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) |
| ; GFX789-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: i16_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_load_u16 v0, off, s[0:3], 0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %val = load i16, ptr addrspace(1) poison |
| ret i16 %val |
| } |
| |
| define zeroext i16 @i16_zeroext_func_void() #0 { |
| ; GFX789-LABEL: i16_zeroext_func_void: |
| ; GFX789: ; %bb.0: |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX789-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX789-NEXT: s_mov_b32 s6, -1 |
| ; GFX789-NEXT: buffer_load_ushort v0, off, s[4:7], 0 |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) |
| ; GFX789-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: i16_zeroext_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_load_u16 v0, off, s[0:3], 0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %val = load i16, ptr addrspace(1) poison |
| ret i16 %val |
| } |
| |
| define signext i16 @i16_signext_func_void() #0 { |
| ; GFX789-LABEL: i16_signext_func_void: |
| ; GFX789: ; %bb.0: |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX789-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX789-NEXT: s_mov_b32 s6, -1 |
| ; GFX789-NEXT: buffer_load_sshort v0, off, s[4:7], 0 |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) |
| ; GFX789-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: i16_signext_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_load_i16 v0, off, s[0:3], 0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %val = load i16, ptr addrspace(1) poison |
| ret i16 %val |
| } |
| |
| define i32 @i32_func_void() #0 { |
| ; GFX789-LABEL: i32_func_void: |
| ; GFX789: ; %bb.0: |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX789-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX789-NEXT: s_mov_b32 s6, -1 |
| ; GFX789-NEXT: buffer_load_dword v0, off, s[4:7], 0 |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) |
| ; GFX789-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: i32_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_load_b32 v0, off, s[0:3], 0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %val = load i32, ptr addrspace(1) poison |
| ret i32 %val |
| } |
| |
| define i48 @i48_func_void() #0 { |
| ; GFX789-LABEL: i48_func_void: |
| ; GFX789: ; %bb.0: |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX789-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX789-NEXT: s_mov_b32 s6, -1 |
| ; GFX789-NEXT: buffer_load_dword v0, off, s[4:7], 0 |
| ; GFX789-NEXT: buffer_load_ushort v1, off, s[4:7], 0 |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) |
| ; GFX789-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: i48_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: buffer_load_b32 v0, off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_load_u16 v1, off, s[0:3], 0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %val = load i48, ptr addrspace(1) poison, align 8 |
| ret i48 %val |
| } |
| |
| define zeroext i48 @i48_zeroext_func_void() #0 { |
| ; GFX789-LABEL: i48_zeroext_func_void: |
| ; GFX789: ; %bb.0: |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX789-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX789-NEXT: s_mov_b32 s6, -1 |
| ; GFX789-NEXT: buffer_load_dword v0, off, s[4:7], 0 |
| ; GFX789-NEXT: buffer_load_ushort v1, off, s[4:7], 0 |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) |
| ; GFX789-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: i48_zeroext_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: buffer_load_b32 v0, off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_load_u16 v1, off, s[0:3], 0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %val = load i48, ptr addrspace(1) poison, align 8 |
| ret i48 %val |
| } |
| |
| define signext i48 @i48_signext_func_void() #0 { |
| ; GFX789-LABEL: i48_signext_func_void: |
| ; GFX789: ; %bb.0: |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX789-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX789-NEXT: s_mov_b32 s6, -1 |
| ; GFX789-NEXT: buffer_load_dword v0, off, s[4:7], 0 |
| ; GFX789-NEXT: buffer_load_sshort v1, off, s[4:7], 0 |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) |
| ; GFX789-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: i48_signext_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: buffer_load_b32 v0, off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_load_i16 v1, off, s[0:3], 0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %val = load i48, ptr addrspace(1) poison, align 8 |
| ret i48 %val |
| } |
| |
| define i63 @i63_func_void(i63 %val) #0 { |
| ; GFX789-LABEL: i63_func_void: |
| ; GFX789: ; %bb.0: |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX789-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: i63_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| ret i63 %val |
| } |
| |
| define zeroext i63 @i63_zeroext_func_void(i63 %val) #0 { |
| ; GFX789-LABEL: i63_zeroext_func_void: |
| ; GFX789: ; %bb.0: |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX789-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1 |
| ; GFX789-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: i63_zeroext_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| ret i63 %val |
| } |
| |
| define signext i63 @i63_signext_func_void(i63 %val) #0 { |
| ; CI-LABEL: i63_signext_func_void: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: v_lshl_b64 v[0:1], v[0:1], 1 |
| ; CI-NEXT: v_ashr_i64 v[0:1], v[0:1], 1 |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX89-LABEL: i63_signext_func_void: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] |
| ; GFX89-NEXT: v_ashrrev_i64 v[0:1], 1, v[0:1] |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: i63_signext_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] |
| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GFX11-NEXT: v_ashrrev_i64 v[0:1], 1, v[0:1] |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| ret i63 %val |
| } |
| |
| define i64 @i64_func_void() #0 { |
| ; GFX789-LABEL: i64_func_void: |
| ; GFX789: ; %bb.0: |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX789-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX789-NEXT: s_mov_b32 s6, -1 |
| ; GFX789-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) |
| ; GFX789-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: i64_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[0:3], 0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %val = load i64, ptr addrspace(1) poison |
| ret i64 %val |
| } |
| |
| define i65 @i65_func_void() #0 { |
| ; GFX789-LABEL: i65_func_void: |
| ; GFX789: ; %bb.0: |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX789-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX789-NEXT: s_mov_b32 s6, -1 |
| ; GFX789-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 |
| ; GFX789-NEXT: buffer_load_ubyte v2, off, s[4:7], 0 |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) |
| ; GFX789-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: i65_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_load_u8 v2, off, s[0:3], 0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %val = load i65, ptr addrspace(1) poison |
| ret i65 %val |
| } |
| |
| define float @f32_func_void() #0 { |
| ; GFX789-LABEL: f32_func_void: |
| ; GFX789: ; %bb.0: |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX789-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX789-NEXT: s_mov_b32 s6, -1 |
| ; GFX789-NEXT: buffer_load_dword v0, off, s[4:7], 0 |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) |
| ; GFX789-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: f32_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_load_b32 v0, off, s[0:3], 0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %val = load float, ptr addrspace(1) poison |
| ret float %val |
| } |
| |
| define double @f64_func_void() #0 { |
| ; GFX789-LABEL: f64_func_void: |
| ; GFX789: ; %bb.0: |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX789-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX789-NEXT: s_mov_b32 s6, -1 |
| ; GFX789-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) |
| ; GFX789-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: f64_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[0:3], 0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %val = load double, ptr addrspace(1) poison |
| ret double %val |
| } |
| |
| define <2 x double> @v2f64_func_void() #0 { |
| ; GFX789-LABEL: v2f64_func_void: |
| ; GFX789: ; %bb.0: |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX789-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX789-NEXT: s_mov_b32 s6, -1 |
| ; GFX789-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) |
| ; GFX789-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v2f64_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %val = load <2 x double>, ptr addrspace(1) poison |
| ret <2 x double> %val |
| } |
| |
| define <2 x i32> @v2i32_func_void() #0 { |
| ; GFX789-LABEL: v2i32_func_void: |
| ; GFX789: ; %bb.0: |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX789-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX789-NEXT: s_mov_b32 s6, -1 |
| ; GFX789-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) |
| ; GFX789-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v2i32_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[0:3], 0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %val = load <2 x i32>, ptr addrspace(1) poison |
| ret <2 x i32> %val |
| } |
| |
| define <3 x i32> @v3i32_func_void() #0 { |
| ; GFX789-LABEL: v3i32_func_void: |
| ; GFX789: ; %bb.0: |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX789-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX789-NEXT: s_mov_b32 s6, -1 |
| ; GFX789-NEXT: buffer_load_dwordx3 v[0:2], off, s[4:7], 0 |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) |
| ; GFX789-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v3i32_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_load_b96 v[0:2], off, s[0:3], 0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %val = load <3 x i32>, ptr addrspace(1) poison |
| ret <3 x i32> %val |
| } |
| |
| define <4 x i32> @v4i32_func_void() #0 { |
| ; GFX789-LABEL: v4i32_func_void: |
| ; GFX789: ; %bb.0: |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX789-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX789-NEXT: s_mov_b32 s6, -1 |
| ; GFX789-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) |
| ; GFX789-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v4i32_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %val = load <4 x i32>, ptr addrspace(1) poison |
| ret <4 x i32> %val |
| } |
| |
| define <5 x i32> @v5i32_func_void() #0 { |
| ; GFX789-LABEL: v5i32_func_void: |
| ; GFX789: ; %bb.0: |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX789-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX789-NEXT: s_mov_b32 s6, -1 |
| ; GFX789-NEXT: buffer_load_dword v4, off, s[4:7], 0 glc |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) |
| ; GFX789-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 glc |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) |
| ; GFX789-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v5i32_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_load_b32 v4, off, s[0:3], 0 glc dlc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 glc dlc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %val = load volatile <5 x i32>, ptr addrspace(1) poison |
| ret <5 x i32> %val |
| } |
| |
| define <8 x i32> @v8i32_func_void() #0 { |
| ; GFX789-LABEL: v8i32_func_void: |
| ; GFX789: ; %bb.0: |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX789-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; GFX789-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX789-NEXT: s_mov_b32 s6, -1 |
| ; GFX789-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX789-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 |
| ; GFX789-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16 |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) |
| ; GFX789-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v8i32_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_load_b128 v[4:7], off, s[0:3], 0 offset:16 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %ptr = load volatile ptr addrspace(1), ptr addrspace(4) poison |
| %val = load <8 x i32>, ptr addrspace(1) %ptr |
| ret <8 x i32> %val |
| } |
| |
| define <16 x i32> @v16i32_func_void() #0 { |
| ; GFX789-LABEL: v16i32_func_void: |
| ; GFX789: ; %bb.0: |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX789-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; GFX789-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX789-NEXT: s_mov_b32 s6, -1 |
| ; GFX789-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX789-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 |
| ; GFX789-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16 |
| ; GFX789-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32 |
| ; GFX789-NEXT: buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48 |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) |
| ; GFX789-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v16i32_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: s_clause 0x3 |
| ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_load_b128 v[4:7], off, s[0:3], 0 offset:16 |
| ; GFX11-NEXT: buffer_load_b128 v[8:11], off, s[0:3], 0 offset:32 |
| ; GFX11-NEXT: buffer_load_b128 v[12:15], off, s[0:3], 0 offset:48 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %ptr = load volatile ptr addrspace(1), ptr addrspace(4) poison |
| %val = load <16 x i32>, ptr addrspace(1) %ptr |
| ret <16 x i32> %val |
| } |
| |
| define <32 x i32> @v32i32_func_void() #0 { |
| ; GFX789-LABEL: v32i32_func_void: |
| ; GFX789: ; %bb.0: |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX789-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; GFX789-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX789-NEXT: s_mov_b32 s6, -1 |
| ; GFX789-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX789-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 |
| ; GFX789-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16 |
| ; GFX789-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32 |
| ; GFX789-NEXT: buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48 |
| ; GFX789-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64 |
| ; GFX789-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80 |
| ; GFX789-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96 |
| ; GFX789-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112 |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) |
| ; GFX789-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v32i32_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: s_clause 0x7 |
| ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_load_b128 v[4:7], off, s[0:3], 0 offset:16 |
| ; GFX11-NEXT: buffer_load_b128 v[8:11], off, s[0:3], 0 offset:32 |
| ; GFX11-NEXT: buffer_load_b128 v[12:15], off, s[0:3], 0 offset:48 |
| ; GFX11-NEXT: buffer_load_b128 v[16:19], off, s[0:3], 0 offset:64 |
| ; GFX11-NEXT: buffer_load_b128 v[20:23], off, s[0:3], 0 offset:80 |
| ; GFX11-NEXT: buffer_load_b128 v[24:27], off, s[0:3], 0 offset:96 |
| ; GFX11-NEXT: buffer_load_b128 v[28:31], off, s[0:3], 0 offset:112 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %ptr = load volatile ptr addrspace(1), ptr addrspace(4) poison |
| %val = load <32 x i32>, ptr addrspace(1) %ptr |
| ret <32 x i32> %val |
| } |
| |
| define <2 x i64> @v2i64_func_void() #0 { |
| ; GFX789-LABEL: v2i64_func_void: |
| ; GFX789: ; %bb.0: |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX789-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX789-NEXT: s_mov_b32 s6, -1 |
| ; GFX789-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) |
| ; GFX789-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v2i64_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %val = load <2 x i64>, ptr addrspace(1) poison |
| ret <2 x i64> %val |
| } |
| |
| define <3 x i64> @v3i64_func_void() #0 { |
| ; GFX789-LABEL: v3i64_func_void: |
| ; GFX789: ; %bb.0: |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX789-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; GFX789-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX789-NEXT: s_mov_b32 s6, -1 |
| ; GFX789-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX789-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 |
| ; GFX789-NEXT: buffer_load_dwordx2 v[4:5], off, s[4:7], 0 offset:16 |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) |
| ; GFX789-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v3i64_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_load_b64 v[4:5], off, s[0:3], 0 offset:16 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %ptr = load volatile ptr addrspace(1), ptr addrspace(4) poison |
| %val = load <3 x i64>, ptr addrspace(1) %ptr |
| ret <3 x i64> %val |
| } |
| |
| define <4 x i64> @v4i64_func_void() #0 { |
| ; GFX789-LABEL: v4i64_func_void: |
| ; GFX789: ; %bb.0: |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX789-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; GFX789-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX789-NEXT: s_mov_b32 s6, -1 |
| ; GFX789-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX789-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 |
| ; GFX789-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16 |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) |
| ; GFX789-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v4i64_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_load_b128 v[4:7], off, s[0:3], 0 offset:16 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %ptr = load volatile ptr addrspace(1), ptr addrspace(4) poison |
| %val = load <4 x i64>, ptr addrspace(1) %ptr |
| ret <4 x i64> %val |
| } |
| |
| define <5 x i64> @v5i64_func_void() #0 { |
| ; GFX789-LABEL: v5i64_func_void: |
| ; GFX789: ; %bb.0: |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX789-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; GFX789-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX789-NEXT: s_mov_b32 s6, -1 |
| ; GFX789-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX789-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 |
| ; GFX789-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16 |
| ; GFX789-NEXT: buffer_load_dwordx2 v[8:9], off, s[4:7], 0 offset:32 |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) |
| ; GFX789-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v5i64_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: s_clause 0x2 |
| ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_load_b128 v[4:7], off, s[0:3], 0 offset:16 |
| ; GFX11-NEXT: buffer_load_b64 v[8:9], off, s[0:3], 0 offset:32 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %ptr = load volatile ptr addrspace(1), ptr addrspace(4) poison |
| %val = load <5 x i64>, ptr addrspace(1) %ptr |
| ret <5 x i64> %val |
| } |
| |
| define <8 x i64> @v8i64_func_void() #0 { |
| ; GFX789-LABEL: v8i64_func_void: |
| ; GFX789: ; %bb.0: |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX789-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; GFX789-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX789-NEXT: s_mov_b32 s6, -1 |
| ; GFX789-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX789-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 |
| ; GFX789-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16 |
| ; GFX789-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32 |
| ; GFX789-NEXT: buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48 |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) |
| ; GFX789-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v8i64_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: s_clause 0x3 |
| ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_load_b128 v[4:7], off, s[0:3], 0 offset:16 |
| ; GFX11-NEXT: buffer_load_b128 v[8:11], off, s[0:3], 0 offset:32 |
| ; GFX11-NEXT: buffer_load_b128 v[12:15], off, s[0:3], 0 offset:48 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %ptr = load volatile ptr addrspace(1), ptr addrspace(4) poison |
| %val = load <8 x i64>, ptr addrspace(1) %ptr |
| ret <8 x i64> %val |
| } |
| |
| define <16 x i64> @v16i64_func_void() #0 { |
| ; GFX789-LABEL: v16i64_func_void: |
| ; GFX789: ; %bb.0: |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX789-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; GFX789-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX789-NEXT: s_mov_b32 s6, -1 |
| ; GFX789-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX789-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 |
| ; GFX789-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16 |
| ; GFX789-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32 |
| ; GFX789-NEXT: buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48 |
| ; GFX789-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64 |
| ; GFX789-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80 |
| ; GFX789-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96 |
| ; GFX789-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112 |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) |
| ; GFX789-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v16i64_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: s_clause 0x7 |
| ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_load_b128 v[4:7], off, s[0:3], 0 offset:16 |
| ; GFX11-NEXT: buffer_load_b128 v[8:11], off, s[0:3], 0 offset:32 |
| ; GFX11-NEXT: buffer_load_b128 v[12:15], off, s[0:3], 0 offset:48 |
| ; GFX11-NEXT: buffer_load_b128 v[16:19], off, s[0:3], 0 offset:64 |
| ; GFX11-NEXT: buffer_load_b128 v[20:23], off, s[0:3], 0 offset:80 |
| ; GFX11-NEXT: buffer_load_b128 v[24:27], off, s[0:3], 0 offset:96 |
| ; GFX11-NEXT: buffer_load_b128 v[28:31], off, s[0:3], 0 offset:112 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %ptr = load volatile ptr addrspace(1), ptr addrspace(4) poison |
| %val = load <16 x i64>, ptr addrspace(1) %ptr |
| ret <16 x i64> %val |
| } |
| |
| define <2 x i16> @v2i16_func_void() #0 { |
| ; CI-LABEL: v2i16_func_void: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: buffer_load_dword v0, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v0 |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX89-LABEL: v2i16_func_void: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX89-NEXT: s_mov_b32 s6, -1 |
| ; GFX89-NEXT: buffer_load_dword v0, off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v2i16_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_load_b32 v0, off, s[0:3], 0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %val = load <2 x i16>, ptr addrspace(1) poison |
| ret <2 x i16> %val |
| } |
| |
| define <3 x i16> @v3i16_func_void() #0 { |
| ; CI-LABEL: v3i16_func_void: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: buffer_load_dwordx2 v[2:3], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: v_alignbit_b32 v1, v3, v2, 16 |
| ; CI-NEXT: v_mov_b32_e32 v0, v2 |
| ; CI-NEXT: v_mov_b32_e32 v2, v3 |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX89-LABEL: v3i16_func_void: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX89-NEXT: s_mov_b32 s6, -1 |
| ; GFX89-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v3i16_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[0:3], 0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %val = load <3 x i16>, ptr addrspace(1) poison |
| ret <3 x i16> %val |
| } |
| |
| define <4 x i16> @v4i16_func_void() #0 { |
| ; CI-LABEL: v4i16_func_void: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: v_lshrrev_b32_e32 v4, 16, v0 |
| ; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v1 |
| ; CI-NEXT: v_mov_b32_e32 v2, v1 |
| ; CI-NEXT: v_mov_b32_e32 v1, v4 |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX89-LABEL: v4i16_func_void: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX89-NEXT: s_mov_b32 s6, -1 |
| ; GFX89-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v4i16_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[0:3], 0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %val = load <4 x i16>, ptr addrspace(1) poison |
| ret <4 x i16> %val |
| } |
| |
| define <4 x half> @v4f16_func_void() #0 { |
| ; CI-LABEL: v4f16_func_void: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: buffer_load_dwordx2 v[3:4], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: v_cvt_f32_f16_e32 v0, v3 |
| ; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v3 |
| ; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v4 |
| ; CI-NEXT: v_cvt_f32_f16_e32 v2, v4 |
| ; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 |
| ; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX89-LABEL: v4f16_func_void: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX89-NEXT: s_mov_b32 s6, -1 |
| ; GFX89-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v4f16_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[0:3], 0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %val = load <4 x half>, ptr addrspace(1) poison |
| ret <4 x half> %val |
| } |
| |
| ; FIXME: Mixing buffer and global |
| ; FIXME: Should not scalarize |
| define <5 x i16> @v5i16_func_void() #0 { |
| ; CI-LABEL: v5i16_func_void: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: s_waitcnt lgkmcnt(0) |
| ; CI-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 |
| ; CI-NEXT: buffer_load_sshort v4, off, s[4:7], 0 offset:8 |
| ; CI-NEXT: s_waitcnt vmcnt(1) |
| ; CI-NEXT: v_alignbit_b32 v5, v1, v0, 16 |
| ; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v1 |
| ; CI-NEXT: v_mov_b32_e32 v2, v1 |
| ; CI-NEXT: v_mov_b32_e32 v1, v5 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX89-LABEL: v5i16_func_void: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; GFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX89-NEXT: s_mov_b32 s6, -1 |
| ; GFX89-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX89-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v5i16_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %ptr = load volatile ptr addrspace(1), ptr addrspace(4) poison |
| %val = load <5 x i16>, ptr addrspace(1) %ptr |
| ret <5 x i16> %val |
| } |
| |
| define <8 x i16> @v8i16_func_void() #0 { |
| ; CI-LABEL: v8i16_func_void: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: s_waitcnt lgkmcnt(0) |
| ; CI-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v8 |
| ; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v9 |
| ; CI-NEXT: v_lshrrev_b32_e32 v5, 16, v10 |
| ; CI-NEXT: v_lshrrev_b32_e32 v7, 16, v11 |
| ; CI-NEXT: v_mov_b32_e32 v0, v8 |
| ; CI-NEXT: v_mov_b32_e32 v2, v9 |
| ; CI-NEXT: v_mov_b32_e32 v4, v10 |
| ; CI-NEXT: v_mov_b32_e32 v6, v11 |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX89-LABEL: v8i16_func_void: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; GFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX89-NEXT: s_mov_b32 s6, -1 |
| ; GFX89-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX89-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v8i16_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %ptr = load volatile ptr addrspace(1), ptr addrspace(4) poison |
| %val = load <8 x i16>, ptr addrspace(1) %ptr |
| ret <8 x i16> %val |
| } |
| |
| define <16 x i16> @v16i16_func_void() #0 { |
| ; CI-LABEL: v16i16_func_void: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: s_waitcnt lgkmcnt(0) |
| ; CI-NEXT: buffer_load_dwordx4 v[22:25], off, s[4:7], 0 |
| ; CI-NEXT: buffer_load_dwordx4 v[18:21], off, s[4:7], 0 offset:16 |
| ; CI-NEXT: s_waitcnt vmcnt(1) |
| ; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v22 |
| ; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v23 |
| ; CI-NEXT: v_lshrrev_b32_e32 v5, 16, v24 |
| ; CI-NEXT: v_lshrrev_b32_e32 v7, 16, v25 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: v_lshrrev_b32_e32 v9, 16, v18 |
| ; CI-NEXT: v_lshrrev_b32_e32 v11, 16, v19 |
| ; CI-NEXT: v_lshrrev_b32_e32 v13, 16, v20 |
| ; CI-NEXT: v_lshrrev_b32_e32 v15, 16, v21 |
| ; CI-NEXT: v_mov_b32_e32 v0, v22 |
| ; CI-NEXT: v_mov_b32_e32 v2, v23 |
| ; CI-NEXT: v_mov_b32_e32 v4, v24 |
| ; CI-NEXT: v_mov_b32_e32 v6, v25 |
| ; CI-NEXT: v_mov_b32_e32 v8, v18 |
| ; CI-NEXT: v_mov_b32_e32 v10, v19 |
| ; CI-NEXT: v_mov_b32_e32 v12, v20 |
| ; CI-NEXT: v_mov_b32_e32 v14, v21 |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX89-LABEL: v16i16_func_void: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; GFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX89-NEXT: s_mov_b32 s6, -1 |
| ; GFX89-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX89-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 |
| ; GFX89-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v16i16_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_load_b128 v[4:7], off, s[0:3], 0 offset:16 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %ptr = load volatile ptr addrspace(1), ptr addrspace(4) poison |
| %val = load <16 x i16>, ptr addrspace(1) %ptr |
| ret <16 x i16> %val |
| } |
| |
| ; FIXME: Should pack |
| define <16 x i8> @v16i8_func_void() #0 { |
| ; GFX789-LABEL: v16i8_func_void: |
| ; GFX789: ; %bb.0: |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX789-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; GFX789-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX789-NEXT: s_mov_b32 s6, -1 |
| ; GFX789-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX789-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) |
| ; GFX789-NEXT: v_lshrrev_b32_e32 v16, 8, v0 |
| ; GFX789-NEXT: v_lshrrev_b32_e32 v17, 16, v0 |
| ; GFX789-NEXT: v_lshrrev_b32_e32 v18, 24, v0 |
| ; GFX789-NEXT: v_lshrrev_b32_e32 v5, 8, v1 |
| ; GFX789-NEXT: v_lshrrev_b32_e32 v6, 16, v1 |
| ; GFX789-NEXT: v_lshrrev_b32_e32 v7, 24, v1 |
| ; GFX789-NEXT: v_lshrrev_b32_e32 v9, 8, v2 |
| ; GFX789-NEXT: v_lshrrev_b32_e32 v10, 16, v2 |
| ; GFX789-NEXT: v_lshrrev_b32_e32 v11, 24, v2 |
| ; GFX789-NEXT: v_lshrrev_b32_e32 v13, 8, v3 |
| ; GFX789-NEXT: v_lshrrev_b32_e32 v14, 16, v3 |
| ; GFX789-NEXT: v_lshrrev_b32_e32 v15, 24, v3 |
| ; GFX789-NEXT: v_mov_b32_e32 v4, v1 |
| ; GFX789-NEXT: v_mov_b32_e32 v8, v2 |
| ; GFX789-NEXT: v_mov_b32_e32 v12, v3 |
| ; GFX789-NEXT: v_mov_b32_e32 v1, v16 |
| ; GFX789-NEXT: v_mov_b32_e32 v2, v17 |
| ; GFX789-NEXT: v_mov_b32_e32 v3, v18 |
| ; GFX789-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v16i8_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: v_lshrrev_b32_e32 v16, 8, v0 |
| ; GFX11-NEXT: v_lshrrev_b32_e32 v17, 16, v0 |
| ; GFX11-NEXT: v_lshrrev_b32_e32 v18, 24, v0 |
| ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 8, v1 |
| ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v1 |
| ; GFX11-NEXT: v_lshrrev_b32_e32 v7, 24, v1 |
| ; GFX11-NEXT: v_lshrrev_b32_e32 v9, 8, v2 |
| ; GFX11-NEXT: v_lshrrev_b32_e32 v10, 16, v2 |
| ; GFX11-NEXT: v_lshrrev_b32_e32 v11, 24, v2 |
| ; GFX11-NEXT: v_lshrrev_b32_e32 v13, 8, v3 |
| ; GFX11-NEXT: v_lshrrev_b32_e32 v14, 16, v3 |
| ; GFX11-NEXT: v_lshrrev_b32_e32 v15, 24, v3 |
| ; GFX11-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v1, v16 |
| ; GFX11-NEXT: v_mov_b32_e32 v8, v2 |
| ; GFX11-NEXT: v_dual_mov_b32 v12, v3 :: v_dual_mov_b32 v3, v18 |
| ; GFX11-NEXT: v_mov_b32_e32 v2, v17 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %ptr = load volatile ptr addrspace(1), ptr addrspace(4) poison |
| %val = load <16 x i8>, ptr addrspace(1) %ptr |
| ret <16 x i8> %val |
| } |
| |
| ; FIXME: Should pack |
| define <4 x i8> @v4i8_func_void() #0 { |
| ; GFX789-LABEL: v4i8_func_void: |
| ; GFX789: ; %bb.0: |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX789-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; GFX789-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX789-NEXT: s_mov_b32 s6, -1 |
| ; GFX789-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX789-NEXT: buffer_load_dword v0, off, s[4:7], 0 |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) |
| ; GFX789-NEXT: v_lshrrev_b32_e32 v2, 16, v0 |
| ; GFX789-NEXT: v_lshrrev_b32_e32 v1, 8, v0 |
| ; GFX789-NEXT: v_lshrrev_b32_e32 v3, 24, v0 |
| ; GFX789-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v4i8_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: buffer_load_b32 v0, off, s[0:3], 0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 8, v0 |
| ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0 |
| ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 24, v0 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %ptr = load volatile ptr addrspace(1), ptr addrspace(4) poison |
| %val = load <4 x i8>, ptr addrspace(1) %ptr |
| ret <4 x i8> %val |
| } |
| |
| define {i8, i32} @struct_i8_i32_func_void() #0 { |
| ; GFX789-LABEL: struct_i8_i32_func_void: |
| ; GFX789: ; %bb.0: |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX789-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX789-NEXT: s_mov_b32 s6, -1 |
| ; GFX789-NEXT: buffer_load_ubyte v0, off, s[4:7], 0 |
| ; GFX789-NEXT: buffer_load_dword v1, off, s[4:7], 0 |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) |
| ; GFX789-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: struct_i8_i32_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: buffer_load_u8 v0, off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_load_b32 v1, off, s[0:3], 0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %val = load { i8, i32 }, ptr addrspace(1) poison |
| ret { i8, i32 } %val |
| } |
| |
| define void @void_func_sret_struct_i8_i32(ptr addrspace(5) sret({ i8, i32 }) %arg0) #0 { |
| ; GFX789-LABEL: void_func_sret_struct_i8_i32: |
| ; GFX789: ; %bb.0: |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX789-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX789-NEXT: s_mov_b32 s6, -1 |
| ; GFX789-NEXT: buffer_load_ubyte v1, off, s[4:7], 0 glc |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) |
| ; GFX789-NEXT: buffer_load_dword v2, off, s[4:7], 0 glc |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) |
| ; GFX789-NEXT: buffer_store_byte v1, v0, s[0:3], 0 offen |
| ; GFX789-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4 |
| ; GFX789-NEXT: s_waitcnt vmcnt(0) |
| ; GFX789-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_sret_struct_i8_i32: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_load_u8 v1, off, s[0:3], 0 glc dlc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: buffer_load_b32 v2, off, s[0:3], 0 glc dlc |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_clause 0x1 |
| ; GFX11-NEXT: scratch_store_b8 v0, v1, off |
| ; GFX11-NEXT: scratch_store_b32 v0, v2, off offset:4 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %val0 = load volatile i8, ptr addrspace(1) poison |
| %val1 = load volatile i32, ptr addrspace(1) poison |
| %gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %arg0, i32 0, i32 0 |
| %gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %arg0, i32 0, i32 1 |
| store i8 %val0, ptr addrspace(5) %gep0 |
| store i32 %val1, ptr addrspace(5) %gep1 |
| ret void |
| } |
| |
| ; FIXME: Should be able to fold offsets in all of these pre-gfx9. Call |
| ; lowering introduces an extra CopyToReg/CopyFromReg obscuring the |
| ; AssertZext inserted. Not using it introduces the spills. |
| define <33 x i32> @v33i32_func_void() #0 { |
| ; CI-LABEL: v33i32_func_void: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: v_add_i32_e32 v34, vcc, 0x80, v0 |
| ; CI-NEXT: s_waitcnt lgkmcnt(0) |
| ; CI-NEXT: buffer_load_dword v33, off, s[4:7], 0 offset:128 |
| ; CI-NEXT: buffer_load_dwordx4 v[1:4], off, s[4:7], 0 offset:112 |
| ; CI-NEXT: buffer_load_dwordx4 v[5:8], off, s[4:7], 0 offset:96 |
| ; CI-NEXT: buffer_load_dwordx4 v[9:12], off, s[4:7], 0 offset:80 |
| ; CI-NEXT: buffer_load_dwordx4 v[13:16], off, s[4:7], 0 offset:64 |
| ; CI-NEXT: buffer_load_dwordx4 v[17:20], off, s[4:7], 0 offset:48 |
| ; CI-NEXT: buffer_load_dwordx4 v[21:24], off, s[4:7], 0 offset:32 |
| ; CI-NEXT: buffer_load_dwordx4 v[25:28], off, s[4:7], 0 offset:16 |
| ; CI-NEXT: buffer_load_dwordx4 v[29:32], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(8) |
| ; CI-NEXT: buffer_store_dword v33, v34, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v33, vcc, 0x7c, v0 |
| ; CI-NEXT: s_waitcnt vmcnt(8) |
| ; CI-NEXT: buffer_store_dword v4, v33, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v4, vcc, 0x78, v0 |
| ; CI-NEXT: buffer_store_dword v3, v4, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v3, vcc, 0x74, v0 |
| ; CI-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v2, vcc, 0x70, v0 |
| ; CI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v1, vcc, 0x6c, v0 |
| ; CI-NEXT: v_add_i32_e32 v2, vcc, 0x68, v0 |
| ; CI-NEXT: v_add_i32_e32 v3, vcc, 0x64, v0 |
| ; CI-NEXT: s_waitcnt vmcnt(11) |
| ; CI-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v1, vcc, 0x60, v0 |
| ; CI-NEXT: buffer_store_dword v7, v2, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v2, vcc, 0x5c, v0 |
| ; CI-NEXT: buffer_store_dword v6, v3, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v3, vcc, 0x58, v0 |
| ; CI-NEXT: buffer_store_dword v5, v1, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v1, vcc, 0x54, v0 |
| ; CI-NEXT: v_add_i32_e32 v4, vcc, 0x50, v0 |
| ; CI-NEXT: v_add_i32_e32 v5, vcc, 0x4c, v0 |
| ; CI-NEXT: s_waitcnt vmcnt(14) |
| ; CI-NEXT: buffer_store_dword v12, v2, s[0:3], 0 offen |
| ; CI-NEXT: buffer_store_dword v11, v3, s[0:3], 0 offen |
| ; CI-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v1, vcc, 56, v0 |
| ; CI-NEXT: v_add_i32_e32 v6, vcc, 0x48, v0 |
| ; CI-NEXT: v_add_i32_e32 v7, vcc, 0x44, v0 |
| ; CI-NEXT: v_add_i32_e32 v2, vcc, 64, v0 |
| ; CI-NEXT: v_add_i32_e32 v3, vcc, 60, v0 |
| ; CI-NEXT: buffer_store_dword v9, v4, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v4, vcc, 52, v0 |
| ; CI-NEXT: v_add_i32_e32 v8, vcc, 48, v0 |
| ; CI-NEXT: v_add_i32_e32 v9, vcc, 44, v0 |
| ; CI-NEXT: v_add_i32_e32 v10, vcc, 40, v0 |
| ; CI-NEXT: v_add_i32_e32 v11, vcc, 36, v0 |
| ; CI-NEXT: s_waitcnt vmcnt(14) |
| ; CI-NEXT: buffer_store_dword v16, v5, s[0:3], 0 offen |
| ; CI-NEXT: buffer_store_dword v15, v6, s[0:3], 0 offen |
| ; CI-NEXT: buffer_store_dword v14, v7, s[0:3], 0 offen |
| ; CI-NEXT: buffer_store_dword v13, v2, s[0:3], 0 offen |
| ; CI-NEXT: buffer_store_dword v20, v3, s[0:3], 0 offen |
| ; CI-NEXT: buffer_store_dword v19, v1, s[0:3], 0 offen |
| ; CI-NEXT: buffer_store_dword v18, v4, s[0:3], 0 offen |
| ; CI-NEXT: buffer_store_dword v17, v8, s[0:3], 0 offen |
| ; CI-NEXT: buffer_store_dword v24, v9, s[0:3], 0 offen |
| ; CI-NEXT: buffer_store_dword v23, v10, s[0:3], 0 offen |
| ; CI-NEXT: buffer_store_dword v22, v11, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v1, vcc, 32, v0 |
| ; CI-NEXT: buffer_store_dword v21, v1, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v1, vcc, 28, v0 |
| ; CI-NEXT: buffer_store_dword v28, v1, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v1, vcc, 24, v0 |
| ; CI-NEXT: buffer_store_dword v27, v1, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v1, vcc, 20, v0 |
| ; CI-NEXT: buffer_store_dword v26, v1, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v1, vcc, 16, v0 |
| ; CI-NEXT: buffer_store_dword v25, v1, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v1, vcc, 12, v0 |
| ; CI-NEXT: s_waitcnt vmcnt(14) |
| ; CI-NEXT: buffer_store_dword v32, v1, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v1, vcc, 8, v0 |
| ; CI-NEXT: buffer_store_dword v31, v1, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v1, vcc, 4, v0 |
| ; CI-NEXT: buffer_store_dword v30, v1, s[0:3], 0 offen |
| ; CI-NEXT: buffer_store_dword v29, v0, s[0:3], 0 offen |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: v33i32_func_void: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; GFX8-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX8-NEXT: s_mov_b32 s6, -1 |
| ; GFX8-NEXT: v_add_u32_e32 v34, vcc, 0x80, v0 |
| ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX8-NEXT: buffer_load_dword v33, off, s[4:7], 0 offset:128 |
| ; GFX8-NEXT: buffer_load_dwordx4 v[1:4], off, s[4:7], 0 offset:112 |
| ; GFX8-NEXT: buffer_load_dwordx4 v[5:8], off, s[4:7], 0 offset:96 |
| ; GFX8-NEXT: buffer_load_dwordx4 v[9:12], off, s[4:7], 0 offset:80 |
| ; GFX8-NEXT: buffer_load_dwordx4 v[13:16], off, s[4:7], 0 offset:64 |
| ; GFX8-NEXT: buffer_load_dwordx4 v[17:20], off, s[4:7], 0 offset:48 |
| ; GFX8-NEXT: buffer_load_dwordx4 v[21:24], off, s[4:7], 0 offset:32 |
| ; GFX8-NEXT: buffer_load_dwordx4 v[25:28], off, s[4:7], 0 offset:16 |
| ; GFX8-NEXT: buffer_load_dwordx4 v[29:32], off, s[4:7], 0 |
| ; GFX8-NEXT: s_waitcnt vmcnt(8) |
| ; GFX8-NEXT: buffer_store_dword v33, v34, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v33, vcc, 0x7c, v0 |
| ; GFX8-NEXT: s_waitcnt vmcnt(8) |
| ; GFX8-NEXT: buffer_store_dword v4, v33, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v4, vcc, 0x78, v0 |
| ; GFX8-NEXT: buffer_store_dword v3, v4, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v3, vcc, 0x74, v0 |
| ; GFX8-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0x70, v0 |
| ; GFX8-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0x6c, v0 |
| ; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0x68, v0 |
| ; GFX8-NEXT: v_add_u32_e32 v3, vcc, 0x64, v0 |
| ; GFX8-NEXT: s_waitcnt vmcnt(11) |
| ; GFX8-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0x60, v0 |
| ; GFX8-NEXT: buffer_store_dword v7, v2, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0x5c, v0 |
| ; GFX8-NEXT: buffer_store_dword v6, v3, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v3, vcc, 0x58, v0 |
| ; GFX8-NEXT: buffer_store_dword v5, v1, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0x54, v0 |
| ; GFX8-NEXT: v_add_u32_e32 v4, vcc, 0x50, v0 |
| ; GFX8-NEXT: v_add_u32_e32 v5, vcc, 0x4c, v0 |
| ; GFX8-NEXT: s_waitcnt vmcnt(14) |
| ; GFX8-NEXT: buffer_store_dword v12, v2, s[0:3], 0 offen |
| ; GFX8-NEXT: buffer_store_dword v11, v3, s[0:3], 0 offen |
| ; GFX8-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 56, v0 |
| ; GFX8-NEXT: v_add_u32_e32 v6, vcc, 0x48, v0 |
| ; GFX8-NEXT: v_add_u32_e32 v7, vcc, 0x44, v0 |
| ; GFX8-NEXT: v_add_u32_e32 v2, vcc, 64, v0 |
| ; GFX8-NEXT: v_add_u32_e32 v3, vcc, 60, v0 |
| ; GFX8-NEXT: buffer_store_dword v9, v4, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v4, vcc, 52, v0 |
| ; GFX8-NEXT: v_add_u32_e32 v8, vcc, 48, v0 |
| ; GFX8-NEXT: v_add_u32_e32 v9, vcc, 44, v0 |
| ; GFX8-NEXT: v_add_u32_e32 v10, vcc, 40, v0 |
| ; GFX8-NEXT: v_add_u32_e32 v11, vcc, 36, v0 |
| ; GFX8-NEXT: s_waitcnt vmcnt(14) |
| ; GFX8-NEXT: buffer_store_dword v16, v5, s[0:3], 0 offen |
| ; GFX8-NEXT: buffer_store_dword v15, v6, s[0:3], 0 offen |
| ; GFX8-NEXT: buffer_store_dword v14, v7, s[0:3], 0 offen |
| ; GFX8-NEXT: buffer_store_dword v13, v2, s[0:3], 0 offen |
| ; GFX8-NEXT: buffer_store_dword v20, v3, s[0:3], 0 offen |
| ; GFX8-NEXT: buffer_store_dword v19, v1, s[0:3], 0 offen |
| ; GFX8-NEXT: buffer_store_dword v18, v4, s[0:3], 0 offen |
| ; GFX8-NEXT: buffer_store_dword v17, v8, s[0:3], 0 offen |
| ; GFX8-NEXT: buffer_store_dword v24, v9, s[0:3], 0 offen |
| ; GFX8-NEXT: buffer_store_dword v23, v10, s[0:3], 0 offen |
| ; GFX8-NEXT: buffer_store_dword v22, v11, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 32, v0 |
| ; GFX8-NEXT: buffer_store_dword v21, v1, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 28, v0 |
| ; GFX8-NEXT: buffer_store_dword v28, v1, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 24, v0 |
| ; GFX8-NEXT: buffer_store_dword v27, v1, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 20, v0 |
| ; GFX8-NEXT: buffer_store_dword v26, v1, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 16, v0 |
| ; GFX8-NEXT: buffer_store_dword v25, v1, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 12, v0 |
| ; GFX8-NEXT: s_waitcnt vmcnt(14) |
| ; GFX8-NEXT: buffer_store_dword v32, v1, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 8, v0 |
| ; GFX8-NEXT: buffer_store_dword v31, v1, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 4, v0 |
| ; GFX8-NEXT: buffer_store_dword v30, v1, s[0:3], 0 offen |
| ; GFX8-NEXT: buffer_store_dword v29, v0, s[0:3], 0 offen |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: v33i32_func_void: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; GFX9-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX9-NEXT: s_mov_b32 s6, -1 |
| ; GFX9-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX9-NEXT: buffer_load_dwordx4 v[1:4], off, s[4:7], 0 offset:112 |
| ; GFX9-NEXT: buffer_load_dwordx4 v[5:8], off, s[4:7], 0 offset:96 |
| ; GFX9-NEXT: buffer_load_dwordx4 v[9:12], off, s[4:7], 0 offset:80 |
| ; GFX9-NEXT: buffer_load_dwordx4 v[13:16], off, s[4:7], 0 offset:64 |
| ; GFX9-NEXT: buffer_load_dword v33, off, s[4:7], 0 offset:128 |
| ; GFX9-NEXT: buffer_load_dwordx4 v[17:20], off, s[4:7], 0 offset:48 |
| ; GFX9-NEXT: buffer_load_dwordx4 v[21:24], off, s[4:7], 0 offset:32 |
| ; GFX9-NEXT: buffer_load_dwordx4 v[25:28], off, s[4:7], 0 offset:16 |
| ; GFX9-NEXT: buffer_load_dwordx4 v[29:32], off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(8) |
| ; GFX9-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:124 |
| ; GFX9-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:120 |
| ; GFX9-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:116 |
| ; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:112 |
| ; GFX9-NEXT: s_waitcnt vmcnt(11) |
| ; GFX9-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:108 |
| ; GFX9-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:104 |
| ; GFX9-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:100 |
| ; GFX9-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:96 |
| ; GFX9-NEXT: s_waitcnt vmcnt(14) |
| ; GFX9-NEXT: buffer_store_dword v12, v0, s[0:3], 0 offen offset:92 |
| ; GFX9-NEXT: buffer_store_dword v11, v0, s[0:3], 0 offen offset:88 |
| ; GFX9-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:84 |
| ; GFX9-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:80 |
| ; GFX9-NEXT: s_waitcnt vmcnt(17) |
| ; GFX9-NEXT: buffer_store_dword v16, v0, s[0:3], 0 offen offset:76 |
| ; GFX9-NEXT: buffer_store_dword v15, v0, s[0:3], 0 offen offset:72 |
| ; GFX9-NEXT: buffer_store_dword v14, v0, s[0:3], 0 offen offset:68 |
| ; GFX9-NEXT: buffer_store_dword v13, v0, s[0:3], 0 offen offset:64 |
| ; GFX9-NEXT: s_waitcnt vmcnt(20) |
| ; GFX9-NEXT: buffer_store_dword v33, v0, s[0:3], 0 offen offset:128 |
| ; GFX9-NEXT: s_waitcnt vmcnt(20) |
| ; GFX9-NEXT: buffer_store_dword v20, v0, s[0:3], 0 offen offset:60 |
| ; GFX9-NEXT: buffer_store_dword v19, v0, s[0:3], 0 offen offset:56 |
| ; GFX9-NEXT: buffer_store_dword v18, v0, s[0:3], 0 offen offset:52 |
| ; GFX9-NEXT: buffer_store_dword v17, v0, s[0:3], 0 offen offset:48 |
| ; GFX9-NEXT: s_waitcnt vmcnt(23) |
| ; GFX9-NEXT: buffer_store_dword v24, v0, s[0:3], 0 offen offset:44 |
| ; GFX9-NEXT: buffer_store_dword v23, v0, s[0:3], 0 offen offset:40 |
| ; GFX9-NEXT: buffer_store_dword v22, v0, s[0:3], 0 offen offset:36 |
| ; GFX9-NEXT: buffer_store_dword v21, v0, s[0:3], 0 offen offset:32 |
| ; GFX9-NEXT: s_waitcnt vmcnt(26) |
| ; GFX9-NEXT: buffer_store_dword v28, v0, s[0:3], 0 offen offset:28 |
| ; GFX9-NEXT: buffer_store_dword v27, v0, s[0:3], 0 offen offset:24 |
| ; GFX9-NEXT: buffer_store_dword v26, v0, s[0:3], 0 offen offset:20 |
| ; GFX9-NEXT: buffer_store_dword v25, v0, s[0:3], 0 offen offset:16 |
| ; GFX9-NEXT: s_waitcnt vmcnt(29) |
| ; GFX9-NEXT: buffer_store_dword v32, v0, s[0:3], 0 offen offset:12 |
| ; GFX9-NEXT: buffer_store_dword v31, v0, s[0:3], 0 offen offset:8 |
| ; GFX9-NEXT: buffer_store_dword v30, v0, s[0:3], 0 offen offset:4 |
| ; GFX9-NEXT: buffer_store_dword v29, v0, s[0:3], 0 offen |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v33i32_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: s_clause 0x8 |
| ; GFX11-NEXT: buffer_load_b128 v[1:4], off, s[0:3], 0 offset:112 |
| ; GFX11-NEXT: buffer_load_b128 v[5:8], off, s[0:3], 0 offset:96 |
| ; GFX11-NEXT: buffer_load_b128 v[9:12], off, s[0:3], 0 offset:80 |
| ; GFX11-NEXT: buffer_load_b128 v[13:16], off, s[0:3], 0 offset:64 |
| ; GFX11-NEXT: buffer_load_b128 v[17:20], off, s[0:3], 0 offset:48 |
| ; GFX11-NEXT: buffer_load_b128 v[21:24], off, s[0:3], 0 offset:32 |
| ; GFX11-NEXT: buffer_load_b128 v[25:28], off, s[0:3], 0 offset:16 |
| ; GFX11-NEXT: buffer_load_b128 v[29:32], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_load_b32 v33, off, s[0:3], 0 offset:128 |
| ; GFX11-NEXT: s_waitcnt vmcnt(8) |
| ; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:112 |
| ; GFX11-NEXT: s_waitcnt vmcnt(7) |
| ; GFX11-NEXT: scratch_store_b128 v0, v[5:8], off offset:96 |
| ; GFX11-NEXT: s_waitcnt vmcnt(6) |
| ; GFX11-NEXT: scratch_store_b128 v0, v[9:12], off offset:80 |
| ; GFX11-NEXT: s_waitcnt vmcnt(5) |
| ; GFX11-NEXT: scratch_store_b128 v0, v[13:16], off offset:64 |
| ; GFX11-NEXT: s_waitcnt vmcnt(4) |
| ; GFX11-NEXT: scratch_store_b128 v0, v[17:20], off offset:48 |
| ; GFX11-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-NEXT: scratch_store_b128 v0, v[21:24], off offset:32 |
| ; GFX11-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-NEXT: scratch_store_b128 v0, v[25:28], off offset:16 |
| ; GFX11-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-NEXT: scratch_store_b128 v0, v[29:32], off |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: scratch_store_b32 v0, v33, off offset:128 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %ptr = load volatile ptr addrspace(1), ptr addrspace(4) poison |
| %val = load <33 x i32>, ptr addrspace(1) %ptr |
| ret <33 x i32> %val |
| } |
| |
| define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 { |
| ; CI-LABEL: struct_v32i32_i32_func_void: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: v_add_i32_e32 v34, vcc, 0x80, v0 |
| ; CI-NEXT: s_waitcnt lgkmcnt(0) |
| ; CI-NEXT: buffer_load_dword v33, off, s[4:7], 0 offset:128 |
| ; CI-NEXT: buffer_load_dwordx4 v[1:4], off, s[4:7], 0 offset:112 |
| ; CI-NEXT: buffer_load_dwordx4 v[5:8], off, s[4:7], 0 offset:96 |
| ; CI-NEXT: buffer_load_dwordx4 v[9:12], off, s[4:7], 0 offset:80 |
| ; CI-NEXT: buffer_load_dwordx4 v[13:16], off, s[4:7], 0 offset:64 |
| ; CI-NEXT: buffer_load_dwordx4 v[17:20], off, s[4:7], 0 offset:48 |
| ; CI-NEXT: buffer_load_dwordx4 v[21:24], off, s[4:7], 0 offset:32 |
| ; CI-NEXT: buffer_load_dwordx4 v[25:28], off, s[4:7], 0 offset:16 |
| ; CI-NEXT: buffer_load_dwordx4 v[29:32], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(8) |
| ; CI-NEXT: buffer_store_dword v33, v34, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v33, vcc, 0x7c, v0 |
| ; CI-NEXT: s_waitcnt vmcnt(8) |
| ; CI-NEXT: buffer_store_dword v4, v33, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v4, vcc, 0x78, v0 |
| ; CI-NEXT: buffer_store_dword v3, v4, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v3, vcc, 0x74, v0 |
| ; CI-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v2, vcc, 0x70, v0 |
| ; CI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v1, vcc, 0x6c, v0 |
| ; CI-NEXT: v_add_i32_e32 v2, vcc, 0x68, v0 |
| ; CI-NEXT: v_add_i32_e32 v3, vcc, 0x64, v0 |
| ; CI-NEXT: s_waitcnt vmcnt(11) |
| ; CI-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v1, vcc, 0x60, v0 |
| ; CI-NEXT: buffer_store_dword v7, v2, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v2, vcc, 0x5c, v0 |
| ; CI-NEXT: buffer_store_dword v6, v3, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v3, vcc, 0x58, v0 |
| ; CI-NEXT: buffer_store_dword v5, v1, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v1, vcc, 0x54, v0 |
| ; CI-NEXT: v_add_i32_e32 v4, vcc, 0x50, v0 |
| ; CI-NEXT: v_add_i32_e32 v5, vcc, 0x4c, v0 |
| ; CI-NEXT: s_waitcnt vmcnt(14) |
| ; CI-NEXT: buffer_store_dword v12, v2, s[0:3], 0 offen |
| ; CI-NEXT: buffer_store_dword v11, v3, s[0:3], 0 offen |
| ; CI-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v1, vcc, 56, v0 |
| ; CI-NEXT: v_add_i32_e32 v6, vcc, 0x48, v0 |
| ; CI-NEXT: v_add_i32_e32 v7, vcc, 0x44, v0 |
| ; CI-NEXT: v_add_i32_e32 v2, vcc, 64, v0 |
| ; CI-NEXT: v_add_i32_e32 v3, vcc, 60, v0 |
| ; CI-NEXT: buffer_store_dword v9, v4, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v4, vcc, 52, v0 |
| ; CI-NEXT: v_add_i32_e32 v8, vcc, 48, v0 |
| ; CI-NEXT: v_add_i32_e32 v9, vcc, 44, v0 |
| ; CI-NEXT: v_add_i32_e32 v10, vcc, 40, v0 |
| ; CI-NEXT: v_add_i32_e32 v11, vcc, 36, v0 |
| ; CI-NEXT: s_waitcnt vmcnt(14) |
| ; CI-NEXT: buffer_store_dword v16, v5, s[0:3], 0 offen |
| ; CI-NEXT: buffer_store_dword v15, v6, s[0:3], 0 offen |
| ; CI-NEXT: buffer_store_dword v14, v7, s[0:3], 0 offen |
| ; CI-NEXT: buffer_store_dword v13, v2, s[0:3], 0 offen |
| ; CI-NEXT: buffer_store_dword v20, v3, s[0:3], 0 offen |
| ; CI-NEXT: buffer_store_dword v19, v1, s[0:3], 0 offen |
| ; CI-NEXT: buffer_store_dword v18, v4, s[0:3], 0 offen |
| ; CI-NEXT: buffer_store_dword v17, v8, s[0:3], 0 offen |
| ; CI-NEXT: buffer_store_dword v24, v9, s[0:3], 0 offen |
| ; CI-NEXT: buffer_store_dword v23, v10, s[0:3], 0 offen |
| ; CI-NEXT: buffer_store_dword v22, v11, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v1, vcc, 32, v0 |
| ; CI-NEXT: buffer_store_dword v21, v1, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v1, vcc, 28, v0 |
| ; CI-NEXT: buffer_store_dword v28, v1, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v1, vcc, 24, v0 |
| ; CI-NEXT: buffer_store_dword v27, v1, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v1, vcc, 20, v0 |
| ; CI-NEXT: buffer_store_dword v26, v1, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v1, vcc, 16, v0 |
| ; CI-NEXT: buffer_store_dword v25, v1, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v1, vcc, 12, v0 |
| ; CI-NEXT: s_waitcnt vmcnt(14) |
| ; CI-NEXT: buffer_store_dword v32, v1, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v1, vcc, 8, v0 |
| ; CI-NEXT: buffer_store_dword v31, v1, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v1, vcc, 4, v0 |
| ; CI-NEXT: buffer_store_dword v30, v1, s[0:3], 0 offen |
| ; CI-NEXT: buffer_store_dword v29, v0, s[0:3], 0 offen |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: struct_v32i32_i32_func_void: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; GFX8-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX8-NEXT: s_mov_b32 s6, -1 |
| ; GFX8-NEXT: v_add_u32_e32 v34, vcc, 0x80, v0 |
| ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX8-NEXT: buffer_load_dword v33, off, s[4:7], 0 offset:128 |
| ; GFX8-NEXT: buffer_load_dwordx4 v[1:4], off, s[4:7], 0 offset:112 |
| ; GFX8-NEXT: buffer_load_dwordx4 v[5:8], off, s[4:7], 0 offset:96 |
| ; GFX8-NEXT: buffer_load_dwordx4 v[9:12], off, s[4:7], 0 offset:80 |
| ; GFX8-NEXT: buffer_load_dwordx4 v[13:16], off, s[4:7], 0 offset:64 |
| ; GFX8-NEXT: buffer_load_dwordx4 v[17:20], off, s[4:7], 0 offset:48 |
| ; GFX8-NEXT: buffer_load_dwordx4 v[21:24], off, s[4:7], 0 offset:32 |
| ; GFX8-NEXT: buffer_load_dwordx4 v[25:28], off, s[4:7], 0 offset:16 |
| ; GFX8-NEXT: buffer_load_dwordx4 v[29:32], off, s[4:7], 0 |
| ; GFX8-NEXT: s_waitcnt vmcnt(8) |
| ; GFX8-NEXT: buffer_store_dword v33, v34, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v33, vcc, 0x7c, v0 |
| ; GFX8-NEXT: s_waitcnt vmcnt(8) |
| ; GFX8-NEXT: buffer_store_dword v4, v33, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v4, vcc, 0x78, v0 |
| ; GFX8-NEXT: buffer_store_dword v3, v4, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v3, vcc, 0x74, v0 |
| ; GFX8-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0x70, v0 |
| ; GFX8-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0x6c, v0 |
| ; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0x68, v0 |
| ; GFX8-NEXT: v_add_u32_e32 v3, vcc, 0x64, v0 |
| ; GFX8-NEXT: s_waitcnt vmcnt(11) |
| ; GFX8-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0x60, v0 |
| ; GFX8-NEXT: buffer_store_dword v7, v2, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0x5c, v0 |
| ; GFX8-NEXT: buffer_store_dword v6, v3, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v3, vcc, 0x58, v0 |
| ; GFX8-NEXT: buffer_store_dword v5, v1, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0x54, v0 |
| ; GFX8-NEXT: v_add_u32_e32 v4, vcc, 0x50, v0 |
| ; GFX8-NEXT: v_add_u32_e32 v5, vcc, 0x4c, v0 |
| ; GFX8-NEXT: s_waitcnt vmcnt(14) |
| ; GFX8-NEXT: buffer_store_dword v12, v2, s[0:3], 0 offen |
| ; GFX8-NEXT: buffer_store_dword v11, v3, s[0:3], 0 offen |
| ; GFX8-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 56, v0 |
| ; GFX8-NEXT: v_add_u32_e32 v6, vcc, 0x48, v0 |
| ; GFX8-NEXT: v_add_u32_e32 v7, vcc, 0x44, v0 |
| ; GFX8-NEXT: v_add_u32_e32 v2, vcc, 64, v0 |
| ; GFX8-NEXT: v_add_u32_e32 v3, vcc, 60, v0 |
| ; GFX8-NEXT: buffer_store_dword v9, v4, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v4, vcc, 52, v0 |
| ; GFX8-NEXT: v_add_u32_e32 v8, vcc, 48, v0 |
| ; GFX8-NEXT: v_add_u32_e32 v9, vcc, 44, v0 |
| ; GFX8-NEXT: v_add_u32_e32 v10, vcc, 40, v0 |
| ; GFX8-NEXT: v_add_u32_e32 v11, vcc, 36, v0 |
| ; GFX8-NEXT: s_waitcnt vmcnt(14) |
| ; GFX8-NEXT: buffer_store_dword v16, v5, s[0:3], 0 offen |
| ; GFX8-NEXT: buffer_store_dword v15, v6, s[0:3], 0 offen |
| ; GFX8-NEXT: buffer_store_dword v14, v7, s[0:3], 0 offen |
| ; GFX8-NEXT: buffer_store_dword v13, v2, s[0:3], 0 offen |
| ; GFX8-NEXT: buffer_store_dword v20, v3, s[0:3], 0 offen |
| ; GFX8-NEXT: buffer_store_dword v19, v1, s[0:3], 0 offen |
| ; GFX8-NEXT: buffer_store_dword v18, v4, s[0:3], 0 offen |
| ; GFX8-NEXT: buffer_store_dword v17, v8, s[0:3], 0 offen |
| ; GFX8-NEXT: buffer_store_dword v24, v9, s[0:3], 0 offen |
| ; GFX8-NEXT: buffer_store_dword v23, v10, s[0:3], 0 offen |
| ; GFX8-NEXT: buffer_store_dword v22, v11, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 32, v0 |
| ; GFX8-NEXT: buffer_store_dword v21, v1, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 28, v0 |
| ; GFX8-NEXT: buffer_store_dword v28, v1, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 24, v0 |
| ; GFX8-NEXT: buffer_store_dword v27, v1, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 20, v0 |
| ; GFX8-NEXT: buffer_store_dword v26, v1, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 16, v0 |
| ; GFX8-NEXT: buffer_store_dword v25, v1, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 12, v0 |
| ; GFX8-NEXT: s_waitcnt vmcnt(14) |
| ; GFX8-NEXT: buffer_store_dword v32, v1, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 8, v0 |
| ; GFX8-NEXT: buffer_store_dword v31, v1, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 4, v0 |
| ; GFX8-NEXT: buffer_store_dword v30, v1, s[0:3], 0 offen |
| ; GFX8-NEXT: buffer_store_dword v29, v0, s[0:3], 0 offen |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: struct_v32i32_i32_func_void: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; GFX9-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX9-NEXT: s_mov_b32 s6, -1 |
| ; GFX9-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX9-NEXT: buffer_load_dwordx4 v[1:4], off, s[4:7], 0 offset:112 |
| ; GFX9-NEXT: buffer_load_dwordx4 v[5:8], off, s[4:7], 0 offset:96 |
| ; GFX9-NEXT: buffer_load_dwordx4 v[9:12], off, s[4:7], 0 offset:80 |
| ; GFX9-NEXT: buffer_load_dwordx4 v[13:16], off, s[4:7], 0 offset:64 |
| ; GFX9-NEXT: buffer_load_dword v33, off, s[4:7], 0 offset:128 |
| ; GFX9-NEXT: buffer_load_dwordx4 v[17:20], off, s[4:7], 0 offset:48 |
| ; GFX9-NEXT: buffer_load_dwordx4 v[21:24], off, s[4:7], 0 offset:32 |
| ; GFX9-NEXT: buffer_load_dwordx4 v[25:28], off, s[4:7], 0 offset:16 |
| ; GFX9-NEXT: buffer_load_dwordx4 v[29:32], off, s[4:7], 0 |
| ; GFX9-NEXT: s_waitcnt vmcnt(8) |
| ; GFX9-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:124 |
| ; GFX9-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:120 |
| ; GFX9-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:116 |
| ; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:112 |
| ; GFX9-NEXT: s_waitcnt vmcnt(11) |
| ; GFX9-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:108 |
| ; GFX9-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:104 |
| ; GFX9-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:100 |
| ; GFX9-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:96 |
| ; GFX9-NEXT: s_waitcnt vmcnt(14) |
| ; GFX9-NEXT: buffer_store_dword v12, v0, s[0:3], 0 offen offset:92 |
| ; GFX9-NEXT: buffer_store_dword v11, v0, s[0:3], 0 offen offset:88 |
| ; GFX9-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:84 |
| ; GFX9-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:80 |
| ; GFX9-NEXT: s_waitcnt vmcnt(17) |
| ; GFX9-NEXT: buffer_store_dword v16, v0, s[0:3], 0 offen offset:76 |
| ; GFX9-NEXT: buffer_store_dword v15, v0, s[0:3], 0 offen offset:72 |
| ; GFX9-NEXT: buffer_store_dword v14, v0, s[0:3], 0 offen offset:68 |
| ; GFX9-NEXT: buffer_store_dword v13, v0, s[0:3], 0 offen offset:64 |
| ; GFX9-NEXT: s_waitcnt vmcnt(20) |
| ; GFX9-NEXT: buffer_store_dword v33, v0, s[0:3], 0 offen offset:128 |
| ; GFX9-NEXT: s_waitcnt vmcnt(20) |
| ; GFX9-NEXT: buffer_store_dword v20, v0, s[0:3], 0 offen offset:60 |
| ; GFX9-NEXT: buffer_store_dword v19, v0, s[0:3], 0 offen offset:56 |
| ; GFX9-NEXT: buffer_store_dword v18, v0, s[0:3], 0 offen offset:52 |
| ; GFX9-NEXT: buffer_store_dword v17, v0, s[0:3], 0 offen offset:48 |
| ; GFX9-NEXT: s_waitcnt vmcnt(23) |
| ; GFX9-NEXT: buffer_store_dword v24, v0, s[0:3], 0 offen offset:44 |
| ; GFX9-NEXT: buffer_store_dword v23, v0, s[0:3], 0 offen offset:40 |
| ; GFX9-NEXT: buffer_store_dword v22, v0, s[0:3], 0 offen offset:36 |
| ; GFX9-NEXT: buffer_store_dword v21, v0, s[0:3], 0 offen offset:32 |
| ; GFX9-NEXT: s_waitcnt vmcnt(26) |
| ; GFX9-NEXT: buffer_store_dword v28, v0, s[0:3], 0 offen offset:28 |
| ; GFX9-NEXT: buffer_store_dword v27, v0, s[0:3], 0 offen offset:24 |
| ; GFX9-NEXT: buffer_store_dword v26, v0, s[0:3], 0 offen offset:20 |
| ; GFX9-NEXT: buffer_store_dword v25, v0, s[0:3], 0 offen offset:16 |
| ; GFX9-NEXT: s_waitcnt vmcnt(29) |
| ; GFX9-NEXT: buffer_store_dword v32, v0, s[0:3], 0 offen offset:12 |
| ; GFX9-NEXT: buffer_store_dword v31, v0, s[0:3], 0 offen offset:8 |
| ; GFX9-NEXT: buffer_store_dword v30, v0, s[0:3], 0 offen offset:4 |
| ; GFX9-NEXT: buffer_store_dword v29, v0, s[0:3], 0 offen |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: struct_v32i32_i32_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: s_clause 0x8 |
| ; GFX11-NEXT: buffer_load_b128 v[1:4], off, s[0:3], 0 offset:112 |
| ; GFX11-NEXT: buffer_load_b128 v[5:8], off, s[0:3], 0 offset:96 |
| ; GFX11-NEXT: buffer_load_b128 v[9:12], off, s[0:3], 0 offset:80 |
| ; GFX11-NEXT: buffer_load_b128 v[13:16], off, s[0:3], 0 offset:64 |
| ; GFX11-NEXT: buffer_load_b128 v[17:20], off, s[0:3], 0 offset:48 |
| ; GFX11-NEXT: buffer_load_b128 v[21:24], off, s[0:3], 0 offset:32 |
| ; GFX11-NEXT: buffer_load_b128 v[25:28], off, s[0:3], 0 offset:16 |
| ; GFX11-NEXT: buffer_load_b128 v[29:32], off, s[0:3], 0 |
| ; GFX11-NEXT: buffer_load_b32 v33, off, s[0:3], 0 offset:128 |
| ; GFX11-NEXT: s_waitcnt vmcnt(8) |
| ; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:112 |
| ; GFX11-NEXT: s_waitcnt vmcnt(7) |
| ; GFX11-NEXT: scratch_store_b128 v0, v[5:8], off offset:96 |
| ; GFX11-NEXT: s_waitcnt vmcnt(6) |
| ; GFX11-NEXT: scratch_store_b128 v0, v[9:12], off offset:80 |
| ; GFX11-NEXT: s_waitcnt vmcnt(5) |
| ; GFX11-NEXT: scratch_store_b128 v0, v[13:16], off offset:64 |
| ; GFX11-NEXT: s_waitcnt vmcnt(4) |
| ; GFX11-NEXT: scratch_store_b128 v0, v[17:20], off offset:48 |
| ; GFX11-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-NEXT: scratch_store_b128 v0, v[21:24], off offset:32 |
| ; GFX11-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-NEXT: scratch_store_b128 v0, v[25:28], off offset:16 |
| ; GFX11-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-NEXT: scratch_store_b128 v0, v[29:32], off |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: scratch_store_b32 v0, v33, off offset:128 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %ptr = load volatile ptr addrspace(1), ptr addrspace(4) poison |
| %val = load { <32 x i32>, i32 }, ptr addrspace(1) %ptr |
| ret { <32 x i32>, i32 }%val |
| } |
| |
| define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 { |
| ; CI-LABEL: struct_i32_v32i32_func_void: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: s_waitcnt lgkmcnt(0) |
| ; CI-NEXT: buffer_load_dword v33, off, s[4:7], 0 |
| ; CI-NEXT: buffer_load_dwordx4 v[1:4], off, s[4:7], 0 offset:240 |
| ; CI-NEXT: buffer_load_dwordx4 v[5:8], off, s[4:7], 0 offset:224 |
| ; CI-NEXT: buffer_load_dwordx4 v[9:12], off, s[4:7], 0 offset:208 |
| ; CI-NEXT: buffer_load_dwordx4 v[13:16], off, s[4:7], 0 offset:192 |
| ; CI-NEXT: buffer_load_dwordx4 v[17:20], off, s[4:7], 0 offset:176 |
| ; CI-NEXT: buffer_load_dwordx4 v[21:24], off, s[4:7], 0 offset:160 |
| ; CI-NEXT: buffer_load_dwordx4 v[25:28], off, s[4:7], 0 offset:144 |
| ; CI-NEXT: buffer_load_dwordx4 v[29:32], off, s[4:7], 0 offset:128 |
| ; CI-NEXT: s_waitcnt vmcnt(8) |
| ; CI-NEXT: buffer_store_dword v33, v0, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v33, vcc, 0xfc, v0 |
| ; CI-NEXT: s_waitcnt vmcnt(8) |
| ; CI-NEXT: buffer_store_dword v4, v33, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v4, vcc, 0xf8, v0 |
| ; CI-NEXT: buffer_store_dword v3, v4, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v3, vcc, 0xf4, v0 |
| ; CI-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v2, vcc, 0xf0, v0 |
| ; CI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v1, vcc, 0xec, v0 |
| ; CI-NEXT: v_add_i32_e32 v2, vcc, 0xe8, v0 |
| ; CI-NEXT: v_add_i32_e32 v3, vcc, 0xe4, v0 |
| ; CI-NEXT: s_waitcnt vmcnt(11) |
| ; CI-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v1, vcc, 0xe0, v0 |
| ; CI-NEXT: buffer_store_dword v7, v2, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v2, vcc, 0xdc, v0 |
| ; CI-NEXT: buffer_store_dword v6, v3, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v3, vcc, 0xd8, v0 |
| ; CI-NEXT: buffer_store_dword v5, v1, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v1, vcc, 0xd4, v0 |
| ; CI-NEXT: v_add_i32_e32 v4, vcc, 0xd0, v0 |
| ; CI-NEXT: v_add_i32_e32 v5, vcc, 0xcc, v0 |
| ; CI-NEXT: v_add_i32_e32 v6, vcc, 0xc8, v0 |
| ; CI-NEXT: s_waitcnt vmcnt(14) |
| ; CI-NEXT: buffer_store_dword v12, v2, s[0:3], 0 offen |
| ; CI-NEXT: buffer_store_dword v11, v3, s[0:3], 0 offen |
| ; CI-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v1, vcc, 0xb8, v0 |
| ; CI-NEXT: v_add_i32_e32 v7, vcc, 0xc4, v0 |
| ; CI-NEXT: v_add_i32_e32 v2, vcc, 0xc0, v0 |
| ; CI-NEXT: v_add_i32_e32 v3, vcc, 0xbc, v0 |
| ; CI-NEXT: buffer_store_dword v9, v4, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v4, vcc, 0xb4, v0 |
| ; CI-NEXT: v_add_i32_e32 v8, vcc, 0xb0, v0 |
| ; CI-NEXT: v_add_i32_e32 v9, vcc, 0xac, v0 |
| ; CI-NEXT: v_add_i32_e32 v10, vcc, 0xa8, v0 |
| ; CI-NEXT: v_add_i32_e32 v11, vcc, 0xa4, v0 |
| ; CI-NEXT: s_waitcnt vmcnt(14) |
| ; CI-NEXT: buffer_store_dword v16, v5, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v5, vcc, 0xa0, v0 |
| ; CI-NEXT: buffer_store_dword v15, v6, s[0:3], 0 offen |
| ; CI-NEXT: buffer_store_dword v14, v7, s[0:3], 0 offen |
| ; CI-NEXT: buffer_store_dword v13, v2, s[0:3], 0 offen |
| ; CI-NEXT: buffer_store_dword v20, v3, s[0:3], 0 offen |
| ; CI-NEXT: buffer_store_dword v19, v1, s[0:3], 0 offen |
| ; CI-NEXT: buffer_store_dword v18, v4, s[0:3], 0 offen |
| ; CI-NEXT: buffer_store_dword v17, v8, s[0:3], 0 offen |
| ; CI-NEXT: buffer_store_dword v24, v9, s[0:3], 0 offen |
| ; CI-NEXT: buffer_store_dword v23, v10, s[0:3], 0 offen |
| ; CI-NEXT: buffer_store_dword v22, v11, s[0:3], 0 offen |
| ; CI-NEXT: buffer_store_dword v21, v5, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v1, vcc, 0x9c, v0 |
| ; CI-NEXT: buffer_store_dword v28, v1, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v1, vcc, 0x98, v0 |
| ; CI-NEXT: buffer_store_dword v27, v1, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v1, vcc, 0x94, v0 |
| ; CI-NEXT: buffer_store_dword v26, v1, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v1, vcc, 0x90, v0 |
| ; CI-NEXT: buffer_store_dword v25, v1, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v1, vcc, 0x8c, v0 |
| ; CI-NEXT: s_waitcnt vmcnt(14) |
| ; CI-NEXT: buffer_store_dword v32, v1, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v1, vcc, 0x88, v0 |
| ; CI-NEXT: buffer_store_dword v31, v1, s[0:3], 0 offen |
| ; CI-NEXT: v_add_i32_e32 v1, vcc, 0x84, v0 |
| ; CI-NEXT: v_add_i32_e32 v0, vcc, 0x80, v0 |
| ; CI-NEXT: buffer_store_dword v30, v1, s[0:3], 0 offen |
| ; CI-NEXT: buffer_store_dword v29, v0, s[0:3], 0 offen |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: struct_i32_v32i32_func_void: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; GFX8-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX8-NEXT: s_mov_b32 s6, -1 |
| ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX8-NEXT: buffer_load_dword v33, off, s[4:7], 0 |
| ; GFX8-NEXT: buffer_load_dwordx4 v[1:4], off, s[4:7], 0 offset:240 |
| ; GFX8-NEXT: buffer_load_dwordx4 v[5:8], off, s[4:7], 0 offset:224 |
| ; GFX8-NEXT: buffer_load_dwordx4 v[9:12], off, s[4:7], 0 offset:208 |
| ; GFX8-NEXT: buffer_load_dwordx4 v[13:16], off, s[4:7], 0 offset:192 |
| ; GFX8-NEXT: buffer_load_dwordx4 v[17:20], off, s[4:7], 0 offset:176 |
| ; GFX8-NEXT: buffer_load_dwordx4 v[21:24], off, s[4:7], 0 offset:160 |
| ; GFX8-NEXT: buffer_load_dwordx4 v[25:28], off, s[4:7], 0 offset:144 |
| ; GFX8-NEXT: buffer_load_dwordx4 v[29:32], off, s[4:7], 0 offset:128 |
| ; GFX8-NEXT: s_waitcnt vmcnt(8) |
| ; GFX8-NEXT: buffer_store_dword v33, v0, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v33, vcc, 0xfc, v0 |
| ; GFX8-NEXT: s_waitcnt vmcnt(8) |
| ; GFX8-NEXT: buffer_store_dword v4, v33, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v4, vcc, 0xf8, v0 |
| ; GFX8-NEXT: buffer_store_dword v3, v4, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v3, vcc, 0xf4, v0 |
| ; GFX8-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0xf0, v0 |
| ; GFX8-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0xec, v0 |
| ; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0xe8, v0 |
| ; GFX8-NEXT: v_add_u32_e32 v3, vcc, 0xe4, v0 |
| ; GFX8-NEXT: s_waitcnt vmcnt(11) |
| ; GFX8-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0xe0, v0 |
| ; GFX8-NEXT: buffer_store_dword v7, v2, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0xdc, v0 |
| ; GFX8-NEXT: buffer_store_dword v6, v3, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v3, vcc, 0xd8, v0 |
| ; GFX8-NEXT: buffer_store_dword v5, v1, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0xd4, v0 |
| ; GFX8-NEXT: v_add_u32_e32 v4, vcc, 0xd0, v0 |
| ; GFX8-NEXT: v_add_u32_e32 v5, vcc, 0xcc, v0 |
| ; GFX8-NEXT: v_add_u32_e32 v6, vcc, 0xc8, v0 |
| ; GFX8-NEXT: s_waitcnt vmcnt(14) |
| ; GFX8-NEXT: buffer_store_dword v12, v2, s[0:3], 0 offen |
| ; GFX8-NEXT: buffer_store_dword v11, v3, s[0:3], 0 offen |
| ; GFX8-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0xb8, v0 |
| ; GFX8-NEXT: v_add_u32_e32 v7, vcc, 0xc4, v0 |
| ; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0xc0, v0 |
| ; GFX8-NEXT: v_add_u32_e32 v3, vcc, 0xbc, v0 |
| ; GFX8-NEXT: buffer_store_dword v9, v4, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v4, vcc, 0xb4, v0 |
| ; GFX8-NEXT: v_add_u32_e32 v8, vcc, 0xb0, v0 |
| ; GFX8-NEXT: v_add_u32_e32 v9, vcc, 0xac, v0 |
| ; GFX8-NEXT: v_add_u32_e32 v10, vcc, 0xa8, v0 |
| ; GFX8-NEXT: v_add_u32_e32 v11, vcc, 0xa4, v0 |
| ; GFX8-NEXT: s_waitcnt vmcnt(14) |
| ; GFX8-NEXT: buffer_store_dword v16, v5, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v5, vcc, 0xa0, v0 |
| ; GFX8-NEXT: buffer_store_dword v15, v6, s[0:3], 0 offen |
| ; GFX8-NEXT: buffer_store_dword v14, v7, s[0:3], 0 offen |
| ; GFX8-NEXT: buffer_store_dword v13, v2, s[0:3], 0 offen |
| ; GFX8-NEXT: buffer_store_dword v20, v3, s[0:3], 0 offen |
| ; GFX8-NEXT: buffer_store_dword v19, v1, s[0:3], 0 offen |
| ; GFX8-NEXT: buffer_store_dword v18, v4, s[0:3], 0 offen |
| ; GFX8-NEXT: buffer_store_dword v17, v8, s[0:3], 0 offen |
| ; GFX8-NEXT: buffer_store_dword v24, v9, s[0:3], 0 offen |
| ; GFX8-NEXT: buffer_store_dword v23, v10, s[0:3], 0 offen |
| ; GFX8-NEXT: buffer_store_dword v22, v11, s[0:3], 0 offen |
| ; GFX8-NEXT: buffer_store_dword v21, v5, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0x9c, v0 |
| ; GFX8-NEXT: buffer_store_dword v28, v1, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0x98, v0 |
| ; GFX8-NEXT: buffer_store_dword v27, v1, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0x94, v0 |
| ; GFX8-NEXT: buffer_store_dword v26, v1, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0x90, v0 |
| ; GFX8-NEXT: buffer_store_dword v25, v1, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0x8c, v0 |
| ; GFX8-NEXT: s_waitcnt vmcnt(14) |
| ; GFX8-NEXT: buffer_store_dword v32, v1, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0x88, v0 |
| ; GFX8-NEXT: buffer_store_dword v31, v1, s[0:3], 0 offen |
| ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0x84, v0 |
| ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 0x80, v0 |
| ; GFX8-NEXT: buffer_store_dword v30, v1, s[0:3], 0 offen |
| ; GFX8-NEXT: buffer_store_dword v29, v0, s[0:3], 0 offen |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: struct_i32_v32i32_func_void: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 |
| ; GFX9-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX9-NEXT: s_mov_b32 s6, -1 |
| ; GFX9-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX9-NEXT: buffer_load_dwordx4 v[1:4], off, s[4:7], 0 offset:240 |
| ; GFX9-NEXT: buffer_load_dwordx4 v[5:8], off, s[4:7], 0 offset:224 |
| ; GFX9-NEXT: buffer_load_dwordx4 v[9:12], off, s[4:7], 0 offset:208 |
| ; GFX9-NEXT: buffer_load_dwordx4 v[13:16], off, s[4:7], 0 offset:192 |
| ; GFX9-NEXT: buffer_load_dword v33, off, s[4:7], 0 |
| ; GFX9-NEXT: buffer_load_dwordx4 v[17:20], off, s[4:7], 0 offset:176 |
| ; GFX9-NEXT: buffer_load_dwordx4 v[21:24], off, s[4:7], 0 offset:160 |
| ; GFX9-NEXT: buffer_load_dwordx4 v[25:28], off, s[4:7], 0 offset:144 |
| ; GFX9-NEXT: buffer_load_dwordx4 v[29:32], off, s[4:7], 0 offset:128 |
| ; GFX9-NEXT: s_waitcnt vmcnt(8) |
| ; GFX9-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:252 |
| ; GFX9-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:248 |
| ; GFX9-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:244 |
| ; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:240 |
| ; GFX9-NEXT: s_waitcnt vmcnt(11) |
| ; GFX9-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:236 |
| ; GFX9-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:232 |
| ; GFX9-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:228 |
| ; GFX9-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:224 |
| ; GFX9-NEXT: s_waitcnt vmcnt(14) |
| ; GFX9-NEXT: buffer_store_dword v12, v0, s[0:3], 0 offen offset:220 |
| ; GFX9-NEXT: buffer_store_dword v11, v0, s[0:3], 0 offen offset:216 |
| ; GFX9-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:212 |
| ; GFX9-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:208 |
| ; GFX9-NEXT: s_waitcnt vmcnt(17) |
| ; GFX9-NEXT: buffer_store_dword v16, v0, s[0:3], 0 offen offset:204 |
| ; GFX9-NEXT: buffer_store_dword v15, v0, s[0:3], 0 offen offset:200 |
| ; GFX9-NEXT: buffer_store_dword v14, v0, s[0:3], 0 offen offset:196 |
| ; GFX9-NEXT: buffer_store_dword v13, v0, s[0:3], 0 offen offset:192 |
| ; GFX9-NEXT: s_waitcnt vmcnt(20) |
| ; GFX9-NEXT: buffer_store_dword v33, v0, s[0:3], 0 offen |
| ; GFX9-NEXT: s_waitcnt vmcnt(20) |
| ; GFX9-NEXT: buffer_store_dword v20, v0, s[0:3], 0 offen offset:188 |
| ; GFX9-NEXT: buffer_store_dword v19, v0, s[0:3], 0 offen offset:184 |
| ; GFX9-NEXT: buffer_store_dword v18, v0, s[0:3], 0 offen offset:180 |
| ; GFX9-NEXT: buffer_store_dword v17, v0, s[0:3], 0 offen offset:176 |
| ; GFX9-NEXT: s_waitcnt vmcnt(23) |
| ; GFX9-NEXT: buffer_store_dword v24, v0, s[0:3], 0 offen offset:172 |
| ; GFX9-NEXT: buffer_store_dword v23, v0, s[0:3], 0 offen offset:168 |
| ; GFX9-NEXT: buffer_store_dword v22, v0, s[0:3], 0 offen offset:164 |
| ; GFX9-NEXT: buffer_store_dword v21, v0, s[0:3], 0 offen offset:160 |
| ; GFX9-NEXT: s_waitcnt vmcnt(26) |
| ; GFX9-NEXT: buffer_store_dword v28, v0, s[0:3], 0 offen offset:156 |
| ; GFX9-NEXT: buffer_store_dword v27, v0, s[0:3], 0 offen offset:152 |
| ; GFX9-NEXT: buffer_store_dword v26, v0, s[0:3], 0 offen offset:148 |
| ; GFX9-NEXT: buffer_store_dword v25, v0, s[0:3], 0 offen offset:144 |
| ; GFX9-NEXT: s_waitcnt vmcnt(29) |
| ; GFX9-NEXT: buffer_store_dword v32, v0, s[0:3], 0 offen offset:140 |
| ; GFX9-NEXT: buffer_store_dword v31, v0, s[0:3], 0 offen offset:136 |
| ; GFX9-NEXT: buffer_store_dword v30, v0, s[0:3], 0 offen offset:132 |
| ; GFX9-NEXT: buffer_store_dword v29, v0, s[0:3], 0 offen offset:128 |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: struct_i32_v32i32_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: s_clause 0x8 |
| ; GFX11-NEXT: buffer_load_b128 v[1:4], off, s[0:3], 0 offset:240 |
| ; GFX11-NEXT: buffer_load_b128 v[5:8], off, s[0:3], 0 offset:224 |
| ; GFX11-NEXT: buffer_load_b128 v[9:12], off, s[0:3], 0 offset:208 |
| ; GFX11-NEXT: buffer_load_b128 v[13:16], off, s[0:3], 0 offset:192 |
| ; GFX11-NEXT: buffer_load_b128 v[17:20], off, s[0:3], 0 offset:176 |
| ; GFX11-NEXT: buffer_load_b128 v[21:24], off, s[0:3], 0 offset:160 |
| ; GFX11-NEXT: buffer_load_b128 v[25:28], off, s[0:3], 0 offset:144 |
| ; GFX11-NEXT: buffer_load_b128 v[29:32], off, s[0:3], 0 offset:128 |
| ; GFX11-NEXT: buffer_load_b32 v33, off, s[0:3], 0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(8) |
| ; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:240 |
| ; GFX11-NEXT: s_waitcnt vmcnt(7) |
| ; GFX11-NEXT: scratch_store_b128 v0, v[5:8], off offset:224 |
| ; GFX11-NEXT: s_waitcnt vmcnt(6) |
| ; GFX11-NEXT: scratch_store_b128 v0, v[9:12], off offset:208 |
| ; GFX11-NEXT: s_waitcnt vmcnt(5) |
| ; GFX11-NEXT: scratch_store_b128 v0, v[13:16], off offset:192 |
| ; GFX11-NEXT: s_waitcnt vmcnt(4) |
| ; GFX11-NEXT: scratch_store_b128 v0, v[17:20], off offset:176 |
| ; GFX11-NEXT: s_waitcnt vmcnt(3) |
| ; GFX11-NEXT: scratch_store_b128 v0, v[21:24], off offset:160 |
| ; GFX11-NEXT: s_waitcnt vmcnt(2) |
| ; GFX11-NEXT: scratch_store_b128 v0, v[25:28], off offset:144 |
| ; GFX11-NEXT: s_waitcnt vmcnt(1) |
| ; GFX11-NEXT: scratch_store_b128 v0, v[29:32], off offset:128 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: scratch_store_b32 v0, v33, off |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %ptr = load volatile ptr addrspace(1), ptr addrspace(4) poison |
| %val = load { i32, <32 x i32> }, ptr addrspace(1) %ptr |
| ret { i32, <32 x i32> }%val |
| } |
| |
| ; Make sure the last struct component is returned in v3, not v4. |
| define { <3 x i32>, i32 } @v3i32_struct_func_void_wasted_reg() #0 { |
| ; CI-LABEL: v3i32_struct_func_void_wasted_reg: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: s_mov_b32 m0, -1 |
| ; CI-NEXT: ds_read_b32 v0, v0 |
| ; CI-NEXT: s_waitcnt lgkmcnt(0) |
| ; CI-NEXT: ds_read_b32 v1, v0 |
| ; CI-NEXT: ds_read_b32 v2, v0 |
| ; CI-NEXT: ds_read_b32 v3, v0 |
| ; CI-NEXT: s_waitcnt lgkmcnt(0) |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: v3i32_struct_func_void_wasted_reg: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: s_mov_b32 m0, -1 |
| ; GFX8-NEXT: ds_read_b32 v0, v0 |
| ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX8-NEXT: ds_read_b32 v1, v0 |
| ; GFX8-NEXT: ds_read_b32 v2, v0 |
| ; GFX8-NEXT: ds_read_b32 v3, v0 |
| ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: v3i32_struct_func_void_wasted_reg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: ds_read_b32 v0, v0 |
| ; GFX9-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX9-NEXT: ds_read_b32 v1, v0 |
| ; GFX9-NEXT: ds_read_b32 v2, v0 |
| ; GFX9-NEXT: ds_read_b32 v3, v0 |
| ; GFX9-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v3i32_struct_func_void_wasted_reg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: ds_load_b32 v0, v0 |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: ds_load_b32 v1, v0 |
| ; GFX11-NEXT: ds_load_b32 v2, v0 |
| ; GFX11-NEXT: ds_load_b32 v3, v0 |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %load0 = load volatile i32, ptr addrspace(3) poison |
| %load1 = load volatile i32, ptr addrspace(3) poison |
| %load2 = load volatile i32, ptr addrspace(3) poison |
| %load3 = load volatile i32, ptr addrspace(3) poison |
| |
| %insert.0 = insertelement <3 x i32> poison, i32 %load0, i32 0 |
| %insert.1 = insertelement <3 x i32> %insert.0, i32 %load1, i32 1 |
| %insert.2 = insertelement <3 x i32> %insert.1, i32 %load2, i32 2 |
| %insert.3 = insertvalue { <3 x i32>, i32 } poison, <3 x i32> %insert.2, 0 |
| %insert.4 = insertvalue { <3 x i32>, i32 } %insert.3, i32 %load3, 1 |
| ret { <3 x i32>, i32 } %insert.4 |
| } |
| |
| define { <3 x float>, i32 } @v3f32_struct_func_void_wasted_reg() #0 { |
| ; CI-LABEL: v3f32_struct_func_void_wasted_reg: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: s_mov_b32 m0, -1 |
| ; CI-NEXT: ds_read_b32 v0, v0 |
| ; CI-NEXT: s_waitcnt lgkmcnt(0) |
| ; CI-NEXT: ds_read_b32 v1, v0 |
| ; CI-NEXT: ds_read_b32 v2, v0 |
| ; CI-NEXT: ds_read_b32 v3, v0 |
| ; CI-NEXT: s_waitcnt lgkmcnt(0) |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: v3f32_struct_func_void_wasted_reg: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: s_mov_b32 m0, -1 |
| ; GFX8-NEXT: ds_read_b32 v0, v0 |
| ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX8-NEXT: ds_read_b32 v1, v0 |
| ; GFX8-NEXT: ds_read_b32 v2, v0 |
| ; GFX8-NEXT: ds_read_b32 v3, v0 |
| ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: v3f32_struct_func_void_wasted_reg: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: ds_read_b32 v0, v0 |
| ; GFX9-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX9-NEXT: ds_read_b32 v1, v0 |
| ; GFX9-NEXT: ds_read_b32 v2, v0 |
| ; GFX9-NEXT: ds_read_b32 v3, v0 |
| ; GFX9-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v3f32_struct_func_void_wasted_reg: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: ds_load_b32 v0, v0 |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: ds_load_b32 v1, v0 |
| ; GFX11-NEXT: ds_load_b32 v2, v0 |
| ; GFX11-NEXT: ds_load_b32 v3, v0 |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %load0 = load volatile float, ptr addrspace(3) poison |
| %load1 = load volatile float, ptr addrspace(3) poison |
| %load2 = load volatile float, ptr addrspace(3) poison |
| %load3 = load volatile i32, ptr addrspace(3) poison |
| |
| %insert.0 = insertelement <3 x float> poison, float %load0, i32 0 |
| %insert.1 = insertelement <3 x float> %insert.0, float %load1, i32 1 |
| %insert.2 = insertelement <3 x float> %insert.1, float %load2, i32 2 |
| %insert.3 = insertvalue { <3 x float>, i32 } poison, <3 x float> %insert.2, 0 |
| %insert.4 = insertvalue { <3 x float>, i32 } %insert.3, i32 %load3, 1 |
| ret { <3 x float>, i32 } %insert.4 |
| } |
| |
| define void @void_func_sret_max_known_zero_bits(ptr addrspace(5) sret(i8) %arg0) #0 { |
| ; CI-LABEL: void_func_sret_max_known_zero_bits: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 |
| ; CI-NEXT: s_mov_b32 m0, -1 |
| ; CI-NEXT: ds_write_b32 v0, v0 |
| ; CI-NEXT: v_mov_b32_e32 v0, 0 |
| ; CI-NEXT: ds_write_b32 v0, v0 |
| ; CI-NEXT: ds_write_b32 v0, v0 |
| ; CI-NEXT: s_waitcnt lgkmcnt(0) |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX8-LABEL: void_func_sret_max_known_zero_bits: |
| ; GFX8: ; %bb.0: |
| ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 |
| ; GFX8-NEXT: s_mov_b32 m0, -1 |
| ; GFX8-NEXT: ds_write_b32 v0, v0 |
| ; GFX8-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX8-NEXT: ds_write_b32 v0, v0 |
| ; GFX8-NEXT: ds_write_b32 v0, v0 |
| ; GFX8-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX8-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX9-LABEL: void_func_sret_max_known_zero_bits: |
| ; GFX9: ; %bb.0: |
| ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 |
| ; GFX9-NEXT: ds_write_b32 v0, v0 |
| ; GFX9-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX9-NEXT: ds_write_b32 v0, v0 |
| ; GFX9-NEXT: ds_write_b32 v0, v0 |
| ; GFX9-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX9-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: void_func_sret_max_known_zero_bits: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0 |
| ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 17, v0 |
| ; GFX11-NEXT: v_mov_b32_e32 v2, 0 |
| ; GFX11-NEXT: ds_store_b32 v0, v1 |
| ; GFX11-NEXT: ds_store_b32 v0, v0 |
| ; GFX11-NEXT: ds_store_b32 v0, v2 |
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %arg0.int = ptrtoint ptr addrspace(5) %arg0 to i32 |
| |
| %lshr0 = lshr i32 %arg0.int, 16 |
| %lshr1 = lshr i32 %arg0.int, 17 |
| %lshr2 = lshr i32 %arg0.int, 18 |
| |
| store volatile i32 %lshr0, ptr addrspace(3) poison |
| store volatile i32 %lshr1, ptr addrspace(3) poison |
| store volatile i32 %lshr2, ptr addrspace(3) poison |
| ret void |
| } |
| |
| define bfloat @bf16_func_void() #0 { |
| ; CI-LABEL: bf16_func_void: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX89-LABEL: bf16_func_void: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX89-NEXT: s_mov_b32 s6, -1 |
| ; GFX89-NEXT: buffer_load_ushort v0, off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: bf16_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_load_u16 v0, off, s[0:3], 0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %val = load bfloat, ptr addrspace(1) poison |
| ret bfloat %val |
| } |
| |
| define <2 x bfloat> @v2bf16_func_void() #0 { |
| ; CI-LABEL: v2bf16_func_void: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: buffer_load_dword v1, off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v1 |
| ; CI-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX89-LABEL: v2bf16_func_void: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX89-NEXT: s_mov_b32 s6, -1 |
| ; GFX89-NEXT: buffer_load_dword v0, off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v2bf16_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_load_b32 v0, off, s[0:3], 0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %val = load <2 x bfloat>, ptr addrspace(1) poison |
| ret <2 x bfloat> %val |
| } |
| |
| define <3 x bfloat> @v3bf16_func_void() #0 { |
| ; CI-LABEL: v3bf16_func_void: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: buffer_load_dwordx2 v[1:2], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v1 |
| ; CI-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 |
| ; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2 |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX89-LABEL: v3bf16_func_void: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX89-NEXT: s_mov_b32 s6, -1 |
| ; GFX89-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v3bf16_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[0:3], 0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %val = load <3 x bfloat>, ptr addrspace(1) poison |
| ret <3 x bfloat> %val |
| } |
| |
| define <4 x bfloat> @v4bf16_func_void() #0 { |
| ; CI-LABEL: v4bf16_func_void: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: buffer_load_dwordx2 v[2:3], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v2 |
| ; CI-NEXT: v_and_b32_e32 v1, 0xffff0000, v2 |
| ; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v3 |
| ; CI-NEXT: v_and_b32_e32 v3, 0xffff0000, v3 |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX89-LABEL: v4bf16_func_void: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX89-NEXT: s_mov_b32 s6, -1 |
| ; GFX89-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v4bf16_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[0:3], 0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %val = load <4 x bfloat>, ptr addrspace(1) poison |
| ret <4 x bfloat> %val |
| } |
| |
| define <6 x bfloat> @v6bf16_func_void() #0 { |
| ; CI-LABEL: v6bf16_func_void: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: buffer_load_dwordx3 v[3:5], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v3 |
| ; CI-NEXT: v_and_b32_e32 v1, 0xffff0000, v3 |
| ; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v4 |
| ; CI-NEXT: v_and_b32_e32 v3, 0xffff0000, v4 |
| ; CI-NEXT: v_lshlrev_b32_e32 v4, 16, v5 |
| ; CI-NEXT: v_and_b32_e32 v5, 0xffff0000, v5 |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX89-LABEL: v6bf16_func_void: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX89-NEXT: s_mov_b32 s6, -1 |
| ; GFX89-NEXT: buffer_load_dwordx3 v[0:2], off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v6bf16_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_load_b96 v[0:2], off, s[0:3], 0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %val = load <6 x bfloat>, ptr addrspace(1) poison |
| ret <6 x bfloat> %val |
| } |
| |
| define <8 x bfloat> @v8bf16_func_void() #0 { |
| ; CI-LABEL: v8bf16_func_void: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v4 |
| ; CI-NEXT: v_and_b32_e32 v1, 0xffff0000, v4 |
| ; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v5 |
| ; CI-NEXT: v_and_b32_e32 v3, 0xffff0000, v5 |
| ; CI-NEXT: v_lshlrev_b32_e32 v4, 16, v6 |
| ; CI-NEXT: v_and_b32_e32 v5, 0xffff0000, v6 |
| ; CI-NEXT: v_lshlrev_b32_e32 v6, 16, v7 |
| ; CI-NEXT: v_and_b32_e32 v7, 0xffff0000, v7 |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX89-LABEL: v8bf16_func_void: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX89-NEXT: s_mov_b32 s6, -1 |
| ; GFX89-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v8bf16_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %val = load <8 x bfloat>, ptr addrspace(1) poison |
| ret <8 x bfloat> %val |
| } |
| |
| define <16 x bfloat> @v16bf16_func_void() #0 { |
| ; CI-LABEL: v16bf16_func_void: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v4 |
| ; CI-NEXT: v_and_b32_e32 v1, 0xffff0000, v4 |
| ; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v5 |
| ; CI-NEXT: v_and_b32_e32 v3, 0xffff0000, v5 |
| ; CI-NEXT: v_lshlrev_b32_e32 v4, 16, v6 |
| ; CI-NEXT: v_and_b32_e32 v5, 0xffff0000, v6 |
| ; CI-NEXT: v_lshlrev_b32_e32 v6, 16, v7 |
| ; CI-NEXT: v_and_b32_e32 v7, 0xffff0000, v7 |
| ; CI-NEXT: v_mov_b32_e32 v8, v0 |
| ; CI-NEXT: v_mov_b32_e32 v9, v1 |
| ; CI-NEXT: v_mov_b32_e32 v10, v2 |
| ; CI-NEXT: v_mov_b32_e32 v11, v3 |
| ; CI-NEXT: v_mov_b32_e32 v12, v4 |
| ; CI-NEXT: v_mov_b32_e32 v13, v5 |
| ; CI-NEXT: v_mov_b32_e32 v14, v6 |
| ; CI-NEXT: v_mov_b32_e32 v15, v7 |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX89-LABEL: v16bf16_func_void: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX89-NEXT: s_mov_b32 s6, -1 |
| ; GFX89-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: v_mov_b32_e32 v4, v0 |
| ; GFX89-NEXT: v_mov_b32_e32 v5, v1 |
| ; GFX89-NEXT: v_mov_b32_e32 v6, v2 |
| ; GFX89-NEXT: v_mov_b32_e32 v7, v3 |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v16bf16_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v4, v0 :: v_dual_mov_b32 v5, v1 |
| ; GFX11-NEXT: v_dual_mov_b32 v6, v2 :: v_dual_mov_b32 v7, v3 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %val = load <16 x bfloat>, ptr addrspace(1) poison |
| ret <16 x bfloat> %val |
| } |
| |
| define <32 x bfloat> @v32bf16_func_void() #0 { |
| ; CI-LABEL: v32bf16_func_void: |
| ; CI: ; %bb.0: |
| ; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CI-NEXT: s_mov_b32 s7, 0xf000 |
| ; CI-NEXT: s_mov_b32 s6, -1 |
| ; CI-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 |
| ; CI-NEXT: s_waitcnt vmcnt(0) |
| ; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v4 |
| ; CI-NEXT: v_and_b32_e32 v1, 0xffff0000, v4 |
| ; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v5 |
| ; CI-NEXT: v_and_b32_e32 v3, 0xffff0000, v5 |
| ; CI-NEXT: v_lshlrev_b32_e32 v4, 16, v6 |
| ; CI-NEXT: v_and_b32_e32 v5, 0xffff0000, v6 |
| ; CI-NEXT: v_lshlrev_b32_e32 v6, 16, v7 |
| ; CI-NEXT: v_and_b32_e32 v7, 0xffff0000, v7 |
| ; CI-NEXT: v_mov_b32_e32 v8, v0 |
| ; CI-NEXT: v_mov_b32_e32 v9, v1 |
| ; CI-NEXT: v_mov_b32_e32 v10, v2 |
| ; CI-NEXT: v_mov_b32_e32 v11, v3 |
| ; CI-NEXT: v_mov_b32_e32 v16, v0 |
| ; CI-NEXT: v_mov_b32_e32 v17, v1 |
| ; CI-NEXT: v_mov_b32_e32 v18, v2 |
| ; CI-NEXT: v_mov_b32_e32 v19, v3 |
| ; CI-NEXT: v_mov_b32_e32 v24, v0 |
| ; CI-NEXT: v_mov_b32_e32 v25, v1 |
| ; CI-NEXT: v_mov_b32_e32 v26, v2 |
| ; CI-NEXT: v_mov_b32_e32 v27, v3 |
| ; CI-NEXT: v_mov_b32_e32 v12, v4 |
| ; CI-NEXT: v_mov_b32_e32 v20, v4 |
| ; CI-NEXT: v_mov_b32_e32 v28, v4 |
| ; CI-NEXT: v_mov_b32_e32 v13, v5 |
| ; CI-NEXT: v_mov_b32_e32 v21, v5 |
| ; CI-NEXT: v_mov_b32_e32 v29, v5 |
| ; CI-NEXT: v_mov_b32_e32 v14, v6 |
| ; CI-NEXT: v_mov_b32_e32 v22, v6 |
| ; CI-NEXT: v_mov_b32_e32 v30, v6 |
| ; CI-NEXT: v_mov_b32_e32 v15, v7 |
| ; CI-NEXT: v_mov_b32_e32 v23, v7 |
| ; CI-NEXT: v_mov_b32_e32 v31, v7 |
| ; CI-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX89-LABEL: v32bf16_func_void: |
| ; GFX89: ; %bb.0: |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX89-NEXT: s_mov_b32 s7, 0xf000 |
| ; GFX89-NEXT: s_mov_b32 s6, -1 |
| ; GFX89-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 |
| ; GFX89-NEXT: s_waitcnt vmcnt(0) |
| ; GFX89-NEXT: v_mov_b32_e32 v4, v0 |
| ; GFX89-NEXT: v_mov_b32_e32 v5, v1 |
| ; GFX89-NEXT: v_mov_b32_e32 v6, v2 |
| ; GFX89-NEXT: v_mov_b32_e32 v7, v3 |
| ; GFX89-NEXT: v_mov_b32_e32 v8, v0 |
| ; GFX89-NEXT: v_mov_b32_e32 v9, v1 |
| ; GFX89-NEXT: v_mov_b32_e32 v10, v2 |
| ; GFX89-NEXT: v_mov_b32_e32 v11, v3 |
| ; GFX89-NEXT: v_mov_b32_e32 v12, v0 |
| ; GFX89-NEXT: v_mov_b32_e32 v13, v1 |
| ; GFX89-NEXT: v_mov_b32_e32 v14, v2 |
| ; GFX89-NEXT: v_mov_b32_e32 v15, v3 |
| ; GFX89-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX11-LABEL: v32bf16_func_void: |
| ; GFX11: ; %bb.0: |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX11-NEXT: s_mov_b32 s3, 0x31016000 |
| ; GFX11-NEXT: s_mov_b32 s2, -1 |
| ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 |
| ; GFX11-NEXT: s_waitcnt vmcnt(0) |
| ; GFX11-NEXT: v_dual_mov_b32 v4, v0 :: v_dual_mov_b32 v5, v1 |
| ; GFX11-NEXT: v_dual_mov_b32 v6, v2 :: v_dual_mov_b32 v7, v3 |
| ; GFX11-NEXT: v_dual_mov_b32 v8, v0 :: v_dual_mov_b32 v9, v1 |
| ; GFX11-NEXT: v_dual_mov_b32 v10, v2 :: v_dual_mov_b32 v11, v3 |
| ; GFX11-NEXT: v_dual_mov_b32 v12, v0 :: v_dual_mov_b32 v13, v1 |
| ; GFX11-NEXT: v_dual_mov_b32 v14, v2 :: v_dual_mov_b32 v15, v3 |
| ; GFX11-NEXT: s_setpc_b64 s[30:31] |
| %val = load <32 x bfloat>, ptr addrspace(1) poison |
| ret <32 x bfloat> %val |
| } |
| |
| attributes #0 = { nounwind } |