blob: 22bfd0b12096e4904b8818a0001370528b2f2cb2 [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -march=amdgcn -mcpu=gfx940 < %s | FileCheck %s -check-prefix=GFX940
; Test flat scratch SVS addressing mode with various combinations of alignment
; of soffset, voffset and inst_offset.
declare i32 @llvm.amdgcn.workitem.id.x()
define amdgpu_kernel void @soff1_voff1(i32 %soff) {
; GFX940-LABEL: soff1_voff1:
; GFX940: ; %bb.0: ; %bb
; GFX940-NEXT: s_load_dword s0, s[0:1], 0x24
; GFX940-NEXT: v_mov_b32_e32 v1, 1
; GFX940-NEXT: v_mov_b32_e32 v2, 2
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-NEXT: s_add_i32 s0, s0, 4
; GFX940-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
; GFX940-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
; GFX940-NEXT: v_mov_b32_e32 v1, 4
; GFX940-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
; GFX940-NEXT: s_endpgm
bb:
%soff1 = mul i32 %soff, 1
%a = alloca i8, i32 64, align 4, addrspace(5)
%as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff1
%voff = call i32 @llvm.amdgcn.workitem.id.x()
%voff1 = mul i32 %voff, 1
%asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff1
%p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
store volatile i8 1, i8 addrspace(5)* %p1
%p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
store volatile i8 2, i8 addrspace(5)* %p2
%p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
store volatile i8 4, i8 addrspace(5)* %p4
ret void
}
define amdgpu_kernel void @soff1_voff2(i32 %soff) {
; GFX940-LABEL: soff1_voff2:
; GFX940: ; %bb.0: ; %bb
; GFX940-NEXT: s_load_dword s0, s[0:1], 0x24
; GFX940-NEXT: v_mov_b32_e32 v1, 1
; GFX940-NEXT: v_mul_u32_u24_e32 v0, 2, v0
; GFX940-NEXT: v_mov_b32_e32 v2, 2
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-NEXT: s_add_i32 s0, s0, 4
; GFX940-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
; GFX940-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
; GFX940-NEXT: v_mov_b32_e32 v1, 4
; GFX940-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
; GFX940-NEXT: s_endpgm
bb:
%soff1 = mul i32 %soff, 1
%a = alloca i8, i32 64, align 4, addrspace(5)
%as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff1
%voff = call i32 @llvm.amdgcn.workitem.id.x()
%voff2 = mul i32 %voff, 2
%asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff2
%p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
store volatile i8 1, i8 addrspace(5)* %p1
%p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
store volatile i8 2, i8 addrspace(5)* %p2
%p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
store volatile i8 4, i8 addrspace(5)* %p4
ret void
}
define amdgpu_kernel void @soff1_voff4(i32 %soff) {
; GFX940-LABEL: soff1_voff4:
; GFX940: ; %bb.0: ; %bb
; GFX940-NEXT: s_load_dword s0, s[0:1], 0x24
; GFX940-NEXT: v_mov_b32_e32 v1, 1
; GFX940-NEXT: v_mul_u32_u24_e32 v0, 4, v0
; GFX940-NEXT: v_mov_b32_e32 v2, 2
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-NEXT: s_add_i32 s0, s0, 4
; GFX940-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
; GFX940-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
; GFX940-NEXT: v_mov_b32_e32 v1, 4
; GFX940-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
; GFX940-NEXT: s_endpgm
bb:
%soff1 = mul i32 %soff, 1
%a = alloca i8, i32 64, align 4, addrspace(5)
%as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff1
%voff = call i32 @llvm.amdgcn.workitem.id.x()
%voff4 = mul i32 %voff, 4
%asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff4
%p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
store volatile i8 1, i8 addrspace(5)* %p1
%p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
store volatile i8 2, i8 addrspace(5)* %p2
%p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
store volatile i8 4, i8 addrspace(5)* %p4
ret void
}
define amdgpu_kernel void @soff2_voff1(i32 %soff) {
; GFX940-LABEL: soff2_voff1:
; GFX940: ; %bb.0: ; %bb
; GFX940-NEXT: s_load_dword s0, s[0:1], 0x24
; GFX940-NEXT: v_mov_b32_e32 v1, 1
; GFX940-NEXT: v_mov_b32_e32 v2, 2
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-NEXT: s_lshl_b32 s0, s0, 1
; GFX940-NEXT: s_add_i32 s0, s0, 4
; GFX940-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
; GFX940-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
; GFX940-NEXT: v_mov_b32_e32 v1, 4
; GFX940-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
; GFX940-NEXT: s_endpgm
bb:
%soff2 = mul i32 %soff, 2
%a = alloca i8, i32 64, align 4, addrspace(5)
%as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff2
%voff = call i32 @llvm.amdgcn.workitem.id.x()
%voff1 = mul i32 %voff, 1
%asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff1
%p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
store volatile i8 1, i8 addrspace(5)* %p1
%p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
store volatile i8 2, i8 addrspace(5)* %p2
%p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
store volatile i8 4, i8 addrspace(5)* %p4
ret void
}
define amdgpu_kernel void @soff2_voff2(i32 %soff) {
; GFX940-LABEL: soff2_voff2:
; GFX940: ; %bb.0: ; %bb
; GFX940-NEXT: s_load_dword s0, s[0:1], 0x24
; GFX940-NEXT: v_mov_b32_e32 v1, 1
; GFX940-NEXT: v_mul_u32_u24_e32 v0, 2, v0
; GFX940-NEXT: v_mov_b32_e32 v2, 2
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-NEXT: s_lshl_b32 s0, s0, 1
; GFX940-NEXT: s_add_i32 s0, s0, 4
; GFX940-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
; GFX940-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
; GFX940-NEXT: v_mov_b32_e32 v1, 4
; GFX940-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
; GFX940-NEXT: s_endpgm
bb:
%soff2 = mul i32 %soff, 2
%a = alloca i8, i32 64, align 4, addrspace(5)
%as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff2
%voff = call i32 @llvm.amdgcn.workitem.id.x()
%voff2 = mul i32 %voff, 2
%asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff2
%p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
store volatile i8 1, i8 addrspace(5)* %p1
%p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
store volatile i8 2, i8 addrspace(5)* %p2
%p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
store volatile i8 4, i8 addrspace(5)* %p4
ret void
}
define amdgpu_kernel void @soff2_voff4(i32 %soff) {
; GFX940-LABEL: soff2_voff4:
; GFX940: ; %bb.0: ; %bb
; GFX940-NEXT: s_load_dword s0, s[0:1], 0x24
; GFX940-NEXT: v_mov_b32_e32 v1, 1
; GFX940-NEXT: v_mul_u32_u24_e32 v0, 4, v0
; GFX940-NEXT: v_mov_b32_e32 v2, 2
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-NEXT: s_lshl_b32 s0, s0, 1
; GFX940-NEXT: s_add_i32 s0, s0, 4
; GFX940-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
; GFX940-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
; GFX940-NEXT: v_mov_b32_e32 v1, 4
; GFX940-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
; GFX940-NEXT: s_endpgm
bb:
%soff2 = mul i32 %soff, 2
%a = alloca i8, i32 64, align 4, addrspace(5)
%as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff2
%voff = call i32 @llvm.amdgcn.workitem.id.x()
%voff4 = mul i32 %voff, 4
%asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff4
%p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
store volatile i8 1, i8 addrspace(5)* %p1
%p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
store volatile i8 2, i8 addrspace(5)* %p2
%p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
store volatile i8 4, i8 addrspace(5)* %p4
ret void
}
define amdgpu_kernel void @soff4_voff1(i32 %soff) {
; GFX940-LABEL: soff4_voff1:
; GFX940: ; %bb.0: ; %bb
; GFX940-NEXT: s_load_dword s0, s[0:1], 0x24
; GFX940-NEXT: v_mov_b32_e32 v1, 1
; GFX940-NEXT: v_mov_b32_e32 v2, 2
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-NEXT: s_lshl_b32 s0, s0, 2
; GFX940-NEXT: s_add_i32 s0, s0, 4
; GFX940-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
; GFX940-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
; GFX940-NEXT: v_mov_b32_e32 v1, 4
; GFX940-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
; GFX940-NEXT: s_endpgm
bb:
%soff4 = mul i32 %soff, 4
%a = alloca i8, i32 64, align 4, addrspace(5)
%as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff4
%voff = call i32 @llvm.amdgcn.workitem.id.x()
%voff1 = mul i32 %voff, 1
%asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff1
%p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
store volatile i8 1, i8 addrspace(5)* %p1
%p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
store volatile i8 2, i8 addrspace(5)* %p2
%p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
store volatile i8 4, i8 addrspace(5)* %p4
ret void
}
define amdgpu_kernel void @soff4_voff2(i32 %soff) {
; GFX940-LABEL: soff4_voff2:
; GFX940: ; %bb.0: ; %bb
; GFX940-NEXT: s_load_dword s0, s[0:1], 0x24
; GFX940-NEXT: v_mov_b32_e32 v1, 1
; GFX940-NEXT: v_mul_u32_u24_e32 v0, 2, v0
; GFX940-NEXT: v_mov_b32_e32 v2, 2
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-NEXT: s_lshl_b32 s0, s0, 2
; GFX940-NEXT: s_add_i32 s0, s0, 4
; GFX940-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
; GFX940-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
; GFX940-NEXT: v_mov_b32_e32 v1, 4
; GFX940-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
; GFX940-NEXT: s_endpgm
bb:
%soff4 = mul i32 %soff, 4
%a = alloca i8, i32 64, align 4, addrspace(5)
%as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff4
%voff = call i32 @llvm.amdgcn.workitem.id.x()
%voff2 = mul i32 %voff, 2
%asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff2
%p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
store volatile i8 1, i8 addrspace(5)* %p1
%p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
store volatile i8 2, i8 addrspace(5)* %p2
%p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
store volatile i8 4, i8 addrspace(5)* %p4
ret void
}
define amdgpu_kernel void @soff4_voff4(i32 %soff) {
; GFX940-LABEL: soff4_voff4:
; GFX940: ; %bb.0: ; %bb
; GFX940-NEXT: s_load_dword s0, s[0:1], 0x24
; GFX940-NEXT: v_mov_b32_e32 v1, 1
; GFX940-NEXT: v_mul_u32_u24_e32 v0, 4, v0
; GFX940-NEXT: v_mov_b32_e32 v2, 2
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-NEXT: s_lshl_b32 s0, s0, 2
; GFX940-NEXT: s_add_i32 s0, s0, 4
; GFX940-NEXT: scratch_store_byte v0, v1, s0 offset:1 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
; GFX940-NEXT: scratch_store_byte v0, v2, s0 offset:2 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
; GFX940-NEXT: v_mov_b32_e32 v1, 4
; GFX940-NEXT: scratch_store_byte v0, v1, s0 offset:4 sc0 sc1
; GFX940-NEXT: s_waitcnt vmcnt(0)
; GFX940-NEXT: s_endpgm
bb:
%soff4 = mul i32 %soff, 4
%a = alloca i8, i32 64, align 4, addrspace(5)
%as = getelementptr i8, i8 addrspace(5)* %a, i32 %soff4
%voff = call i32 @llvm.amdgcn.workitem.id.x()
%voff4 = mul i32 %voff, 4
%asv = getelementptr i8, i8 addrspace(5)* %as, i32 %voff4
%p1 = getelementptr i8, i8 addrspace(5)* %asv, i32 1
store volatile i8 1, i8 addrspace(5)* %p1
%p2 = getelementptr i8, i8 addrspace(5)* %asv, i32 2
store volatile i8 2, i8 addrspace(5)* %p2
%p4 = getelementptr i8, i8 addrspace(5)* %asv, i32 4
store volatile i8 4, i8 addrspace(5)* %p4
ret void
}