blob: c6028497c941f628b4fa3494bf261349545ed675 [file] [log] [blame] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s
define float @raw.buffer.load(<4 x i32> inreg %rsrc, ptr addrspace(3) inreg %lds) {
; CHECK-LABEL: raw.buffer.load:
; CHECK: ; %bb.0: ; %main_body
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_mov_b32 m0, s20
; CHECK-NEXT: v_mov_b32_e32 v0, s20
; CHECK-NEXT: buffer_load_dword off, s[16:19], 0 lds
; CHECK-NEXT: ; asyncmark
; CHECK-NEXT: buffer_load_dword off, s[16:19], 0 offset:4 glc lds
; CHECK-NEXT: ; asyncmark
; CHECK-NEXT: buffer_load_dword off, s[16:19], 0 offset:8 slc lds
; CHECK-NEXT: ; wait_asyncmark(1)
; CHECK-NEXT: s_waitcnt vmcnt(2)
; CHECK-NEXT: ds_read_b32 v0, v0
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
main_body:
call void @llvm.amdgcn.raw.buffer.load.async.lds(<4 x i32> %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 0, i32 0)
call void @llvm.amdgcn.asyncmark()
call void @llvm.amdgcn.raw.buffer.load.async.lds(<4 x i32> %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 4, i32 1)
call void @llvm.amdgcn.asyncmark()
call void @llvm.amdgcn.raw.buffer.load.async.lds(<4 x i32> %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 8, i32 2)
call void @llvm.amdgcn.wait.asyncmark(i16 1)
%res = load float, ptr addrspace(3) %lds
ret float %res
}
define float @raw.ptr.buffer.load(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds) {
; CHECK-LABEL: raw.ptr.buffer.load:
; CHECK: ; %bb.0: ; %main_body
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_mov_b32 m0, s20
; CHECK-NEXT: v_mov_b32_e32 v0, s20
; CHECK-NEXT: buffer_load_dword off, s[16:19], 0 lds
; CHECK-NEXT: ; asyncmark
; CHECK-NEXT: buffer_load_dword off, s[16:19], 0 offset:4 glc lds
; CHECK-NEXT: ; asyncmark
; CHECK-NEXT: buffer_load_dword off, s[16:19], 0 offset:8 slc lds
; CHECK-NEXT: ; wait_asyncmark(1)
; CHECK-NEXT: s_waitcnt vmcnt(2)
; CHECK-NEXT: ds_read_b32 v0, v0
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
main_body:
call void @llvm.amdgcn.raw.ptr.buffer.load.async.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 0, i32 0)
call void @llvm.amdgcn.asyncmark()
call void @llvm.amdgcn.raw.ptr.buffer.load.async.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 4, i32 1)
call void @llvm.amdgcn.asyncmark()
call void @llvm.amdgcn.raw.ptr.buffer.load.async.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 8, i32 2)
call void @llvm.amdgcn.wait.asyncmark(i16 1)
%res = load float, ptr addrspace(3) %lds
ret float %res
}
define float @struct.buffer.load(<4 x i32> inreg %rsrc, ptr addrspace(3) inreg %lds) {
; CHECK-LABEL: struct.buffer.load:
; CHECK: ; %bb.0: ; %main_body
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_mov_b32 m0, s20
; CHECK-NEXT: v_mov_b32_e32 v0, 8
; CHECK-NEXT: buffer_load_dword v0, s[16:19], 0 idxen lds
; CHECK-NEXT: ; asyncmark
; CHECK-NEXT: buffer_load_dword v0, s[16:19], 0 idxen offset:4 glc lds
; CHECK-NEXT: ; asyncmark
; CHECK-NEXT: buffer_load_dword v0, s[16:19], 0 idxen offset:8 slc lds
; CHECK-NEXT: v_mov_b32_e32 v0, s20
; CHECK-NEXT: ; wait_asyncmark(1)
; CHECK-NEXT: s_waitcnt vmcnt(2)
; CHECK-NEXT: ds_read_b32 v0, v0
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
main_body:
call void @llvm.amdgcn.struct.buffer.load.async.lds(<4 x i32> %rsrc, ptr addrspace(3) %lds, i32 4, i32 8, i32 0, i32 0, i32 0, i32 0)
call void @llvm.amdgcn.asyncmark()
call void @llvm.amdgcn.struct.buffer.load.async.lds(<4 x i32> %rsrc, ptr addrspace(3) %lds, i32 4, i32 8, i32 0, i32 0, i32 4, i32 1)
call void @llvm.amdgcn.asyncmark()
call void @llvm.amdgcn.struct.buffer.load.async.lds(<4 x i32> %rsrc, ptr addrspace(3) %lds, i32 4, i32 8, i32 0, i32 0, i32 8, i32 2)
call void @llvm.amdgcn.wait.asyncmark(i16 1)
%res = load float, ptr addrspace(3) %lds
ret float %res
}
define float @struct.ptr.buffer.load(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds) {
; CHECK-LABEL: struct.ptr.buffer.load:
; CHECK: ; %bb.0: ; %main_body
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_mov_b32 m0, s20
; CHECK-NEXT: v_mov_b32_e32 v0, 8
; CHECK-NEXT: buffer_load_dword v0, s[16:19], 0 idxen lds
; CHECK-NEXT: ; asyncmark
; CHECK-NEXT: buffer_load_dword v0, s[16:19], 0 idxen offset:4 glc lds
; CHECK-NEXT: ; asyncmark
; CHECK-NEXT: buffer_load_dword v0, s[16:19], 0 idxen offset:8 slc lds
; CHECK-NEXT: v_mov_b32_e32 v0, s20
; CHECK-NEXT: ; wait_asyncmark(1)
; CHECK-NEXT: s_waitcnt vmcnt(2)
; CHECK-NEXT: ds_read_b32 v0, v0
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
main_body:
call void @llvm.amdgcn.struct.ptr.buffer.load.async.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 8, i32 0, i32 0, i32 0, i32 0)
call void @llvm.amdgcn.asyncmark()
call void @llvm.amdgcn.struct.ptr.buffer.load.async.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 8, i32 0, i32 0, i32 4, i32 1)
call void @llvm.amdgcn.asyncmark()
call void @llvm.amdgcn.struct.ptr.buffer.load.async.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 8, i32 0, i32 0, i32 8, i32 2)
call void @llvm.amdgcn.wait.asyncmark(i16 1)
%res = load float, ptr addrspace(3) %lds
ret float %res
}