| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s |
| ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s |
| |
| define float @raw.buffer.load(<4 x i32> inreg %rsrc, ptr addrspace(3) inreg %lds) { |
| ; CHECK-LABEL: raw.buffer.load: |
| ; CHECK: ; %bb.0: ; %main_body |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_mov_b32 m0, s20 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, s20 |
| ; CHECK-NEXT: buffer_load_dword off, s[16:19], 0 lds |
| ; CHECK-NEXT: ; asyncmark |
| ; CHECK-NEXT: buffer_load_dword off, s[16:19], 0 offset:4 glc lds |
| ; CHECK-NEXT: ; asyncmark |
| ; CHECK-NEXT: buffer_load_dword off, s[16:19], 0 offset:8 slc lds |
| ; CHECK-NEXT: ; wait_asyncmark(1) |
| ; CHECK-NEXT: s_waitcnt vmcnt(2) |
| ; CHECK-NEXT: ds_read_b32 v0, v0 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| main_body: |
| call void @llvm.amdgcn.raw.buffer.load.async.lds(<4 x i32> %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 0, i32 0) |
| call void @llvm.amdgcn.asyncmark() |
| call void @llvm.amdgcn.raw.buffer.load.async.lds(<4 x i32> %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 4, i32 1) |
| call void @llvm.amdgcn.asyncmark() |
| call void @llvm.amdgcn.raw.buffer.load.async.lds(<4 x i32> %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 8, i32 2) |
| call void @llvm.amdgcn.wait.asyncmark(i16 1) |
| %res = load float, ptr addrspace(3) %lds |
| ret float %res |
| } |
| |
| define float @raw.ptr.buffer.load(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds) { |
| ; CHECK-LABEL: raw.ptr.buffer.load: |
| ; CHECK: ; %bb.0: ; %main_body |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_mov_b32 m0, s20 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, s20 |
| ; CHECK-NEXT: buffer_load_dword off, s[16:19], 0 lds |
| ; CHECK-NEXT: ; asyncmark |
| ; CHECK-NEXT: buffer_load_dword off, s[16:19], 0 offset:4 glc lds |
| ; CHECK-NEXT: ; asyncmark |
| ; CHECK-NEXT: buffer_load_dword off, s[16:19], 0 offset:8 slc lds |
| ; CHECK-NEXT: ; wait_asyncmark(1) |
| ; CHECK-NEXT: s_waitcnt vmcnt(2) |
| ; CHECK-NEXT: ds_read_b32 v0, v0 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| main_body: |
| call void @llvm.amdgcn.raw.ptr.buffer.load.async.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 0, i32 0) |
| call void @llvm.amdgcn.asyncmark() |
| call void @llvm.amdgcn.raw.ptr.buffer.load.async.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 4, i32 1) |
| call void @llvm.amdgcn.asyncmark() |
| call void @llvm.amdgcn.raw.ptr.buffer.load.async.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 8, i32 2) |
| call void @llvm.amdgcn.wait.asyncmark(i16 1) |
| %res = load float, ptr addrspace(3) %lds |
| ret float %res |
| } |
| |
| define float @struct.buffer.load(<4 x i32> inreg %rsrc, ptr addrspace(3) inreg %lds) { |
| ; CHECK-LABEL: struct.buffer.load: |
| ; CHECK: ; %bb.0: ; %main_body |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_mov_b32 m0, s20 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 8 |
| ; CHECK-NEXT: buffer_load_dword v0, s[16:19], 0 idxen lds |
| ; CHECK-NEXT: ; asyncmark |
| ; CHECK-NEXT: buffer_load_dword v0, s[16:19], 0 idxen offset:4 glc lds |
| ; CHECK-NEXT: ; asyncmark |
| ; CHECK-NEXT: buffer_load_dword v0, s[16:19], 0 idxen offset:8 slc lds |
| ; CHECK-NEXT: v_mov_b32_e32 v0, s20 |
| ; CHECK-NEXT: ; wait_asyncmark(1) |
| ; CHECK-NEXT: s_waitcnt vmcnt(2) |
| ; CHECK-NEXT: ds_read_b32 v0, v0 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| main_body: |
| call void @llvm.amdgcn.struct.buffer.load.async.lds(<4 x i32> %rsrc, ptr addrspace(3) %lds, i32 4, i32 8, i32 0, i32 0, i32 0, i32 0) |
| call void @llvm.amdgcn.asyncmark() |
| call void @llvm.amdgcn.struct.buffer.load.async.lds(<4 x i32> %rsrc, ptr addrspace(3) %lds, i32 4, i32 8, i32 0, i32 0, i32 4, i32 1) |
| call void @llvm.amdgcn.asyncmark() |
| call void @llvm.amdgcn.struct.buffer.load.async.lds(<4 x i32> %rsrc, ptr addrspace(3) %lds, i32 4, i32 8, i32 0, i32 0, i32 8, i32 2) |
| call void @llvm.amdgcn.wait.asyncmark(i16 1) |
| %res = load float, ptr addrspace(3) %lds |
| ret float %res |
| } |
| |
| define float @struct.ptr.buffer.load(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds) { |
| ; CHECK-LABEL: struct.ptr.buffer.load: |
| ; CHECK: ; %bb.0: ; %main_body |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_mov_b32 m0, s20 |
| ; CHECK-NEXT: v_mov_b32_e32 v0, 8 |
| ; CHECK-NEXT: buffer_load_dword v0, s[16:19], 0 idxen lds |
| ; CHECK-NEXT: ; asyncmark |
| ; CHECK-NEXT: buffer_load_dword v0, s[16:19], 0 idxen offset:4 glc lds |
| ; CHECK-NEXT: ; asyncmark |
| ; CHECK-NEXT: buffer_load_dword v0, s[16:19], 0 idxen offset:8 slc lds |
| ; CHECK-NEXT: v_mov_b32_e32 v0, s20 |
| ; CHECK-NEXT: ; wait_asyncmark(1) |
| ; CHECK-NEXT: s_waitcnt vmcnt(2) |
| ; CHECK-NEXT: ds_read_b32 v0, v0 |
| ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| ; CHECK-NEXT: s_setpc_b64 s[30:31] |
| main_body: |
| call void @llvm.amdgcn.struct.ptr.buffer.load.async.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 8, i32 0, i32 0, i32 0, i32 0) |
| call void @llvm.amdgcn.asyncmark() |
| call void @llvm.amdgcn.struct.ptr.buffer.load.async.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 8, i32 0, i32 0, i32 4, i32 1) |
| call void @llvm.amdgcn.asyncmark() |
| call void @llvm.amdgcn.struct.ptr.buffer.load.async.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 8, i32 0, i32 0, i32 8, i32 2) |
| call void @llvm.amdgcn.wait.asyncmark(i16 1) |
| %res = load float, ptr addrspace(3) %lds |
| ret float %res |
| } |