| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s --check-prefix=GFX90A |
| ; RUN: llc -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck %s --check-prefix=GFX90A |
| ; RUN: llc -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck %s --check-prefix=GFX942 |
| ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck %s --check-prefix=GFX10 |
| ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck %s --check-prefix=GFX942-GISEL |
| |
| ;; Note: load.to.lds is a wrapper intrinsic around underlying operations. |
| ;; This is a bare-bones test to ensure that it lowers to the correct instructions. |
| |
| declare void @llvm.amdgcn.load.to.lds.p1(ptr addrspace(1) nocapture %gptr, ptr addrspace(3) nocapture %lptr, i32 %size, i32 %offset, i32 %aux) |
| declare void @llvm.amdgcn.load.to.lds.p7(ptr addrspace(7) nocapture %gptr, ptr addrspace(3) nocapture %lptr, i32 %size, i32 %offset, i32 %aux) |
| |
| define amdgpu_ps void @global_load_lds_dword_vaddr_saddr(ptr addrspace(1) nocapture %gptr, ptr addrspace(3) nocapture inreg %lptr) { |
| ; GFX90A-LABEL: global_load_lds_dword_vaddr_saddr: |
| ; GFX90A: ; %bb.0: ; %main_body |
| ; GFX90A-NEXT: s_mov_b32 m0, s0 |
| ; GFX90A-NEXT: s_nop 0 |
| ; GFX90A-NEXT: global_load_dword v[0:1], off offset:16 glc lds |
| ; GFX90A-NEXT: s_endpgm |
| ; |
| ; GFX942-LABEL: global_load_lds_dword_vaddr_saddr: |
| ; GFX942: ; %bb.0: ; %main_body |
| ; GFX942-NEXT: s_mov_b32 m0, s0 |
| ; GFX942-NEXT: s_nop 0 |
| ; GFX942-NEXT: global_load_lds_dword v[0:1], off offset:16 sc0 |
| ; GFX942-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_load_lds_dword_vaddr_saddr: |
| ; GFX10: ; %bb.0: ; %main_body |
| ; GFX10-NEXT: s_mov_b32 m0, s0 |
| ; GFX10-NEXT: global_load_dword v[0:1], off offset:16 glc lds |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX942-GISEL-LABEL: global_load_lds_dword_vaddr_saddr: |
| ; GFX942-GISEL: ; %bb.0: ; %main_body |
| ; GFX942-GISEL-NEXT: s_mov_b32 m0, s0 |
| ; GFX942-GISEL-NEXT: s_nop 0 |
| ; GFX942-GISEL-NEXT: global_load_lds_dword v[0:1], off offset:16 sc0 |
| ; GFX942-GISEL-NEXT: s_endpgm |
| main_body: |
| call void @llvm.amdgcn.load.to.lds.p1(ptr addrspace(1) %gptr, ptr addrspace(3) %lptr, i32 4, i32 16, i32 1) |
| ret void |
| } |
| |
| define amdgpu_ps void @buffer_load_lds_dword_vaddr_saddr(ptr addrspace(7) nocapture inreg %gptr, i32 %off, ptr addrspace(3) nocapture inreg %lptr) { |
| ; GFX90A-LABEL: buffer_load_lds_dword_vaddr_saddr: |
| ; GFX90A: ; %bb.0: ; %main_body |
| ; GFX90A-NEXT: v_add_u32_e32 v0, s4, v0 |
| ; GFX90A-NEXT: s_mov_b32 m0, s5 |
| ; GFX90A-NEXT: s_nop 0 |
| ; GFX90A-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:16 glc lds |
| ; GFX90A-NEXT: s_endpgm |
| ; |
| ; GFX942-LABEL: buffer_load_lds_dword_vaddr_saddr: |
| ; GFX942: ; %bb.0: ; %main_body |
| ; GFX942-NEXT: v_add_u32_e32 v0, s4, v0 |
| ; GFX942-NEXT: s_mov_b32 m0, s5 |
| ; GFX942-NEXT: s_nop 0 |
| ; GFX942-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:16 sc0 lds |
| ; GFX942-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: buffer_load_lds_dword_vaddr_saddr: |
| ; GFX10: ; %bb.0: ; %main_body |
| ; GFX10-NEXT: v_add_nc_u32_e32 v0, s4, v0 |
| ; GFX10-NEXT: s_mov_b32 m0, s5 |
| ; GFX10-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:16 glc lds |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX942-GISEL-LABEL: buffer_load_lds_dword_vaddr_saddr: |
| ; GFX942-GISEL: ; %bb.0: ; %main_body |
| ; GFX942-GISEL-NEXT: v_add_u32_e32 v0, s4, v0 |
| ; GFX942-GISEL-NEXT: s_mov_b32 m0, s5 |
| ; GFX942-GISEL-NEXT: s_nop 0 |
| ; GFX942-GISEL-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:16 sc0 lds |
| ; GFX942-GISEL-NEXT: s_endpgm |
| main_body: |
| %gptr.off = getelementptr i8, ptr addrspace(7) %gptr, i32 %off |
| call void @llvm.amdgcn.load.to.lds.p7(ptr addrspace(7) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 16, i32 1) |
| ret void |
| } |
| |
| define amdgpu_ps void @global_load_lds_ushort_vaddr_saddr(ptr addrspace(1) nocapture %gptr, ptr addrspace(3) nocapture inreg %lptr) { |
| ; GFX90A-LABEL: global_load_lds_ushort_vaddr_saddr: |
| ; GFX90A: ; %bb.0: ; %main_body |
| ; GFX90A-NEXT: s_mov_b32 m0, s0 |
| ; GFX90A-NEXT: s_nop 0 |
| ; GFX90A-NEXT: global_load_ushort v[0:1], off offset:16 glc lds |
| ; GFX90A-NEXT: s_endpgm |
| ; |
| ; GFX942-LABEL: global_load_lds_ushort_vaddr_saddr: |
| ; GFX942: ; %bb.0: ; %main_body |
| ; GFX942-NEXT: s_mov_b32 m0, s0 |
| ; GFX942-NEXT: s_nop 0 |
| ; GFX942-NEXT: global_load_lds_ushort v[0:1], off offset:16 sc0 |
| ; GFX942-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_load_lds_ushort_vaddr_saddr: |
| ; GFX10: ; %bb.0: ; %main_body |
| ; GFX10-NEXT: s_mov_b32 m0, s0 |
| ; GFX10-NEXT: global_load_ushort v[0:1], off offset:16 glc lds |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX942-GISEL-LABEL: global_load_lds_ushort_vaddr_saddr: |
| ; GFX942-GISEL: ; %bb.0: ; %main_body |
| ; GFX942-GISEL-NEXT: s_mov_b32 m0, s0 |
| ; GFX942-GISEL-NEXT: s_nop 0 |
| ; GFX942-GISEL-NEXT: global_load_lds_ushort v[0:1], off offset:16 sc0 |
| ; GFX942-GISEL-NEXT: s_endpgm |
| main_body: |
| call void @llvm.amdgcn.load.to.lds.p1(ptr addrspace(1) %gptr, ptr addrspace(3) %lptr, i32 2, i32 16, i32 1) |
| ret void |
| } |
| |
| define amdgpu_ps void @buffer_load_lds_ushort_vaddr_saddr(ptr addrspace(7) nocapture inreg %gptr, i32 %off, ptr addrspace(3) nocapture inreg %lptr) { |
| ; GFX90A-LABEL: buffer_load_lds_ushort_vaddr_saddr: |
| ; GFX90A: ; %bb.0: ; %main_body |
| ; GFX90A-NEXT: v_add_u32_e32 v0, s4, v0 |
| ; GFX90A-NEXT: s_mov_b32 m0, s5 |
| ; GFX90A-NEXT: s_nop 0 |
| ; GFX90A-NEXT: buffer_load_ushort v0, s[0:3], 0 offen offset:16 glc lds |
| ; GFX90A-NEXT: s_endpgm |
| ; |
| ; GFX942-LABEL: buffer_load_lds_ushort_vaddr_saddr: |
| ; GFX942: ; %bb.0: ; %main_body |
| ; GFX942-NEXT: v_add_u32_e32 v0, s4, v0 |
| ; GFX942-NEXT: s_mov_b32 m0, s5 |
| ; GFX942-NEXT: s_nop 0 |
| ; GFX942-NEXT: buffer_load_ushort v0, s[0:3], 0 offen offset:16 sc0 lds |
| ; GFX942-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: buffer_load_lds_ushort_vaddr_saddr: |
| ; GFX10: ; %bb.0: ; %main_body |
| ; GFX10-NEXT: v_add_nc_u32_e32 v0, s4, v0 |
| ; GFX10-NEXT: s_mov_b32 m0, s5 |
| ; GFX10-NEXT: buffer_load_ushort v0, s[0:3], 0 offen offset:16 glc lds |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX942-GISEL-LABEL: buffer_load_lds_ushort_vaddr_saddr: |
| ; GFX942-GISEL: ; %bb.0: ; %main_body |
| ; GFX942-GISEL-NEXT: v_add_u32_e32 v0, s4, v0 |
| ; GFX942-GISEL-NEXT: s_mov_b32 m0, s5 |
| ; GFX942-GISEL-NEXT: s_nop 0 |
| ; GFX942-GISEL-NEXT: buffer_load_ushort v0, s[0:3], 0 offen offset:16 sc0 lds |
| ; GFX942-GISEL-NEXT: s_endpgm |
| main_body: |
| %gptr.off = getelementptr i8, ptr addrspace(7) %gptr, i32 %off |
| call void @llvm.amdgcn.load.to.lds.p7(ptr addrspace(7) %gptr.off, ptr addrspace(3) %lptr, i32 2, i32 16, i32 1) |
| ret void |
| } |
| |
| define amdgpu_ps void @global_load_lds_ubyte_vaddr_saddr(ptr addrspace(1) nocapture %gptr, ptr addrspace(3) nocapture inreg %lptr) { |
| ; GFX90A-LABEL: global_load_lds_ubyte_vaddr_saddr: |
| ; GFX90A: ; %bb.0: ; %main_body |
| ; GFX90A-NEXT: s_mov_b32 m0, s0 |
| ; GFX90A-NEXT: s_nop 0 |
| ; GFX90A-NEXT: global_load_ubyte v[0:1], off offset:16 glc lds |
| ; GFX90A-NEXT: s_endpgm |
| ; |
| ; GFX942-LABEL: global_load_lds_ubyte_vaddr_saddr: |
| ; GFX942: ; %bb.0: ; %main_body |
| ; GFX942-NEXT: s_mov_b32 m0, s0 |
| ; GFX942-NEXT: s_nop 0 |
| ; GFX942-NEXT: global_load_lds_ubyte v[0:1], off offset:16 sc0 |
| ; GFX942-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_load_lds_ubyte_vaddr_saddr: |
| ; GFX10: ; %bb.0: ; %main_body |
| ; GFX10-NEXT: s_mov_b32 m0, s0 |
| ; GFX10-NEXT: global_load_ubyte v[0:1], off offset:16 glc lds |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX942-GISEL-LABEL: global_load_lds_ubyte_vaddr_saddr: |
| ; GFX942-GISEL: ; %bb.0: ; %main_body |
| ; GFX942-GISEL-NEXT: s_mov_b32 m0, s0 |
| ; GFX942-GISEL-NEXT: s_nop 0 |
| ; GFX942-GISEL-NEXT: global_load_lds_ubyte v[0:1], off offset:16 sc0 |
| ; GFX942-GISEL-NEXT: s_endpgm |
| main_body: |
| call void @llvm.amdgcn.load.to.lds.p1(ptr addrspace(1) %gptr, ptr addrspace(3) %lptr, i32 1, i32 16, i32 1) |
| ret void |
| } |
| |
| define amdgpu_ps void @buffer_load_lds_ubyte_vaddr_saddr(ptr addrspace(7) nocapture inreg %gptr, i32 %off, ptr addrspace(3) nocapture inreg %lptr) { |
| ; GFX90A-LABEL: buffer_load_lds_ubyte_vaddr_saddr: |
| ; GFX90A: ; %bb.0: ; %main_body |
| ; GFX90A-NEXT: v_add_u32_e32 v0, s4, v0 |
| ; GFX90A-NEXT: s_mov_b32 m0, s5 |
| ; GFX90A-NEXT: s_nop 0 |
| ; GFX90A-NEXT: buffer_load_ubyte v0, s[0:3], 0 offen offset:16 glc lds |
| ; GFX90A-NEXT: s_endpgm |
| ; |
| ; GFX942-LABEL: buffer_load_lds_ubyte_vaddr_saddr: |
| ; GFX942: ; %bb.0: ; %main_body |
| ; GFX942-NEXT: v_add_u32_e32 v0, s4, v0 |
| ; GFX942-NEXT: s_mov_b32 m0, s5 |
| ; GFX942-NEXT: s_nop 0 |
| ; GFX942-NEXT: buffer_load_ubyte v0, s[0:3], 0 offen offset:16 sc0 lds |
| ; GFX942-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: buffer_load_lds_ubyte_vaddr_saddr: |
| ; GFX10: ; %bb.0: ; %main_body |
| ; GFX10-NEXT: v_add_nc_u32_e32 v0, s4, v0 |
| ; GFX10-NEXT: s_mov_b32 m0, s5 |
| ; GFX10-NEXT: buffer_load_ubyte v0, s[0:3], 0 offen offset:16 glc lds |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX942-GISEL-LABEL: buffer_load_lds_ubyte_vaddr_saddr: |
| ; GFX942-GISEL: ; %bb.0: ; %main_body |
| ; GFX942-GISEL-NEXT: v_add_u32_e32 v0, s4, v0 |
| ; GFX942-GISEL-NEXT: s_mov_b32 m0, s5 |
| ; GFX942-GISEL-NEXT: s_nop 0 |
| ; GFX942-GISEL-NEXT: buffer_load_ubyte v0, s[0:3], 0 offen offset:16 sc0 lds |
| ; GFX942-GISEL-NEXT: s_endpgm |
| main_body: |
| %gptr.off = getelementptr i8, ptr addrspace(7) %gptr, i32 %off |
| call void @llvm.amdgcn.load.to.lds.p7(ptr addrspace(7) %gptr.off, ptr addrspace(3) %lptr, i32 1, i32 16, i32 1) |
| ret void |
| } |
| |