| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX900 |
| ; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX90A |
| ; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX942 |
| ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX10 |
| ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX900-GISEL |
| |
| declare void @llvm.amdgcn.global.load.lds(ptr addrspace(1) nocapture %gptr, ptr addrspace(3) nocapture %lptr, i32 %size, i32 %offset, i32 %aux) |
| |
| define amdgpu_ps void @global_load_lds_dword_vaddr(ptr addrspace(1) nocapture %gptr, ptr addrspace(3) nocapture %lptr) { |
| ; GFX900-LABEL: global_load_lds_dword_vaddr: |
| ; GFX900: ; %bb.0: ; %main_body |
| ; GFX900-NEXT: v_readfirstlane_b32 s0, v2 |
| ; GFX900-NEXT: s_mov_b32 m0, s0 |
| ; GFX900-NEXT: s_nop 0 |
| ; GFX900-NEXT: global_load_dword v[0:1], off offset:16 glc lds |
| ; GFX900-NEXT: s_endpgm |
| ; |
| ; GFX90A-LABEL: global_load_lds_dword_vaddr: |
| ; GFX90A: ; %bb.0: ; %main_body |
| ; GFX90A-NEXT: v_readfirstlane_b32 s0, v2 |
| ; GFX90A-NEXT: s_mov_b32 m0, s0 |
| ; GFX90A-NEXT: s_nop 0 |
| ; GFX90A-NEXT: global_load_dword v[0:1], off offset:16 glc lds |
| ; GFX90A-NEXT: s_endpgm |
| ; |
| ; GFX942-LABEL: global_load_lds_dword_vaddr: |
| ; GFX942: ; %bb.0: ; %main_body |
| ; GFX942-NEXT: v_readfirstlane_b32 s0, v2 |
| ; GFX942-NEXT: s_mov_b32 m0, s0 |
| ; GFX942-NEXT: s_nop 0 |
| ; GFX942-NEXT: global_load_lds_dword v[0:1], off offset:16 sc0 |
| ; GFX942-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_load_lds_dword_vaddr: |
| ; GFX10: ; %bb.0: ; %main_body |
| ; GFX10-NEXT: v_readfirstlane_b32 s0, v2 |
| ; GFX10-NEXT: s_mov_b32 m0, s0 |
| ; GFX10-NEXT: global_load_dword v[0:1], off offset:16 glc lds |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX900-GISEL-LABEL: global_load_lds_dword_vaddr: |
| ; GFX900-GISEL: ; %bb.0: ; %main_body |
| ; GFX900-GISEL-NEXT: v_readfirstlane_b32 s0, v2 |
| ; GFX900-GISEL-NEXT: s_mov_b32 m0, s0 |
| ; GFX900-GISEL-NEXT: s_nop 0 |
| ; GFX900-GISEL-NEXT: global_load_dword v[0:1], off offset:16 glc lds |
| ; GFX900-GISEL-NEXT: s_endpgm |
| main_body: |
| call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gptr, ptr addrspace(3) %lptr, i32 4, i32 16, i32 1) |
| ret void |
| } |
| |
| define amdgpu_ps void @global_load_lds_dword_saddr(ptr addrspace(1) nocapture inreg %gptr, ptr addrspace(3) nocapture %lptr) { |
| ; GFX900-LABEL: global_load_lds_dword_saddr: |
| ; GFX900: ; %bb.0: ; %main_body |
| ; GFX900-NEXT: v_readfirstlane_b32 s2, v0 |
| ; GFX900-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX900-NEXT: s_mov_b32 m0, s2 |
| ; GFX900-NEXT: s_nop 0 |
| ; GFX900-NEXT: global_load_dword v1, s[0:1] offset:32 slc lds |
| ; GFX900-NEXT: s_endpgm |
| ; |
| ; GFX90A-LABEL: global_load_lds_dword_saddr: |
| ; GFX90A: ; %bb.0: ; %main_body |
| ; GFX90A-NEXT: v_readfirstlane_b32 s2, v0 |
| ; GFX90A-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX90A-NEXT: s_mov_b32 m0, s2 |
| ; GFX90A-NEXT: s_nop 0 |
| ; GFX90A-NEXT: global_load_dword v1, s[0:1] offset:32 slc lds |
| ; GFX90A-NEXT: s_endpgm |
| ; |
| ; GFX942-LABEL: global_load_lds_dword_saddr: |
| ; GFX942: ; %bb.0: ; %main_body |
| ; GFX942-NEXT: v_readfirstlane_b32 s2, v0 |
| ; GFX942-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX942-NEXT: s_mov_b32 m0, s2 |
| ; GFX942-NEXT: s_nop 0 |
| ; GFX942-NEXT: global_load_lds_dword v1, s[0:1] offset:32 nt |
| ; GFX942-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_load_lds_dword_saddr: |
| ; GFX10: ; %bb.0: ; %main_body |
| ; GFX10-NEXT: v_readfirstlane_b32 s2, v0 |
| ; GFX10-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX10-NEXT: s_mov_b32 m0, s2 |
| ; GFX10-NEXT: global_load_dword v0, s[0:1] offset:32 slc lds |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX900-GISEL-LABEL: global_load_lds_dword_saddr: |
| ; GFX900-GISEL: ; %bb.0: ; %main_body |
| ; GFX900-GISEL-NEXT: v_readfirstlane_b32 s2, v0 |
| ; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0 |
| ; GFX900-GISEL-NEXT: s_mov_b32 m0, s2 |
| ; GFX900-GISEL-NEXT: s_nop 0 |
| ; GFX900-GISEL-NEXT: global_load_dword v0, s[0:1] offset:32 slc lds |
| ; GFX900-GISEL-NEXT: s_endpgm |
| main_body: |
| call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gptr, ptr addrspace(3) %lptr, i32 4, i32 32, i32 2) |
| ret void |
| } |
| |
| define amdgpu_ps void @global_load_lds_dword_saddr_and_vaddr(ptr addrspace(1) nocapture inreg %gptr, ptr addrspace(3) nocapture %lptr, i32 %voffset) { |
| ; GFX900-LABEL: global_load_lds_dword_saddr_and_vaddr: |
| ; GFX900: ; %bb.0: ; %main_body |
| ; GFX900-NEXT: v_readfirstlane_b32 s2, v0 |
| ; GFX900-NEXT: s_mov_b32 m0, s2 |
| ; GFX900-NEXT: s_nop 0 |
| ; GFX900-NEXT: global_load_dword v1, s[0:1] offset:48 lds |
| ; GFX900-NEXT: s_endpgm |
| ; |
| ; GFX90A-LABEL: global_load_lds_dword_saddr_and_vaddr: |
| ; GFX90A: ; %bb.0: ; %main_body |
| ; GFX90A-NEXT: v_readfirstlane_b32 s2, v0 |
| ; GFX90A-NEXT: s_mov_b32 m0, s2 |
| ; GFX90A-NEXT: s_nop 0 |
| ; GFX90A-NEXT: global_load_dword v1, s[0:1] offset:48 scc lds |
| ; GFX90A-NEXT: s_endpgm |
| ; |
| ; GFX942-LABEL: global_load_lds_dword_saddr_and_vaddr: |
| ; GFX942: ; %bb.0: ; %main_body |
| ; GFX942-NEXT: v_readfirstlane_b32 s2, v0 |
| ; GFX942-NEXT: s_mov_b32 m0, s2 |
| ; GFX942-NEXT: s_nop 0 |
| ; GFX942-NEXT: global_load_lds_dword v1, s[0:1] offset:48 sc1 |
| ; GFX942-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_load_lds_dword_saddr_and_vaddr: |
| ; GFX10: ; %bb.0: ; %main_body |
| ; GFX10-NEXT: v_readfirstlane_b32 s2, v0 |
| ; GFX10-NEXT: s_mov_b32 m0, s2 |
| ; GFX10-NEXT: global_load_dword v1, s[0:1] offset:48 lds |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX900-GISEL-LABEL: global_load_lds_dword_saddr_and_vaddr: |
| ; GFX900-GISEL: ; %bb.0: ; %main_body |
| ; GFX900-GISEL-NEXT: v_readfirstlane_b32 s2, v0 |
| ; GFX900-GISEL-NEXT: s_mov_b32 m0, s2 |
| ; GFX900-GISEL-NEXT: s_nop 0 |
| ; GFX900-GISEL-NEXT: global_load_dword v1, s[0:1] offset:48 lds |
| ; GFX900-GISEL-NEXT: s_endpgm |
| main_body: |
| %voffset.64 = zext i32 %voffset to i64 |
| %gep = getelementptr i8, ptr addrspace(1) %gptr, i64 %voffset.64 |
| call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gep, ptr addrspace(3) %lptr, i32 4, i32 48, i32 16) |
| ret void |
| } |
| |
| define amdgpu_ps void @global_load_lds_ushort_vaddr(ptr addrspace(1) nocapture %gptr, ptr addrspace(3) nocapture %lptr) { |
| ; GFX900-LABEL: global_load_lds_ushort_vaddr: |
| ; GFX900: ; %bb.0: ; %main_body |
| ; GFX900-NEXT: v_readfirstlane_b32 s0, v2 |
| ; GFX900-NEXT: s_mov_b32 m0, s0 |
| ; GFX900-NEXT: s_nop 0 |
| ; GFX900-NEXT: global_load_ushort v[0:1], off lds |
| ; GFX900-NEXT: s_endpgm |
| ; |
| ; GFX90A-LABEL: global_load_lds_ushort_vaddr: |
| ; GFX90A: ; %bb.0: ; %main_body |
| ; GFX90A-NEXT: v_readfirstlane_b32 s0, v2 |
| ; GFX90A-NEXT: s_mov_b32 m0, s0 |
| ; GFX90A-NEXT: s_nop 0 |
| ; GFX90A-NEXT: global_load_ushort v[0:1], off lds |
| ; GFX90A-NEXT: s_endpgm |
| ; |
| ; GFX942-LABEL: global_load_lds_ushort_vaddr: |
| ; GFX942: ; %bb.0: ; %main_body |
| ; GFX942-NEXT: v_readfirstlane_b32 s0, v2 |
| ; GFX942-NEXT: s_mov_b32 m0, s0 |
| ; GFX942-NEXT: s_nop 0 |
| ; GFX942-NEXT: global_load_lds_ushort v[0:1], off |
| ; GFX942-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_load_lds_ushort_vaddr: |
| ; GFX10: ; %bb.0: ; %main_body |
| ; GFX10-NEXT: v_readfirstlane_b32 s0, v2 |
| ; GFX10-NEXT: s_mov_b32 m0, s0 |
| ; GFX10-NEXT: global_load_ushort v[0:1], off dlc lds |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX900-GISEL-LABEL: global_load_lds_ushort_vaddr: |
| ; GFX900-GISEL: ; %bb.0: ; %main_body |
| ; GFX900-GISEL-NEXT: v_readfirstlane_b32 s0, v2 |
| ; GFX900-GISEL-NEXT: s_mov_b32 m0, s0 |
| ; GFX900-GISEL-NEXT: s_nop 0 |
| ; GFX900-GISEL-NEXT: global_load_ushort v[0:1], off lds |
| ; GFX900-GISEL-NEXT: s_endpgm |
| main_body: |
| call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gptr, ptr addrspace(3) %lptr, i32 2, i32 0, i32 4) |
| ret void |
| } |
| |
| define amdgpu_ps void @global_load_lds_ubyte_vaddr(ptr addrspace(1) nocapture %gptr, ptr addrspace(3) nocapture %lptr) { |
| ; GFX900-LABEL: global_load_lds_ubyte_vaddr: |
| ; GFX900: ; %bb.0: ; %main_body |
| ; GFX900-NEXT: v_readfirstlane_b32 s0, v2 |
| ; GFX900-NEXT: s_mov_b32 m0, s0 |
| ; GFX900-NEXT: s_nop 0 |
| ; GFX900-NEXT: global_load_ubyte v[0:1], off lds |
| ; GFX900-NEXT: s_endpgm |
| ; |
| ; GFX90A-LABEL: global_load_lds_ubyte_vaddr: |
| ; GFX90A: ; %bb.0: ; %main_body |
| ; GFX90A-NEXT: v_readfirstlane_b32 s0, v2 |
| ; GFX90A-NEXT: s_mov_b32 m0, s0 |
| ; GFX90A-NEXT: s_nop 0 |
| ; GFX90A-NEXT: global_load_ubyte v[0:1], off lds |
| ; GFX90A-NEXT: s_endpgm |
| ; |
| ; GFX942-LABEL: global_load_lds_ubyte_vaddr: |
| ; GFX942: ; %bb.0: ; %main_body |
| ; GFX942-NEXT: v_readfirstlane_b32 s0, v2 |
| ; GFX942-NEXT: s_mov_b32 m0, s0 |
| ; GFX942-NEXT: s_nop 0 |
| ; GFX942-NEXT: global_load_lds_ubyte v[0:1], off |
| ; GFX942-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_load_lds_ubyte_vaddr: |
| ; GFX10: ; %bb.0: ; %main_body |
| ; GFX10-NEXT: v_readfirstlane_b32 s0, v2 |
| ; GFX10-NEXT: s_mov_b32 m0, s0 |
| ; GFX10-NEXT: global_load_ubyte v[0:1], off lds |
| ; GFX10-NEXT: s_endpgm |
| ; |
| ; GFX900-GISEL-LABEL: global_load_lds_ubyte_vaddr: |
| ; GFX900-GISEL: ; %bb.0: ; %main_body |
| ; GFX900-GISEL-NEXT: v_readfirstlane_b32 s0, v2 |
| ; GFX900-GISEL-NEXT: s_mov_b32 m0, s0 |
| ; GFX900-GISEL-NEXT: s_nop 0 |
| ; GFX900-GISEL-NEXT: global_load_ubyte v[0:1], off lds |
| ; GFX900-GISEL-NEXT: s_endpgm |
| main_body: |
| call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gptr, ptr addrspace(3) %lptr, i32 1, i32 0, i32 0) |
| ret void |
| } |