| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: llc -mtriple=amdgcn-- -mcpu=gfx90a -global-isel=0 < %s | FileCheck --check-prefixes=GFX90A %s |
| ; RUN: llc -mtriple=amdgcn-- -mcpu=gfx942 -global-isel=0 < %s | FileCheck --check-prefixes=GFX942 %s |
| ; RUN: llc -mtriple=amdgcn-- -mcpu=gfx942 -global-isel=1 < %s | FileCheck --check-prefixes=GFX942-GISEL %s |
| ; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1010 -global-isel=0 < %s | FileCheck --check-prefixes=GFX10 %s |
| |
| define amdgpu_ps void @global_load_lds_dword_volatile(ptr addrspace(1) inreg %gptr, i64 %off, ptr addrspace(3) inreg %lptr) { |
| ; GFX90A-LABEL: global_load_lds_dword_volatile: |
| ; GFX90A: ; %bb.0: |
| ; GFX90A-NEXT: v_mov_b32_e32 v2, s1 |
| ; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, s0, v0 |
| ; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc |
| ; GFX90A-NEXT: s_mov_b32 m0, s2 |
| ; GFX90A-NEXT: s_nop 0 |
| ; GFX90A-NEXT: global_load_dword v[0:1], off glc lds |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_load_dword v[0:1], off offset:512 lds |
| ; GFX90A-NEXT: s_endpgm |
| ; |
| ; GFX942-LABEL: global_load_lds_dword_volatile: |
| ; GFX942: ; %bb.0: |
| ; GFX942-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1] |
| ; GFX942-NEXT: s_mov_b32 m0, s2 |
| ; GFX942-NEXT: s_nop 0 |
| ; GFX942-NEXT: global_load_lds_dword v[0:1], off sc0 sc1 |
| ; GFX942-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-NEXT: global_load_lds_dword v[0:1], off offset:512 |
| ; GFX942-NEXT: s_endpgm |
| ; |
| ; GFX942-GISEL-LABEL: global_load_lds_dword_volatile: |
| ; GFX942-GISEL: ; %bb.0: |
| ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1] |
| ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0 |
| ; GFX942-GISEL-NEXT: s_mov_b32 m0, s2 |
| ; GFX942-GISEL-NEXT: s_nop 0 |
| ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc |
| ; GFX942-GISEL-NEXT: global_load_lds_dword v[0:1], off sc0 sc1 |
| ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-GISEL-NEXT: global_load_lds_dword v[0:1], off offset:512 |
| ; GFX942-GISEL-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_load_lds_dword_volatile: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, s0, v0 |
| ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, s1, v1, vcc_lo |
| ; GFX10-NEXT: s_mov_b32 m0, s2 |
| ; GFX10-NEXT: global_load_dword v[0:1], off glc dlc lds |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: global_load_dword v[0:1], off offset:512 lds |
| ; GFX10-NEXT: s_endpgm |
| %gptr.off = getelementptr i8, ptr addrspace(1) %gptr, i64 %off |
| call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 0, i32 2147483648) |
| call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 512, i32 0) |
| ret void |
| } |
| |
| define amdgpu_ps void @global_load_lds_dword_nontemporal(ptr addrspace(1) inreg %gptr, i64 %off, ptr addrspace(3) inreg %lptr) { |
| ; GFX90A-LABEL: global_load_lds_dword_nontemporal: |
| ; GFX90A: ; %bb.0: |
| ; GFX90A-NEXT: v_mov_b32_e32 v2, s1 |
| ; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, s0, v0 |
| ; GFX90A-NEXT: s_mov_b32 m0, s2 |
| ; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc |
| ; GFX90A-NEXT: global_load_dword v[0:1], off glc slc lds |
| ; GFX90A-NEXT: global_load_dword v[0:1], off offset:512 lds |
| ; GFX90A-NEXT: s_endpgm |
| ; |
| ; GFX942-LABEL: global_load_lds_dword_nontemporal: |
| ; GFX942: ; %bb.0: |
| ; GFX942-NEXT: s_mov_b32 m0, s2 |
| ; GFX942-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1] |
| ; GFX942-NEXT: global_load_lds_dword v[0:1], off nt |
| ; GFX942-NEXT: global_load_lds_dword v[0:1], off offset:512 |
| ; GFX942-NEXT: s_endpgm |
| ; |
| ; GFX942-GISEL-LABEL: global_load_lds_dword_nontemporal: |
| ; GFX942-GISEL: ; %bb.0: |
| ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1] |
| ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0 |
| ; GFX942-GISEL-NEXT: s_mov_b32 m0, s2 |
| ; GFX942-GISEL-NEXT: s_nop 0 |
| ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc |
| ; GFX942-GISEL-NEXT: global_load_lds_dword v[0:1], off nt |
| ; GFX942-GISEL-NEXT: global_load_lds_dword v[0:1], off offset:512 |
| ; GFX942-GISEL-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_load_lds_dword_nontemporal: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, s0, v0 |
| ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, s1, v1, vcc_lo |
| ; GFX10-NEXT: s_mov_b32 m0, s2 |
| ; GFX10-NEXT: global_load_dword v[0:1], off slc lds |
| ; GFX10-NEXT: global_load_dword v[0:1], off offset:512 lds |
| ; GFX10-NEXT: s_endpgm |
| %gptr.off = getelementptr i8, ptr addrspace(1) %gptr, i64 %off |
| call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 0, i32 0), !nontemporal !0 |
| call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 512, i32 0) |
| ret void |
| } |
| |
| define amdgpu_ps void @global_load_lds_dword_volatile_nontemporal(ptr addrspace(1) inreg %gptr, i64 %off, ptr addrspace(3) inreg %lptr) { |
| ; GFX90A-LABEL: global_load_lds_dword_volatile_nontemporal: |
| ; GFX90A: ; %bb.0: |
| ; GFX90A-NEXT: v_mov_b32_e32 v2, s1 |
| ; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, s0, v0 |
| ; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc |
| ; GFX90A-NEXT: s_mov_b32 m0, s2 |
| ; GFX90A-NEXT: s_nop 0 |
| ; GFX90A-NEXT: global_load_dword v[0:1], off glc lds |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_load_dword v[0:1], off offset:512 lds |
| ; GFX90A-NEXT: s_endpgm |
| ; |
| ; GFX942-LABEL: global_load_lds_dword_volatile_nontemporal: |
| ; GFX942: ; %bb.0: |
| ; GFX942-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1] |
| ; GFX942-NEXT: s_mov_b32 m0, s2 |
| ; GFX942-NEXT: s_nop 0 |
| ; GFX942-NEXT: global_load_lds_dword v[0:1], off sc0 sc1 |
| ; GFX942-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-NEXT: global_load_lds_dword v[0:1], off offset:512 |
| ; GFX942-NEXT: s_endpgm |
| ; |
| ; GFX942-GISEL-LABEL: global_load_lds_dword_volatile_nontemporal: |
| ; GFX942-GISEL: ; %bb.0: |
| ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1] |
| ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0 |
| ; GFX942-GISEL-NEXT: s_mov_b32 m0, s2 |
| ; GFX942-GISEL-NEXT: s_nop 0 |
| ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc |
| ; GFX942-GISEL-NEXT: global_load_lds_dword v[0:1], off sc0 sc1 |
| ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-GISEL-NEXT: global_load_lds_dword v[0:1], off offset:512 |
| ; GFX942-GISEL-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: global_load_lds_dword_volatile_nontemporal: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, s0, v0 |
| ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, s1, v1, vcc_lo |
| ; GFX10-NEXT: s_mov_b32 m0, s2 |
| ; GFX10-NEXT: global_load_dword v[0:1], off glc dlc lds |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: global_load_dword v[0:1], off offset:512 lds |
| ; GFX10-NEXT: s_endpgm |
| %gptr.off = getelementptr i8, ptr addrspace(1) %gptr, i64 %off |
| call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 0, i32 2147483648), !nontemporal !0 |
| call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 512, i32 0) |
| ret void |
| } |
| |
| define amdgpu_ps void @load_to_lds_p1_dword_volatile(ptr addrspace(1) inreg %gptr, i64 %off, ptr addrspace(3) inreg %lptr) { |
| ; GFX90A-LABEL: load_to_lds_p1_dword_volatile: |
| ; GFX90A: ; %bb.0: |
| ; GFX90A-NEXT: v_mov_b32_e32 v2, s1 |
| ; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, s0, v0 |
| ; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc |
| ; GFX90A-NEXT: s_mov_b32 m0, s2 |
| ; GFX90A-NEXT: s_nop 0 |
| ; GFX90A-NEXT: global_load_dword v[0:1], off glc lds |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_load_dword v[0:1], off offset:512 lds |
| ; GFX90A-NEXT: s_endpgm |
| ; |
| ; GFX942-LABEL: load_to_lds_p1_dword_volatile: |
| ; GFX942: ; %bb.0: |
| ; GFX942-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1] |
| ; GFX942-NEXT: s_mov_b32 m0, s2 |
| ; GFX942-NEXT: s_nop 0 |
| ; GFX942-NEXT: global_load_lds_dword v[0:1], off sc0 sc1 |
| ; GFX942-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-NEXT: global_load_lds_dword v[0:1], off offset:512 |
| ; GFX942-NEXT: s_endpgm |
| ; |
| ; GFX942-GISEL-LABEL: load_to_lds_p1_dword_volatile: |
| ; GFX942-GISEL: ; %bb.0: |
| ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1] |
| ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0 |
| ; GFX942-GISEL-NEXT: s_mov_b32 m0, s2 |
| ; GFX942-GISEL-NEXT: s_nop 0 |
| ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc |
| ; GFX942-GISEL-NEXT: global_load_lds_dword v[0:1], off sc0 sc1 |
| ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-GISEL-NEXT: global_load_lds_dword v[0:1], off offset:512 |
| ; GFX942-GISEL-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: load_to_lds_p1_dword_volatile: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, s0, v0 |
| ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, s1, v1, vcc_lo |
| ; GFX10-NEXT: s_mov_b32 m0, s2 |
| ; GFX10-NEXT: global_load_dword v[0:1], off glc dlc lds |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: global_load_dword v[0:1], off offset:512 lds |
| ; GFX10-NEXT: s_endpgm |
| %gptr.off = getelementptr i8, ptr addrspace(1) %gptr, i64 %off |
| call void @llvm.amdgcn.load.to.lds.p1(ptr addrspace(1) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 0, i32 2147483648) |
| call void @llvm.amdgcn.load.to.lds.p1(ptr addrspace(1) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 512, i32 0) |
| ret void |
| } |
| |
| define amdgpu_ps void @load_to_lds_p1_dword_nontemporal(ptr addrspace(1) inreg %gptr, i64 %off, ptr addrspace(3) inreg %lptr) { |
| ; GFX90A-LABEL: load_to_lds_p1_dword_nontemporal: |
| ; GFX90A: ; %bb.0: |
| ; GFX90A-NEXT: v_mov_b32_e32 v2, s1 |
| ; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, s0, v0 |
| ; GFX90A-NEXT: s_mov_b32 m0, s2 |
| ; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc |
| ; GFX90A-NEXT: global_load_dword v[0:1], off glc slc lds |
| ; GFX90A-NEXT: global_load_dword v[0:1], off offset:512 lds |
| ; GFX90A-NEXT: s_endpgm |
| ; |
| ; GFX942-LABEL: load_to_lds_p1_dword_nontemporal: |
| ; GFX942: ; %bb.0: |
| ; GFX942-NEXT: s_mov_b32 m0, s2 |
| ; GFX942-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1] |
| ; GFX942-NEXT: global_load_lds_dword v[0:1], off nt |
| ; GFX942-NEXT: global_load_lds_dword v[0:1], off offset:512 |
| ; GFX942-NEXT: s_endpgm |
| ; |
| ; GFX942-GISEL-LABEL: load_to_lds_p1_dword_nontemporal: |
| ; GFX942-GISEL: ; %bb.0: |
| ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1] |
| ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0 |
| ; GFX942-GISEL-NEXT: s_mov_b32 m0, s2 |
| ; GFX942-GISEL-NEXT: s_nop 0 |
| ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc |
| ; GFX942-GISEL-NEXT: global_load_lds_dword v[0:1], off nt |
| ; GFX942-GISEL-NEXT: global_load_lds_dword v[0:1], off offset:512 |
| ; GFX942-GISEL-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: load_to_lds_p1_dword_nontemporal: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, s0, v0 |
| ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, s1, v1, vcc_lo |
| ; GFX10-NEXT: s_mov_b32 m0, s2 |
| ; GFX10-NEXT: global_load_dword v[0:1], off slc lds |
| ; GFX10-NEXT: global_load_dword v[0:1], off offset:512 lds |
| ; GFX10-NEXT: s_endpgm |
| %gptr.off = getelementptr i8, ptr addrspace(1) %gptr, i64 %off |
| call void @llvm.amdgcn.load.to.lds.p1(ptr addrspace(1) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 0, i32 0), !nontemporal !0 |
| call void @llvm.amdgcn.load.to.lds.p1(ptr addrspace(1) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 512, i32 0) |
| ret void |
| } |
| |
| define amdgpu_ps void @load_to_lds_p1_dword_volatile_nontemporal(ptr addrspace(1) inreg %gptr, i64 %off, ptr addrspace(3) inreg %lptr) { |
| ; GFX90A-LABEL: load_to_lds_p1_dword_volatile_nontemporal: |
| ; GFX90A: ; %bb.0: |
| ; GFX90A-NEXT: v_mov_b32_e32 v2, s1 |
| ; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, s0, v0 |
| ; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc |
| ; GFX90A-NEXT: s_mov_b32 m0, s2 |
| ; GFX90A-NEXT: s_nop 0 |
| ; GFX90A-NEXT: global_load_dword v[0:1], off glc lds |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: global_load_dword v[0:1], off offset:512 lds |
| ; GFX90A-NEXT: s_endpgm |
| ; |
| ; GFX942-LABEL: load_to_lds_p1_dword_volatile_nontemporal: |
| ; GFX942: ; %bb.0: |
| ; GFX942-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1] |
| ; GFX942-NEXT: s_mov_b32 m0, s2 |
| ; GFX942-NEXT: s_nop 0 |
| ; GFX942-NEXT: global_load_lds_dword v[0:1], off sc0 sc1 |
| ; GFX942-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-NEXT: global_load_lds_dword v[0:1], off offset:512 |
| ; GFX942-NEXT: s_endpgm |
| ; |
| ; GFX942-GISEL-LABEL: load_to_lds_p1_dword_volatile_nontemporal: |
| ; GFX942-GISEL: ; %bb.0: |
| ; GFX942-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1] |
| ; GFX942-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0 |
| ; GFX942-GISEL-NEXT: s_mov_b32 m0, s2 |
| ; GFX942-GISEL-NEXT: s_nop 0 |
| ; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc |
| ; GFX942-GISEL-NEXT: global_load_lds_dword v[0:1], off sc0 sc1 |
| ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-GISEL-NEXT: global_load_lds_dword v[0:1], off offset:512 |
| ; GFX942-GISEL-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: load_to_lds_p1_dword_volatile_nontemporal: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, s0, v0 |
| ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, s1, v1, vcc_lo |
| ; GFX10-NEXT: s_mov_b32 m0, s2 |
| ; GFX10-NEXT: global_load_dword v[0:1], off glc dlc lds |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: global_load_dword v[0:1], off offset:512 lds |
| ; GFX10-NEXT: s_endpgm |
| %gptr.off = getelementptr i8, ptr addrspace(1) %gptr, i64 %off |
| call void @llvm.amdgcn.load.to.lds.p1(ptr addrspace(1) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 0, i32 2147483648), !nontemporal !0 |
| call void @llvm.amdgcn.load.to.lds.p1(ptr addrspace(1) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 512, i32 0) |
| ret void |
| } |
| |
| define amdgpu_ps void @load_to_lds_p7_dword_volatile(ptr addrspace(7) inreg %gptr, i32 %off, ptr addrspace(3) inreg %lptr) { |
| ; GFX90A-LABEL: load_to_lds_p7_dword_volatile: |
| ; GFX90A: ; %bb.0: |
| ; GFX90A-NEXT: v_add_u32_e32 v0, s4, v0 |
| ; GFX90A-NEXT: s_mov_b32 m0, s5 |
| ; GFX90A-NEXT: s_nop 0 |
| ; GFX90A-NEXT: buffer_load_dword v0, s[0:3], 0 offen glc lds |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds |
| ; GFX90A-NEXT: s_endpgm |
| ; |
| ; GFX942-LABEL: load_to_lds_p7_dword_volatile: |
| ; GFX942: ; %bb.0: |
| ; GFX942-NEXT: v_add_u32_e32 v0, s4, v0 |
| ; GFX942-NEXT: s_mov_b32 m0, s5 |
| ; GFX942-NEXT: s_nop 0 |
| ; GFX942-NEXT: buffer_load_dword v0, s[0:3], 0 offen sc0 sc1 lds |
| ; GFX942-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds |
| ; GFX942-NEXT: s_endpgm |
| ; |
| ; GFX942-GISEL-LABEL: load_to_lds_p7_dword_volatile: |
| ; GFX942-GISEL: ; %bb.0: |
| ; GFX942-GISEL-NEXT: v_add_u32_e32 v0, s4, v0 |
| ; GFX942-GISEL-NEXT: s_mov_b32 m0, s5 |
| ; GFX942-GISEL-NEXT: s_nop 0 |
| ; GFX942-GISEL-NEXT: buffer_load_dword v0, s[0:3], 0 offen sc0 sc1 lds |
| ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-GISEL-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds |
| ; GFX942-GISEL-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: load_to_lds_p7_dword_volatile: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: v_add_nc_u32_e32 v0, s4, v0 |
| ; GFX10-NEXT: s_mov_b32 m0, s5 |
| ; GFX10-NEXT: buffer_load_dword v0, s[0:3], 0 offen glc dlc lds |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds |
| ; GFX10-NEXT: s_endpgm |
| %gptr.off = getelementptr i8, ptr addrspace(7) %gptr, i32 %off |
| call void @llvm.amdgcn.load.to.lds.p7(ptr addrspace(7) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 0, i32 2147483648) |
| call void @llvm.amdgcn.load.to.lds.p7(ptr addrspace(7) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 512, i32 0) |
| ret void |
| } |
| |
| define amdgpu_ps void @load_to_lds_p7_dword_nontemporal(ptr addrspace(7) inreg %gptr, i32 %off, ptr addrspace(3) inreg %lptr) { |
| ; GFX90A-LABEL: load_to_lds_p7_dword_nontemporal: |
| ; GFX90A: ; %bb.0: |
| ; GFX90A-NEXT: s_mov_b32 m0, s5 |
| ; GFX90A-NEXT: v_add_u32_e32 v0, s4, v0 |
| ; GFX90A-NEXT: buffer_load_dword v0, s[0:3], 0 offen glc slc lds |
| ; GFX90A-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds |
| ; GFX90A-NEXT: s_endpgm |
| ; |
| ; GFX942-LABEL: load_to_lds_p7_dword_nontemporal: |
| ; GFX942: ; %bb.0: |
| ; GFX942-NEXT: s_mov_b32 m0, s5 |
| ; GFX942-NEXT: v_add_u32_e32 v0, s4, v0 |
| ; GFX942-NEXT: buffer_load_dword v0, s[0:3], 0 offen nt lds |
| ; GFX942-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds |
| ; GFX942-NEXT: s_endpgm |
| ; |
| ; GFX942-GISEL-LABEL: load_to_lds_p7_dword_nontemporal: |
| ; GFX942-GISEL: ; %bb.0: |
| ; GFX942-GISEL-NEXT: v_add_u32_e32 v0, s4, v0 |
| ; GFX942-GISEL-NEXT: s_mov_b32 m0, s5 |
| ; GFX942-GISEL-NEXT: s_nop 0 |
| ; GFX942-GISEL-NEXT: buffer_load_dword v0, s[0:3], 0 offen nt lds |
| ; GFX942-GISEL-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds |
| ; GFX942-GISEL-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: load_to_lds_p7_dword_nontemporal: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: v_add_nc_u32_e32 v0, s4, v0 |
| ; GFX10-NEXT: s_mov_b32 m0, s5 |
| ; GFX10-NEXT: buffer_load_dword v0, s[0:3], 0 offen slc lds |
| ; GFX10-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds |
| ; GFX10-NEXT: s_endpgm |
| %gptr.off = getelementptr i8, ptr addrspace(7) %gptr, i32 %off |
| call void @llvm.amdgcn.load.to.lds.p7(ptr addrspace(7) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 0, i32 0), !nontemporal !0 |
| call void @llvm.amdgcn.load.to.lds.p7(ptr addrspace(7) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 512, i32 0) |
| ret void |
| } |
| |
| define amdgpu_ps void @load_to_lds_p7_dword_volatile_nontemporal(ptr addrspace(7) inreg %gptr, i32 %off, ptr addrspace(3) inreg %lptr) { |
| ; GFX90A-LABEL: load_to_lds_p7_dword_volatile_nontemporal: |
| ; GFX90A: ; %bb.0: |
| ; GFX90A-NEXT: v_add_u32_e32 v0, s4, v0 |
| ; GFX90A-NEXT: s_mov_b32 m0, s5 |
| ; GFX90A-NEXT: s_nop 0 |
| ; GFX90A-NEXT: buffer_load_dword v0, s[0:3], 0 offen glc lds |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds |
| ; GFX90A-NEXT: s_endpgm |
| ; |
| ; GFX942-LABEL: load_to_lds_p7_dword_volatile_nontemporal: |
| ; GFX942: ; %bb.0: |
| ; GFX942-NEXT: v_add_u32_e32 v0, s4, v0 |
| ; GFX942-NEXT: s_mov_b32 m0, s5 |
| ; GFX942-NEXT: s_nop 0 |
| ; GFX942-NEXT: buffer_load_dword v0, s[0:3], 0 offen sc0 sc1 lds |
| ; GFX942-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds |
| ; GFX942-NEXT: s_endpgm |
| ; |
| ; GFX942-GISEL-LABEL: load_to_lds_p7_dword_volatile_nontemporal: |
| ; GFX942-GISEL: ; %bb.0: |
| ; GFX942-GISEL-NEXT: v_add_u32_e32 v0, s4, v0 |
| ; GFX942-GISEL-NEXT: s_mov_b32 m0, s5 |
| ; GFX942-GISEL-NEXT: s_nop 0 |
| ; GFX942-GISEL-NEXT: buffer_load_dword v0, s[0:3], 0 offen sc0 sc1 lds |
| ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-GISEL-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds |
| ; GFX942-GISEL-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: load_to_lds_p7_dword_volatile_nontemporal: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: v_add_nc_u32_e32 v0, s4, v0 |
| ; GFX10-NEXT: s_mov_b32 m0, s5 |
| ; GFX10-NEXT: buffer_load_dword v0, s[0:3], 0 offen glc dlc lds |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds |
| ; GFX10-NEXT: s_endpgm |
| %gptr.off = getelementptr i8, ptr addrspace(7) %gptr, i32 %off |
| call void @llvm.amdgcn.load.to.lds.p7(ptr addrspace(7) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 0, i32 2147483648), !nontemporal !0 |
| call void @llvm.amdgcn.load.to.lds.p7(ptr addrspace(7) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 512, i32 0) |
| ret void |
| } |
| |
| define amdgpu_ps void @raw_ptr_buffer_load_lds_dword_volatile(ptr addrspace(8) inreg %gptr, i32 %off, ptr addrspace(3) inreg %lptr) { |
| ; GFX90A-LABEL: raw_ptr_buffer_load_lds_dword_volatile: |
| ; GFX90A: ; %bb.0: |
| ; GFX90A-NEXT: s_mov_b32 m0, s4 |
| ; GFX90A-NEXT: s_nop 0 |
| ; GFX90A-NEXT: buffer_load_dword v0, s[0:3], 0 offen glc lds |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds |
| ; GFX90A-NEXT: s_endpgm |
| ; |
| ; GFX942-LABEL: raw_ptr_buffer_load_lds_dword_volatile: |
| ; GFX942: ; %bb.0: |
| ; GFX942-NEXT: s_mov_b32 m0, s4 |
| ; GFX942-NEXT: s_nop 0 |
| ; GFX942-NEXT: buffer_load_dword v0, s[0:3], 0 offen sc0 sc1 lds |
| ; GFX942-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds |
| ; GFX942-NEXT: s_endpgm |
| ; |
| ; GFX942-GISEL-LABEL: raw_ptr_buffer_load_lds_dword_volatile: |
| ; GFX942-GISEL: ; %bb.0: |
| ; GFX942-GISEL-NEXT: s_mov_b32 m0, s4 |
| ; GFX942-GISEL-NEXT: s_nop 0 |
| ; GFX942-GISEL-NEXT: buffer_load_dword v0, s[0:3], 0 offen sc0 sc1 lds |
| ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-GISEL-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds |
| ; GFX942-GISEL-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: raw_ptr_buffer_load_lds_dword_volatile: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_mov_b32 m0, s4 |
| ; GFX10-NEXT: buffer_load_dword v0, s[0:3], 0 offen glc dlc lds |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds |
| ; GFX10-NEXT: s_endpgm |
| call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %gptr, ptr addrspace(3) %lptr, i32 4, i32 %off, i32 0, i32 0, i32 2147483648) |
| call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %gptr, ptr addrspace(3) %lptr, i32 4, i32 %off, i32 0, i32 512, i32 0) |
| ret void |
| } |
| |
| define amdgpu_ps void @raw_ptr_buffer_load_lds_dword_nontemporal(ptr addrspace(8) inreg %gptr, i32 %off, ptr addrspace(3) inreg %lptr) { |
| ; GFX90A-LABEL: raw_ptr_buffer_load_lds_dword_nontemporal: |
| ; GFX90A: ; %bb.0: |
| ; GFX90A-NEXT: s_mov_b32 m0, s4 |
| ; GFX90A-NEXT: s_nop 0 |
| ; GFX90A-NEXT: buffer_load_dword v0, s[0:3], 0 offen glc slc lds |
| ; GFX90A-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds |
| ; GFX90A-NEXT: s_endpgm |
| ; |
| ; GFX942-LABEL: raw_ptr_buffer_load_lds_dword_nontemporal: |
| ; GFX942: ; %bb.0: |
| ; GFX942-NEXT: s_mov_b32 m0, s4 |
| ; GFX942-NEXT: s_nop 0 |
| ; GFX942-NEXT: buffer_load_dword v0, s[0:3], 0 offen nt lds |
| ; GFX942-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds |
| ; GFX942-NEXT: s_endpgm |
| ; |
| ; GFX942-GISEL-LABEL: raw_ptr_buffer_load_lds_dword_nontemporal: |
| ; GFX942-GISEL: ; %bb.0: |
| ; GFX942-GISEL-NEXT: s_mov_b32 m0, s4 |
| ; GFX942-GISEL-NEXT: s_nop 0 |
| ; GFX942-GISEL-NEXT: buffer_load_dword v0, s[0:3], 0 offen nt lds |
| ; GFX942-GISEL-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds |
| ; GFX942-GISEL-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: raw_ptr_buffer_load_lds_dword_nontemporal: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_mov_b32 m0, s4 |
| ; GFX10-NEXT: buffer_load_dword v0, s[0:3], 0 offen slc lds |
| ; GFX10-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds |
| ; GFX10-NEXT: s_endpgm |
| call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %gptr, ptr addrspace(3) %lptr, i32 4, i32 %off, i32 0, i32 0, i32 0), !nontemporal !0 |
| call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %gptr, ptr addrspace(3) %lptr, i32 4, i32 %off, i32 0, i32 512, i32 0) |
| ret void |
| } |
| |
| define amdgpu_ps void @raw_ptr_buffer_load_lds_dword_volatile_nontemporal(ptr addrspace(8) inreg %gptr, i32 %off, ptr addrspace(3) inreg %lptr) { |
| ; GFX90A-LABEL: raw_ptr_buffer_load_lds_dword_volatile_nontemporal: |
| ; GFX90A: ; %bb.0: |
| ; GFX90A-NEXT: s_mov_b32 m0, s4 |
| ; GFX90A-NEXT: s_nop 0 |
| ; GFX90A-NEXT: buffer_load_dword v0, s[0:3], 0 offen glc lds |
| ; GFX90A-NEXT: s_waitcnt vmcnt(0) |
| ; GFX90A-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds |
| ; GFX90A-NEXT: s_endpgm |
| ; |
| ; GFX942-LABEL: raw_ptr_buffer_load_lds_dword_volatile_nontemporal: |
| ; GFX942: ; %bb.0: |
| ; GFX942-NEXT: s_mov_b32 m0, s4 |
| ; GFX942-NEXT: s_nop 0 |
| ; GFX942-NEXT: buffer_load_dword v0, s[0:3], 0 offen sc0 sc1 lds |
| ; GFX942-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds |
| ; GFX942-NEXT: s_endpgm |
| ; |
| ; GFX942-GISEL-LABEL: raw_ptr_buffer_load_lds_dword_volatile_nontemporal: |
| ; GFX942-GISEL: ; %bb.0: |
| ; GFX942-GISEL-NEXT: s_mov_b32 m0, s4 |
| ; GFX942-GISEL-NEXT: s_nop 0 |
| ; GFX942-GISEL-NEXT: buffer_load_dword v0, s[0:3], 0 offen sc0 sc1 lds |
| ; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0) |
| ; GFX942-GISEL-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds |
| ; GFX942-GISEL-NEXT: s_endpgm |
| ; |
| ; GFX10-LABEL: raw_ptr_buffer_load_lds_dword_volatile_nontemporal: |
| ; GFX10: ; %bb.0: |
| ; GFX10-NEXT: s_mov_b32 m0, s4 |
| ; GFX10-NEXT: buffer_load_dword v0, s[0:3], 0 offen glc dlc lds |
| ; GFX10-NEXT: s_waitcnt vmcnt(0) |
| ; GFX10-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds |
| ; GFX10-NEXT: s_endpgm |
| call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %gptr, ptr addrspace(3) %lptr, i32 4, i32 %off, i32 0, i32 0, i32 2147483648), !nontemporal !0 |
| call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %gptr, ptr addrspace(3) %lptr, i32 4, i32 %off, i32 0, i32 512, i32 0) |
| ret void |
| } |
| |
| !0 = !{i32 1} |