blob: 14d2e4ca5f2c371280ffa7d01680212ec180b122 [file] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc -mtriple=amdgcn-- -mcpu=gfx90a -global-isel=0 < %s | FileCheck --check-prefixes=GFX90A %s
; RUN: llc -mtriple=amdgcn-- -mcpu=gfx942 -global-isel=0 < %s | FileCheck --check-prefixes=GFX942 %s
; RUN: llc -mtriple=amdgcn-- -mcpu=gfx942 -global-isel=1 < %s | FileCheck --check-prefixes=GFX942-GISEL %s
; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1010 -global-isel=0 < %s | FileCheck --check-prefixes=GFX10 %s
define amdgpu_ps void @global_load_lds_dword_volatile(ptr addrspace(1) inreg %gptr, i64 %off, ptr addrspace(3) inreg %lptr) {
; GFX90A-LABEL: global_load_lds_dword_volatile:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: v_mov_b32_e32 v2, s1
; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, s0, v0
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
; GFX90A-NEXT: s_mov_b32 m0, s2
; GFX90A-NEXT: s_nop 0
; GFX90A-NEXT: global_load_dword v[0:1], off glc lds
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_load_dword v[0:1], off offset:512 lds
; GFX90A-NEXT: s_endpgm
;
; GFX942-LABEL: global_load_lds_dword_volatile:
; GFX942: ; %bb.0:
; GFX942-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1]
; GFX942-NEXT: s_mov_b32 m0, s2
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: global_load_lds_dword v[0:1], off sc0 sc1
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: global_load_lds_dword v[0:1], off offset:512
; GFX942-NEXT: s_endpgm
;
; GFX942-GISEL-LABEL: global_load_lds_dword_volatile:
; GFX942-GISEL: ; %bb.0:
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
; GFX942-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
; GFX942-GISEL-NEXT: s_mov_b32 m0, s2
; GFX942-GISEL-NEXT: s_nop 0
; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc
; GFX942-GISEL-NEXT: global_load_lds_dword v[0:1], off sc0 sc1
; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-GISEL-NEXT: global_load_lds_dword v[0:1], off offset:512
; GFX942-GISEL-NEXT: s_endpgm
;
; GFX10-LABEL: global_load_lds_dword_volatile:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, s0, v0
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, s1, v1, vcc_lo
; GFX10-NEXT: s_mov_b32 m0, s2
; GFX10-NEXT: global_load_dword v[0:1], off glc dlc lds
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_load_dword v[0:1], off offset:512 lds
; GFX10-NEXT: s_endpgm
%gptr.off = getelementptr i8, ptr addrspace(1) %gptr, i64 %off
call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 0, i32 2147483648)
call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 512, i32 0)
ret void
}
define amdgpu_ps void @global_load_lds_dword_nontemporal(ptr addrspace(1) inreg %gptr, i64 %off, ptr addrspace(3) inreg %lptr) {
; GFX90A-LABEL: global_load_lds_dword_nontemporal:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: v_mov_b32_e32 v2, s1
; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, s0, v0
; GFX90A-NEXT: s_mov_b32 m0, s2
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
; GFX90A-NEXT: global_load_dword v[0:1], off glc slc lds
; GFX90A-NEXT: global_load_dword v[0:1], off offset:512 lds
; GFX90A-NEXT: s_endpgm
;
; GFX942-LABEL: global_load_lds_dword_nontemporal:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_mov_b32 m0, s2
; GFX942-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1]
; GFX942-NEXT: global_load_lds_dword v[0:1], off nt
; GFX942-NEXT: global_load_lds_dword v[0:1], off offset:512
; GFX942-NEXT: s_endpgm
;
; GFX942-GISEL-LABEL: global_load_lds_dword_nontemporal:
; GFX942-GISEL: ; %bb.0:
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
; GFX942-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
; GFX942-GISEL-NEXT: s_mov_b32 m0, s2
; GFX942-GISEL-NEXT: s_nop 0
; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc
; GFX942-GISEL-NEXT: global_load_lds_dword v[0:1], off nt
; GFX942-GISEL-NEXT: global_load_lds_dword v[0:1], off offset:512
; GFX942-GISEL-NEXT: s_endpgm
;
; GFX10-LABEL: global_load_lds_dword_nontemporal:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, s0, v0
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, s1, v1, vcc_lo
; GFX10-NEXT: s_mov_b32 m0, s2
; GFX10-NEXT: global_load_dword v[0:1], off slc lds
; GFX10-NEXT: global_load_dword v[0:1], off offset:512 lds
; GFX10-NEXT: s_endpgm
%gptr.off = getelementptr i8, ptr addrspace(1) %gptr, i64 %off
call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 0, i32 0), !nontemporal !0
call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 512, i32 0)
ret void
}
define amdgpu_ps void @global_load_lds_dword_volatile_nontemporal(ptr addrspace(1) inreg %gptr, i64 %off, ptr addrspace(3) inreg %lptr) {
; GFX90A-LABEL: global_load_lds_dword_volatile_nontemporal:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: v_mov_b32_e32 v2, s1
; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, s0, v0
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
; GFX90A-NEXT: s_mov_b32 m0, s2
; GFX90A-NEXT: s_nop 0
; GFX90A-NEXT: global_load_dword v[0:1], off glc lds
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_load_dword v[0:1], off offset:512 lds
; GFX90A-NEXT: s_endpgm
;
; GFX942-LABEL: global_load_lds_dword_volatile_nontemporal:
; GFX942: ; %bb.0:
; GFX942-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1]
; GFX942-NEXT: s_mov_b32 m0, s2
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: global_load_lds_dword v[0:1], off sc0 sc1
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: global_load_lds_dword v[0:1], off offset:512
; GFX942-NEXT: s_endpgm
;
; GFX942-GISEL-LABEL: global_load_lds_dword_volatile_nontemporal:
; GFX942-GISEL: ; %bb.0:
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
; GFX942-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
; GFX942-GISEL-NEXT: s_mov_b32 m0, s2
; GFX942-GISEL-NEXT: s_nop 0
; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc
; GFX942-GISEL-NEXT: global_load_lds_dword v[0:1], off sc0 sc1
; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-GISEL-NEXT: global_load_lds_dword v[0:1], off offset:512
; GFX942-GISEL-NEXT: s_endpgm
;
; GFX10-LABEL: global_load_lds_dword_volatile_nontemporal:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, s0, v0
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, s1, v1, vcc_lo
; GFX10-NEXT: s_mov_b32 m0, s2
; GFX10-NEXT: global_load_dword v[0:1], off glc dlc lds
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_load_dword v[0:1], off offset:512 lds
; GFX10-NEXT: s_endpgm
%gptr.off = getelementptr i8, ptr addrspace(1) %gptr, i64 %off
call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 0, i32 2147483648), !nontemporal !0
call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 512, i32 0)
ret void
}
define amdgpu_ps void @load_to_lds_p1_dword_volatile(ptr addrspace(1) inreg %gptr, i64 %off, ptr addrspace(3) inreg %lptr) {
; GFX90A-LABEL: load_to_lds_p1_dword_volatile:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: v_mov_b32_e32 v2, s1
; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, s0, v0
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
; GFX90A-NEXT: s_mov_b32 m0, s2
; GFX90A-NEXT: s_nop 0
; GFX90A-NEXT: global_load_dword v[0:1], off glc lds
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_load_dword v[0:1], off offset:512 lds
; GFX90A-NEXT: s_endpgm
;
; GFX942-LABEL: load_to_lds_p1_dword_volatile:
; GFX942: ; %bb.0:
; GFX942-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1]
; GFX942-NEXT: s_mov_b32 m0, s2
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: global_load_lds_dword v[0:1], off sc0 sc1
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: global_load_lds_dword v[0:1], off offset:512
; GFX942-NEXT: s_endpgm
;
; GFX942-GISEL-LABEL: load_to_lds_p1_dword_volatile:
; GFX942-GISEL: ; %bb.0:
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
; GFX942-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
; GFX942-GISEL-NEXT: s_mov_b32 m0, s2
; GFX942-GISEL-NEXT: s_nop 0
; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc
; GFX942-GISEL-NEXT: global_load_lds_dword v[0:1], off sc0 sc1
; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-GISEL-NEXT: global_load_lds_dword v[0:1], off offset:512
; GFX942-GISEL-NEXT: s_endpgm
;
; GFX10-LABEL: load_to_lds_p1_dword_volatile:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, s0, v0
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, s1, v1, vcc_lo
; GFX10-NEXT: s_mov_b32 m0, s2
; GFX10-NEXT: global_load_dword v[0:1], off glc dlc lds
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_load_dword v[0:1], off offset:512 lds
; GFX10-NEXT: s_endpgm
%gptr.off = getelementptr i8, ptr addrspace(1) %gptr, i64 %off
call void @llvm.amdgcn.load.to.lds.p1(ptr addrspace(1) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 0, i32 2147483648)
call void @llvm.amdgcn.load.to.lds.p1(ptr addrspace(1) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 512, i32 0)
ret void
}
define amdgpu_ps void @load_to_lds_p1_dword_nontemporal(ptr addrspace(1) inreg %gptr, i64 %off, ptr addrspace(3) inreg %lptr) {
; GFX90A-LABEL: load_to_lds_p1_dword_nontemporal:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: v_mov_b32_e32 v2, s1
; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, s0, v0
; GFX90A-NEXT: s_mov_b32 m0, s2
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
; GFX90A-NEXT: global_load_dword v[0:1], off glc slc lds
; GFX90A-NEXT: global_load_dword v[0:1], off offset:512 lds
; GFX90A-NEXT: s_endpgm
;
; GFX942-LABEL: load_to_lds_p1_dword_nontemporal:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_mov_b32 m0, s2
; GFX942-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1]
; GFX942-NEXT: global_load_lds_dword v[0:1], off nt
; GFX942-NEXT: global_load_lds_dword v[0:1], off offset:512
; GFX942-NEXT: s_endpgm
;
; GFX942-GISEL-LABEL: load_to_lds_p1_dword_nontemporal:
; GFX942-GISEL: ; %bb.0:
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
; GFX942-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
; GFX942-GISEL-NEXT: s_mov_b32 m0, s2
; GFX942-GISEL-NEXT: s_nop 0
; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc
; GFX942-GISEL-NEXT: global_load_lds_dword v[0:1], off nt
; GFX942-GISEL-NEXT: global_load_lds_dword v[0:1], off offset:512
; GFX942-GISEL-NEXT: s_endpgm
;
; GFX10-LABEL: load_to_lds_p1_dword_nontemporal:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, s0, v0
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, s1, v1, vcc_lo
; GFX10-NEXT: s_mov_b32 m0, s2
; GFX10-NEXT: global_load_dword v[0:1], off slc lds
; GFX10-NEXT: global_load_dword v[0:1], off offset:512 lds
; GFX10-NEXT: s_endpgm
%gptr.off = getelementptr i8, ptr addrspace(1) %gptr, i64 %off
call void @llvm.amdgcn.load.to.lds.p1(ptr addrspace(1) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 0, i32 0), !nontemporal !0
call void @llvm.amdgcn.load.to.lds.p1(ptr addrspace(1) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 512, i32 0)
ret void
}
define amdgpu_ps void @load_to_lds_p1_dword_volatile_nontemporal(ptr addrspace(1) inreg %gptr, i64 %off, ptr addrspace(3) inreg %lptr) {
; GFX90A-LABEL: load_to_lds_p1_dword_volatile_nontemporal:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: v_mov_b32_e32 v2, s1
; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, s0, v0
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
; GFX90A-NEXT: s_mov_b32 m0, s2
; GFX90A-NEXT: s_nop 0
; GFX90A-NEXT: global_load_dword v[0:1], off glc lds
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: global_load_dword v[0:1], off offset:512 lds
; GFX90A-NEXT: s_endpgm
;
; GFX942-LABEL: load_to_lds_p1_dword_volatile_nontemporal:
; GFX942: ; %bb.0:
; GFX942-NEXT: v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1]
; GFX942-NEXT: s_mov_b32 m0, s2
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: global_load_lds_dword v[0:1], off sc0 sc1
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: global_load_lds_dword v[0:1], off offset:512
; GFX942-NEXT: s_endpgm
;
; GFX942-GISEL-LABEL: load_to_lds_p1_dword_volatile_nontemporal:
; GFX942-GISEL: ; %bb.0:
; GFX942-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
; GFX942-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
; GFX942-GISEL-NEXT: s_mov_b32 m0, s2
; GFX942-GISEL-NEXT: s_nop 0
; GFX942-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc
; GFX942-GISEL-NEXT: global_load_lds_dword v[0:1], off sc0 sc1
; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-GISEL-NEXT: global_load_lds_dword v[0:1], off offset:512
; GFX942-GISEL-NEXT: s_endpgm
;
; GFX10-LABEL: load_to_lds_p1_dword_volatile_nontemporal:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, s0, v0
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, s1, v1, vcc_lo
; GFX10-NEXT: s_mov_b32 m0, s2
; GFX10-NEXT: global_load_dword v[0:1], off glc dlc lds
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_load_dword v[0:1], off offset:512 lds
; GFX10-NEXT: s_endpgm
%gptr.off = getelementptr i8, ptr addrspace(1) %gptr, i64 %off
call void @llvm.amdgcn.load.to.lds.p1(ptr addrspace(1) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 0, i32 2147483648), !nontemporal !0
call void @llvm.amdgcn.load.to.lds.p1(ptr addrspace(1) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 512, i32 0)
ret void
}
define amdgpu_ps void @load_to_lds_p7_dword_volatile(ptr addrspace(7) inreg %gptr, i32 %off, ptr addrspace(3) inreg %lptr) {
; GFX90A-LABEL: load_to_lds_p7_dword_volatile:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: v_add_u32_e32 v0, s4, v0
; GFX90A-NEXT: s_mov_b32 m0, s5
; GFX90A-NEXT: s_nop 0
; GFX90A-NEXT: buffer_load_dword v0, s[0:3], 0 offen glc lds
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds
; GFX90A-NEXT: s_endpgm
;
; GFX942-LABEL: load_to_lds_p7_dword_volatile:
; GFX942: ; %bb.0:
; GFX942-NEXT: v_add_u32_e32 v0, s4, v0
; GFX942-NEXT: s_mov_b32 m0, s5
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: buffer_load_dword v0, s[0:3], 0 offen sc0 sc1 lds
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds
; GFX942-NEXT: s_endpgm
;
; GFX942-GISEL-LABEL: load_to_lds_p7_dword_volatile:
; GFX942-GISEL: ; %bb.0:
; GFX942-GISEL-NEXT: v_add_u32_e32 v0, s4, v0
; GFX942-GISEL-NEXT: s_mov_b32 m0, s5
; GFX942-GISEL-NEXT: s_nop 0
; GFX942-GISEL-NEXT: buffer_load_dword v0, s[0:3], 0 offen sc0 sc1 lds
; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-GISEL-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds
; GFX942-GISEL-NEXT: s_endpgm
;
; GFX10-LABEL: load_to_lds_p7_dword_volatile:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_add_nc_u32_e32 v0, s4, v0
; GFX10-NEXT: s_mov_b32 m0, s5
; GFX10-NEXT: buffer_load_dword v0, s[0:3], 0 offen glc dlc lds
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds
; GFX10-NEXT: s_endpgm
%gptr.off = getelementptr i8, ptr addrspace(7) %gptr, i32 %off
call void @llvm.amdgcn.load.to.lds.p7(ptr addrspace(7) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 0, i32 2147483648)
call void @llvm.amdgcn.load.to.lds.p7(ptr addrspace(7) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 512, i32 0)
ret void
}
define amdgpu_ps void @load_to_lds_p7_dword_nontemporal(ptr addrspace(7) inreg %gptr, i32 %off, ptr addrspace(3) inreg %lptr) {
; GFX90A-LABEL: load_to_lds_p7_dword_nontemporal:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_mov_b32 m0, s5
; GFX90A-NEXT: v_add_u32_e32 v0, s4, v0
; GFX90A-NEXT: buffer_load_dword v0, s[0:3], 0 offen glc slc lds
; GFX90A-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds
; GFX90A-NEXT: s_endpgm
;
; GFX942-LABEL: load_to_lds_p7_dword_nontemporal:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_mov_b32 m0, s5
; GFX942-NEXT: v_add_u32_e32 v0, s4, v0
; GFX942-NEXT: buffer_load_dword v0, s[0:3], 0 offen nt lds
; GFX942-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds
; GFX942-NEXT: s_endpgm
;
; GFX942-GISEL-LABEL: load_to_lds_p7_dword_nontemporal:
; GFX942-GISEL: ; %bb.0:
; GFX942-GISEL-NEXT: v_add_u32_e32 v0, s4, v0
; GFX942-GISEL-NEXT: s_mov_b32 m0, s5
; GFX942-GISEL-NEXT: s_nop 0
; GFX942-GISEL-NEXT: buffer_load_dword v0, s[0:3], 0 offen nt lds
; GFX942-GISEL-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds
; GFX942-GISEL-NEXT: s_endpgm
;
; GFX10-LABEL: load_to_lds_p7_dword_nontemporal:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_add_nc_u32_e32 v0, s4, v0
; GFX10-NEXT: s_mov_b32 m0, s5
; GFX10-NEXT: buffer_load_dword v0, s[0:3], 0 offen slc lds
; GFX10-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds
; GFX10-NEXT: s_endpgm
%gptr.off = getelementptr i8, ptr addrspace(7) %gptr, i32 %off
call void @llvm.amdgcn.load.to.lds.p7(ptr addrspace(7) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 0, i32 0), !nontemporal !0
call void @llvm.amdgcn.load.to.lds.p7(ptr addrspace(7) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 512, i32 0)
ret void
}
define amdgpu_ps void @load_to_lds_p7_dword_volatile_nontemporal(ptr addrspace(7) inreg %gptr, i32 %off, ptr addrspace(3) inreg %lptr) {
; GFX90A-LABEL: load_to_lds_p7_dword_volatile_nontemporal:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: v_add_u32_e32 v0, s4, v0
; GFX90A-NEXT: s_mov_b32 m0, s5
; GFX90A-NEXT: s_nop 0
; GFX90A-NEXT: buffer_load_dword v0, s[0:3], 0 offen glc lds
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds
; GFX90A-NEXT: s_endpgm
;
; GFX942-LABEL: load_to_lds_p7_dword_volatile_nontemporal:
; GFX942: ; %bb.0:
; GFX942-NEXT: v_add_u32_e32 v0, s4, v0
; GFX942-NEXT: s_mov_b32 m0, s5
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: buffer_load_dword v0, s[0:3], 0 offen sc0 sc1 lds
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds
; GFX942-NEXT: s_endpgm
;
; GFX942-GISEL-LABEL: load_to_lds_p7_dword_volatile_nontemporal:
; GFX942-GISEL: ; %bb.0:
; GFX942-GISEL-NEXT: v_add_u32_e32 v0, s4, v0
; GFX942-GISEL-NEXT: s_mov_b32 m0, s5
; GFX942-GISEL-NEXT: s_nop 0
; GFX942-GISEL-NEXT: buffer_load_dword v0, s[0:3], 0 offen sc0 sc1 lds
; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-GISEL-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds
; GFX942-GISEL-NEXT: s_endpgm
;
; GFX10-LABEL: load_to_lds_p7_dword_volatile_nontemporal:
; GFX10: ; %bb.0:
; GFX10-NEXT: v_add_nc_u32_e32 v0, s4, v0
; GFX10-NEXT: s_mov_b32 m0, s5
; GFX10-NEXT: buffer_load_dword v0, s[0:3], 0 offen glc dlc lds
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds
; GFX10-NEXT: s_endpgm
%gptr.off = getelementptr i8, ptr addrspace(7) %gptr, i32 %off
call void @llvm.amdgcn.load.to.lds.p7(ptr addrspace(7) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 0, i32 2147483648), !nontemporal !0
call void @llvm.amdgcn.load.to.lds.p7(ptr addrspace(7) %gptr.off, ptr addrspace(3) %lptr, i32 4, i32 512, i32 0)
ret void
}
define amdgpu_ps void @raw_ptr_buffer_load_lds_dword_volatile(ptr addrspace(8) inreg %gptr, i32 %off, ptr addrspace(3) inreg %lptr) {
; GFX90A-LABEL: raw_ptr_buffer_load_lds_dword_volatile:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_mov_b32 m0, s4
; GFX90A-NEXT: s_nop 0
; GFX90A-NEXT: buffer_load_dword v0, s[0:3], 0 offen glc lds
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds
; GFX90A-NEXT: s_endpgm
;
; GFX942-LABEL: raw_ptr_buffer_load_lds_dword_volatile:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_mov_b32 m0, s4
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: buffer_load_dword v0, s[0:3], 0 offen sc0 sc1 lds
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds
; GFX942-NEXT: s_endpgm
;
; GFX942-GISEL-LABEL: raw_ptr_buffer_load_lds_dword_volatile:
; GFX942-GISEL: ; %bb.0:
; GFX942-GISEL-NEXT: s_mov_b32 m0, s4
; GFX942-GISEL-NEXT: s_nop 0
; GFX942-GISEL-NEXT: buffer_load_dword v0, s[0:3], 0 offen sc0 sc1 lds
; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-GISEL-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds
; GFX942-GISEL-NEXT: s_endpgm
;
; GFX10-LABEL: raw_ptr_buffer_load_lds_dword_volatile:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_mov_b32 m0, s4
; GFX10-NEXT: buffer_load_dword v0, s[0:3], 0 offen glc dlc lds
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds
; GFX10-NEXT: s_endpgm
call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %gptr, ptr addrspace(3) %lptr, i32 4, i32 %off, i32 0, i32 0, i32 2147483648)
call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %gptr, ptr addrspace(3) %lptr, i32 4, i32 %off, i32 0, i32 512, i32 0)
ret void
}
define amdgpu_ps void @raw_ptr_buffer_load_lds_dword_nontemporal(ptr addrspace(8) inreg %gptr, i32 %off, ptr addrspace(3) inreg %lptr) {
; GFX90A-LABEL: raw_ptr_buffer_load_lds_dword_nontemporal:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_mov_b32 m0, s4
; GFX90A-NEXT: s_nop 0
; GFX90A-NEXT: buffer_load_dword v0, s[0:3], 0 offen glc slc lds
; GFX90A-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds
; GFX90A-NEXT: s_endpgm
;
; GFX942-LABEL: raw_ptr_buffer_load_lds_dword_nontemporal:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_mov_b32 m0, s4
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: buffer_load_dword v0, s[0:3], 0 offen nt lds
; GFX942-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds
; GFX942-NEXT: s_endpgm
;
; GFX942-GISEL-LABEL: raw_ptr_buffer_load_lds_dword_nontemporal:
; GFX942-GISEL: ; %bb.0:
; GFX942-GISEL-NEXT: s_mov_b32 m0, s4
; GFX942-GISEL-NEXT: s_nop 0
; GFX942-GISEL-NEXT: buffer_load_dword v0, s[0:3], 0 offen nt lds
; GFX942-GISEL-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds
; GFX942-GISEL-NEXT: s_endpgm
;
; GFX10-LABEL: raw_ptr_buffer_load_lds_dword_nontemporal:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_mov_b32 m0, s4
; GFX10-NEXT: buffer_load_dword v0, s[0:3], 0 offen slc lds
; GFX10-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds
; GFX10-NEXT: s_endpgm
call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %gptr, ptr addrspace(3) %lptr, i32 4, i32 %off, i32 0, i32 0, i32 0), !nontemporal !0
call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %gptr, ptr addrspace(3) %lptr, i32 4, i32 %off, i32 0, i32 512, i32 0)
ret void
}
define amdgpu_ps void @raw_ptr_buffer_load_lds_dword_volatile_nontemporal(ptr addrspace(8) inreg %gptr, i32 %off, ptr addrspace(3) inreg %lptr) {
; GFX90A-LABEL: raw_ptr_buffer_load_lds_dword_volatile_nontemporal:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_mov_b32 m0, s4
; GFX90A-NEXT: s_nop 0
; GFX90A-NEXT: buffer_load_dword v0, s[0:3], 0 offen glc lds
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds
; GFX90A-NEXT: s_endpgm
;
; GFX942-LABEL: raw_ptr_buffer_load_lds_dword_volatile_nontemporal:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_mov_b32 m0, s4
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: buffer_load_dword v0, s[0:3], 0 offen sc0 sc1 lds
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds
; GFX942-NEXT: s_endpgm
;
; GFX942-GISEL-LABEL: raw_ptr_buffer_load_lds_dword_volatile_nontemporal:
; GFX942-GISEL: ; %bb.0:
; GFX942-GISEL-NEXT: s_mov_b32 m0, s4
; GFX942-GISEL-NEXT: s_nop 0
; GFX942-GISEL-NEXT: buffer_load_dword v0, s[0:3], 0 offen sc0 sc1 lds
; GFX942-GISEL-NEXT: s_waitcnt vmcnt(0)
; GFX942-GISEL-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds
; GFX942-GISEL-NEXT: s_endpgm
;
; GFX10-LABEL: raw_ptr_buffer_load_lds_dword_volatile_nontemporal:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_mov_b32 m0, s4
; GFX10-NEXT: buffer_load_dword v0, s[0:3], 0 offen glc dlc lds
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: buffer_load_dword v0, s[0:3], 0 offen offset:512 lds
; GFX10-NEXT: s_endpgm
call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %gptr, ptr addrspace(3) %lptr, i32 4, i32 %off, i32 0, i32 0, i32 2147483648), !nontemporal !0
call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %gptr, ptr addrspace(3) %lptr, i32 4, i32 %off, i32 0, i32 512, i32 0)
ret void
}
!0 = !{i32 1}