| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck --check-prefix=GCN %s |
| ; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck --check-prefix=GCN %s |
| |
| declare void @llvm.amdgcn.global.prefetch(ptr addrspace(1) %ptr, i32 %col) |
| |
| define amdgpu_ps void @global_prefetch(ptr addrspace(1) %ptr) { |
| ; GCN-LABEL: global_prefetch: |
| ; GCN: ; %bb.0: ; %entry |
| ; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 |
| ; GCN-NEXT: global_prefetch_b8 v[0:1], off |
| ; GCN-NEXT: s_endpgm |
| entry: |
| tail call void @llvm.amdgcn.global.prefetch(ptr addrspace(1) %ptr, i32 0) |
| ret void |
| } |
| |
| define amdgpu_ps void @global_prefetch_sgpr(ptr addrspace(1) inreg %ptr) { |
| ; GCN-LABEL: global_prefetch_sgpr: |
| ; GCN: ; %bb.0: ; %entry |
| ; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 |
| ; GCN-NEXT: v_mov_b32_e32 v0, 0 |
| ; GCN-NEXT: global_prefetch_b8 v0, s[0:1] |
| ; GCN-NEXT: s_endpgm |
| entry: |
| tail call void @llvm.amdgcn.global.prefetch(ptr addrspace(1) %ptr, i32 0) |
| ret void |
| } |
| |
| define amdgpu_ps void @global_prefetch_offset(ptr addrspace(1) %ptr) { |
| ; GCN-LABEL: global_prefetch_offset: |
| ; GCN: ; %bb.0: ; %entry |
| ; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 |
| ; GCN-NEXT: global_prefetch_b8 v[0:1], off offset:512 |
| ; GCN-NEXT: s_endpgm |
| entry: |
| %gep = getelementptr inbounds i32, ptr addrspace(1) %ptr, i32 128 |
| tail call void @llvm.amdgcn.global.prefetch(ptr addrspace(1) %gep, i32 0) |
| ret void |
| } |
| |
| define amdgpu_ps void @global_prefetch_sgpr_voffset(ptr addrspace(1) inreg %ptr, i32 %offset) { |
| ; GCN-LABEL: global_prefetch_sgpr_voffset: |
| ; GCN: ; %bb.0: ; %entry |
| ; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 |
| ; GCN-NEXT: global_prefetch_b8 v0, s[0:1] |
| ; GCN-NEXT: s_endpgm |
| entry: |
| %gep = getelementptr i8, ptr addrspace(1) %ptr, i32 %offset |
| tail call void @llvm.amdgcn.global.prefetch(ptr addrspace(1) %gep, i32 0) |
| ret void |
| } |
| |
| define amdgpu_ps void @global_prefetch_sgpr_voffset_offset(ptr addrspace(1) inreg %ptr, i32 %offset) { |
| ; GCN-LABEL: global_prefetch_sgpr_voffset_offset: |
| ; GCN: ; %bb.0: ; %entry |
| ; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 |
| ; GCN-NEXT: global_prefetch_b8 v0, s[0:1] offset:128 |
| ; GCN-NEXT: s_endpgm |
| entry: |
| %gep1 = getelementptr i8, ptr addrspace(1) %ptr, i32 %offset |
| %gep2 = getelementptr i8, ptr addrspace(1) %gep1, i32 128 |
| tail call void @llvm.amdgcn.global.prefetch(ptr addrspace(1) %gep2, i32 0) |
| ret void |
| } |
| |
| define amdgpu_ps void @global_prefetch_se(ptr addrspace(1) %ptr) { |
| ; GCN-LABEL: global_prefetch_se: |
| ; GCN: ; %bb.0: ; %entry |
| ; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 |
| ; GCN-NEXT: global_prefetch_b8 v[0:1], off scope:SCOPE_SE |
| ; GCN-NEXT: s_endpgm |
| entry: |
| tail call void @llvm.amdgcn.global.prefetch(ptr addrspace(1) %ptr, i32 8) |
| ret void |
| } |
| |
| define amdgpu_ps void @global_prefetch_se_nt(ptr addrspace(1) %ptr) { |
| ; GCN-LABEL: global_prefetch_se_nt: |
| ; GCN: ; %bb.0: ; %entry |
| ; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 |
| ; GCN-NEXT: global_prefetch_b8 v[0:1], off th:TH_LOAD_NT scope:SCOPE_SE |
| ; GCN-NEXT: s_endpgm |
| entry: |
| tail call void @llvm.amdgcn.global.prefetch(ptr addrspace(1) %ptr, i32 9) |
| ret void |
| } |
| |
| define amdgpu_ps void @global_prefetch_dev_ht(ptr addrspace(1) %ptr) { |
| ; GCN-LABEL: global_prefetch_dev_ht: |
| ; GCN: ; %bb.0: ; %entry |
| ; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 |
| ; GCN-NEXT: global_prefetch_b8 v[0:1], off th:TH_LOAD_HT scope:SCOPE_DEV |
| ; GCN-NEXT: s_endpgm |
| entry: |
| tail call void @llvm.amdgcn.global.prefetch(ptr addrspace(1) %ptr, i32 18) |
| ret void |
| } |
| |
| define amdgpu_ps void @global_prefetch_sys_lu(ptr addrspace(1) %ptr) { |
| ; GCN-LABEL: global_prefetch_sys_lu: |
| ; GCN: ; %bb.0: ; %entry |
| ; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 |
| ; GCN-NEXT: global_prefetch_b8 v[0:1], off th:TH_LOAD_BYPASS scope:SCOPE_SYS |
| ; GCN-NEXT: s_endpgm |
| entry: |
| tail call void @llvm.amdgcn.global.prefetch(ptr addrspace(1) %ptr, i32 27) |
| ret void |
| } |
| |
| define amdgpu_ps float @global_prefetch_and_load_b32_idxprom(ptr addrspace(1) align 4 inreg %p, i32 %idx) { |
| ; GCN-LABEL: global_prefetch_and_load_b32_idxprom: |
| ; GCN: ; %bb.0: ; %entry |
| ; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 |
| ; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0 |
| ; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GCN-NEXT: v_lshl_add_u64 v[2:3], v[0:1], 2, s[0:1] |
| ; GCN-NEXT: global_prefetch_b8 v[2:3], off |
| ; GCN-NEXT: global_load_b32 v0, v0, s[0:1] scale_offset |
| ; GCN-NEXT: s_wait_loadcnt 0x0 |
| ; GCN-NEXT: ; return to shader part epilog |
| entry: |
| %idxprom = sext i32 %idx to i64 |
| %ptr = getelementptr inbounds float, ptr addrspace(1) %p, i64 %idxprom |
| tail call void @llvm.amdgcn.global.prefetch(ptr addrspace(1) %ptr, i32 0) |
| %ret = load float, ptr addrspace(1) %ptr, align 4 |
| ret float %ret |
| } |