| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 |
| ; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GCN,SDAG %s |
| ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GCN,GISEL %s |
| |
| define amdgpu_ps float @global_load_b32_idxprom(ptr addrspace(1) align 4 inreg %p, i32 %idx) { |
| ; GCN-LABEL: global_load_b32_idxprom: |
| ; GCN: ; %bb.0: ; %entry |
| ; GCN-NEXT: global_load_b32 v0, v0, s[0:1] scale_offset |
| ; GCN-NEXT: s_wait_loadcnt 0x0 |
| ; GCN-NEXT: ; return to shader part epilog |
| entry: |
| %idxprom = sext i32 %idx to i64 |
| %arrayidx = getelementptr inbounds float, ptr addrspace(1) %p, i64 %idxprom |
| %ret = load float, ptr addrspace(1) %arrayidx, align 4 |
| ret float %ret |
| } |
| |
| define amdgpu_ps float @global_load_b32_idx32(ptr addrspace(1) align 4 inreg %p, i32 %idx) { |
| ; GCN-LABEL: global_load_b32_idx32: |
| ; GCN: ; %bb.0: ; %entry |
| ; GCN-NEXT: global_load_b32 v0, v0, s[0:1] scale_offset |
| ; GCN-NEXT: s_wait_loadcnt 0x0 |
| ; GCN-NEXT: ; return to shader part epilog |
| entry: |
| %arrayidx = getelementptr inbounds float, ptr addrspace(1) %p, i32 %idx |
| %ret = load float, ptr addrspace(1) %arrayidx, align 4 |
| ret float %ret |
| } |
| |
| define amdgpu_ps float @global_load_b32_idxprom_wrong_stride(ptr addrspace(1) align 4 inreg %p, i32 %idx) { |
| ; SDAG-LABEL: global_load_b32_idxprom_wrong_stride: |
| ; SDAG: ; %bb.0: ; %entry |
| ; SDAG-NEXT: v_ashrrev_i32_e32 v1, 31, v0 |
| ; SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; SDAG-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 3, s[0:1] |
| ; SDAG-NEXT: global_load_b32 v0, v[0:1], off |
| ; SDAG-NEXT: s_wait_loadcnt 0x0 |
| ; SDAG-NEXT: ; return to shader part epilog |
| ; |
| ; GISEL-LABEL: global_load_b32_idxprom_wrong_stride: |
| ; GISEL: ; %bb.0: ; %entry |
| ; GISEL-NEXT: v_ashrrev_i32_e32 v1, 31, v0 |
| ; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1] |
| ; GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) |
| ; GISEL-NEXT: v_lshlrev_b64_e32 v[0:1], 3, v[0:1] |
| ; GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 |
| ; GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| ; GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, v3, v1, vcc_lo |
| ; GISEL-NEXT: global_load_b32 v0, v[0:1], off |
| ; GISEL-NEXT: s_wait_loadcnt 0x0 |
| ; GISEL-NEXT: ; return to shader part epilog |
| entry: |
| %idxprom = sext i32 %idx to i64 |
| %arrayidx = getelementptr inbounds <2 x float>, ptr addrspace(1) %p, i64 %idxprom |
| %ret = load float, ptr addrspace(1) %arrayidx, align 4 |
| ret float %ret |
| } |
| |
| define amdgpu_ps float @global_load_b16_idxprom_ioffset(ptr addrspace(1) align 4 inreg %p, i32 %idx) { |
| ; GCN-LABEL: global_load_b16_idxprom_ioffset: |
| ; GCN: ; %bb.0: ; %entry |
| ; GCN-NEXT: global_load_u16 v0, v0, s[0:1] offset:32 scale_offset |
| ; GCN-NEXT: s_wait_loadcnt 0x0 |
| ; GCN-NEXT: ; return to shader part epilog |
| entry: |
| %idxprom = sext i32 %idx to i64 |
| %idxadd = add i64 %idxprom, 16 |
| %arrayidx = getelementptr inbounds i16, ptr addrspace(1) %p, i64 %idxadd |
| %ld = load i16, ptr addrspace(1) %arrayidx, align 2 |
| %ret.i32 = zext i16 %ld to i32 |
| %ret = bitcast i32 %ret.i32 to float |
| ret float %ret |
| } |
| |
| define amdgpu_ps <2 x float> @global_load_b64_idxprom(ptr addrspace(1) align 4 inreg %p, i32 %idx) { |
| ; GCN-LABEL: global_load_b64_idxprom: |
| ; GCN: ; %bb.0: ; %entry |
| ; GCN-NEXT: global_load_b64 v[0:1], v0, s[0:1] scale_offset |
| ; GCN-NEXT: s_wait_loadcnt 0x0 |
| ; GCN-NEXT: ; return to shader part epilog |
| entry: |
| %idxprom = sext i32 %idx to i64 |
| %arrayidx = getelementptr inbounds <2 x float>, ptr addrspace(1) %p, i64 %idxprom |
| %ret = load <2 x float>, ptr addrspace(1) %arrayidx, align 4 |
| ret <2 x float> %ret |
| } |
| |
| define amdgpu_ps <3 x float> @global_load_b96_idxprom(ptr addrspace(1) align 4 inreg %p, i32 %idx) { |
| ; GCN-LABEL: global_load_b96_idxprom: |
| ; GCN: ; %bb.0: ; %entry |
| ; GCN-NEXT: global_load_b96 v[0:2], v0, s[0:1] scale_offset |
| ; GCN-NEXT: s_wait_loadcnt 0x0 |
| ; GCN-NEXT: ; return to shader part epilog |
| entry: |
| %idxprom = sext i32 %idx to i64 |
| %arrayidx = getelementptr inbounds [3 x float], ptr addrspace(1) %p, i64 %idxprom |
| %ret = load <3 x float>, ptr addrspace(1) %arrayidx, align 4 |
| ret <3 x float> %ret |
| } |
| |
| define amdgpu_ps <3 x float> @global_load_b96_idxpromi_ioffset(ptr addrspace(1) align 4 inreg %p, i32 %idx) { |
| ; GCN-LABEL: global_load_b96_idxpromi_ioffset: |
| ; GCN: ; %bb.0: ; %entry |
| ; GCN-NEXT: global_load_b96 v[0:2], v0, s[0:1] offset:192 scale_offset |
| ; GCN-NEXT: s_wait_loadcnt 0x0 |
| ; GCN-NEXT: ; return to shader part epilog |
| entry: |
| %idxprom = sext i32 %idx to i64 |
| %idxadd = add i64 %idxprom, 16 |
| %arrayidx = getelementptr inbounds [3 x float], ptr addrspace(1) %p, i64 %idxadd |
| %ret = load <3 x float>, ptr addrspace(1) %arrayidx, align 4 |
| ret <3 x float> %ret |
| } |
| |
| define amdgpu_ps <4 x float> @global_load_b128_idxprom(ptr addrspace(1) align 4 inreg %p, i32 %idx) { |
| ; GCN-LABEL: global_load_b128_idxprom: |
| ; GCN: ; %bb.0: ; %entry |
| ; GCN-NEXT: global_load_b128 v[0:3], v0, s[0:1] scale_offset |
| ; GCN-NEXT: s_wait_loadcnt 0x0 |
| ; GCN-NEXT: ; return to shader part epilog |
| entry: |
| %idxprom = sext i32 %idx to i64 |
| %arrayidx = getelementptr inbounds <4 x float>, ptr addrspace(1) %p, i64 %idxprom |
| %ret = load <4 x float>, ptr addrspace(1) %arrayidx, align 4 |
| ret <4 x float> %ret |
| } |
| |
| define amdgpu_ps float @global_load_b32_idxprom_range(ptr addrspace(1) align 4 inreg %p, ptr addrspace(1) align 4 %pp) { |
| ; GCN-LABEL: global_load_b32_idxprom_range: |
| ; GCN: ; %bb.0: ; %entry |
| ; GCN-NEXT: global_load_b32 v0, v[0:1], off |
| ; GCN-NEXT: s_wait_loadcnt 0x0 |
| ; GCN-NEXT: global_load_b32 v0, v0, s[0:1] scale_offset |
| ; GCN-NEXT: s_wait_loadcnt 0x0 |
| ; GCN-NEXT: ; return to shader part epilog |
| entry: |
| %idx = load i32, ptr addrspace(1) %pp, align 4, !range !0 |
| %idxprom = sext i32 %idx to i64 |
| %arrayidx = getelementptr inbounds float, ptr addrspace(1) %p, i64 %idxprom |
| %ret = load float, ptr addrspace(1) %arrayidx, align 4 |
| ret float %ret |
| } |
| |
| define amdgpu_ps float @global_load_b32_idxprom_range_ioffset(ptr addrspace(1) align 4 inreg %p, ptr addrspace(1) align 4 %pp) { |
| ; GCN-LABEL: global_load_b32_idxprom_range_ioffset: |
| ; GCN: ; %bb.0: ; %entry |
| ; GCN-NEXT: global_load_b32 v0, v[0:1], off |
| ; GCN-NEXT: s_wait_loadcnt 0x0 |
| ; GCN-NEXT: global_load_b32 v0, v0, s[0:1] offset:64 scale_offset |
| ; GCN-NEXT: s_wait_loadcnt 0x0 |
| ; GCN-NEXT: ; return to shader part epilog |
| entry: |
| %idx = load i32, ptr addrspace(1) %pp, align 4, !range !0 |
| %idxprom = sext i32 %idx to i64 |
| %idxadd = add i64 %idxprom, 16 |
| %arrayidx = getelementptr inbounds float, ptr addrspace(1) %p, i64 %idxadd |
| %ret = load float, ptr addrspace(1) %arrayidx, align 4 |
| ret float %ret |
| } |
| |
| ; Note: this is a byte load, there is nothing to scale |
| |
| define amdgpu_ps float @global_load_b8_idxprom_range_ioffset(ptr addrspace(1) align 4 inreg %p, ptr addrspace(1) align 4 %pp) { |
| ; GCN-LABEL: global_load_b8_idxprom_range_ioffset: |
| ; GCN: ; %bb.0: ; %entry |
| ; GCN-NEXT: global_load_b32 v0, v[0:1], off |
| ; GCN-NEXT: s_wait_loadcnt 0x0 |
| ; GCN-NEXT: global_load_u8 v0, v0, s[0:1] offset:16 |
| ; GCN-NEXT: s_wait_loadcnt 0x0 |
| ; GCN-NEXT: ; return to shader part epilog |
| entry: |
| %idx = load i32, ptr addrspace(1) %pp, align 4, !range !0 |
| %idxprom = sext i32 %idx to i64 |
| %idxadd = add i64 %idxprom, 16 |
| %arrayidx = getelementptr inbounds i8, ptr addrspace(1) %p, i64 %idxadd |
| %ld = load i8, ptr addrspace(1) %arrayidx |
| %ret.i32 = zext i8 %ld to i32 |
| %ret = bitcast i32 %ret.i32 to float |
| ret float %ret |
| } |
| |
| define amdgpu_ps float @global_load_b16_idxprom_range(ptr addrspace(1) align 4 inreg %p, ptr addrspace(1) align 4 %pp) { |
| ; GCN-LABEL: global_load_b16_idxprom_range: |
| ; GCN: ; %bb.0: ; %entry |
| ; GCN-NEXT: global_load_b32 v0, v[0:1], off |
| ; GCN-NEXT: s_wait_loadcnt 0x0 |
| ; GCN-NEXT: global_load_u16 v0, v0, s[0:1] scale_offset |
| ; GCN-NEXT: s_wait_loadcnt 0x0 |
| ; GCN-NEXT: ; return to shader part epilog |
| entry: |
| %idx = load i32, ptr addrspace(1) %pp, align 4, !range !0 |
| %idxprom = sext i32 %idx to i64 |
| %arrayidx = getelementptr inbounds i16, ptr addrspace(1) %p, i64 %idxprom |
| %ld = load i16, ptr addrspace(1) %arrayidx, align 2 |
| %ret.i32 = zext i16 %ld to i32 |
| %ret = bitcast i32 %ret.i32 to float |
| ret float %ret |
| } |
| |
| define amdgpu_ps float @global_load_b16_idxprom_range_ioffset(ptr addrspace(1) align 4 inreg %p, ptr addrspace(1) align 4 %pp) { |
| ; GCN-LABEL: global_load_b16_idxprom_range_ioffset: |
| ; GCN: ; %bb.0: ; %entry |
| ; GCN-NEXT: global_load_b32 v0, v[0:1], off |
| ; GCN-NEXT: s_wait_loadcnt 0x0 |
| ; GCN-NEXT: global_load_u16 v0, v0, s[0:1] offset:32 scale_offset |
| ; GCN-NEXT: s_wait_loadcnt 0x0 |
| ; GCN-NEXT: ; return to shader part epilog |
| entry: |
| %idx = load i32, ptr addrspace(1) %pp, align 4, !range !0 |
| %idxprom = sext i32 %idx to i64 |
| %idxadd = add i64 %idxprom, 16 |
| %arrayidx = getelementptr inbounds i16, ptr addrspace(1) %p, i64 %idxadd |
| %ld = load i16, ptr addrspace(1) %arrayidx, align 2 |
| %ret.i32 = zext i16 %ld to i32 |
| %ret = bitcast i32 %ret.i32 to float |
| ret float %ret |
| } |
| |
| define amdgpu_ps <2 x float> @global_load_b64_idxprom_range(ptr addrspace(1) align 4 inreg %p, ptr addrspace(1) align 4 %pp) { |
| ; GCN-LABEL: global_load_b64_idxprom_range: |
| ; GCN: ; %bb.0: ; %entry |
| ; GCN-NEXT: global_load_b32 v0, v[0:1], off |
| ; GCN-NEXT: s_wait_loadcnt 0x0 |
| ; GCN-NEXT: global_load_b64 v[0:1], v0, s[0:1] scale_offset |
| ; GCN-NEXT: s_wait_loadcnt 0x0 |
| ; GCN-NEXT: ; return to shader part epilog |
| entry: |
| %idx = load i32, ptr addrspace(1) %pp, align 4, !range !0 |
| %idxprom = sext i32 %idx to i64 |
| %arrayidx = getelementptr inbounds <2 x float>, ptr addrspace(1) %p, i64 %idxprom |
| %ret = load <2 x float>, ptr addrspace(1) %arrayidx, align 4 |
| ret <2 x float> %ret |
| } |
| |
| define amdgpu_ps <3 x float> @global_load_b96_idxprom_range(ptr addrspace(1) align 4 inreg %p, ptr addrspace(1) align 4 %pp) { |
| ; GCN-LABEL: global_load_b96_idxprom_range: |
| ; GCN: ; %bb.0: ; %entry |
| ; GCN-NEXT: global_load_b32 v0, v[0:1], off |
| ; GCN-NEXT: s_wait_loadcnt 0x0 |
| ; GCN-NEXT: global_load_b96 v[0:2], v0, s[0:1] scale_offset |
| ; GCN-NEXT: s_wait_loadcnt 0x0 |
| ; GCN-NEXT: ; return to shader part epilog |
| entry: |
| %idx = load i32, ptr addrspace(1) %pp, align 4, !range !0 |
| %idxprom = sext i32 %idx to i64 |
| %arrayidx = getelementptr inbounds [3 x float], ptr addrspace(1) %p, i64 %idxprom |
| %ret = load <3 x float>, ptr addrspace(1) %arrayidx, align 4 |
| ret <3 x float> %ret |
| } |
| |
| define amdgpu_ps <3 x float> @global_load_b96_idxprom_range_ioffset(ptr addrspace(1) align 4 inreg %p, ptr addrspace(1) align 4 %pp) { |
| ; GCN-LABEL: global_load_b96_idxprom_range_ioffset: |
| ; GCN: ; %bb.0: ; %entry |
| ; GCN-NEXT: global_load_b32 v0, v[0:1], off |
| ; GCN-NEXT: s_wait_loadcnt 0x0 |
| ; GCN-NEXT: global_load_b96 v[0:2], v0, s[0:1] offset:192 scale_offset |
| ; GCN-NEXT: s_wait_loadcnt 0x0 |
| ; GCN-NEXT: ; return to shader part epilog |
| entry: |
| %idx = load i32, ptr addrspace(1) %pp, align 4, !range !0 |
| %idxprom = sext i32 %idx to i64 |
| %idxadd = add i64 %idxprom, 16 |
| %arrayidx = getelementptr inbounds [3 x float], ptr addrspace(1) %p, i64 %idxadd |
| %ret = load <3 x float>, ptr addrspace(1) %arrayidx, align 4 |
| ret <3 x float> %ret |
| } |
| |
| define amdgpu_ps <4 x float> @global_load_b128_idxprom_range(ptr addrspace(1) align 4 inreg %p, ptr addrspace(1) align 4 %pp) { |
| ; GCN-LABEL: global_load_b128_idxprom_range: |
| ; GCN: ; %bb.0: ; %entry |
| ; GCN-NEXT: global_load_b32 v0, v[0:1], off |
| ; GCN-NEXT: s_wait_loadcnt 0x0 |
| ; GCN-NEXT: global_load_b128 v[0:3], v0, s[0:1] scale_offset |
| ; GCN-NEXT: s_wait_loadcnt 0x0 |
| ; GCN-NEXT: ; return to shader part epilog |
| entry: |
| %idx = load i32, ptr addrspace(1) %pp, align 4, !range !0 |
| %idxprom = sext i32 %idx to i64 |
| %arrayidx = getelementptr inbounds <4 x float>, ptr addrspace(1) %p, i64 %idxprom |
| %ret = load <4 x float>, ptr addrspace(1) %arrayidx, align 4 |
| ret <4 x float> %ret |
| } |
| |
| define amdgpu_ps void @global_store_b32_idxprom(ptr addrspace(1) align 4 inreg %p, i32 %idx) { |
| ; GCN-LABEL: global_store_b32_idxprom: |
| ; GCN: ; %bb.0: ; %entry |
| ; GCN-NEXT: v_mov_b32_e32 v1, 1.0 |
| ; GCN-NEXT: global_store_b32 v0, v1, s[0:1] scale_offset |
| ; GCN-NEXT: s_endpgm |
| entry: |
| %idxprom = sext i32 %idx to i64 |
| %arrayidx = getelementptr inbounds float, ptr addrspace(1) %p, i64 %idxprom |
| store float 1.0, ptr addrspace(1) %arrayidx, align 4 |
| ret void |
| } |
| |
| define amdgpu_ps void @global_store_b16_idxprom(ptr addrspace(1) align 2 inreg %p, i32 %idx) { |
| ; GCN-LABEL: global_store_b16_idxprom: |
| ; GCN: ; %bb.0: ; %entry |
| ; GCN-NEXT: v_mov_b32_e32 v1, 1 |
| ; GCN-NEXT: global_store_b16 v0, v1, s[0:1] scale_offset |
| ; GCN-NEXT: s_endpgm |
| entry: |
| %idxprom = sext i32 %idx to i64 |
| %arrayidx = getelementptr inbounds i16, ptr addrspace(1) %p, i64 %idxprom |
| store i16 1, ptr addrspace(1) %arrayidx, align 2 |
| ret void |
| } |
| |
| define amdgpu_ps void @global_store_b64_idxprom(ptr addrspace(1) align 4 inreg %p, i32 %idx) { |
| ; GCN-LABEL: global_store_b64_idxprom: |
| ; GCN: ; %bb.0: ; %entry |
| ; GCN-NEXT: v_mov_b64_e32 v[2:3], 1.0 |
| ; GCN-NEXT: global_store_b64 v0, v[2:3], s[0:1] scale_offset |
| ; GCN-NEXT: s_endpgm |
| entry: |
| %idxprom = sext i32 %idx to i64 |
| %arrayidx = getelementptr inbounds double, ptr addrspace(1) %p, i64 %idxprom |
| store double 1.0, ptr addrspace(1) %arrayidx, align 4 |
| ret void |
| } |
| |
| define amdgpu_ps void @global_atomicrmw_b32_idxprom(ptr addrspace(1) align 4 inreg %p, i32 %idx) { |
| ; GCN-LABEL: global_atomicrmw_b32_idxprom: |
| ; GCN: ; %bb.0: ; %entry |
| ; GCN-NEXT: v_mov_b32_e32 v1, 1 |
| ; GCN-NEXT: global_atomic_add_u32 v0, v1, s[0:1] scale_offset scope:SCOPE_SYS |
| ; GCN-NEXT: s_endpgm |
| entry: |
| %idxprom = sext i32 %idx to i64 |
| %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %p, i64 %idxprom |
| atomicrmw add ptr addrspace(1) %arrayidx, i32 1 monotonic |
| ret void |
| } |
| |
| define amdgpu_ps <2 x float> @global_atomicrmw_b64_rtn_idxprom(ptr addrspace(1) align 8 inreg %p, i32 %idx) { |
| ; GCN-LABEL: global_atomicrmw_b64_rtn_idxprom: |
| ; GCN: ; %bb.0: ; %entry |
| ; GCN-NEXT: v_mov_b64_e32 v[2:3], 1 |
| ; GCN-NEXT: global_atomic_add_u64 v[0:1], v0, v[2:3], s[0:1] scale_offset th:TH_ATOMIC_RETURN scope:SCOPE_SYS |
| ; GCN-NEXT: s_wait_loadcnt 0x0 |
| ; GCN-NEXT: ; return to shader part epilog |
| entry: |
| %idxprom = sext i32 %idx to i64 |
| %arrayidx = getelementptr inbounds i64, ptr addrspace(1) %p, i64 %idxprom |
| %ret = atomicrmw add ptr addrspace(1) %arrayidx, i64 1 monotonic |
| %ret.cast = bitcast i64 %ret to <2 x float> |
| ret <2 x float> %ret.cast |
| } |
| |
| !0 = !{i32 0, i32 1024} |