| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GPRIDX %s |
| ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=MOVREL %s |
| |
| define float @dyn_extract_v8f32_const_s_v(i32 %sel) { |
| ; GPRIDX-LABEL: dyn_extract_v8f32_const_s_v: |
| ; GPRIDX: ; %bb.0: ; %entry |
| ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GPRIDX-NEXT: s_mov_b32 s11, 0x41000000 |
| ; GPRIDX-NEXT: s_mov_b32 s10, 0x40e00000 |
| ; GPRIDX-NEXT: s_mov_b32 s9, 0x40c00000 |
| ; GPRIDX-NEXT: s_mov_b32 s8, 0x40a00000 |
| ; GPRIDX-NEXT: s_mov_b32 s7, 4.0 |
| ; GPRIDX-NEXT: s_mov_b32 s6, 0x40400000 |
| ; GPRIDX-NEXT: s_mov_b32 s5, 2.0 |
| ; GPRIDX-NEXT: s_mov_b32 s4, 1.0 |
| ; GPRIDX-NEXT: s_mov_b64 s[12:13], exec |
| ; GPRIDX-NEXT: BB0_1: ; =>This Inner Loop Header: Depth=1 |
| ; GPRIDX-NEXT: v_readfirstlane_b32 s14, v0 |
| ; GPRIDX-NEXT: s_mov_b32 m0, s14 |
| ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s14, v0 |
| ; GPRIDX-NEXT: s_movrels_b32 s14, s4 |
| ; GPRIDX-NEXT: v_mov_b32_e32 v1, s14 |
| ; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc |
| ; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc |
| ; GPRIDX-NEXT: s_cbranch_execnz BB0_1 |
| ; GPRIDX-NEXT: ; %bb.2: |
| ; GPRIDX-NEXT: s_mov_b64 exec, s[12:13] |
| ; GPRIDX-NEXT: v_mov_b32_e32 v0, v1 |
| ; GPRIDX-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; MOVREL-LABEL: dyn_extract_v8f32_const_s_v: |
| ; MOVREL: ; %bb.0: ; %entry |
| ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; MOVREL-NEXT: s_mov_b32 s11, 0x41000000 |
| ; MOVREL-NEXT: s_mov_b32 s10, 0x40e00000 |
| ; MOVREL-NEXT: s_mov_b32 s9, 0x40c00000 |
| ; MOVREL-NEXT: s_mov_b32 s8, 0x40a00000 |
| ; MOVREL-NEXT: s_mov_b32 s7, 4.0 |
| ; MOVREL-NEXT: s_mov_b32 s6, 0x40400000 |
| ; MOVREL-NEXT: s_mov_b32 s5, 2.0 |
| ; MOVREL-NEXT: s_mov_b32 s4, 1.0 |
| ; MOVREL-NEXT: s_mov_b64 s[12:13], exec |
| ; MOVREL-NEXT: BB0_1: ; =>This Inner Loop Header: Depth=1 |
| ; MOVREL-NEXT: v_readfirstlane_b32 s14, v0 |
| ; MOVREL-NEXT: s_mov_b32 m0, s14 |
| ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s14, v0 |
| ; MOVREL-NEXT: s_movrels_b32 s14, s4 |
| ; MOVREL-NEXT: v_mov_b32_e32 v1, s14 |
| ; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc |
| ; MOVREL-NEXT: s_xor_b64 exec, exec, vcc |
| ; MOVREL-NEXT: s_cbranch_execnz BB0_1 |
| ; MOVREL-NEXT: ; %bb.2: |
| ; MOVREL-NEXT: s_mov_b64 exec, s[12:13] |
| ; MOVREL-NEXT: v_mov_b32_e32 v0, v1 |
| ; MOVREL-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %ext = extractelement <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, i32 %sel |
| ret float %ext |
| } |
| |
| define amdgpu_ps float @dyn_extract_v8f32_const_s_s(i32 inreg %sel) { |
| ; GPRIDX-LABEL: dyn_extract_v8f32_const_s_s: |
| ; GPRIDX: ; %bb.0: ; %entry |
| ; GPRIDX-NEXT: s_mov_b32 s4, 1.0 |
| ; GPRIDX-NEXT: s_mov_b32 m0, s2 |
| ; GPRIDX-NEXT: s_mov_b32 s11, 0x41000000 |
| ; GPRIDX-NEXT: s_mov_b32 s10, 0x40e00000 |
| ; GPRIDX-NEXT: s_mov_b32 s9, 0x40c00000 |
| ; GPRIDX-NEXT: s_mov_b32 s8, 0x40a00000 |
| ; GPRIDX-NEXT: s_mov_b32 s7, 4.0 |
| ; GPRIDX-NEXT: s_mov_b32 s6, 0x40400000 |
| ; GPRIDX-NEXT: s_mov_b32 s5, 2.0 |
| ; GPRIDX-NEXT: s_movrels_b32 s0, s4 |
| ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 |
| ; GPRIDX-NEXT: ; return to shader part epilog |
| ; |
| ; MOVREL-LABEL: dyn_extract_v8f32_const_s_s: |
| ; MOVREL: ; %bb.0: ; %entry |
| ; MOVREL-NEXT: s_mov_b32 s4, 1.0 |
| ; MOVREL-NEXT: s_mov_b32 m0, s2 |
| ; MOVREL-NEXT: s_mov_b32 s11, 0x41000000 |
| ; MOVREL-NEXT: s_mov_b32 s10, 0x40e00000 |
| ; MOVREL-NEXT: s_mov_b32 s9, 0x40c00000 |
| ; MOVREL-NEXT: s_mov_b32 s8, 0x40a00000 |
| ; MOVREL-NEXT: s_mov_b32 s7, 4.0 |
| ; MOVREL-NEXT: s_mov_b32 s6, 0x40400000 |
| ; MOVREL-NEXT: s_mov_b32 s5, 2.0 |
| ; MOVREL-NEXT: s_movrels_b32 s0, s4 |
| ; MOVREL-NEXT: v_mov_b32_e32 v0, s0 |
| ; MOVREL-NEXT: ; return to shader part epilog |
| entry: |
| %ext = extractelement <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, i32 %sel |
| ret float %ext |
| } |
| |
| define amdgpu_ps float @dyn_extract_v8f32_s_v(<8 x float> inreg %vec, i32 %sel) { |
| ; GPRIDX-LABEL: dyn_extract_v8f32_s_v: |
| ; GPRIDX: ; %bb.0: ; %entry |
| ; GPRIDX-NEXT: s_mov_b32 s0, s2 |
| ; GPRIDX-NEXT: s_mov_b32 s1, s3 |
| ; GPRIDX-NEXT: s_mov_b32 s2, s4 |
| ; GPRIDX-NEXT: s_mov_b32 s3, s5 |
| ; GPRIDX-NEXT: s_mov_b32 s4, s6 |
| ; GPRIDX-NEXT: s_mov_b32 s5, s7 |
| ; GPRIDX-NEXT: s_mov_b32 s6, s8 |
| ; GPRIDX-NEXT: s_mov_b32 s7, s9 |
| ; GPRIDX-NEXT: s_mov_b64 s[8:9], exec |
| ; GPRIDX-NEXT: BB2_1: ; =>This Inner Loop Header: Depth=1 |
| ; GPRIDX-NEXT: v_readfirstlane_b32 s10, v0 |
| ; GPRIDX-NEXT: s_mov_b32 m0, s10 |
| ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s10, v0 |
| ; GPRIDX-NEXT: s_movrels_b32 s10, s0 |
| ; GPRIDX-NEXT: v_mov_b32_e32 v1, s10 |
| ; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc |
| ; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc |
| ; GPRIDX-NEXT: s_cbranch_execnz BB2_1 |
| ; GPRIDX-NEXT: ; %bb.2: |
| ; GPRIDX-NEXT: s_mov_b64 exec, s[8:9] |
| ; GPRIDX-NEXT: v_mov_b32_e32 v0, v1 |
| ; GPRIDX-NEXT: ; return to shader part epilog |
| ; |
| ; MOVREL-LABEL: dyn_extract_v8f32_s_v: |
| ; MOVREL: ; %bb.0: ; %entry |
| ; MOVREL-NEXT: s_mov_b32 s0, s2 |
| ; MOVREL-NEXT: s_mov_b32 s1, s3 |
| ; MOVREL-NEXT: s_mov_b32 s2, s4 |
| ; MOVREL-NEXT: s_mov_b32 s3, s5 |
| ; MOVREL-NEXT: s_mov_b32 s4, s6 |
| ; MOVREL-NEXT: s_mov_b32 s5, s7 |
| ; MOVREL-NEXT: s_mov_b32 s6, s8 |
| ; MOVREL-NEXT: s_mov_b32 s7, s9 |
| ; MOVREL-NEXT: s_mov_b64 s[8:9], exec |
| ; MOVREL-NEXT: BB2_1: ; =>This Inner Loop Header: Depth=1 |
| ; MOVREL-NEXT: v_readfirstlane_b32 s10, v0 |
| ; MOVREL-NEXT: s_mov_b32 m0, s10 |
| ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s10, v0 |
| ; MOVREL-NEXT: s_movrels_b32 s10, s0 |
| ; MOVREL-NEXT: v_mov_b32_e32 v1, s10 |
| ; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc |
| ; MOVREL-NEXT: s_xor_b64 exec, exec, vcc |
| ; MOVREL-NEXT: s_cbranch_execnz BB2_1 |
| ; MOVREL-NEXT: ; %bb.2: |
| ; MOVREL-NEXT: s_mov_b64 exec, s[8:9] |
| ; MOVREL-NEXT: v_mov_b32_e32 v0, v1 |
| ; MOVREL-NEXT: ; return to shader part epilog |
| entry: |
| %ext = extractelement <8 x float> %vec, i32 %sel |
| ret float %ext |
| } |
| |
| define float @dyn_extract_v8f32_v_v(<8 x float> %vec, i32 %sel) { |
| ; GPRIDX-LABEL: dyn_extract_v8f32_v_v: |
| ; GPRIDX: ; %bb.0: ; %entry |
| ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GPRIDX-NEXT: s_mov_b64 s[4:5], exec |
| ; GPRIDX-NEXT: BB3_1: ; =>This Inner Loop Header: Depth=1 |
| ; GPRIDX-NEXT: v_readfirstlane_b32 s6, v8 |
| ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v8 |
| ; GPRIDX-NEXT: s_set_gpr_idx_on s6, gpr_idx(SRC0) |
| ; GPRIDX-NEXT: v_mov_b32_e32 v9, v0 |
| ; GPRIDX-NEXT: s_set_gpr_idx_off |
| ; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc |
| ; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc |
| ; GPRIDX-NEXT: s_cbranch_execnz BB3_1 |
| ; GPRIDX-NEXT: ; %bb.2: |
| ; GPRIDX-NEXT: s_mov_b64 exec, s[4:5] |
| ; GPRIDX-NEXT: v_mov_b32_e32 v0, v9 |
| ; GPRIDX-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; MOVREL-LABEL: dyn_extract_v8f32_v_v: |
| ; MOVREL: ; %bb.0: ; %entry |
| ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; MOVREL-NEXT: s_mov_b64 s[4:5], exec |
| ; MOVREL-NEXT: BB3_1: ; =>This Inner Loop Header: Depth=1 |
| ; MOVREL-NEXT: v_readfirstlane_b32 s6, v8 |
| ; MOVREL-NEXT: s_mov_b32 m0, s6 |
| ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v8 |
| ; MOVREL-NEXT: v_movrels_b32_e32 v9, v0 |
| ; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc |
| ; MOVREL-NEXT: s_xor_b64 exec, exec, vcc |
| ; MOVREL-NEXT: s_cbranch_execnz BB3_1 |
| ; MOVREL-NEXT: ; %bb.2: |
| ; MOVREL-NEXT: s_mov_b64 exec, s[4:5] |
| ; MOVREL-NEXT: v_mov_b32_e32 v0, v9 |
| ; MOVREL-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %ext = extractelement <8 x float> %vec, i32 %sel |
| ret float %ext |
| } |
| |
| define amdgpu_ps float @dyn_extract_v8f32_v_s(<8 x float> %vec, i32 inreg %sel) { |
| ; GPRIDX-LABEL: dyn_extract_v8f32_v_s: |
| ; GPRIDX: ; %bb.0: ; %entry |
| ; GPRIDX-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0) |
| ; GPRIDX-NEXT: v_mov_b32_e32 v0, v0 |
| ; GPRIDX-NEXT: s_set_gpr_idx_off |
| ; GPRIDX-NEXT: ; return to shader part epilog |
| ; |
| ; MOVREL-LABEL: dyn_extract_v8f32_v_s: |
| ; MOVREL: ; %bb.0: ; %entry |
| ; MOVREL-NEXT: s_mov_b32 m0, s2 |
| ; MOVREL-NEXT: v_movrels_b32_e32 v0, v0 |
| ; MOVREL-NEXT: ; return to shader part epilog |
| entry: |
| %ext = extractelement <8 x float> %vec, i32 %sel |
| ret float %ext |
| } |
| |
| define amdgpu_ps float @dyn_extract_v8f32_s_s(<8 x float> inreg %vec, i32 inreg %sel) { |
| ; GPRIDX-LABEL: dyn_extract_v8f32_s_s: |
| ; GPRIDX: ; %bb.0: ; %entry |
| ; GPRIDX-NEXT: s_mov_b32 s0, s2 |
| ; GPRIDX-NEXT: s_mov_b32 m0, s10 |
| ; GPRIDX-NEXT: s_mov_b32 s1, s3 |
| ; GPRIDX-NEXT: s_mov_b32 s2, s4 |
| ; GPRIDX-NEXT: s_mov_b32 s3, s5 |
| ; GPRIDX-NEXT: s_mov_b32 s4, s6 |
| ; GPRIDX-NEXT: s_mov_b32 s5, s7 |
| ; GPRIDX-NEXT: s_mov_b32 s6, s8 |
| ; GPRIDX-NEXT: s_mov_b32 s7, s9 |
| ; GPRIDX-NEXT: s_movrels_b32 s0, s0 |
| ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 |
| ; GPRIDX-NEXT: ; return to shader part epilog |
| ; |
| ; MOVREL-LABEL: dyn_extract_v8f32_s_s: |
| ; MOVREL: ; %bb.0: ; %entry |
| ; MOVREL-NEXT: s_mov_b32 s0, s2 |
| ; MOVREL-NEXT: s_mov_b32 m0, s10 |
| ; MOVREL-NEXT: s_mov_b32 s1, s3 |
| ; MOVREL-NEXT: s_mov_b32 s2, s4 |
| ; MOVREL-NEXT: s_mov_b32 s3, s5 |
| ; MOVREL-NEXT: s_mov_b32 s4, s6 |
| ; MOVREL-NEXT: s_mov_b32 s5, s7 |
| ; MOVREL-NEXT: s_mov_b32 s6, s8 |
| ; MOVREL-NEXT: s_mov_b32 s7, s9 |
| ; MOVREL-NEXT: s_movrels_b32 s0, s0 |
| ; MOVREL-NEXT: v_mov_b32_e32 v0, s0 |
| ; MOVREL-NEXT: ; return to shader part epilog |
| entry: |
| %ext = extractelement <8 x float> %vec, i32 %sel |
| ret float %ext |
| } |
| |
| define i64 @dyn_extract_v8i64_const_s_v(i32 %sel) { |
| ; GPRIDX-LABEL: dyn_extract_v8i64_const_s_v: |
| ; GPRIDX: ; %bb.0: ; %entry |
| ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GPRIDX-NEXT: s_mov_b64 s[18:19], 8 |
| ; GPRIDX-NEXT: s_mov_b64 s[16:17], 7 |
| ; GPRIDX-NEXT: s_mov_b64 s[14:15], 6 |
| ; GPRIDX-NEXT: s_mov_b64 s[12:13], 5 |
| ; GPRIDX-NEXT: s_mov_b64 s[10:11], 4 |
| ; GPRIDX-NEXT: s_mov_b64 s[8:9], 3 |
| ; GPRIDX-NEXT: s_mov_b64 s[6:7], 2 |
| ; GPRIDX-NEXT: s_mov_b64 s[4:5], 1 |
| ; GPRIDX-NEXT: s_mov_b64 s[20:21], exec |
| ; GPRIDX-NEXT: BB6_1: ; =>This Inner Loop Header: Depth=1 |
| ; GPRIDX-NEXT: v_readfirstlane_b32 s22, v0 |
| ; GPRIDX-NEXT: s_lshl_b32 m0, s22, 1 |
| ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s22, v0 |
| ; GPRIDX-NEXT: s_movrels_b32 s22, s4 |
| ; GPRIDX-NEXT: s_movrels_b32 s23, s5 |
| ; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc |
| ; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc |
| ; GPRIDX-NEXT: s_cbranch_execnz BB6_1 |
| ; GPRIDX-NEXT: ; %bb.2: |
| ; GPRIDX-NEXT: s_mov_b64 exec, s[20:21] |
| ; GPRIDX-NEXT: v_mov_b32_e32 v0, s22 |
| ; GPRIDX-NEXT: v_mov_b32_e32 v1, s23 |
| ; GPRIDX-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; MOVREL-LABEL: dyn_extract_v8i64_const_s_v: |
| ; MOVREL: ; %bb.0: ; %entry |
| ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; MOVREL-NEXT: s_mov_b64 s[18:19], 8 |
| ; MOVREL-NEXT: s_mov_b64 s[16:17], 7 |
| ; MOVREL-NEXT: s_mov_b64 s[14:15], 6 |
| ; MOVREL-NEXT: s_mov_b64 s[12:13], 5 |
| ; MOVREL-NEXT: s_mov_b64 s[10:11], 4 |
| ; MOVREL-NEXT: s_mov_b64 s[8:9], 3 |
| ; MOVREL-NEXT: s_mov_b64 s[6:7], 2 |
| ; MOVREL-NEXT: s_mov_b64 s[4:5], 1 |
| ; MOVREL-NEXT: s_mov_b64 s[20:21], exec |
| ; MOVREL-NEXT: BB6_1: ; =>This Inner Loop Header: Depth=1 |
| ; MOVREL-NEXT: v_readfirstlane_b32 s22, v0 |
| ; MOVREL-NEXT: s_lshl_b32 m0, s22, 1 |
| ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s22, v0 |
| ; MOVREL-NEXT: s_movrels_b32 s22, s4 |
| ; MOVREL-NEXT: s_movrels_b32 s23, s5 |
| ; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc |
| ; MOVREL-NEXT: s_xor_b64 exec, exec, vcc |
| ; MOVREL-NEXT: s_cbranch_execnz BB6_1 |
| ; MOVREL-NEXT: ; %bb.2: |
| ; MOVREL-NEXT: s_mov_b64 exec, s[20:21] |
| ; MOVREL-NEXT: v_mov_b32_e32 v0, s22 |
| ; MOVREL-NEXT: v_mov_b32_e32 v1, s23 |
| ; MOVREL-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %ext = extractelement <8 x i64> <i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8>, i32 %sel |
| ret i64 %ext |
| } |
| |
| define amdgpu_ps void @dyn_extract_v8i64_const_s_s(i32 inreg %sel) { |
| ; GPRIDX-LABEL: dyn_extract_v8i64_const_s_s: |
| ; GPRIDX: ; %bb.0: ; %entry |
| ; GPRIDX-NEXT: s_mov_b64 s[4:5], 1 |
| ; GPRIDX-NEXT: s_mov_b32 m0, s2 |
| ; GPRIDX-NEXT: s_mov_b64 s[18:19], 8 |
| ; GPRIDX-NEXT: s_mov_b64 s[16:17], 7 |
| ; GPRIDX-NEXT: s_mov_b64 s[14:15], 6 |
| ; GPRIDX-NEXT: s_mov_b64 s[12:13], 5 |
| ; GPRIDX-NEXT: s_mov_b64 s[10:11], 4 |
| ; GPRIDX-NEXT: s_mov_b64 s[8:9], 3 |
| ; GPRIDX-NEXT: s_mov_b64 s[6:7], 2 |
| ; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[4:5] |
| ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 |
| ; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 |
| ; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off |
| ; GPRIDX-NEXT: s_endpgm |
| ; |
| ; MOVREL-LABEL: dyn_extract_v8i64_const_s_s: |
| ; MOVREL: ; %bb.0: ; %entry |
| ; MOVREL-NEXT: s_mov_b64 s[4:5], 1 |
| ; MOVREL-NEXT: s_mov_b32 m0, s2 |
| ; MOVREL-NEXT: s_mov_b64 s[18:19], 8 |
| ; MOVREL-NEXT: s_mov_b64 s[16:17], 7 |
| ; MOVREL-NEXT: s_mov_b64 s[14:15], 6 |
| ; MOVREL-NEXT: s_mov_b64 s[12:13], 5 |
| ; MOVREL-NEXT: s_mov_b64 s[10:11], 4 |
| ; MOVREL-NEXT: s_mov_b64 s[8:9], 3 |
| ; MOVREL-NEXT: s_mov_b64 s[6:7], 2 |
| ; MOVREL-NEXT: s_movrels_b64 s[0:1], s[4:5] |
| ; MOVREL-NEXT: v_mov_b32_e32 v0, s0 |
| ; MOVREL-NEXT: v_mov_b32_e32 v1, s1 |
| ; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] |
| ; MOVREL-NEXT: s_endpgm |
| entry: |
| %ext = extractelement <8 x i64> <i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8>, i32 %sel |
| store i64 %ext, i64 addrspace(1)* undef |
| ret void |
| } |
| |
| define amdgpu_ps void @dyn_extract_v8i64_s_v(<8 x i64> inreg %vec, i32 %sel) { |
| ; GPRIDX-LABEL: dyn_extract_v8i64_s_v: |
| ; GPRIDX: ; %bb.0: ; %entry |
| ; GPRIDX-NEXT: s_mov_b32 s0, s2 |
| ; GPRIDX-NEXT: s_mov_b32 s1, s3 |
| ; GPRIDX-NEXT: s_mov_b32 s2, s4 |
| ; GPRIDX-NEXT: s_mov_b32 s3, s5 |
| ; GPRIDX-NEXT: s_mov_b32 s4, s6 |
| ; GPRIDX-NEXT: s_mov_b32 s5, s7 |
| ; GPRIDX-NEXT: s_mov_b32 s6, s8 |
| ; GPRIDX-NEXT: s_mov_b32 s7, s9 |
| ; GPRIDX-NEXT: s_mov_b32 s8, s10 |
| ; GPRIDX-NEXT: s_mov_b32 s9, s11 |
| ; GPRIDX-NEXT: s_mov_b32 s10, s12 |
| ; GPRIDX-NEXT: s_mov_b32 s11, s13 |
| ; GPRIDX-NEXT: s_mov_b32 s12, s14 |
| ; GPRIDX-NEXT: s_mov_b32 s13, s15 |
| ; GPRIDX-NEXT: s_mov_b32 s14, s16 |
| ; GPRIDX-NEXT: s_mov_b32 s15, s17 |
| ; GPRIDX-NEXT: s_mov_b64 s[16:17], exec |
| ; GPRIDX-NEXT: BB8_1: ; =>This Inner Loop Header: Depth=1 |
| ; GPRIDX-NEXT: v_readfirstlane_b32 s18, v0 |
| ; GPRIDX-NEXT: s_lshl_b32 m0, s18, 1 |
| ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s18, v0 |
| ; GPRIDX-NEXT: s_movrels_b32 s18, s0 |
| ; GPRIDX-NEXT: s_movrels_b32 s19, s1 |
| ; GPRIDX-NEXT: v_mov_b32_e32 v1, s18 |
| ; GPRIDX-NEXT: v_mov_b32_e32 v2, s19 |
| ; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc |
| ; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc |
| ; GPRIDX-NEXT: s_cbranch_execnz BB8_1 |
| ; GPRIDX-NEXT: ; %bb.2: |
| ; GPRIDX-NEXT: s_mov_b64 exec, s[16:17] |
| ; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[1:2], off |
| ; GPRIDX-NEXT: s_endpgm |
| ; |
| ; MOVREL-LABEL: dyn_extract_v8i64_s_v: |
| ; MOVREL: ; %bb.0: ; %entry |
| ; MOVREL-NEXT: s_mov_b32 s0, s2 |
| ; MOVREL-NEXT: s_mov_b32 s1, s3 |
| ; MOVREL-NEXT: s_mov_b32 s2, s4 |
| ; MOVREL-NEXT: s_mov_b32 s3, s5 |
| ; MOVREL-NEXT: s_mov_b32 s4, s6 |
| ; MOVREL-NEXT: s_mov_b32 s5, s7 |
| ; MOVREL-NEXT: s_mov_b32 s6, s8 |
| ; MOVREL-NEXT: s_mov_b32 s7, s9 |
| ; MOVREL-NEXT: s_mov_b32 s8, s10 |
| ; MOVREL-NEXT: s_mov_b32 s9, s11 |
| ; MOVREL-NEXT: s_mov_b32 s10, s12 |
| ; MOVREL-NEXT: s_mov_b32 s11, s13 |
| ; MOVREL-NEXT: s_mov_b32 s12, s14 |
| ; MOVREL-NEXT: s_mov_b32 s13, s15 |
| ; MOVREL-NEXT: s_mov_b32 s14, s16 |
| ; MOVREL-NEXT: s_mov_b32 s15, s17 |
| ; MOVREL-NEXT: s_mov_b64 s[16:17], exec |
| ; MOVREL-NEXT: BB8_1: ; =>This Inner Loop Header: Depth=1 |
| ; MOVREL-NEXT: v_readfirstlane_b32 s18, v0 |
| ; MOVREL-NEXT: s_lshl_b32 m0, s18, 1 |
| ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s18, v0 |
| ; MOVREL-NEXT: s_movrels_b32 s18, s0 |
| ; MOVREL-NEXT: s_movrels_b32 s19, s1 |
| ; MOVREL-NEXT: v_mov_b32_e32 v1, s18 |
| ; MOVREL-NEXT: v_mov_b32_e32 v2, s19 |
| ; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc |
| ; MOVREL-NEXT: s_xor_b64 exec, exec, vcc |
| ; MOVREL-NEXT: s_cbranch_execnz BB8_1 |
| ; MOVREL-NEXT: ; %bb.2: |
| ; MOVREL-NEXT: s_mov_b64 exec, s[16:17] |
| ; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[1:2] |
| ; MOVREL-NEXT: s_endpgm |
| entry: |
| %ext = extractelement <8 x i64> %vec, i32 %sel |
| store i64 %ext, i64 addrspace(1)* undef |
| ret void |
| } |
| |
| define i64 @dyn_extract_v8i64_v_v(<8 x i64> %vec, i32 %sel) { |
| ; GPRIDX-LABEL: dyn_extract_v8i64_v_v: |
| ; GPRIDX: ; %bb.0: ; %entry |
| ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GPRIDX-NEXT: s_mov_b64 s[4:5], exec |
| ; GPRIDX-NEXT: BB9_1: ; =>This Inner Loop Header: Depth=1 |
| ; GPRIDX-NEXT: v_readfirstlane_b32 s6, v16 |
| ; GPRIDX-NEXT: s_lshl_b32 s7, s6, 1 |
| ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16 |
| ; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(SRC0) |
| ; GPRIDX-NEXT: v_mov_b32_e32 v17, v0 |
| ; GPRIDX-NEXT: s_set_gpr_idx_off |
| ; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(SRC0) |
| ; GPRIDX-NEXT: v_mov_b32_e32 v18, v1 |
| ; GPRIDX-NEXT: s_set_gpr_idx_off |
| ; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc |
| ; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc |
| ; GPRIDX-NEXT: s_cbranch_execnz BB9_1 |
| ; GPRIDX-NEXT: ; %bb.2: |
| ; GPRIDX-NEXT: s_mov_b64 exec, s[4:5] |
| ; GPRIDX-NEXT: v_mov_b32_e32 v0, v17 |
| ; GPRIDX-NEXT: v_mov_b32_e32 v1, v18 |
| ; GPRIDX-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; MOVREL-LABEL: dyn_extract_v8i64_v_v: |
| ; MOVREL: ; %bb.0: ; %entry |
| ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; MOVREL-NEXT: s_mov_b64 s[4:5], exec |
| ; MOVREL-NEXT: BB9_1: ; =>This Inner Loop Header: Depth=1 |
| ; MOVREL-NEXT: v_readfirstlane_b32 s6, v16 |
| ; MOVREL-NEXT: s_lshl_b32 m0, s6, 1 |
| ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16 |
| ; MOVREL-NEXT: v_movrels_b32_e32 v17, v0 |
| ; MOVREL-NEXT: v_movrels_b32_e32 v18, v1 |
| ; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc |
| ; MOVREL-NEXT: s_xor_b64 exec, exec, vcc |
| ; MOVREL-NEXT: s_cbranch_execnz BB9_1 |
| ; MOVREL-NEXT: ; %bb.2: |
| ; MOVREL-NEXT: s_mov_b64 exec, s[4:5] |
| ; MOVREL-NEXT: v_mov_b32_e32 v0, v17 |
| ; MOVREL-NEXT: v_mov_b32_e32 v1, v18 |
| ; MOVREL-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %ext = extractelement <8 x i64> %vec, i32 %sel |
| ret i64 %ext |
| } |
| |
| define amdgpu_ps void @dyn_extract_v8i64_v_s(<8 x i64> %vec, i32 inreg %sel) { |
| ; GPRIDX-LABEL: dyn_extract_v8i64_v_s: |
| ; GPRIDX: ; %bb.0: ; %entry |
| ; GPRIDX-NEXT: s_lshl_b32 s0, s2, 1 |
| ; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0) |
| ; GPRIDX-NEXT: v_mov_b32_e32 v16, v0 |
| ; GPRIDX-NEXT: s_set_gpr_idx_off |
| ; GPRIDX-NEXT: s_set_gpr_idx_on s0, gpr_idx(SRC0) |
| ; GPRIDX-NEXT: v_mov_b32_e32 v17, v1 |
| ; GPRIDX-NEXT: s_set_gpr_idx_off |
| ; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[16:17], off |
| ; GPRIDX-NEXT: s_endpgm |
| ; |
| ; MOVREL-LABEL: dyn_extract_v8i64_v_s: |
| ; MOVREL: ; %bb.0: ; %entry |
| ; MOVREL-NEXT: s_lshl_b32 m0, s2, 1 |
| ; MOVREL-NEXT: v_movrels_b32_e32 v16, v0 |
| ; MOVREL-NEXT: v_movrels_b32_e32 v17, v1 |
| ; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[16:17] |
| ; MOVREL-NEXT: s_endpgm |
| entry: |
| %ext = extractelement <8 x i64> %vec, i32 %sel |
| store i64 %ext, i64 addrspace(1)* undef |
| ret void |
| } |
| |
| define amdgpu_ps void @dyn_extract_v8i64_s_s(<8 x i64> inreg %vec, i32 inreg %sel) { |
| ; GPRIDX-LABEL: dyn_extract_v8i64_s_s: |
| ; GPRIDX: ; %bb.0: ; %entry |
| ; GPRIDX-NEXT: s_mov_b32 s0, s2 |
| ; GPRIDX-NEXT: s_mov_b32 s1, s3 |
| ; GPRIDX-NEXT: s_mov_b32 m0, s18 |
| ; GPRIDX-NEXT: s_mov_b32 s2, s4 |
| ; GPRIDX-NEXT: s_mov_b32 s3, s5 |
| ; GPRIDX-NEXT: s_mov_b32 s4, s6 |
| ; GPRIDX-NEXT: s_mov_b32 s5, s7 |
| ; GPRIDX-NEXT: s_mov_b32 s6, s8 |
| ; GPRIDX-NEXT: s_mov_b32 s7, s9 |
| ; GPRIDX-NEXT: s_mov_b32 s8, s10 |
| ; GPRIDX-NEXT: s_mov_b32 s9, s11 |
| ; GPRIDX-NEXT: s_mov_b32 s10, s12 |
| ; GPRIDX-NEXT: s_mov_b32 s11, s13 |
| ; GPRIDX-NEXT: s_mov_b32 s12, s14 |
| ; GPRIDX-NEXT: s_mov_b32 s13, s15 |
| ; GPRIDX-NEXT: s_mov_b32 s14, s16 |
| ; GPRIDX-NEXT: s_mov_b32 s15, s17 |
| ; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] |
| ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 |
| ; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 |
| ; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off |
| ; GPRIDX-NEXT: s_endpgm |
| ; |
| ; MOVREL-LABEL: dyn_extract_v8i64_s_s: |
| ; MOVREL: ; %bb.0: ; %entry |
| ; MOVREL-NEXT: s_mov_b32 s0, s2 |
| ; MOVREL-NEXT: s_mov_b32 s1, s3 |
| ; MOVREL-NEXT: s_mov_b32 m0, s18 |
| ; MOVREL-NEXT: s_mov_b32 s2, s4 |
| ; MOVREL-NEXT: s_mov_b32 s3, s5 |
| ; MOVREL-NEXT: s_mov_b32 s4, s6 |
| ; MOVREL-NEXT: s_mov_b32 s5, s7 |
| ; MOVREL-NEXT: s_mov_b32 s6, s8 |
| ; MOVREL-NEXT: s_mov_b32 s7, s9 |
| ; MOVREL-NEXT: s_mov_b32 s8, s10 |
| ; MOVREL-NEXT: s_mov_b32 s9, s11 |
| ; MOVREL-NEXT: s_mov_b32 s10, s12 |
| ; MOVREL-NEXT: s_mov_b32 s11, s13 |
| ; MOVREL-NEXT: s_mov_b32 s12, s14 |
| ; MOVREL-NEXT: s_mov_b32 s13, s15 |
| ; MOVREL-NEXT: s_mov_b32 s14, s16 |
| ; MOVREL-NEXT: s_mov_b32 s15, s17 |
| ; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] |
| ; MOVREL-NEXT: v_mov_b32_e32 v0, s0 |
| ; MOVREL-NEXT: v_mov_b32_e32 v1, s1 |
| ; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] |
| ; MOVREL-NEXT: s_endpgm |
| entry: |
| %ext = extractelement <8 x i64> %vec, i32 %sel |
| store i64 %ext, i64 addrspace(1)* undef |
| ret void |
| } |
| |
| define amdgpu_ps float @dyn_extract_v8f32_s_s_offset3(<8 x float> inreg %vec, i32 inreg %sel) { |
| ; GPRIDX-LABEL: dyn_extract_v8f32_s_s_offset3: |
| ; GPRIDX: ; %bb.0: ; %entry |
| ; GPRIDX-NEXT: s_mov_b32 s0, s2 |
| ; GPRIDX-NEXT: s_mov_b32 s1, s3 |
| ; GPRIDX-NEXT: s_mov_b32 s3, s5 |
| ; GPRIDX-NEXT: s_mov_b32 m0, s10 |
| ; GPRIDX-NEXT: s_mov_b32 s2, s4 |
| ; GPRIDX-NEXT: s_mov_b32 s4, s6 |
| ; GPRIDX-NEXT: s_mov_b32 s5, s7 |
| ; GPRIDX-NEXT: s_mov_b32 s6, s8 |
| ; GPRIDX-NEXT: s_mov_b32 s7, s9 |
| ; GPRIDX-NEXT: s_movrels_b32 s0, s3 |
| ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 |
| ; GPRIDX-NEXT: ; return to shader part epilog |
| ; |
| ; MOVREL-LABEL: dyn_extract_v8f32_s_s_offset3: |
| ; MOVREL: ; %bb.0: ; %entry |
| ; MOVREL-NEXT: s_mov_b32 s0, s2 |
| ; MOVREL-NEXT: s_mov_b32 s1, s3 |
| ; MOVREL-NEXT: s_mov_b32 s3, s5 |
| ; MOVREL-NEXT: s_mov_b32 m0, s10 |
| ; MOVREL-NEXT: s_mov_b32 s2, s4 |
| ; MOVREL-NEXT: s_mov_b32 s4, s6 |
| ; MOVREL-NEXT: s_mov_b32 s5, s7 |
| ; MOVREL-NEXT: s_mov_b32 s6, s8 |
| ; MOVREL-NEXT: s_mov_b32 s7, s9 |
| ; MOVREL-NEXT: s_movrels_b32 s0, s3 |
| ; MOVREL-NEXT: v_mov_b32_e32 v0, s0 |
| ; MOVREL-NEXT: ; return to shader part epilog |
| entry: |
| %add = add i32 %sel, 3 |
| %ext = extractelement <8 x float> %vec, i32 %add |
| ret float %ext |
| } |
| |
| define float @dyn_extract_v8f32_v_v_offset3(<8 x float> %vec, i32 %sel) { |
| ; GPRIDX-LABEL: dyn_extract_v8f32_v_v_offset3: |
| ; GPRIDX: ; %bb.0: ; %entry |
| ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GPRIDX-NEXT: s_mov_b64 s[4:5], exec |
| ; GPRIDX-NEXT: BB13_1: ; =>This Inner Loop Header: Depth=1 |
| ; GPRIDX-NEXT: v_readfirstlane_b32 s6, v8 |
| ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v8 |
| ; GPRIDX-NEXT: s_set_gpr_idx_on s6, gpr_idx(SRC0) |
| ; GPRIDX-NEXT: v_mov_b32_e32 v9, v3 |
| ; GPRIDX-NEXT: s_set_gpr_idx_off |
| ; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc |
| ; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc |
| ; GPRIDX-NEXT: s_cbranch_execnz BB13_1 |
| ; GPRIDX-NEXT: ; %bb.2: |
| ; GPRIDX-NEXT: s_mov_b64 exec, s[4:5] |
| ; GPRIDX-NEXT: v_mov_b32_e32 v0, v9 |
| ; GPRIDX-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; MOVREL-LABEL: dyn_extract_v8f32_v_v_offset3: |
| ; MOVREL: ; %bb.0: ; %entry |
| ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; MOVREL-NEXT: s_mov_b64 s[4:5], exec |
| ; MOVREL-NEXT: BB13_1: ; =>This Inner Loop Header: Depth=1 |
| ; MOVREL-NEXT: v_readfirstlane_b32 s6, v8 |
| ; MOVREL-NEXT: s_mov_b32 m0, s6 |
| ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v8 |
| ; MOVREL-NEXT: v_movrels_b32_e32 v9, v3 |
| ; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc |
| ; MOVREL-NEXT: s_xor_b64 exec, exec, vcc |
| ; MOVREL-NEXT: s_cbranch_execnz BB13_1 |
| ; MOVREL-NEXT: ; %bb.2: |
| ; MOVREL-NEXT: s_mov_b64 exec, s[4:5] |
| ; MOVREL-NEXT: v_mov_b32_e32 v0, v9 |
| ; MOVREL-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %add = add i32 %sel, 3 |
| %ext = extractelement <8 x float> %vec, i32 %add |
| ret float %ext |
| } |
| |
| define amdgpu_ps double @dyn_extract_v8f64_s_s_offset1(<8 x double> inreg %vec, i32 inreg %sel) { |
| ; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset1: |
| ; GPRIDX: ; %bb.0: ; %entry |
| ; GPRIDX-NEXT: s_mov_b32 s0, s2 |
| ; GPRIDX-NEXT: s_mov_b32 s1, s3 |
| ; GPRIDX-NEXT: s_mov_b32 s2, s4 |
| ; GPRIDX-NEXT: s_mov_b32 s3, s5 |
| ; GPRIDX-NEXT: s_mov_b32 m0, s18 |
| ; GPRIDX-NEXT: s_mov_b32 s4, s6 |
| ; GPRIDX-NEXT: s_mov_b32 s5, s7 |
| ; GPRIDX-NEXT: s_mov_b32 s6, s8 |
| ; GPRIDX-NEXT: s_mov_b32 s7, s9 |
| ; GPRIDX-NEXT: s_mov_b32 s8, s10 |
| ; GPRIDX-NEXT: s_mov_b32 s9, s11 |
| ; GPRIDX-NEXT: s_mov_b32 s10, s12 |
| ; GPRIDX-NEXT: s_mov_b32 s11, s13 |
| ; GPRIDX-NEXT: s_mov_b32 s12, s14 |
| ; GPRIDX-NEXT: s_mov_b32 s13, s15 |
| ; GPRIDX-NEXT: s_mov_b32 s14, s16 |
| ; GPRIDX-NEXT: s_mov_b32 s15, s17 |
| ; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[2:3] |
| ; GPRIDX-NEXT: ; return to shader part epilog |
| ; |
| ; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset1: |
| ; MOVREL: ; %bb.0: ; %entry |
| ; MOVREL-NEXT: s_mov_b32 s0, s2 |
| ; MOVREL-NEXT: s_mov_b32 s1, s3 |
| ; MOVREL-NEXT: s_mov_b32 s2, s4 |
| ; MOVREL-NEXT: s_mov_b32 s3, s5 |
| ; MOVREL-NEXT: s_mov_b32 m0, s18 |
| ; MOVREL-NEXT: s_mov_b32 s4, s6 |
| ; MOVREL-NEXT: s_mov_b32 s5, s7 |
| ; MOVREL-NEXT: s_mov_b32 s6, s8 |
| ; MOVREL-NEXT: s_mov_b32 s7, s9 |
| ; MOVREL-NEXT: s_mov_b32 s8, s10 |
| ; MOVREL-NEXT: s_mov_b32 s9, s11 |
| ; MOVREL-NEXT: s_mov_b32 s10, s12 |
| ; MOVREL-NEXT: s_mov_b32 s11, s13 |
| ; MOVREL-NEXT: s_mov_b32 s12, s14 |
| ; MOVREL-NEXT: s_mov_b32 s13, s15 |
| ; MOVREL-NEXT: s_mov_b32 s14, s16 |
| ; MOVREL-NEXT: s_mov_b32 s15, s17 |
| ; MOVREL-NEXT: s_movrels_b64 s[0:1], s[2:3] |
| ; MOVREL-NEXT: ; return to shader part epilog |
| entry: |
| %add = add i32 %sel, 1 |
| %ext = extractelement <8 x double> %vec, i32 %add |
| ret double %ext |
| } |
| |
| define amdgpu_ps double @dyn_extract_v8f64_s_s_offset2(<8 x double> inreg %vec, i32 inreg %sel) { |
| ; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset2: |
| ; GPRIDX: ; %bb.0: ; %entry |
| ; GPRIDX-NEXT: s_mov_b32 s0, s2 |
| ; GPRIDX-NEXT: s_mov_b32 s1, s3 |
| ; GPRIDX-NEXT: s_mov_b32 s2, s4 |
| ; GPRIDX-NEXT: s_mov_b32 s3, s5 |
| ; GPRIDX-NEXT: s_mov_b32 s4, s6 |
| ; GPRIDX-NEXT: s_mov_b32 s5, s7 |
| ; GPRIDX-NEXT: s_mov_b32 m0, s18 |
| ; GPRIDX-NEXT: s_mov_b32 s6, s8 |
| ; GPRIDX-NEXT: s_mov_b32 s7, s9 |
| ; GPRIDX-NEXT: s_mov_b32 s8, s10 |
| ; GPRIDX-NEXT: s_mov_b32 s9, s11 |
| ; GPRIDX-NEXT: s_mov_b32 s10, s12 |
| ; GPRIDX-NEXT: s_mov_b32 s11, s13 |
| ; GPRIDX-NEXT: s_mov_b32 s12, s14 |
| ; GPRIDX-NEXT: s_mov_b32 s13, s15 |
| ; GPRIDX-NEXT: s_mov_b32 s14, s16 |
| ; GPRIDX-NEXT: s_mov_b32 s15, s17 |
| ; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[4:5] |
| ; GPRIDX-NEXT: ; return to shader part epilog |
| ; |
| ; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset2: |
| ; MOVREL: ; %bb.0: ; %entry |
| ; MOVREL-NEXT: s_mov_b32 s0, s2 |
| ; MOVREL-NEXT: s_mov_b32 s1, s3 |
| ; MOVREL-NEXT: s_mov_b32 s2, s4 |
| ; MOVREL-NEXT: s_mov_b32 s3, s5 |
| ; MOVREL-NEXT: s_mov_b32 s4, s6 |
| ; MOVREL-NEXT: s_mov_b32 s5, s7 |
| ; MOVREL-NEXT: s_mov_b32 m0, s18 |
| ; MOVREL-NEXT: s_mov_b32 s6, s8 |
| ; MOVREL-NEXT: s_mov_b32 s7, s9 |
| ; MOVREL-NEXT: s_mov_b32 s8, s10 |
| ; MOVREL-NEXT: s_mov_b32 s9, s11 |
| ; MOVREL-NEXT: s_mov_b32 s10, s12 |
| ; MOVREL-NEXT: s_mov_b32 s11, s13 |
| ; MOVREL-NEXT: s_mov_b32 s12, s14 |
| ; MOVREL-NEXT: s_mov_b32 s13, s15 |
| ; MOVREL-NEXT: s_mov_b32 s14, s16 |
| ; MOVREL-NEXT: s_mov_b32 s15, s17 |
| ; MOVREL-NEXT: s_movrels_b64 s[0:1], s[4:5] |
| ; MOVREL-NEXT: ; return to shader part epilog |
| entry: |
| %add = add i32 %sel, 2 |
| %ext = extractelement <8 x double> %vec, i32 %add |
| ret double %ext |
| } |
| |
| define amdgpu_ps double @dyn_extract_v8f64_s_s_offset3(<8 x double> inreg %vec, i32 inreg %sel) { |
| ; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset3: |
| ; GPRIDX: ; %bb.0: ; %entry |
| ; GPRIDX-NEXT: s_mov_b32 s0, s2 |
| ; GPRIDX-NEXT: s_mov_b32 s1, s3 |
| ; GPRIDX-NEXT: s_mov_b32 s2, s4 |
| ; GPRIDX-NEXT: s_mov_b32 s3, s5 |
| ; GPRIDX-NEXT: s_mov_b32 s4, s6 |
| ; GPRIDX-NEXT: s_mov_b32 s5, s7 |
| ; GPRIDX-NEXT: s_mov_b32 s6, s8 |
| ; GPRIDX-NEXT: s_mov_b32 s7, s9 |
| ; GPRIDX-NEXT: s_mov_b32 m0, s18 |
| ; GPRIDX-NEXT: s_mov_b32 s8, s10 |
| ; GPRIDX-NEXT: s_mov_b32 s9, s11 |
| ; GPRIDX-NEXT: s_mov_b32 s10, s12 |
| ; GPRIDX-NEXT: s_mov_b32 s11, s13 |
| ; GPRIDX-NEXT: s_mov_b32 s12, s14 |
| ; GPRIDX-NEXT: s_mov_b32 s13, s15 |
| ; GPRIDX-NEXT: s_mov_b32 s14, s16 |
| ; GPRIDX-NEXT: s_mov_b32 s15, s17 |
| ; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[6:7] |
| ; GPRIDX-NEXT: ; return to shader part epilog |
| ; |
| ; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset3: |
| ; MOVREL: ; %bb.0: ; %entry |
| ; MOVREL-NEXT: s_mov_b32 s0, s2 |
| ; MOVREL-NEXT: s_mov_b32 s1, s3 |
| ; MOVREL-NEXT: s_mov_b32 s2, s4 |
| ; MOVREL-NEXT: s_mov_b32 s3, s5 |
| ; MOVREL-NEXT: s_mov_b32 s4, s6 |
| ; MOVREL-NEXT: s_mov_b32 s5, s7 |
| ; MOVREL-NEXT: s_mov_b32 s6, s8 |
| ; MOVREL-NEXT: s_mov_b32 s7, s9 |
| ; MOVREL-NEXT: s_mov_b32 m0, s18 |
| ; MOVREL-NEXT: s_mov_b32 s8, s10 |
| ; MOVREL-NEXT: s_mov_b32 s9, s11 |
| ; MOVREL-NEXT: s_mov_b32 s10, s12 |
| ; MOVREL-NEXT: s_mov_b32 s11, s13 |
| ; MOVREL-NEXT: s_mov_b32 s12, s14 |
| ; MOVREL-NEXT: s_mov_b32 s13, s15 |
| ; MOVREL-NEXT: s_mov_b32 s14, s16 |
| ; MOVREL-NEXT: s_mov_b32 s15, s17 |
| ; MOVREL-NEXT: s_movrels_b64 s[0:1], s[6:7] |
| ; MOVREL-NEXT: ; return to shader part epilog |
| entry: |
| %add = add i32 %sel, 3 |
| %ext = extractelement <8 x double> %vec, i32 %add |
| ret double %ext |
| } |
| |
| define amdgpu_ps double @dyn_extract_v8f64_s_s_offset4(<8 x double> inreg %vec, i32 inreg %sel) { |
| ; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset4: |
| ; GPRIDX: ; %bb.0: ; %entry |
| ; GPRIDX-NEXT: s_mov_b32 s0, s2 |
| ; GPRIDX-NEXT: s_mov_b32 s1, s3 |
| ; GPRIDX-NEXT: s_mov_b32 s2, s4 |
| ; GPRIDX-NEXT: s_mov_b32 s3, s5 |
| ; GPRIDX-NEXT: s_mov_b32 s4, s6 |
| ; GPRIDX-NEXT: s_mov_b32 s5, s7 |
| ; GPRIDX-NEXT: s_mov_b32 s6, s8 |
| ; GPRIDX-NEXT: s_mov_b32 s7, s9 |
| ; GPRIDX-NEXT: s_mov_b32 s8, s10 |
| ; GPRIDX-NEXT: s_mov_b32 s9, s11 |
| ; GPRIDX-NEXT: s_mov_b32 m0, s18 |
| ; GPRIDX-NEXT: s_mov_b32 s10, s12 |
| ; GPRIDX-NEXT: s_mov_b32 s11, s13 |
| ; GPRIDX-NEXT: s_mov_b32 s12, s14 |
| ; GPRIDX-NEXT: s_mov_b32 s13, s15 |
| ; GPRIDX-NEXT: s_mov_b32 s14, s16 |
| ; GPRIDX-NEXT: s_mov_b32 s15, s17 |
| ; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[8:9] |
| ; GPRIDX-NEXT: ; return to shader part epilog |
| ; |
| ; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset4: |
| ; MOVREL: ; %bb.0: ; %entry |
| ; MOVREL-NEXT: s_mov_b32 s0, s2 |
| ; MOVREL-NEXT: s_mov_b32 s1, s3 |
| ; MOVREL-NEXT: s_mov_b32 s2, s4 |
| ; MOVREL-NEXT: s_mov_b32 s3, s5 |
| ; MOVREL-NEXT: s_mov_b32 s4, s6 |
| ; MOVREL-NEXT: s_mov_b32 s5, s7 |
| ; MOVREL-NEXT: s_mov_b32 s6, s8 |
| ; MOVREL-NEXT: s_mov_b32 s7, s9 |
| ; MOVREL-NEXT: s_mov_b32 s8, s10 |
| ; MOVREL-NEXT: s_mov_b32 s9, s11 |
| ; MOVREL-NEXT: s_mov_b32 m0, s18 |
| ; MOVREL-NEXT: s_mov_b32 s10, s12 |
| ; MOVREL-NEXT: s_mov_b32 s11, s13 |
| ; MOVREL-NEXT: s_mov_b32 s12, s14 |
| ; MOVREL-NEXT: s_mov_b32 s13, s15 |
| ; MOVREL-NEXT: s_mov_b32 s14, s16 |
| ; MOVREL-NEXT: s_mov_b32 s15, s17 |
| ; MOVREL-NEXT: s_movrels_b64 s[0:1], s[8:9] |
| ; MOVREL-NEXT: ; return to shader part epilog |
| entry: |
| %add = add i32 %sel, 4 |
| %ext = extractelement <8 x double> %vec, i32 %add |
| ret double %ext |
| } |
| |
| define amdgpu_ps double @dyn_extract_v8f64_s_s_offset5(<8 x double> inreg %vec, i32 inreg %sel) { |
| ; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset5: |
| ; GPRIDX: ; %bb.0: ; %entry |
| ; GPRIDX-NEXT: s_mov_b32 s0, s2 |
| ; GPRIDX-NEXT: s_mov_b32 s1, s3 |
| ; GPRIDX-NEXT: s_mov_b32 s2, s4 |
| ; GPRIDX-NEXT: s_mov_b32 s3, s5 |
| ; GPRIDX-NEXT: s_mov_b32 s4, s6 |
| ; GPRIDX-NEXT: s_mov_b32 s5, s7 |
| ; GPRIDX-NEXT: s_mov_b32 s6, s8 |
| ; GPRIDX-NEXT: s_mov_b32 s7, s9 |
| ; GPRIDX-NEXT: s_mov_b32 s8, s10 |
| ; GPRIDX-NEXT: s_mov_b32 s9, s11 |
| ; GPRIDX-NEXT: s_mov_b32 s10, s12 |
| ; GPRIDX-NEXT: s_mov_b32 s11, s13 |
| ; GPRIDX-NEXT: s_mov_b32 m0, s18 |
| ; GPRIDX-NEXT: s_mov_b32 s12, s14 |
| ; GPRIDX-NEXT: s_mov_b32 s13, s15 |
| ; GPRIDX-NEXT: s_mov_b32 s14, s16 |
| ; GPRIDX-NEXT: s_mov_b32 s15, s17 |
| ; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[10:11] |
| ; GPRIDX-NEXT: ; return to shader part epilog |
| ; |
| ; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset5: |
| ; MOVREL: ; %bb.0: ; %entry |
| ; MOVREL-NEXT: s_mov_b32 s0, s2 |
| ; MOVREL-NEXT: s_mov_b32 s1, s3 |
| ; MOVREL-NEXT: s_mov_b32 s2, s4 |
| ; MOVREL-NEXT: s_mov_b32 s3, s5 |
| ; MOVREL-NEXT: s_mov_b32 s4, s6 |
| ; MOVREL-NEXT: s_mov_b32 s5, s7 |
| ; MOVREL-NEXT: s_mov_b32 s6, s8 |
| ; MOVREL-NEXT: s_mov_b32 s7, s9 |
| ; MOVREL-NEXT: s_mov_b32 s8, s10 |
| ; MOVREL-NEXT: s_mov_b32 s9, s11 |
| ; MOVREL-NEXT: s_mov_b32 s10, s12 |
| ; MOVREL-NEXT: s_mov_b32 s11, s13 |
| ; MOVREL-NEXT: s_mov_b32 m0, s18 |
| ; MOVREL-NEXT: s_mov_b32 s12, s14 |
| ; MOVREL-NEXT: s_mov_b32 s13, s15 |
| ; MOVREL-NEXT: s_mov_b32 s14, s16 |
| ; MOVREL-NEXT: s_mov_b32 s15, s17 |
| ; MOVREL-NEXT: s_movrels_b64 s[0:1], s[10:11] |
| ; MOVREL-NEXT: ; return to shader part epilog |
| entry: |
| %add = add i32 %sel, 5 |
| %ext = extractelement <8 x double> %vec, i32 %add |
| ret double %ext |
| } |
| |
| define amdgpu_ps double @dyn_extract_v8f64_s_s_offset6(<8 x double> inreg %vec, i32 inreg %sel) { |
| ; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset6: |
| ; GPRIDX: ; %bb.0: ; %entry |
| ; GPRIDX-NEXT: s_mov_b32 s0, s2 |
| ; GPRIDX-NEXT: s_mov_b32 s1, s3 |
| ; GPRIDX-NEXT: s_mov_b32 s2, s4 |
| ; GPRIDX-NEXT: s_mov_b32 s3, s5 |
| ; GPRIDX-NEXT: s_mov_b32 s4, s6 |
| ; GPRIDX-NEXT: s_mov_b32 s5, s7 |
| ; GPRIDX-NEXT: s_mov_b32 s6, s8 |
| ; GPRIDX-NEXT: s_mov_b32 s7, s9 |
| ; GPRIDX-NEXT: s_mov_b32 s8, s10 |
| ; GPRIDX-NEXT: s_mov_b32 s9, s11 |
| ; GPRIDX-NEXT: s_mov_b32 s10, s12 |
| ; GPRIDX-NEXT: s_mov_b32 s11, s13 |
| ; GPRIDX-NEXT: s_mov_b32 s12, s14 |
| ; GPRIDX-NEXT: s_mov_b32 s13, s15 |
| ; GPRIDX-NEXT: s_mov_b32 m0, s18 |
| ; GPRIDX-NEXT: s_mov_b32 s14, s16 |
| ; GPRIDX-NEXT: s_mov_b32 s15, s17 |
| ; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[12:13] |
| ; GPRIDX-NEXT: ; return to shader part epilog |
| ; |
| ; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset6: |
| ; MOVREL: ; %bb.0: ; %entry |
| ; MOVREL-NEXT: s_mov_b32 s0, s2 |
| ; MOVREL-NEXT: s_mov_b32 s1, s3 |
| ; MOVREL-NEXT: s_mov_b32 s2, s4 |
| ; MOVREL-NEXT: s_mov_b32 s3, s5 |
| ; MOVREL-NEXT: s_mov_b32 s4, s6 |
| ; MOVREL-NEXT: s_mov_b32 s5, s7 |
| ; MOVREL-NEXT: s_mov_b32 s6, s8 |
| ; MOVREL-NEXT: s_mov_b32 s7, s9 |
| ; MOVREL-NEXT: s_mov_b32 s8, s10 |
| ; MOVREL-NEXT: s_mov_b32 s9, s11 |
| ; MOVREL-NEXT: s_mov_b32 s10, s12 |
| ; MOVREL-NEXT: s_mov_b32 s11, s13 |
| ; MOVREL-NEXT: s_mov_b32 s12, s14 |
| ; MOVREL-NEXT: s_mov_b32 s13, s15 |
| ; MOVREL-NEXT: s_mov_b32 m0, s18 |
| ; MOVREL-NEXT: s_mov_b32 s14, s16 |
| ; MOVREL-NEXT: s_mov_b32 s15, s17 |
| ; MOVREL-NEXT: s_movrels_b64 s[0:1], s[12:13] |
| ; MOVREL-NEXT: ; return to shader part epilog |
| entry: |
| %add = add i32 %sel, 6 |
| %ext = extractelement <8 x double> %vec, i32 %add |
| ret double %ext |
| } |
| |
| define amdgpu_ps double @dyn_extract_v8f64_s_s_offset7(<8 x double> inreg %vec, i32 inreg %sel) { |
| ; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offset7: |
| ; GPRIDX: ; %bb.0: ; %entry |
| ; GPRIDX-NEXT: s_mov_b32 s0, s2 |
| ; GPRIDX-NEXT: s_mov_b32 s1, s3 |
| ; GPRIDX-NEXT: s_mov_b32 s2, s4 |
| ; GPRIDX-NEXT: s_mov_b32 s3, s5 |
| ; GPRIDX-NEXT: s_mov_b32 s4, s6 |
| ; GPRIDX-NEXT: s_mov_b32 s5, s7 |
| ; GPRIDX-NEXT: s_mov_b32 s6, s8 |
| ; GPRIDX-NEXT: s_mov_b32 s7, s9 |
| ; GPRIDX-NEXT: s_mov_b32 s8, s10 |
| ; GPRIDX-NEXT: s_mov_b32 s9, s11 |
| ; GPRIDX-NEXT: s_mov_b32 s10, s12 |
| ; GPRIDX-NEXT: s_mov_b32 s11, s13 |
| ; GPRIDX-NEXT: s_mov_b32 s12, s14 |
| ; GPRIDX-NEXT: s_mov_b32 s13, s15 |
| ; GPRIDX-NEXT: s_mov_b32 s14, s16 |
| ; GPRIDX-NEXT: s_mov_b32 s15, s17 |
| ; GPRIDX-NEXT: s_mov_b32 m0, s18 |
| ; GPRIDX-NEXT: s_nop 0 |
| ; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[14:15] |
| ; GPRIDX-NEXT: ; return to shader part epilog |
| ; |
| ; MOVREL-LABEL: dyn_extract_v8f64_s_s_offset7: |
| ; MOVREL: ; %bb.0: ; %entry |
| ; MOVREL-NEXT: s_mov_b32 s0, s2 |
| ; MOVREL-NEXT: s_mov_b32 s1, s3 |
| ; MOVREL-NEXT: s_mov_b32 s2, s4 |
| ; MOVREL-NEXT: s_mov_b32 s3, s5 |
| ; MOVREL-NEXT: s_mov_b32 s4, s6 |
| ; MOVREL-NEXT: s_mov_b32 s5, s7 |
| ; MOVREL-NEXT: s_mov_b32 s6, s8 |
| ; MOVREL-NEXT: s_mov_b32 s7, s9 |
| ; MOVREL-NEXT: s_mov_b32 s8, s10 |
| ; MOVREL-NEXT: s_mov_b32 s9, s11 |
| ; MOVREL-NEXT: s_mov_b32 s10, s12 |
| ; MOVREL-NEXT: s_mov_b32 s11, s13 |
| ; MOVREL-NEXT: s_mov_b32 s12, s14 |
| ; MOVREL-NEXT: s_mov_b32 s13, s15 |
| ; MOVREL-NEXT: s_mov_b32 s14, s16 |
| ; MOVREL-NEXT: s_mov_b32 s15, s17 |
| ; MOVREL-NEXT: s_mov_b32 m0, s18 |
| ; MOVREL-NEXT: s_movrels_b64 s[0:1], s[14:15] |
| ; MOVREL-NEXT: ; return to shader part epilog |
| entry: |
| %add = add i32 %sel, 7 |
| %ext = extractelement <8 x double> %vec, i32 %add |
| ret double %ext |
| } |
| |
| define amdgpu_ps double @dyn_extract_v8f64_s_s_offsetm1(<8 x double> inreg %vec, i32 inreg %sel) { |
| ; GPRIDX-LABEL: dyn_extract_v8f64_s_s_offsetm1: |
| ; GPRIDX: ; %bb.0: ; %entry |
| ; GPRIDX-NEXT: s_mov_b32 s0, s2 |
| ; GPRIDX-NEXT: s_mov_b32 s1, s3 |
| ; GPRIDX-NEXT: s_add_i32 m0, s18, -1 |
| ; GPRIDX-NEXT: s_mov_b32 s2, s4 |
| ; GPRIDX-NEXT: s_mov_b32 s3, s5 |
| ; GPRIDX-NEXT: s_mov_b32 s4, s6 |
| ; GPRIDX-NEXT: s_mov_b32 s5, s7 |
| ; GPRIDX-NEXT: s_mov_b32 s6, s8 |
| ; GPRIDX-NEXT: s_mov_b32 s7, s9 |
| ; GPRIDX-NEXT: s_mov_b32 s8, s10 |
| ; GPRIDX-NEXT: s_mov_b32 s9, s11 |
| ; GPRIDX-NEXT: s_mov_b32 s10, s12 |
| ; GPRIDX-NEXT: s_mov_b32 s11, s13 |
| ; GPRIDX-NEXT: s_mov_b32 s12, s14 |
| ; GPRIDX-NEXT: s_mov_b32 s13, s15 |
| ; GPRIDX-NEXT: s_mov_b32 s14, s16 |
| ; GPRIDX-NEXT: s_mov_b32 s15, s17 |
| ; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] |
| ; GPRIDX-NEXT: ; return to shader part epilog |
| ; |
| ; MOVREL-LABEL: dyn_extract_v8f64_s_s_offsetm1: |
| ; MOVREL: ; %bb.0: ; %entry |
| ; MOVREL-NEXT: s_mov_b32 s0, s2 |
| ; MOVREL-NEXT: s_mov_b32 s1, s3 |
| ; MOVREL-NEXT: s_add_i32 m0, s18, -1 |
| ; MOVREL-NEXT: s_mov_b32 s2, s4 |
| ; MOVREL-NEXT: s_mov_b32 s3, s5 |
| ; MOVREL-NEXT: s_mov_b32 s4, s6 |
| ; MOVREL-NEXT: s_mov_b32 s5, s7 |
| ; MOVREL-NEXT: s_mov_b32 s6, s8 |
| ; MOVREL-NEXT: s_mov_b32 s7, s9 |
| ; MOVREL-NEXT: s_mov_b32 s8, s10 |
| ; MOVREL-NEXT: s_mov_b32 s9, s11 |
| ; MOVREL-NEXT: s_mov_b32 s10, s12 |
| ; MOVREL-NEXT: s_mov_b32 s11, s13 |
| ; MOVREL-NEXT: s_mov_b32 s12, s14 |
| ; MOVREL-NEXT: s_mov_b32 s13, s15 |
| ; MOVREL-NEXT: s_mov_b32 s14, s16 |
| ; MOVREL-NEXT: s_mov_b32 s15, s17 |
| ; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] |
| ; MOVREL-NEXT: ; return to shader part epilog |
| entry: |
| %add = add i32 %sel, -1 |
| %ext = extractelement <8 x double> %vec, i32 %add |
| ret double %ext |
| } |
| |
| define double @dyn_extract_v8f64_v_v_offset3(<8 x double> %vec, i32 %sel) { |
| ; GPRIDX-LABEL: dyn_extract_v8f64_v_v_offset3: |
| ; GPRIDX: ; %bb.0: ; %entry |
| ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GPRIDX-NEXT: s_mov_b64 s[4:5], exec |
| ; GPRIDX-NEXT: BB22_1: ; =>This Inner Loop Header: Depth=1 |
| ; GPRIDX-NEXT: v_readfirstlane_b32 s6, v16 |
| ; GPRIDX-NEXT: s_add_i32 s7, s6, 3 |
| ; GPRIDX-NEXT: s_lshl_b32 s7, s7, 1 |
| ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16 |
| ; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(SRC0) |
| ; GPRIDX-NEXT: v_mov_b32_e32 v17, v0 |
| ; GPRIDX-NEXT: s_set_gpr_idx_off |
| ; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(SRC0) |
| ; GPRIDX-NEXT: v_mov_b32_e32 v18, v1 |
| ; GPRIDX-NEXT: s_set_gpr_idx_off |
| ; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc |
| ; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc |
| ; GPRIDX-NEXT: s_cbranch_execnz BB22_1 |
| ; GPRIDX-NEXT: ; %bb.2: |
| ; GPRIDX-NEXT: s_mov_b64 exec, s[4:5] |
| ; GPRIDX-NEXT: v_mov_b32_e32 v0, v17 |
| ; GPRIDX-NEXT: v_mov_b32_e32 v1, v18 |
| ; GPRIDX-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; MOVREL-LABEL: dyn_extract_v8f64_v_v_offset3: |
| ; MOVREL: ; %bb.0: ; %entry |
| ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; MOVREL-NEXT: s_mov_b64 s[4:5], exec |
| ; MOVREL-NEXT: BB22_1: ; =>This Inner Loop Header: Depth=1 |
| ; MOVREL-NEXT: v_readfirstlane_b32 s6, v16 |
| ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16 |
| ; MOVREL-NEXT: s_add_i32 s6, s6, 3 |
| ; MOVREL-NEXT: s_lshl_b32 m0, s6, 1 |
| ; MOVREL-NEXT: v_movrels_b32_e32 v17, v0 |
| ; MOVREL-NEXT: v_movrels_b32_e32 v18, v1 |
| ; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc |
| ; MOVREL-NEXT: s_xor_b64 exec, exec, vcc |
| ; MOVREL-NEXT: s_cbranch_execnz BB22_1 |
| ; MOVREL-NEXT: ; %bb.2: |
| ; MOVREL-NEXT: s_mov_b64 exec, s[4:5] |
| ; MOVREL-NEXT: v_mov_b32_e32 v0, v17 |
| ; MOVREL-NEXT: v_mov_b32_e32 v1, v18 |
| ; MOVREL-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %add = add i32 %sel, 3 |
| %ext = extractelement <8 x double> %vec, i32 %add |
| ret double %ext |
| } |
| |
| define i8 addrspace(3)* @dyn_extract_v8p3_v_v(<8 x i8 addrspace(3)*> %vec, i32 %idx) { |
| ; GPRIDX-LABEL: dyn_extract_v8p3_v_v: |
| ; GPRIDX: ; %bb.0: ; %entry |
| ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GPRIDX-NEXT: s_mov_b64 s[4:5], exec |
| ; GPRIDX-NEXT: BB23_1: ; =>This Inner Loop Header: Depth=1 |
| ; GPRIDX-NEXT: v_readfirstlane_b32 s6, v8 |
| ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v8 |
| ; GPRIDX-NEXT: s_set_gpr_idx_on s6, gpr_idx(SRC0) |
| ; GPRIDX-NEXT: v_mov_b32_e32 v9, v0 |
| ; GPRIDX-NEXT: s_set_gpr_idx_off |
| ; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc |
| ; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc |
| ; GPRIDX-NEXT: s_cbranch_execnz BB23_1 |
| ; GPRIDX-NEXT: ; %bb.2: |
| ; GPRIDX-NEXT: s_mov_b64 exec, s[4:5] |
| ; GPRIDX-NEXT: v_mov_b32_e32 v0, v9 |
| ; GPRIDX-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; MOVREL-LABEL: dyn_extract_v8p3_v_v: |
| ; MOVREL: ; %bb.0: ; %entry |
| ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; MOVREL-NEXT: s_mov_b64 s[4:5], exec |
| ; MOVREL-NEXT: BB23_1: ; =>This Inner Loop Header: Depth=1 |
| ; MOVREL-NEXT: v_readfirstlane_b32 s6, v8 |
| ; MOVREL-NEXT: s_mov_b32 m0, s6 |
| ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v8 |
| ; MOVREL-NEXT: v_movrels_b32_e32 v9, v0 |
| ; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc |
| ; MOVREL-NEXT: s_xor_b64 exec, exec, vcc |
| ; MOVREL-NEXT: s_cbranch_execnz BB23_1 |
| ; MOVREL-NEXT: ; %bb.2: |
| ; MOVREL-NEXT: s_mov_b64 exec, s[4:5] |
| ; MOVREL-NEXT: v_mov_b32_e32 v0, v9 |
| ; MOVREL-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %ext = extractelement <8 x i8 addrspace(3)*> %vec, i32 %idx |
| ret i8 addrspace(3)* %ext |
| } |
| |
| define amdgpu_ps void @dyn_extract_v8p3_s_s(<8 x i8 addrspace(3)*> inreg %vec, i32 inreg %idx) { |
| ; GPRIDX-LABEL: dyn_extract_v8p3_s_s: |
| ; GPRIDX: ; %bb.0: ; %entry |
| ; GPRIDX-NEXT: s_mov_b32 s0, s2 |
| ; GPRIDX-NEXT: s_mov_b32 m0, s10 |
| ; GPRIDX-NEXT: s_mov_b32 s1, s3 |
| ; GPRIDX-NEXT: s_mov_b32 s2, s4 |
| ; GPRIDX-NEXT: s_mov_b32 s3, s5 |
| ; GPRIDX-NEXT: s_mov_b32 s4, s6 |
| ; GPRIDX-NEXT: s_mov_b32 s5, s7 |
| ; GPRIDX-NEXT: s_mov_b32 s6, s8 |
| ; GPRIDX-NEXT: s_mov_b32 s7, s9 |
| ; GPRIDX-NEXT: s_movrels_b32 s0, s0 |
| ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 |
| ; GPRIDX-NEXT: ds_write_b32 v0, v0 |
| ; GPRIDX-NEXT: s_endpgm |
| ; |
| ; MOVREL-LABEL: dyn_extract_v8p3_s_s: |
| ; MOVREL: ; %bb.0: ; %entry |
| ; MOVREL-NEXT: s_mov_b32 s0, s2 |
| ; MOVREL-NEXT: s_mov_b32 m0, s10 |
| ; MOVREL-NEXT: s_mov_b32 s1, s3 |
| ; MOVREL-NEXT: s_mov_b32 s2, s4 |
| ; MOVREL-NEXT: s_mov_b32 s3, s5 |
| ; MOVREL-NEXT: s_mov_b32 s4, s6 |
| ; MOVREL-NEXT: s_mov_b32 s5, s7 |
| ; MOVREL-NEXT: s_mov_b32 s6, s8 |
| ; MOVREL-NEXT: s_mov_b32 s7, s9 |
| ; MOVREL-NEXT: s_movrels_b32 s0, s0 |
| ; MOVREL-NEXT: v_mov_b32_e32 v0, s0 |
| ; MOVREL-NEXT: s_mov_b32 m0, -1 |
| ; MOVREL-NEXT: ds_write_b32 v0, v0 |
| ; MOVREL-NEXT: s_endpgm |
| entry: |
| %ext = extractelement <8 x i8 addrspace(3)*> %vec, i32 %idx |
| store i8 addrspace(3)* %ext, i8 addrspace(3)* addrspace(3)* undef |
| ret void |
| } |
| |
| define i8 addrspace(1)* @dyn_extract_v8p1_v_v(<8 x i8 addrspace(1)*> %vec, i32 %idx) { |
| ; GPRIDX-LABEL: dyn_extract_v8p1_v_v: |
| ; GPRIDX: ; %bb.0: ; %entry |
| ; GPRIDX-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GPRIDX-NEXT: s_mov_b64 s[4:5], exec |
| ; GPRIDX-NEXT: BB25_1: ; =>This Inner Loop Header: Depth=1 |
| ; GPRIDX-NEXT: v_readfirstlane_b32 s6, v16 |
| ; GPRIDX-NEXT: s_lshl_b32 s7, s6, 1 |
| ; GPRIDX-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16 |
| ; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(SRC0) |
| ; GPRIDX-NEXT: v_mov_b32_e32 v17, v0 |
| ; GPRIDX-NEXT: s_set_gpr_idx_off |
| ; GPRIDX-NEXT: s_set_gpr_idx_on s7, gpr_idx(SRC0) |
| ; GPRIDX-NEXT: v_mov_b32_e32 v18, v1 |
| ; GPRIDX-NEXT: s_set_gpr_idx_off |
| ; GPRIDX-NEXT: s_and_saveexec_b64 vcc, vcc |
| ; GPRIDX-NEXT: s_xor_b64 exec, exec, vcc |
| ; GPRIDX-NEXT: s_cbranch_execnz BB25_1 |
| ; GPRIDX-NEXT: ; %bb.2: |
| ; GPRIDX-NEXT: s_mov_b64 exec, s[4:5] |
| ; GPRIDX-NEXT: v_mov_b32_e32 v0, v17 |
| ; GPRIDX-NEXT: v_mov_b32_e32 v1, v18 |
| ; GPRIDX-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; MOVREL-LABEL: dyn_extract_v8p1_v_v: |
| ; MOVREL: ; %bb.0: ; %entry |
| ; MOVREL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; MOVREL-NEXT: s_mov_b64 s[4:5], exec |
| ; MOVREL-NEXT: BB25_1: ; =>This Inner Loop Header: Depth=1 |
| ; MOVREL-NEXT: v_readfirstlane_b32 s6, v16 |
| ; MOVREL-NEXT: s_lshl_b32 m0, s6, 1 |
| ; MOVREL-NEXT: v_cmp_eq_u32_e32 vcc, s6, v16 |
| ; MOVREL-NEXT: v_movrels_b32_e32 v17, v0 |
| ; MOVREL-NEXT: v_movrels_b32_e32 v18, v1 |
| ; MOVREL-NEXT: s_and_saveexec_b64 vcc, vcc |
| ; MOVREL-NEXT: s_xor_b64 exec, exec, vcc |
| ; MOVREL-NEXT: s_cbranch_execnz BB25_1 |
| ; MOVREL-NEXT: ; %bb.2: |
| ; MOVREL-NEXT: s_mov_b64 exec, s[4:5] |
| ; MOVREL-NEXT: v_mov_b32_e32 v0, v17 |
| ; MOVREL-NEXT: v_mov_b32_e32 v1, v18 |
| ; MOVREL-NEXT: s_setpc_b64 s[30:31] |
| entry: |
| %ext = extractelement <8 x i8 addrspace(1)*> %vec, i32 %idx |
| ret i8 addrspace(1)* %ext |
| } |
| |
| define amdgpu_ps void @dyn_extract_v8p1_s_s(<8 x i8 addrspace(1)*> inreg %vec, i32 inreg %idx) { |
| ; GPRIDX-LABEL: dyn_extract_v8p1_s_s: |
| ; GPRIDX: ; %bb.0: ; %entry |
| ; GPRIDX-NEXT: s_mov_b32 s0, s2 |
| ; GPRIDX-NEXT: s_mov_b32 s1, s3 |
| ; GPRIDX-NEXT: s_mov_b32 m0, s18 |
| ; GPRIDX-NEXT: s_mov_b32 s2, s4 |
| ; GPRIDX-NEXT: s_mov_b32 s3, s5 |
| ; GPRIDX-NEXT: s_mov_b32 s4, s6 |
| ; GPRIDX-NEXT: s_mov_b32 s5, s7 |
| ; GPRIDX-NEXT: s_mov_b32 s6, s8 |
| ; GPRIDX-NEXT: s_mov_b32 s7, s9 |
| ; GPRIDX-NEXT: s_mov_b32 s8, s10 |
| ; GPRIDX-NEXT: s_mov_b32 s9, s11 |
| ; GPRIDX-NEXT: s_mov_b32 s10, s12 |
| ; GPRIDX-NEXT: s_mov_b32 s11, s13 |
| ; GPRIDX-NEXT: s_mov_b32 s12, s14 |
| ; GPRIDX-NEXT: s_mov_b32 s13, s15 |
| ; GPRIDX-NEXT: s_mov_b32 s14, s16 |
| ; GPRIDX-NEXT: s_mov_b32 s15, s17 |
| ; GPRIDX-NEXT: s_movrels_b64 s[0:1], s[0:1] |
| ; GPRIDX-NEXT: v_mov_b32_e32 v0, s0 |
| ; GPRIDX-NEXT: v_mov_b32_e32 v1, s1 |
| ; GPRIDX-NEXT: global_store_dwordx2 v[0:1], v[0:1], off |
| ; GPRIDX-NEXT: s_endpgm |
| ; |
| ; MOVREL-LABEL: dyn_extract_v8p1_s_s: |
| ; MOVREL: ; %bb.0: ; %entry |
| ; MOVREL-NEXT: s_mov_b32 s0, s2 |
| ; MOVREL-NEXT: s_mov_b32 s1, s3 |
| ; MOVREL-NEXT: s_mov_b32 m0, s18 |
| ; MOVREL-NEXT: s_mov_b32 s2, s4 |
| ; MOVREL-NEXT: s_mov_b32 s3, s5 |
| ; MOVREL-NEXT: s_mov_b32 s4, s6 |
| ; MOVREL-NEXT: s_mov_b32 s5, s7 |
| ; MOVREL-NEXT: s_mov_b32 s6, s8 |
| ; MOVREL-NEXT: s_mov_b32 s7, s9 |
| ; MOVREL-NEXT: s_mov_b32 s8, s10 |
| ; MOVREL-NEXT: s_mov_b32 s9, s11 |
| ; MOVREL-NEXT: s_mov_b32 s10, s12 |
| ; MOVREL-NEXT: s_mov_b32 s11, s13 |
| ; MOVREL-NEXT: s_mov_b32 s12, s14 |
| ; MOVREL-NEXT: s_mov_b32 s13, s15 |
| ; MOVREL-NEXT: s_mov_b32 s14, s16 |
| ; MOVREL-NEXT: s_mov_b32 s15, s17 |
| ; MOVREL-NEXT: s_movrels_b64 s[0:1], s[0:1] |
| ; MOVREL-NEXT: v_mov_b32_e32 v0, s0 |
| ; MOVREL-NEXT: v_mov_b32_e32 v1, s1 |
| ; MOVREL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] |
| ; MOVREL-NEXT: s_endpgm |
| entry: |
| %ext = extractelement <8 x i8 addrspace(1)*> %vec, i32 %idx |
| store i8 addrspace(1)* %ext, i8 addrspace(1)* addrspace(1)* undef |
| ret void |
| } |