| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc -mtriple=amdgcn-- -mcpu=gfx950 < %s | FileCheck -check-prefix=GFX950 %s |
| ; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1250 < %s | FileCheck -check-prefix=GFX1250 %s |
| |
| define i64 @srl_bv_v4i16_extract_last(i16 %a, i16 %b, i16 %c, i16 %d) { |
| ; GFX950-LABEL: srl_bv_v4i16_extract_last: |
| ; GFX950: ; %bb.0: |
| ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-NEXT: v_and_b32_e32 v0, 0xffff, v3 |
| ; GFX950-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX950-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-LABEL: srl_bv_v4i16_extract_last: |
| ; GFX1250: ; %bb.0: |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-NEXT: v_mov_b16_e32 v0.h, 0 |
| ; GFX1250-NEXT: v_mov_b16_e32 v0.l, v3.l |
| ; GFX1250-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX1250-NEXT: s_set_pc_i64 s[30:31] |
| %bv0 = insertelement <4 x i16> poison, i16 %a, i32 0 |
| %bv1 = insertelement <4 x i16> %bv0, i16 %b, i32 1 |
| %bv2 = insertelement <4 x i16> %bv1, i16 %c, i32 2 |
| %bv3 = insertelement <4 x i16> %bv2, i16 %d, i32 3 |
| %bc = bitcast <4 x i16> %bv3 to i64 |
| %srl = lshr i64 %bc, 48 |
| ret i64 %srl |
| } |
| |
| define i64 @srl_bv_v2f32_extract_last(float %a, float %b) { |
| ; GFX950-LABEL: srl_bv_v2f32_extract_last: |
| ; GFX950: ; %bb.0: |
| ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-NEXT: v_mov_b32_e32 v0, v1 |
| ; GFX950-NEXT: v_mov_b32_e32 v1, 0 |
| ; GFX950-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-LABEL: srl_bv_v2f32_extract_last: |
| ; GFX1250: ; %bb.0: |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-NEXT: v_dual_mov_b32 v0, v1 :: v_dual_mov_b32 v1, 0 |
| ; GFX1250-NEXT: s_set_pc_i64 s[30:31] |
| %bv0 = insertelement <2 x float> poison, float %a, i32 0 |
| %bv1 = insertelement <2 x float> %bv0, float %b, i32 1 |
| %bc = bitcast <2 x float> %bv1 to i64 |
| %srl = lshr i64 %bc, 32 |
| ret i64 %srl |
| } |
| |
| ; Negative test: shift amount is not (NumElts-1) * EltSize. |
| define i64 @srl_bv_not_last(i16 %a, i16 %b, i16 %c, i16 %d) { |
| ; GFX950-LABEL: srl_bv_not_last: |
| ; GFX950: ; %bb.0: |
| ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| ; GFX950-NEXT: s_mov_b32 s0, 0x5040100 |
| ; GFX950-NEXT: v_perm_b32 v2, v3, v2, s0 |
| ; GFX950-NEXT: v_perm_b32 v0, v1, v0, s0 |
| ; GFX950-NEXT: v_alignbit_b32 v0, v2, v0, 16 |
| ; GFX950-NEXT: v_and_b32_e32 v1, 0xffff, v3 |
| ; GFX950-NEXT: s_setpc_b64 s[30:31] |
| ; |
| ; GFX1250-LABEL: srl_bv_not_last: |
| ; GFX1250: ; %bb.0: |
| ; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 |
| ; GFX1250-NEXT: s_wait_kmcnt 0x0 |
| ; GFX1250-NEXT: v_mov_b16_e32 v2.h, v3.l |
| ; GFX1250-NEXT: v_mov_b16_e32 v0.h, v1.l |
| ; GFX1250-NEXT: v_mov_b16_e32 v1.h, 0 |
| ; GFX1250-NEXT: v_mov_b16_e32 v1.l, v3.l |
| ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) |
| ; GFX1250-NEXT: v_alignbit_b32 v0, v2, v0, 16 |
| ; GFX1250-NEXT: s_set_pc_i64 s[30:31] |
| %bv0 = insertelement <4 x i16> poison, i16 %a, i32 0 |
| %bv1 = insertelement <4 x i16> %bv0, i16 %b, i32 1 |
| %bv2 = insertelement <4 x i16> %bv1, i16 %c, i32 2 |
| %bv3 = insertelement <4 x i16> %bv2, i16 %d, i32 3 |
| %bc = bitcast <4 x i16> %bv3 to i64 |
| %srl = lshr i64 %bc, 16 |
| ret i64 %srl |
| } |