blob: ca72905a0f39b976c00191c2f48a6294ad9e3093 [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32-ONLY
; RUN: llc -mtriple=riscv32 -mattr=+v,+zba,+zbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32VB
; RUN: llc -mtriple=riscv32 -mattr=+v,+zba,+zbb,+zbkb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32VB-PACK
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64V,RV64V-ONLY
; RUN: llc -mtriple=riscv64 -mattr=+v,+rva22u64 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64V,RVA22U64
; RUN: llc -mtriple=riscv64 -mattr=+v,+rva22u64,+zbkb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64V,RVA22U64-PACK
; RUN: llc -mtriple=riscv64 -mattr=+zve32x,+zvl128b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVE32
define void @buildvec_vid_v16i8(ptr %x) {
; CHECK-LABEL: buildvec_vid_v16i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: ret
store <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, ptr %x
ret void
}
define void @buildvec_vid_undefelts_v16i8(ptr %x) {
; CHECK-LABEL: buildvec_vid_undefelts_v16i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: ret
store <16 x i8> <i8 0, i8 1, i8 2, i8 poison, i8 4, i8 poison, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, ptr %x
ret void
}
; TODO: Could do VID then insertelement on missing elements
define void @buildvec_notquite_vid_v16i8(ptr %x) {
; CHECK-LABEL: buildvec_notquite_vid_v16i8:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI2_0)
; CHECK-NEXT: addi a1, a1, %lo(.LCPI2_0)
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vle8.v v8, (a1)
; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: ret
store <16 x i8> <i8 0, i8 1, i8 3, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, ptr %x
ret void
}
define void @buildvec_vid_plus_imm_v16i8(ptr %x) {
; CHECK-LABEL: buildvec_vid_plus_imm_v16i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vadd.vi v8, v8, 2
; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: ret
store <16 x i8> <i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17>, ptr %x
ret void
}
define void @buildvec_vid_plus_nonimm_v16i8(ptr %x) {
; CHECK-LABEL: buildvec_vid_plus_nonimm_v16i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: li a1, 100
; CHECK-NEXT: vadd.vx v8, v8, a1
; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: ret
store <16 x i8> <i8 100, i8 101, i8 102, i8 103, i8 104, i8 105, i8 106, i8 107, i8 108, i8 109, i8 110, i8 111, i8 112, i8 113, i8 114, i8 115>, ptr %x
ret void
}
define void @buildvec_vid_mpy_imm_v16i8(ptr %x) {
; CHECK-LABEL: buildvec_vid_mpy_imm_v16i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: li a1, 3
; CHECK-NEXT: vmul.vx v8, v8, a1
; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: ret
store <16 x i8> <i8 0, i8 3, i8 6, i8 9, i8 12, i8 15, i8 18, i8 21, i8 24, i8 27, i8 30, i8 33, i8 36, i8 39, i8 42, i8 45>, ptr %x
ret void
}
define <4 x i8> @buildvec_vid_step2_add0_v4i8() {
; CHECK-LABEL: buildvec_vid_step2_add0_v4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vadd.vv v8, v8, v8
; CHECK-NEXT: ret
ret <4 x i8> <i8 0, i8 2, i8 4, i8 6>
}
define <4 x i8> @buildvec_vid_step2_add0_v4i8_undef0() {
; CHECK-LABEL: buildvec_vid_step2_add0_v4i8_undef0:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vadd.vv v8, v8, v8
; CHECK-NEXT: ret
ret <4 x i8> <i8 poison, i8 2, i8 4, i8 6>
}
define <4 x i8> @buildvec_vid_step2_add0_v4i8_undef1() {
; CHECK-LABEL: buildvec_vid_step2_add0_v4i8_undef1:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vadd.vv v8, v8, v8
; CHECK-NEXT: ret
ret <4 x i8> <i8 poison, i8 poison, i8 4, i8 6>
}
define <4 x i8> @buildvec_vid_step2_add0_v4i8_undef2() {
; CHECK-LABEL: buildvec_vid_step2_add0_v4i8_undef2:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vadd.vv v8, v8, v8
; CHECK-NEXT: ret
ret <4 x i8> <i8 0, i8 poison, i8 poison, i8 6>
}
define <4 x i8> @buildvec_vid_step2_add1_v4i8() {
; CHECK-LABEL: buildvec_vid_step2_add1_v4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vadd.vv v8, v8, v8
; CHECK-NEXT: vadd.vi v8, v8, 1
; CHECK-NEXT: ret
ret <4 x i8> <i8 1, i8 3, i8 5, i8 7>
}
define <4 x i8> @buildvec_vid_step2_add1_v4i8_undef0() {
; CHECK-LABEL: buildvec_vid_step2_add1_v4i8_undef0:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vadd.vv v8, v8, v8
; CHECK-NEXT: vadd.vi v8, v8, 1
; CHECK-NEXT: ret
ret <4 x i8> <i8 poison, i8 3, i8 5, i8 7>
}
define <4 x i8> @buildvec_vid_step2_add1_v4i8_undef1() {
; CHECK-LABEL: buildvec_vid_step2_add1_v4i8_undef1:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vadd.vv v8, v8, v8
; CHECK-NEXT: vadd.vi v8, v8, 1
; CHECK-NEXT: ret
ret <4 x i8> <i8 poison, i8 poison, i8 5, i8 7>
}
define <4 x i8> @buildvec_vid_step2_add1_v4i8_undef2() {
; CHECK-LABEL: buildvec_vid_step2_add1_v4i8_undef2:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vadd.vv v8, v8, v8
; CHECK-NEXT: vadd.vi v8, v8, 1
; CHECK-NEXT: ret
ret <4 x i8> <i8 1, i8 poison, i8 poison, i8 7>
}
define <4 x i8> @buildvec_vid_stepn1_add0_v4i8() {
; CHECK-LABEL: buildvec_vid_stepn1_add0_v4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vrsub.vi v8, v8, 0
; CHECK-NEXT: ret
ret <4 x i8> <i8 0, i8 -1, i8 -2, i8 -3>
}
define <4 x i8> @buildvec_vid_stepn1_add0_v4i8_undef0() {
; CHECK-LABEL: buildvec_vid_stepn1_add0_v4i8_undef0:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vrsub.vi v8, v8, 0
; CHECK-NEXT: ret
ret <4 x i8> <i8 poison, i8 -1, i8 -2, i8 -3>
}
define <4 x i8> @buildvec_vid_stepn1_add0_v4i8_undef1() {
; CHECK-LABEL: buildvec_vid_stepn1_add0_v4i8_undef1:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vrsub.vi v8, v8, 0
; CHECK-NEXT: ret
ret <4 x i8> <i8 poison, i8 poison, i8 -2, i8 -3>
}
define <4 x i8> @buildvec_vid_stepn1_add0_v4i8_undef2() {
; CHECK-LABEL: buildvec_vid_stepn1_add0_v4i8_undef2:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vrsub.vi v8, v8, 0
; CHECK-NEXT: ret
ret <4 x i8> <i8 0, i8 poison, i8 poison, i8 -3>
}
define <4 x i8> @buildvec_vid_stepn2_add0_v4i8() {
; CHECK-LABEL: buildvec_vid_stepn2_add0_v4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vadd.vv v8, v8, v8
; CHECK-NEXT: vrsub.vi v8, v8, 0
; CHECK-NEXT: ret
ret <4 x i8> <i8 0, i8 -2, i8 -4, i8 -6>
}
define <4 x i8> @buildvec_vid_stepn2_add0_v4i8_undef0() {
; CHECK-LABEL: buildvec_vid_stepn2_add0_v4i8_undef0:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vadd.vv v8, v8, v8
; CHECK-NEXT: vrsub.vi v8, v8, 0
; CHECK-NEXT: ret
ret <4 x i8> <i8 poison, i8 -2, i8 -4, i8 -6>
}
define <4 x i8> @buildvec_vid_stepn2_add0_v4i8_undef1() {
; CHECK-LABEL: buildvec_vid_stepn2_add0_v4i8_undef1:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vadd.vv v8, v8, v8
; CHECK-NEXT: vrsub.vi v8, v8, 0
; CHECK-NEXT: ret
ret <4 x i8> <i8 poison, i8 poison, i8 -4, i8 -6>
}
define <4 x i8> @buildvec_vid_stepn2_add0_v4i8_undef2() {
; CHECK-LABEL: buildvec_vid_stepn2_add0_v4i8_undef2:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vmv.v.i v8, -6
; CHECK-NEXT: ret
ret <4 x i8> <i8 poison, i8 poison, i8 poison, i8 -6>
}
define <4 x i8> @buildvec_vid_stepn2_add3_v4i8() {
; CHECK-LABEL: buildvec_vid_stepn2_add3_v4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vadd.vv v8, v8, v8
; CHECK-NEXT: vrsub.vi v8, v8, 3
; CHECK-NEXT: ret
ret <4 x i8> <i8 3, i8 1, i8 -1, i8 -3>
}
define <4 x i8> @buildvec_vid_stepn3_add3_v4i8() {
; CHECK-LABEL: buildvec_vid_stepn3_add3_v4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vmv.v.i v9, 3
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: li a0, -3
; CHECK-NEXT: vmadd.vx v8, a0, v9
; CHECK-NEXT: ret
ret <4 x i8> <i8 3, i8 0, i8 -3, i8 -6>
}
define void @buildvec_vid_stepn3_addn3_v4i32(ptr %z0, ptr %z1, ptr %z2, ptr %z3) {
; CHECK-LABEL: buildvec_vid_stepn3_addn3_v4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vmv.v.i v8, -3
; CHECK-NEXT: vid.v v9
; CHECK-NEXT: li a4, -3
; CHECK-NEXT: vmadd.vx v9, a4, v8
; CHECK-NEXT: vse32.v v9, (a0)
; CHECK-NEXT: vse32.v v9, (a1)
; CHECK-NEXT: vse32.v v9, (a2)
; CHECK-NEXT: vse32.v v9, (a3)
; CHECK-NEXT: ret
store <4 x i32> <i32 -3, i32 -6, i32 -9, i32 -12>, ptr %z0
store <4 x i32> <i32 poison, i32 -6, i32 -9, i32 -12>, ptr %z1
store <4 x i32> <i32 poison, i32 poison, i32 -9, i32 -12>, ptr %z2
store <4 x i32> <i32 -3, i32 poison, i32 poison, i32 -12>, ptr %z3
ret void
}
define <4 x i64> @buildvec_vid_step1_add0_v4i64() {
; RV32-LABEL: buildvec_vid_step1_add0_v4i64:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vid.v v8
; RV32-NEXT: ret
;
; RV64V-LABEL: buildvec_vid_step1_add0_v4i64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV64V-NEXT: vid.v v8
; RV64V-NEXT: ret
;
; RV64ZVE32-LABEL: buildvec_vid_step1_add0_v4i64:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: li a1, 3
; RV64ZVE32-NEXT: li a2, 2
; RV64ZVE32-NEXT: li a3, 1
; RV64ZVE32-NEXT: sd zero, 0(a0)
; RV64ZVE32-NEXT: sd a3, 8(a0)
; RV64ZVE32-NEXT: sd a2, 16(a0)
; RV64ZVE32-NEXT: sd a1, 24(a0)
; RV64ZVE32-NEXT: ret
ret <4 x i64> <i64 0, i64 1, i64 2, i64 3>
}
define <4 x i64> @buildvec_vid_step2_add0_v4i64() {
; RV32-LABEL: buildvec_vid_step2_add0_v4i64:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vid.v v8
; RV32-NEXT: vadd.vv v8, v8, v8
; RV32-NEXT: ret
;
; RV64V-LABEL: buildvec_vid_step2_add0_v4i64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV64V-NEXT: vid.v v8
; RV64V-NEXT: vadd.vv v8, v8, v8
; RV64V-NEXT: ret
;
; RV64ZVE32-LABEL: buildvec_vid_step2_add0_v4i64:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: li a1, 6
; RV64ZVE32-NEXT: li a2, 4
; RV64ZVE32-NEXT: li a3, 2
; RV64ZVE32-NEXT: sd zero, 0(a0)
; RV64ZVE32-NEXT: sd a3, 8(a0)
; RV64ZVE32-NEXT: sd a2, 16(a0)
; RV64ZVE32-NEXT: sd a1, 24(a0)
; RV64ZVE32-NEXT: ret
ret <4 x i64> <i64 0, i64 2, i64 4, i64 6>
}
define <4 x i8> @buildvec_no_vid_v4i8_0() {
; CHECK-LABEL: buildvec_no_vid_v4i8_0:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, 28768
; CHECK-NEXT: addi a0, a0, 769
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: ret
ret <4 x i8> <i8 1, i8 3, i8 6, i8 7>
}
define <4 x i8> @buildvec_no_vid_v4i8_1() {
; CHECK-LABEL: buildvec_no_vid_v4i8_1:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, 28752
; CHECK-NEXT: addi a0, a0, 512
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: ret
ret <4 x i8> <i8 poison, i8 2, i8 5, i8 7>
}
define <4 x i8> @buildvec_no_vid_v4i8_2() {
; CHECK-LABEL: buildvec_no_vid_v4i8_2:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, 32768
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: ret
ret <4 x i8> <i8 0, i8 poison, i8 poison, i8 8>
}
define <4 x i8> @buildvec_no_vid_v4i8_3() {
; CHECK-LABEL: buildvec_no_vid_v4i8_3:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, 28672
; CHECK-NEXT: addi a0, a0, 255
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: ret
ret <4 x i8> <i8 -1, i8 poison, i8 poison, i8 7>
}
define <4 x i8> @buildvec_no_vid_v4i8_4() {
; CHECK-LABEL: buildvec_no_vid_v4i8_4:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vmv.v.i v8, -2
; CHECK-NEXT: ret
ret <4 x i8> <i8 -2, i8 poison, i8 poison, i8 poison>
}
define <4 x i8> @buildvec_no_vid_v4i8_5() {
; CHECK-LABEL: buildvec_no_vid_v4i8_5:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, 1032144
; CHECK-NEXT: addi a0, a0, -257
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: ret
ret <4 x i8> <i8 -1, i8 -2, i8 -4, i8 -5>
}
define void @buildvec_dominant0_v8i16(ptr %x) {
; CHECK-LABEL: buildvec_dominant0_v8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vmv.s.x v8, zero
; CHECK-NEXT: vmv.v.i v9, 8
; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; CHECK-NEXT: vslideup.vi v9, v8, 3
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vse16.v v9, (a0)
; CHECK-NEXT: ret
store <8 x i16> <i16 8, i16 8, i16 poison, i16 0, i16 8, i16 poison, i16 8, i16 8>, ptr %x
ret void
}
define void @buildvec_dominant0_v8i16_with_end_element(ptr %x) {
; CHECK-LABEL: buildvec_dominant0_v8i16_with_end_element:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vmv.v.i v8, 8
; CHECK-NEXT: li a1, 3
; CHECK-NEXT: vslide1down.vx v8, v8, a1
; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
store <8 x i16> <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 3>, ptr %x
ret void
}
define void @buildvec_dominant0_v8i16_with_tail(ptr %x) {
; CHECK-LABEL: buildvec_dominant0_v8i16_with_tail:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a1, %hi(.LCPI35_0)
; CHECK-NEXT: addi a1, a1, %lo(.LCPI35_0)
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vle16.v v8, (a1)
; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
store <8 x i16> <i16 8, i16 8, i16 8, i16 8, i16 8, i16 poison, i16 2, i16 3>, ptr %x
ret void
}
define void @buildvec_dominant1_v8i16(ptr %x) {
; CHECK-LABEL: buildvec_dominant1_v8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vmv.v.i v8, 8
; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
store <8 x i16> <i16 poison, i16 8, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison>, ptr %x
ret void
}
define <2 x i8> @buildvec_dominant0_v2i8() {
; CHECK-LABEL: buildvec_dominant0_v2i8:
; CHECK: # %bb.0:
; CHECK-NEXT: ret
ret <2 x i8> <i8 poison, i8 poison>
}
define <2 x i8> @buildvec_dominant1_v2i8() {
; RV32-LABEL: buildvec_dominant1_v2i8:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; RV32-NEXT: vmv.v.i v8, -1
; RV32-NEXT: ret
;
; RV64V-LABEL: buildvec_dominant1_v2i8:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; RV64V-NEXT: vmv.v.i v8, -1
; RV64V-NEXT: ret
;
; RV64ZVE32-LABEL: buildvec_dominant1_v2i8:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32-NEXT: vmv.v.i v8, -1
; RV64ZVE32-NEXT: ret
ret <2 x i8> <i8 poison, i8 -1>
}
define <2 x i8> @buildvec_dominant2_v2i8() {
; RV32-LABEL: buildvec_dominant2_v2i8:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; RV32-NEXT: vid.v v8
; RV32-NEXT: vrsub.vi v8, v8, 0
; RV32-NEXT: ret
;
; RV64V-LABEL: buildvec_dominant2_v2i8:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; RV64V-NEXT: vid.v v8
; RV64V-NEXT: vrsub.vi v8, v8, 0
; RV64V-NEXT: ret
;
; RV64ZVE32-LABEL: buildvec_dominant2_v2i8:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32-NEXT: vid.v v8
; RV64ZVE32-NEXT: vrsub.vi v8, v8, 0
; RV64ZVE32-NEXT: ret
ret <2 x i8> <i8 0, i8 -1>
}
define void @buildvec_dominant0_v2i32(ptr %x) {
; RV32-LABEL: buildvec_dominant0_v2i32:
; RV32: # %bb.0:
; RV32-NEXT: lui a1, %hi(.LCPI40_0)
; RV32-NEXT: addi a1, a1, %lo(.LCPI40_0)
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vle32.v v8, (a1)
; RV32-NEXT: vse32.v v8, (a0)
; RV32-NEXT: ret
;
; RV64V-LABEL: buildvec_dominant0_v2i32:
; RV64V: # %bb.0:
; RV64V-NEXT: lui a1, %hi(.LCPI40_0)
; RV64V-NEXT: ld a1, %lo(.LCPI40_0)(a1)
; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64V-NEXT: vmv.v.i v8, -1
; RV64V-NEXT: vsetvli zero, zero, e64, m1, tu, ma
; RV64V-NEXT: vmv.s.x v8, a1
; RV64V-NEXT: vse64.v v8, (a0)
; RV64V-NEXT: ret
;
; RV64ZVE32-LABEL: buildvec_dominant0_v2i32:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: lui a1, %hi(.LCPI40_0)
; RV64ZVE32-NEXT: ld a1, %lo(.LCPI40_0)(a1)
; RV64ZVE32-NEXT: li a2, -1
; RV64ZVE32-NEXT: sd a1, 0(a0)
; RV64ZVE32-NEXT: sd a2, 8(a0)
; RV64ZVE32-NEXT: ret
store <2 x i64> <i64 2049638230412172402, i64 -1>, ptr %x
ret void
}
define void @buildvec_dominant1_optsize_v2i32(ptr %x) optsize {
; RV32-LABEL: buildvec_dominant1_optsize_v2i32:
; RV32: # %bb.0:
; RV32-NEXT: lui a1, %hi(.LCPI41_0)
; RV32-NEXT: addi a1, a1, %lo(.LCPI41_0)
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vle32.v v8, (a1)
; RV32-NEXT: vse32.v v8, (a0)
; RV32-NEXT: ret
;
; RV64V-LABEL: buildvec_dominant1_optsize_v2i32:
; RV64V: # %bb.0:
; RV64V-NEXT: lui a1, %hi(.LCPI41_0)
; RV64V-NEXT: addi a1, a1, %lo(.LCPI41_0)
; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64V-NEXT: vle64.v v8, (a1)
; RV64V-NEXT: vse64.v v8, (a0)
; RV64V-NEXT: ret
;
; RV64ZVE32-LABEL: buildvec_dominant1_optsize_v2i32:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: lui a1, %hi(.LCPI41_0)
; RV64ZVE32-NEXT: ld a1, %lo(.LCPI41_0)(a1)
; RV64ZVE32-NEXT: li a2, -1
; RV64ZVE32-NEXT: sd a1, 0(a0)
; RV64ZVE32-NEXT: sd a2, 8(a0)
; RV64ZVE32-NEXT: ret
store <2 x i64> <i64 2049638230412172402, i64 -1>, ptr %x
ret void
}
define void @buildvec_seq_v8i8_v4i16(ptr %x) {
; CHECK-LABEL: buildvec_seq_v8i8_v4i16:
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, 513
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vmv.v.x v8, a1
; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: ret
store <8 x i8> <i8 1, i8 2, i8 1, i8 2, i8 1, i8 2, i8 poison, i8 2>, ptr %x
ret void
}
define void @buildvec_seq_v8i8_v2i32(ptr %x) {
; RV32-LABEL: buildvec_seq_v8i8_v2i32:
; RV32: # %bb.0:
; RV32-NEXT: lui a1, 48
; RV32-NEXT: addi a1, a1, 513
; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; RV32-NEXT: vmv.v.x v8, a1
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV32-NEXT: vse8.v v8, (a0)
; RV32-NEXT: ret
;
; RV64V-LABEL: buildvec_seq_v8i8_v2i32:
; RV64V: # %bb.0:
; RV64V-NEXT: lui a1, 48
; RV64V-NEXT: addi a1, a1, 513
; RV64V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; RV64V-NEXT: vmv.v.x v8, a1
; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64V-NEXT: vse8.v v8, (a0)
; RV64V-NEXT: ret
;
; RV64ZVE32-LABEL: buildvec_seq_v8i8_v2i32:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: lui a1, 48
; RV64ZVE32-NEXT: addi a1, a1, 513
; RV64ZVE32-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32-NEXT: vmv.v.x v8, a1
; RV64ZVE32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64ZVE32-NEXT: vse8.v v8, (a0)
; RV64ZVE32-NEXT: ret
store <8 x i8> <i8 1, i8 2, i8 3, i8 poison, i8 1, i8 2, i8 3, i8 poison>, ptr %x
ret void
}
define void @buildvec_seq_v16i8_v2i64(ptr %x) {
; RV32-LABEL: buildvec_seq_v16i8_v2i64:
; RV32: # %bb.0:
; RV32-NEXT: lui a1, %hi(.LCPI44_0)
; RV32-NEXT: addi a1, a1, %lo(.LCPI44_0)
; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV32-NEXT: vle8.v v8, (a1)
; RV32-NEXT: vse8.v v8, (a0)
; RV32-NEXT: ret
;
; RV64V-LABEL: buildvec_seq_v16i8_v2i64:
; RV64V: # %bb.0:
; RV64V-NEXT: lui a1, %hi(.LCPI44_0)
; RV64V-NEXT: ld a1, %lo(.LCPI44_0)(a1)
; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64V-NEXT: vmv.v.x v8, a1
; RV64V-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV64V-NEXT: vse8.v v8, (a0)
; RV64V-NEXT: ret
;
; RV64ZVE32-LABEL: buildvec_seq_v16i8_v2i64:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: lui a1, %hi(.LCPI44_0)
; RV64ZVE32-NEXT: addi a1, a1, %lo(.LCPI44_0)
; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV64ZVE32-NEXT: vle8.v v8, (a1)
; RV64ZVE32-NEXT: vse8.v v8, (a0)
; RV64ZVE32-NEXT: ret
store <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, ptr %x
ret void
}
define void @buildvec_seq2_v16i8_v2i64(ptr %x) {
; RV32-LABEL: buildvec_seq2_v16i8_v2i64:
; RV32: # %bb.0:
; RV32-NEXT: lui a1, 528432
; RV32-NEXT: addi a1, a1, 513
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vmv.v.x v8, a1
; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV32-NEXT: vse8.v v8, (a0)
; RV32-NEXT: ret
;
; RV64V-LABEL: buildvec_seq2_v16i8_v2i64:
; RV64V: # %bb.0:
; RV64V-NEXT: lui a1, 528432
; RV64V-NEXT: addi a1, a1, 513
; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64V-NEXT: vmv.v.x v8, a1
; RV64V-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV64V-NEXT: vse8.v v8, (a0)
; RV64V-NEXT: ret
;
; RV64ZVE32-LABEL: buildvec_seq2_v16i8_v2i64:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: lui a1, %hi(.LCPI45_0)
; RV64ZVE32-NEXT: addi a1, a1, %lo(.LCPI45_0)
; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV64ZVE32-NEXT: vle8.v v8, (a1)
; RV64ZVE32-NEXT: vse8.v v8, (a0)
; RV64ZVE32-NEXT: ret
store <16 x i8> <i8 1, i8 2, i8 3, i8 129, i8 -1, i8 -1, i8 -1, i8 -1, i8 1, i8 2, i8 3, i8 129, i8 -1, i8 -1, i8 -1, i8 -1>, ptr %x
ret void
}
define void @buildvec_seq_v9i8(ptr %x) {
; CHECK-LABEL: buildvec_seq_v9i8:
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, 73
; CHECK-NEXT: vsetivli zero, 9, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.i v8, 3
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmv.s.x v0, a1
; CHECK-NEXT: li a1, 146
; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmv.s.x v0, a1
; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
; CHECK-NEXT: vmerge.vim v8, v8, 2, v0
; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: ret
store <9 x i8> <i8 1, i8 2, i8 3, i8 1, i8 2, i8 3, i8 1, i8 2, i8 3>, ptr %x
ret void
}
define void @buildvec_seq_v4i16_v2i32(ptr %x) {
; CHECK-LABEL: buildvec_seq_v4i16_v2i32:
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, -127
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vmv.v.x v8, a1
; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
store <4 x i16> <i16 -127, i16 -1, i16 -127, i16 -1>, ptr %x
ret void
}
define void @buildvec_vid_step1o2_v4i32(ptr %z0, ptr %z1, ptr %z2, ptr %z3, ptr %z4, ptr %z5, ptr %z6) {
; CHECK-LABEL: buildvec_vid_step1o2_v4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vmv.v.i v8, 1
; CHECK-NEXT: vmv.s.x v9, zero
; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; CHECK-NEXT: vslideup.vi v8, v9, 1
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vid.v v9
; CHECK-NEXT: vsrl.vi v9, v9, 1
; CHECK-NEXT: vse32.v v9, (a0)
; CHECK-NEXT: vse32.v v9, (a1)
; CHECK-NEXT: vse32.v v9, (a2)
; CHECK-NEXT: vse32.v v9, (a3)
; CHECK-NEXT: vse32.v v9, (a4)
; CHECK-NEXT: vmv.v.i v9, 0
; CHECK-NEXT: li a0, 1
; CHECK-NEXT: vslide1down.vx v9, v9, a0
; CHECK-NEXT: vse32.v v8, (a5)
; CHECK-NEXT: vse32.v v9, (a6)
; CHECK-NEXT: ret
store <4 x i32> <i32 0, i32 0, i32 1, i32 1>, ptr %z0
store <4 x i32> <i32 0, i32 0, i32 1, i32 poison>, ptr %z1
store <4 x i32> <i32 0, i32 poison, i32 1, i32 1>, ptr %z2
store <4 x i32> <i32 poison, i32 0, i32 poison, i32 1>, ptr %z3
store <4 x i32> <i32 0, i32 poison, i32 1, i32 poison>, ptr %z4
; We don't catch this one
store <4 x i32> <i32 poison, i32 0, i32 1, i32 1>, ptr %z5
; We catch this one but as VID/3 rather than VID/2
store <4 x i32> <i32 0, i32 0, i32 poison, i32 1>, ptr %z6
ret void
}
define void @buildvec_vid_step1o2_add3_v4i16(ptr %z0, ptr %z1, ptr %z2, ptr %z3, ptr %z4, ptr %z5, ptr %z6) {
; CHECK-LABEL: buildvec_vid_step1o2_add3_v4i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vmv.v.i v9, 3
; CHECK-NEXT: vsrl.vi v8, v8, 1
; CHECK-NEXT: vadd.vi v8, v8, 3
; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: vse16.v v8, (a1)
; CHECK-NEXT: vse16.v v8, (a2)
; CHECK-NEXT: vse16.v v8, (a3)
; CHECK-NEXT: vse16.v v8, (a4)
; CHECK-NEXT: vmv.v.i v8, 4
; CHECK-NEXT: li a0, 4
; CHECK-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
; CHECK-NEXT: vslideup.vi v8, v9, 1
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vslide1down.vx v9, v9, a0
; CHECK-NEXT: vse16.v v8, (a5)
; CHECK-NEXT: vse16.v v9, (a6)
; CHECK-NEXT: ret
store <4 x i16> <i16 3, i16 3, i16 4, i16 4>, ptr %z0
store <4 x i16> <i16 3, i16 3, i16 4, i16 poison>, ptr %z1
store <4 x i16> <i16 3, i16 poison, i16 4, i16 4>, ptr %z2
store <4 x i16> <i16 poison, i16 3, i16 poison, i16 4>, ptr %z3
store <4 x i16> <i16 3, i16 poison, i16 4, i16 poison>, ptr %z4
; We don't catch this one
store <4 x i16> <i16 poison, i16 3, i16 4, i16 4>, ptr %z5
; We catch this one but as VID/3 rather than VID/2
store <4 x i16> <i16 3, i16 3, i16 poison, i16 4>, ptr %z6
ret void
}
define void @buildvec_vid_stepn1o4_addn5_v8i8(ptr %z0) {
; CHECK-LABEL: buildvec_vid_stepn1o4_addn5_v8i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vsrl.vi v8, v8, 2
; CHECK-NEXT: vrsub.vi v8, v8, -5
; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: ret
store <8 x i8> <i8 -5, i8 -5, i8 -5, i8 -5, i8 -6, i8 -6, i8 -6, i8 -6>, ptr %z0
ret void
}
define void @buildvec_vid_mpy_imm_v8i16(ptr %x) {
; CHECK-LABEL: buildvec_vid_mpy_imm_v8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: li a1, 17
; CHECK-NEXT: vmul.vx v8, v8, a1
; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
store <8 x i16> <i16 0, i16 17, i16 34, i16 51, i16 68, i16 85, i16 102, i16 119>, ptr %x
ret void
}
define void @buildvec_vid_shl_imm_v8i16(ptr %x) {
; CHECK-LABEL: buildvec_vid_shl_imm_v8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vsll.vi v8, v8, 9
; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
store <8 x i16> <i16 0, i16 512, i16 1024, i16 1536, i16 2048, i16 2560, i16 3072, i16 3584>, ptr %x
ret void
}
define <4 x i32> @splat_c3_v4i32(<4 x i32> %v) {
; CHECK-LABEL: splat_c3_v4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vrgather.vi v9, v8, 3
; CHECK-NEXT: vmv.v.v v8, v9
; CHECK-NEXT: ret
%x = extractelement <4 x i32> %v, i32 3
%ins = insertelement <4 x i32> poison, i32 %x, i32 0
%splat = shufflevector <4 x i32> %ins, <4 x i32> poison, <4 x i32> zeroinitializer
ret <4 x i32> %splat
}
define <4 x i32> @splat_idx_v4i32(<4 x i32> %v, i64 %idx) {
; CHECK-LABEL: splat_idx_v4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vrgather.vx v9, v8, a0
; CHECK-NEXT: vmv.v.v v8, v9
; CHECK-NEXT: ret
%x = extractelement <4 x i32> %v, i64 %idx
%ins = insertelement <4 x i32> poison, i32 %x, i32 0
%splat = shufflevector <4 x i32> %ins, <4 x i32> poison, <4 x i32> zeroinitializer
ret <4 x i32> %splat
}
define <8 x i16> @splat_c4_v8i16(<8 x i16> %v) {
; CHECK-LABEL: splat_c4_v8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vrgather.vi v9, v8, 4
; CHECK-NEXT: vmv.v.v v8, v9
; CHECK-NEXT: ret
%x = extractelement <8 x i16> %v, i32 4
%ins = insertelement <8 x i16> poison, i16 %x, i32 0
%splat = shufflevector <8 x i16> %ins, <8 x i16> poison, <8 x i32> zeroinitializer
ret <8 x i16> %splat
}
define <8 x i16> @splat_idx_v8i16(<8 x i16> %v, i64 %idx) {
; CHECK-LABEL: splat_idx_v8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vrgather.vx v9, v8, a0
; CHECK-NEXT: vmv.v.v v8, v9
; CHECK-NEXT: ret
%x = extractelement <8 x i16> %v, i64 %idx
%ins = insertelement <8 x i16> poison, i16 %x, i32 0
%splat = shufflevector <8 x i16> %ins, <8 x i16> poison, <8 x i32> zeroinitializer
ret <8 x i16> %splat
}
define <4 x i8> @buildvec_not_vid_v4i8_1() {
; CHECK-LABEL: buildvec_not_vid_v4i8_1:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, 12320
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: ret
ret <4 x i8> <i8 0, i8 0, i8 2, i8 3>
}
define <4 x i8> @buildvec_not_vid_v4i8_2() {
; CHECK-LABEL: buildvec_not_vid_v4i8_2:
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, 16
; CHECK-NEXT: addi a0, a0, 771
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: ret
ret <4 x i8> <i8 3, i8 3, i8 1, i8 0>
}
; We match this as a VID sequence (-3 / 8) + 5 but choose not to introduce
; division to compute it.
define <16 x i8> @buildvec_not_vid_v16i8() {
; CHECK-LABEL: buildvec_not_vid_v16i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 7, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.i v9, 3
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vsetivli zero, 7, e8, m1, tu, ma
; CHECK-NEXT: vslideup.vi v8, v9, 6
; CHECK-NEXT: ret
ret <16 x i8> <i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 3, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 0, i8 0>
}
define <512 x i8> @buildvec_vid_v512i8_indices_overflow() vscale_range(16, 1024) {
; CHECK-LABEL: buildvec_vid_v512i8_indices_overflow:
; CHECK: # %bb.0:
; CHECK-NEXT: li a0, 512
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: ret
ret <512 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 32, i8 33, i8 34, i8 35, i8 36, i8 37, i8 38, i8 39, i8 40, i8 41, i8 42, i8 43, i8 44, i8 45, i8 46, i8 47, i8 48, i8 49, i8 50, i8 51, i8 52, i8 53, i8 54, i8 55, i8 56, i8 57, i8 58, i8 59, i8 60, i8 61, i8 62, i8 63, i8 64, i8 65, i8 66, i8 67, i8 68, i8 69, i8 70, i8 71, i8 72, i8 73, i8 74, i8 75, i8 76, i8 77, i8 78, i8 79, i8 80, i8 81, i8 82, i8 83, i8 84, i8 85, i8 86, i8 87, i8 88, i8 89, i8 90, i8 91, i8 92, i8 93, i8 94, i8 95, i8 96, i8 97, i8 98, i8 99, i8 100, i8 101, i8 102, i8 103, i8 104, i8 105, i8 106, i8 107, i8 108, i8 109, i8 110, i8 111, i8 112, i8 113, i8 114, i8 115, i8 116, i8 117, i8 118, i8 119, i8 120, i8 121, i8 122, i8 123, i8 124, i8 125, i8 126, i8 127, i8 128, i8 129, i8 130, i8 131, i8 132, i8 133, i8 134, i8 135, i8 136, i8 137, i8 138, i8 139, i8 140, i8 141, i8 142, i8 143, i8 144, i8 145, i8 146, i8 147, i8 148, i8 149, i8 150, i8 151, i8 152, i8 153, i8 154, i8 155, i8 156, i8 157, i8 158, i8 159, i8 160, i8 161, i8 162, i8 163, i8 164, i8 165, i8 166, i8 167, i8 168, i8 169, i8 170, i8 171, i8 172, i8 173, i8 174, i8 175, i8 176, i8 177, i8 178, i8 179, i8 180, i8 181, i8 182, i8 183, i8 184, i8 185, i8 186, i8 187, i8 188, i8 189, i8 190, i8 191, i8 192, i8 193, i8 194, i8 195, i8 196, i8 197, i8 198, i8 199, i8 200, i8 201, i8 202, i8 203, i8 204, i8 205, i8 206, i8 207, i8 208, i8 209, i8 210, i8 211, i8 212, i8 213, i8 214, i8 215, i8 216, i8 217, i8 218, i8 219, i8 220, i8 221, i8 222, i8 223, i8 224, i8 225, i8 226, i8 227, i8 228, i8 229, i8 230, i8 231, i8 232, i8 233, i8 234, i8 235, i8 236, i8 237, i8 238, i8 239, i8 240, i8 241, i8 242, i8 243, i8 244, i8 245, i8 246, i8 247, i8 248, i8 249, i8 250, i8 251, i8 252, i8 253, i8 254, i8 255, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 32, i8 33, i8 34, i8 35, i8 36, i8 37, i8 38, i8 39, i8 40, i8 41, i8 42, i8 43, i8 44, i8 45, i8 46, i8 47, i8 48, i8 49, i8 50, i8 51, i8 52, i8 53, i8 54, i8 55, i8 56, i8 57, i8 58, i8 59, i8 60, i8 61, i8 62, i8 63, i8 64, i8 65, i8 66, i8 67, i8 68, i8 69, i8 70, i8 71, i8 72, i8 73, i8 74, i8 75, i8 76, i8 77, i8 78, i8 79, i8 80, i8 81, i8 82, i8 83, i8 84, i8 85, i8 86, i8 87, i8 88, i8 89, i8 90, i8 91, i8 92, i8 93, i8 94, i8 95, i8 96, i8 97, i8 98, i8 99, i8 100, i8 101, i8 102, i8 103, i8 104, i8 105, i8 106, i8 107, i8 108, i8 109, i8 110, i8 111, i8 112, i8 113, i8 114, i8 115, i8 116, i8 117, i8 118, i8 119, i8 120, i8 121, i8 122, i8 123, i8 124, i8 125, i8 126, i8 127, i8 128, i8 129, i8 130, i8 131, i8 132, i8 133, i8 134, i8 135, i8 136, i8 137, i8 138, i8 139, i8 140, i8 141, i8 142, i8 143, i8 144, i8 145, i8 146, i8 147, i8 148, i8 149, i8 150, i8 151, i8 152, i8 153, i8 154, i8 155, i8 156, i8 157, i8 158, i8 159, i8 160, i8 161, i8 162, i8 163, i8 164, i8 165, i8 166, i8 167, i8 168, i8 169, i8 170, i8 171, i8 172, i8 173, i8 174, i8 175, i8 176, i8 177, i8 178, i8 179, i8 180, i8 181, i8 182, i8 183, i8 184, i8 185, i8 186, i8 187, i8 188, i8 189, i8 190, i8 191, i8 192, i8 193, i8 194, i8 195, i8 196, i8 197, i8 198, i8 199, i8 200, i8 201, i8 202, i8 203, i8 204, i8 205, i8 206, i8 207, i8 208, i8 209, i8 210, i8 211, i8 212, i8 213, i8 214, i8 215, i8 216, i8 217, i8 218, i8 219, i8 220, i8 221, i8 222, i8 223, i8 224, i8 225, i8 226, i8 227, i8 228, i8 229, i8 230, i8 231, i8 232, i8 233, i8 234, i8 235, i8 236, i8 237, i8 238, i8 239, i8 240, i8 241, i8 242, i8 243, i8 244, i8 245, i8 246, i8 247, i8 248, i8 249, i8 250, i8 251, i8 252, i8 253, i8 254, i8 255>
}
define <512 x i8> @buildvec_not_vid_v512i8_indices_overflow_1() vscale_range(16, 1024) {
; RV32-LABEL: buildvec_not_vid_v512i8_indices_overflow_1:
; RV32: # %bb.0:
; RV32-NEXT: li a0, 512
; RV32-NEXT: vsetivli zero, 16, e32, mf2, ta, ma
; RV32-NEXT: vid.v v8
; RV32-NEXT: vsrl.vi v8, v8, 3
; RV32-NEXT: vadd.vi v0, v8, -1
; RV32-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; RV32-NEXT: vmv.v.i v8, 1
; RV32-NEXT: vmerge.vim v8, v8, 0, v0
; RV32-NEXT: ret
;
; RV64V-LABEL: buildvec_not_vid_v512i8_indices_overflow_1:
; RV64V: # %bb.0:
; RV64V-NEXT: li a0, 512
; RV64V-NEXT: vsetivli zero, 8, e64, m1, ta, ma
; RV64V-NEXT: vid.v v8
; RV64V-NEXT: vsrl.vi v8, v8, 2
; RV64V-NEXT: vadd.vi v0, v8, -1
; RV64V-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; RV64V-NEXT: vmv.v.i v8, 1
; RV64V-NEXT: vmerge.vim v8, v8, 0, v0
; RV64V-NEXT: ret
;
; RV64ZVE32-LABEL: buildvec_not_vid_v512i8_indices_overflow_1:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: li a0, 512
; RV64ZVE32-NEXT: vsetivli zero, 16, e32, m1, ta, ma
; RV64ZVE32-NEXT: vid.v v8
; RV64ZVE32-NEXT: vsrl.vi v8, v8, 3
; RV64ZVE32-NEXT: vadd.vi v0, v8, -1
; RV64ZVE32-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; RV64ZVE32-NEXT: vmv.v.i v8, 1
; RV64ZVE32-NEXT: vmerge.vim v8, v8, 0, v0
; RV64ZVE32-NEXT: ret
ret <512 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
}
define <512 x i8> @buildvec_not_vid_v512i8_indices_overflow_2() vscale_range(16, 1024) {
; RV32-LABEL: buildvec_not_vid_v512i8_indices_overflow_2:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 16, e32, mf2, ta, ma
; RV32-NEXT: vmv.v.i v0, 15
; RV32-NEXT: vmv.v.i v12, 0
; RV32-NEXT: li a0, 512
; RV32-NEXT: li a1, 240
; RV32-NEXT: vmerge.vim v13, v12, -1, v0
; RV32-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; RV32-NEXT: vmv.v.i v8, 3
; RV32-NEXT: vmv1r.v v0, v13
; RV32-NEXT: vmerge.vim v8, v8, 0, v0
; RV32-NEXT: vsetvli zero, zero, e16, m8, ta, ma
; RV32-NEXT: vmv.s.x v0, a1
; RV32-NEXT: li a1, 15
; RV32-NEXT: vsetivli zero, 16, e32, mf2, ta, ma
; RV32-NEXT: vmerge.vim v13, v12, -1, v0
; RV32-NEXT: slli a1, a1, 8
; RV32-NEXT: vmv1r.v v0, v13
; RV32-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; RV32-NEXT: vmerge.vim v8, v8, 1, v0
; RV32-NEXT: vsetvli zero, zero, e16, m8, ta, ma
; RV32-NEXT: vmv.s.x v0, a1
; RV32-NEXT: vsetivli zero, 16, e32, mf2, ta, ma
; RV32-NEXT: vmerge.vim v12, v12, -1, v0
; RV32-NEXT: vmv1r.v v0, v12
; RV32-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; RV32-NEXT: vmerge.vim v8, v8, 2, v0
; RV32-NEXT: ret
;
; RV64V-LABEL: buildvec_not_vid_v512i8_indices_overflow_2:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m1, ta, ma
; RV64V-NEXT: vmv.v.i v0, 3
; RV64V-NEXT: vmv.v.i v12, 0
; RV64V-NEXT: li a0, 512
; RV64V-NEXT: vmerge.vim v13, v12, -1, v0
; RV64V-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; RV64V-NEXT: vmv.v.i v8, 3
; RV64V-NEXT: vmv1r.v v0, v13
; RV64V-NEXT: vmerge.vim v8, v8, 0, v0
; RV64V-NEXT: vsetivli zero, 8, e64, m1, ta, ma
; RV64V-NEXT: vmv.v.i v0, 12
; RV64V-NEXT: vmerge.vim v13, v12, -1, v0
; RV64V-NEXT: li a1, 48
; RV64V-NEXT: vmv.v.v v0, v13
; RV64V-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; RV64V-NEXT: vmerge.vim v8, v8, 1, v0
; RV64V-NEXT: vmv.s.x v0, a1
; RV64V-NEXT: vsetivli zero, 8, e64, m1, ta, ma
; RV64V-NEXT: vmerge.vim v12, v12, -1, v0
; RV64V-NEXT: vmv.v.v v0, v12
; RV64V-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; RV64V-NEXT: vmerge.vim v8, v8, 2, v0
; RV64V-NEXT: ret
;
; RV64ZVE32-LABEL: buildvec_not_vid_v512i8_indices_overflow_2:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: vsetivli zero, 16, e32, m1, ta, ma
; RV64ZVE32-NEXT: vmv.v.i v0, 15
; RV64ZVE32-NEXT: vmv.v.i v12, 0
; RV64ZVE32-NEXT: li a0, 512
; RV64ZVE32-NEXT: li a1, 240
; RV64ZVE32-NEXT: vmerge.vim v13, v12, -1, v0
; RV64ZVE32-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; RV64ZVE32-NEXT: vmv.v.i v8, 3
; RV64ZVE32-NEXT: vmv1r.v v0, v13
; RV64ZVE32-NEXT: vmerge.vim v8, v8, 0, v0
; RV64ZVE32-NEXT: vsetvli zero, zero, e16, m8, ta, ma
; RV64ZVE32-NEXT: vmv.s.x v0, a1
; RV64ZVE32-NEXT: li a1, 15
; RV64ZVE32-NEXT: vsetivli zero, 16, e32, m1, ta, ma
; RV64ZVE32-NEXT: vmerge.vim v13, v12, -1, v0
; RV64ZVE32-NEXT: slli a1, a1, 8
; RV64ZVE32-NEXT: vmv.v.v v0, v13
; RV64ZVE32-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; RV64ZVE32-NEXT: vmerge.vim v8, v8, 1, v0
; RV64ZVE32-NEXT: vsetvli zero, zero, e16, m8, ta, ma
; RV64ZVE32-NEXT: vmv.s.x v0, a1
; RV64ZVE32-NEXT: vsetivli zero, 16, e32, m1, ta, ma
; RV64ZVE32-NEXT: vmerge.vim v12, v12, -1, v0
; RV64ZVE32-NEXT: vmv.v.v v0, v12
; RV64ZVE32-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; RV64ZVE32-NEXT: vmerge.vim v8, v8, 2, v0
; RV64ZVE32-NEXT: ret
ret <512 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
}
define <8 x i32> @prefix_overwrite(<8 x i32> %vin, i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: prefix_overwrite:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m1, tu, ma
; CHECK-NEXT: vmv.s.x v10, a1
; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; CHECK-NEXT: vslideup.vi v8, v10, 1
; CHECK-NEXT: vmv.s.x v10, a2
; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; CHECK-NEXT: vslideup.vi v8, v10, 2
; CHECK-NEXT: vmv.s.x v10, a3
; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; CHECK-NEXT: vslideup.vi v8, v10, 3
; CHECK-NEXT: ret
%v0 = insertelement <8 x i32> %vin, i32 %a, i32 0
%v1 = insertelement <8 x i32> %v0, i32 %b, i32 1
%v2 = insertelement <8 x i32> %v1, i32 %c, i32 2
%v3 = insertelement <8 x i32> %v2, i32 %d, i32 3
ret <8 x i32> %v3
}
define <8 x i32> @suffix_overwrite(<8 x i32> %vin, i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: suffix_overwrite:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: vslideup.vi v8, v10, 4
; CHECK-NEXT: vmv.s.x v10, a1
; CHECK-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; CHECK-NEXT: vslideup.vi v8, v10, 5
; CHECK-NEXT: vmv.s.x v10, a2
; CHECK-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; CHECK-NEXT: vslideup.vi v8, v10, 6
; CHECK-NEXT: vmv.s.x v10, a3
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vslideup.vi v8, v10, 7
; CHECK-NEXT: ret
%v0 = insertelement <8 x i32> %vin, i32 %a, i32 4
%v1 = insertelement <8 x i32> %v0, i32 %b, i32 5
%v2 = insertelement <8 x i32> %v1, i32 %c, i32 6
%v3 = insertelement <8 x i32> %v2, i32 %d, i32 7
ret <8 x i32> %v3
}
define <4 x i64> @v4xi64_exact(i64 %a, i64 %b, i64 %c, i64 %d) vscale_range(2,2) {
; RV32-LABEL: v4xi64_exact:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vmv.v.x v8, a4
; RV32-NEXT: vmv.v.x v9, a0
; RV32-NEXT: vslide1down.vx v8, v8, a5
; RV32-NEXT: vslide1down.vx v10, v9, a1
; RV32-NEXT: vslide1down.vx v8, v8, a6
; RV32-NEXT: vslide1down.vx v9, v8, a7
; RV32-NEXT: vslide1down.vx v8, v10, a2
; RV32-NEXT: vslide1down.vx v8, v8, a3
; RV32-NEXT: ret
;
; RV64V-LABEL: v4xi64_exact:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64V-NEXT: vmv.v.x v8, a2
; RV64V-NEXT: vslide1down.vx v9, v8, a3
; RV64V-NEXT: vmv.v.x v8, a0
; RV64V-NEXT: vslide1down.vx v8, v8, a1
; RV64V-NEXT: ret
;
; RV64ZVE32-LABEL: v4xi64_exact:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: sd a1, 0(a0)
; RV64ZVE32-NEXT: sd a2, 8(a0)
; RV64ZVE32-NEXT: sd a3, 16(a0)
; RV64ZVE32-NEXT: sd a4, 24(a0)
; RV64ZVE32-NEXT: ret
%v1 = insertelement <4 x i64> poison, i64 %a, i32 0
%v2 = insertelement <4 x i64> %v1, i64 %b, i32 1
%v3 = insertelement <4 x i64> %v2, i64 %c, i32 2
%v4 = insertelement <4 x i64> %v3, i64 %d, i32 3
ret <4 x i64> %v4
}
define <8 x i64> @v8xi64_exact(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g, i64 %h) vscale_range(2,2) {
; RV32-LABEL: v8xi64_exact:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset s0, -4
; RV32-NEXT: lw t0, 44(sp)
; RV32-NEXT: lw t1, 40(sp)
; RV32-NEXT: lw t2, 36(sp)
; RV32-NEXT: lw t3, 32(sp)
; RV32-NEXT: lw t4, 28(sp)
; RV32-NEXT: lw t5, 24(sp)
; RV32-NEXT: lw t6, 20(sp)
; RV32-NEXT: lw s0, 16(sp)
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vmv.v.x v8, a4
; RV32-NEXT: vmv.v.x v9, a0
; RV32-NEXT: vslide1down.vx v8, v8, a5
; RV32-NEXT: vslide1down.vx v9, v9, a1
; RV32-NEXT: vslide1down.vx v8, v8, a6
; RV32-NEXT: vslide1down.vx v10, v9, a2
; RV32-NEXT: vslide1down.vx v9, v8, a7
; RV32-NEXT: vslide1down.vx v8, v10, a3
; RV32-NEXT: vmv.v.x v10, s0
; RV32-NEXT: vslide1down.vx v10, v10, t6
; RV32-NEXT: vslide1down.vx v10, v10, t5
; RV32-NEXT: vslide1down.vx v10, v10, t4
; RV32-NEXT: vmv.v.x v11, t3
; RV32-NEXT: vslide1down.vx v11, v11, t2
; RV32-NEXT: vslide1down.vx v11, v11, t1
; RV32-NEXT: vslide1down.vx v11, v11, t0
; RV32-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32-NEXT: .cfi_restore s0
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: .cfi_def_cfa_offset 0
; RV32-NEXT: ret
;
; RV64V-LABEL: v8xi64_exact:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64V-NEXT: vmv.v.x v8, a2
; RV64V-NEXT: vmv.v.x v10, a0
; RV64V-NEXT: vslide1down.vx v9, v8, a3
; RV64V-NEXT: vslide1down.vx v8, v10, a1
; RV64V-NEXT: vmv.v.x v10, a4
; RV64V-NEXT: vslide1down.vx v10, v10, a5
; RV64V-NEXT: vmv.v.x v11, a6
; RV64V-NEXT: vslide1down.vx v11, v11, a7
; RV64V-NEXT: ret
;
; RV64ZVE32-LABEL: v8xi64_exact:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: ld t0, 0(sp)
; RV64ZVE32-NEXT: sd a5, 32(a0)
; RV64ZVE32-NEXT: sd a6, 40(a0)
; RV64ZVE32-NEXT: sd a7, 48(a0)
; RV64ZVE32-NEXT: sd t0, 56(a0)
; RV64ZVE32-NEXT: sd a1, 0(a0)
; RV64ZVE32-NEXT: sd a2, 8(a0)
; RV64ZVE32-NEXT: sd a3, 16(a0)
; RV64ZVE32-NEXT: sd a4, 24(a0)
; RV64ZVE32-NEXT: ret
%v1 = insertelement <8 x i64> poison, i64 %a, i32 0
%v2 = insertelement <8 x i64> %v1, i64 %b, i32 1
%v3 = insertelement <8 x i64> %v2, i64 %c, i32 2
%v4 = insertelement <8 x i64> %v3, i64 %d, i32 3
%v5 = insertelement <8 x i64> %v4, i64 %e, i32 4
%v6 = insertelement <8 x i64> %v5, i64 %f, i32 5
%v7 = insertelement <8 x i64> %v6, i64 %g, i32 6
%v8 = insertelement <8 x i64> %v7, i64 %h, i32 7
ret <8 x i64> %v8
}
define <8 x i64> @v8xi64_exact_equal_halves(i64 %a, i64 %b, i64 %c, i64 %d) vscale_range(2,2) {
; RV32-LABEL: v8xi64_exact_equal_halves:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vmv.v.x v8, a4
; RV32-NEXT: vmv.v.x v9, a0
; RV32-NEXT: vslide1down.vx v8, v8, a5
; RV32-NEXT: vslide1down.vx v10, v9, a1
; RV32-NEXT: vslide1down.vx v8, v8, a6
; RV32-NEXT: vslide1down.vx v9, v8, a7
; RV32-NEXT: vslide1down.vx v8, v10, a2
; RV32-NEXT: vslide1down.vx v8, v8, a3
; RV32-NEXT: vmv.v.v v10, v8
; RV32-NEXT: vmv.v.v v11, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: v8xi64_exact_equal_halves:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64V-NEXT: vmv.v.x v8, a2
; RV64V-NEXT: vslide1down.vx v9, v8, a3
; RV64V-NEXT: vmv.v.x v8, a0
; RV64V-NEXT: vslide1down.vx v8, v8, a1
; RV64V-NEXT: vmv.v.v v10, v8
; RV64V-NEXT: vmv.v.v v11, v9
; RV64V-NEXT: ret
;
; RV64ZVE32-LABEL: v8xi64_exact_equal_halves:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: sd a1, 32(a0)
; RV64ZVE32-NEXT: sd a2, 40(a0)
; RV64ZVE32-NEXT: sd a3, 48(a0)
; RV64ZVE32-NEXT: sd a4, 56(a0)
; RV64ZVE32-NEXT: sd a1, 0(a0)
; RV64ZVE32-NEXT: sd a2, 8(a0)
; RV64ZVE32-NEXT: sd a3, 16(a0)
; RV64ZVE32-NEXT: sd a4, 24(a0)
; RV64ZVE32-NEXT: ret
%v1 = insertelement <8 x i64> poison, i64 %a, i32 0
%v2 = insertelement <8 x i64> %v1, i64 %b, i32 1
%v3 = insertelement <8 x i64> %v2, i64 %c, i32 2
%v4 = insertelement <8 x i64> %v3, i64 %d, i32 3
%v5 = insertelement <8 x i64> %v4, i64 %a, i32 4
%v6 = insertelement <8 x i64> %v5, i64 %b, i32 5
%v7 = insertelement <8 x i64> %v6, i64 %c, i32 6
%v8 = insertelement <8 x i64> %v7, i64 %d, i32 7
ret <8 x i64> %v8
}
define <8 x i64> @v8xi64_exact_undef_suffix(i64 %a, i64 %b, i64 %c, i64 %d) vscale_range(2,2) {
; RV32-LABEL: v8xi64_exact_undef_suffix:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vmv.v.x v8, a4
; RV32-NEXT: vmv.v.x v9, a0
; RV32-NEXT: vslide1down.vx v8, v8, a5
; RV32-NEXT: vslide1down.vx v10, v9, a1
; RV32-NEXT: vslide1down.vx v8, v8, a6
; RV32-NEXT: vslide1down.vx v9, v8, a7
; RV32-NEXT: vslide1down.vx v8, v10, a2
; RV32-NEXT: vslide1down.vx v8, v8, a3
; RV32-NEXT: ret
;
; RV64V-LABEL: v8xi64_exact_undef_suffix:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64V-NEXT: vmv.v.x v8, a2
; RV64V-NEXT: vslide1down.vx v9, v8, a3
; RV64V-NEXT: vmv.v.x v8, a0
; RV64V-NEXT: vslide1down.vx v8, v8, a1
; RV64V-NEXT: ret
;
; RV64ZVE32-LABEL: v8xi64_exact_undef_suffix:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: sd a1, 0(a0)
; RV64ZVE32-NEXT: sd a2, 8(a0)
; RV64ZVE32-NEXT: sd a3, 16(a0)
; RV64ZVE32-NEXT: sd a4, 24(a0)
; RV64ZVE32-NEXT: ret
%v1 = insertelement <8 x i64> poison, i64 %a, i32 0
%v2 = insertelement <8 x i64> %v1, i64 %b, i32 1
%v3 = insertelement <8 x i64> %v2, i64 %c, i32 2
%v4 = insertelement <8 x i64> %v3, i64 %d, i32 3
ret <8 x i64> %v4
}
define <8 x i64> @v8xi64_exact_undef_prefix(i64 %a, i64 %b, i64 %c, i64 %d) vscale_range(2,2) {
; RV32-LABEL: v8xi64_exact_undef_prefix:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vmv.v.x v8, a4
; RV32-NEXT: vmv.v.x v9, a0
; RV32-NEXT: vslide1down.vx v8, v8, a5
; RV32-NEXT: vslide1down.vx v9, v9, a1
; RV32-NEXT: vslide1down.vx v8, v8, a6
; RV32-NEXT: vslide1down.vx v11, v8, a7
; RV32-NEXT: vslide1down.vx v8, v9, a2
; RV32-NEXT: vslide1down.vx v10, v8, a3
; RV32-NEXT: ret
;
; RV64V-LABEL: v8xi64_exact_undef_prefix:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64V-NEXT: vmv.v.x v8, a2
; RV64V-NEXT: vslide1down.vx v11, v8, a3
; RV64V-NEXT: vmv.v.x v8, a0
; RV64V-NEXT: vslide1down.vx v10, v8, a1
; RV64V-NEXT: ret
;
; RV64ZVE32-LABEL: v8xi64_exact_undef_prefix:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: sd a1, 32(a0)
; RV64ZVE32-NEXT: sd a2, 40(a0)
; RV64ZVE32-NEXT: sd a3, 48(a0)
; RV64ZVE32-NEXT: sd a4, 56(a0)
; RV64ZVE32-NEXT: ret
%v1 = insertelement <8 x i64> poison, i64 %a, i32 4
%v2 = insertelement <8 x i64> %v1, i64 %b, i32 5
%v3 = insertelement <8 x i64> %v2, i64 %c, i32 6
%v4 = insertelement <8 x i64> %v3, i64 %d, i32 7
ret <8 x i64> %v4
}
define <16 x i8> @buildvec_v16i8_loads_contigous(ptr %p) {
; RV32-ONLY-LABEL: buildvec_v16i8_loads_contigous:
; RV32-ONLY: # %bb.0:
; RV32-ONLY-NEXT: addi sp, sp, -16
; RV32-ONLY-NEXT: .cfi_def_cfa_offset 16
; RV32-ONLY-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
; RV32-ONLY-NEXT: .cfi_offset s0, -4
; RV32-ONLY-NEXT: lbu a1, 0(a0)
; RV32-ONLY-NEXT: lbu a2, 1(a0)
; RV32-ONLY-NEXT: lbu a3, 2(a0)
; RV32-ONLY-NEXT: lbu a4, 3(a0)
; RV32-ONLY-NEXT: lbu a5, 4(a0)
; RV32-ONLY-NEXT: lbu a6, 5(a0)
; RV32-ONLY-NEXT: lbu a7, 6(a0)
; RV32-ONLY-NEXT: lbu t0, 7(a0)
; RV32-ONLY-NEXT: lbu t1, 8(a0)
; RV32-ONLY-NEXT: lbu t2, 9(a0)
; RV32-ONLY-NEXT: lbu t3, 10(a0)
; RV32-ONLY-NEXT: lbu t4, 11(a0)
; RV32-ONLY-NEXT: li t5, 255
; RV32-ONLY-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV32-ONLY-NEXT: vmv.s.x v0, t5
; RV32-ONLY-NEXT: lbu t5, 12(a0)
; RV32-ONLY-NEXT: lbu t6, 13(a0)
; RV32-ONLY-NEXT: lbu s0, 14(a0)
; RV32-ONLY-NEXT: lbu a0, 15(a0)
; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, mu
; RV32-ONLY-NEXT: vmv.v.x v8, t1
; RV32-ONLY-NEXT: vmv.v.x v9, a1
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t2
; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a2
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t3
; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a3
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t4
; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a4
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t5
; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a5
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t6
; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a6
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, s0
; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a7
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a0
; RV32-ONLY-NEXT: vslide1down.vx v9, v9, t0
; RV32-ONLY-NEXT: vslidedown.vi v8, v9, 8, v0.t
; RV32-ONLY-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32-ONLY-NEXT: .cfi_restore s0
; RV32-ONLY-NEXT: addi sp, sp, 16
; RV32-ONLY-NEXT: .cfi_def_cfa_offset 0
; RV32-ONLY-NEXT: ret
;
; RV32VB-LABEL: buildvec_v16i8_loads_contigous:
; RV32VB: # %bb.0:
; RV32VB-NEXT: lbu a1, 0(a0)
; RV32VB-NEXT: lbu a2, 1(a0)
; RV32VB-NEXT: lbu a3, 2(a0)
; RV32VB-NEXT: lbu a4, 3(a0)
; RV32VB-NEXT: lbu a5, 4(a0)
; RV32VB-NEXT: lbu a6, 5(a0)
; RV32VB-NEXT: lbu a7, 6(a0)
; RV32VB-NEXT: lbu t0, 7(a0)
; RV32VB-NEXT: slli a2, a2, 8
; RV32VB-NEXT: slli a3, a3, 16
; RV32VB-NEXT: slli a4, a4, 24
; RV32VB-NEXT: slli a6, a6, 8
; RV32VB-NEXT: or a1, a1, a2
; RV32VB-NEXT: or a3, a4, a3
; RV32VB-NEXT: or a2, a5, a6
; RV32VB-NEXT: lbu a4, 8(a0)
; RV32VB-NEXT: lbu a5, 9(a0)
; RV32VB-NEXT: lbu a6, 10(a0)
; RV32VB-NEXT: lbu t1, 11(a0)
; RV32VB-NEXT: slli a7, a7, 16
; RV32VB-NEXT: slli t0, t0, 24
; RV32VB-NEXT: slli a5, a5, 8
; RV32VB-NEXT: slli a6, a6, 16
; RV32VB-NEXT: slli t1, t1, 24
; RV32VB-NEXT: or a7, t0, a7
; RV32VB-NEXT: or a4, a4, a5
; RV32VB-NEXT: or a5, t1, a6
; RV32VB-NEXT: lbu a6, 13(a0)
; RV32VB-NEXT: lbu t0, 12(a0)
; RV32VB-NEXT: lbu t1, 14(a0)
; RV32VB-NEXT: lbu a0, 15(a0)
; RV32VB-NEXT: slli a6, a6, 8
; RV32VB-NEXT: or a6, t0, a6
; RV32VB-NEXT: slli t1, t1, 16
; RV32VB-NEXT: slli a0, a0, 24
; RV32VB-NEXT: or a0, a0, t1
; RV32VB-NEXT: or a1, a1, a3
; RV32VB-NEXT: or a2, a2, a7
; RV32VB-NEXT: or a4, a4, a5
; RV32VB-NEXT: or a0, a6, a0
; RV32VB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32VB-NEXT: vmv.v.x v8, a1
; RV32VB-NEXT: vslide1down.vx v8, v8, a2
; RV32VB-NEXT: vslide1down.vx v8, v8, a4
; RV32VB-NEXT: vslide1down.vx v8, v8, a0
; RV32VB-NEXT: ret
;
; RV32VB-PACK-LABEL: buildvec_v16i8_loads_contigous:
; RV32VB-PACK: # %bb.0:
; RV32VB-PACK-NEXT: lbu a1, 0(a0)
; RV32VB-PACK-NEXT: lbu a2, 1(a0)
; RV32VB-PACK-NEXT: lbu a3, 2(a0)
; RV32VB-PACK-NEXT: lbu a4, 3(a0)
; RV32VB-PACK-NEXT: lbu a5, 4(a0)
; RV32VB-PACK-NEXT: lbu a6, 5(a0)
; RV32VB-PACK-NEXT: lbu a7, 6(a0)
; RV32VB-PACK-NEXT: lbu t0, 7(a0)
; RV32VB-PACK-NEXT: packh a1, a1, a2
; RV32VB-PACK-NEXT: lbu a2, 8(a0)
; RV32VB-PACK-NEXT: lbu t1, 9(a0)
; RV32VB-PACK-NEXT: lbu t2, 10(a0)
; RV32VB-PACK-NEXT: lbu t3, 11(a0)
; RV32VB-PACK-NEXT: packh a3, a3, a4
; RV32VB-PACK-NEXT: packh a4, a5, a6
; RV32VB-PACK-NEXT: packh a5, a7, t0
; RV32VB-PACK-NEXT: lbu a6, 12(a0)
; RV32VB-PACK-NEXT: lbu a7, 13(a0)
; RV32VB-PACK-NEXT: lbu t0, 14(a0)
; RV32VB-PACK-NEXT: lbu a0, 15(a0)
; RV32VB-PACK-NEXT: packh a2, a2, t1
; RV32VB-PACK-NEXT: packh t1, t2, t3
; RV32VB-PACK-NEXT: packh a6, a6, a7
; RV32VB-PACK-NEXT: packh a0, t0, a0
; RV32VB-PACK-NEXT: pack a1, a1, a3
; RV32VB-PACK-NEXT: pack a3, a4, a5
; RV32VB-PACK-NEXT: pack a2, a2, t1
; RV32VB-PACK-NEXT: pack a0, a6, a0
; RV32VB-PACK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32VB-PACK-NEXT: vmv.v.x v8, a1
; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a3
; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a2
; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a0
; RV32VB-PACK-NEXT: ret
;
; RV64V-ONLY-LABEL: buildvec_v16i8_loads_contigous:
; RV64V-ONLY: # %bb.0:
; RV64V-ONLY-NEXT: addi sp, sp, -16
; RV64V-ONLY-NEXT: .cfi_def_cfa_offset 16
; RV64V-ONLY-NEXT: sd s0, 8(sp) # 8-byte Folded Spill
; RV64V-ONLY-NEXT: .cfi_offset s0, -8
; RV64V-ONLY-NEXT: lbu a1, 0(a0)
; RV64V-ONLY-NEXT: lbu a2, 1(a0)
; RV64V-ONLY-NEXT: lbu a3, 2(a0)
; RV64V-ONLY-NEXT: lbu a4, 3(a0)
; RV64V-ONLY-NEXT: lbu a5, 4(a0)
; RV64V-ONLY-NEXT: lbu a6, 5(a0)
; RV64V-ONLY-NEXT: lbu a7, 6(a0)
; RV64V-ONLY-NEXT: lbu t0, 7(a0)
; RV64V-ONLY-NEXT: lbu t1, 8(a0)
; RV64V-ONLY-NEXT: lbu t2, 9(a0)
; RV64V-ONLY-NEXT: lbu t3, 10(a0)
; RV64V-ONLY-NEXT: lbu t4, 11(a0)
; RV64V-ONLY-NEXT: li t5, 255
; RV64V-ONLY-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64V-ONLY-NEXT: vmv.s.x v0, t5
; RV64V-ONLY-NEXT: lbu t5, 12(a0)
; RV64V-ONLY-NEXT: lbu t6, 13(a0)
; RV64V-ONLY-NEXT: lbu s0, 14(a0)
; RV64V-ONLY-NEXT: lbu a0, 15(a0)
; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, mu
; RV64V-ONLY-NEXT: vmv.v.x v8, t1
; RV64V-ONLY-NEXT: vmv.v.x v9, a1
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t2
; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a2
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t3
; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a3
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t4
; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a4
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t5
; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a5
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t6
; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a6
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, s0
; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a7
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a0
; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, t0
; RV64V-ONLY-NEXT: vslidedown.vi v8, v9, 8, v0.t
; RV64V-ONLY-NEXT: ld s0, 8(sp) # 8-byte Folded Reload
; RV64V-ONLY-NEXT: .cfi_restore s0
; RV64V-ONLY-NEXT: addi sp, sp, 16
; RV64V-ONLY-NEXT: .cfi_def_cfa_offset 0
; RV64V-ONLY-NEXT: ret
;
; RVA22U64-LABEL: buildvec_v16i8_loads_contigous:
; RVA22U64: # %bb.0:
; RVA22U64-NEXT: lbu a6, 0(a0)
; RVA22U64-NEXT: lbu a2, 1(a0)
; RVA22U64-NEXT: lbu a3, 2(a0)
; RVA22U64-NEXT: lbu a4, 3(a0)
; RVA22U64-NEXT: lbu a5, 4(a0)
; RVA22U64-NEXT: lbu a1, 5(a0)
; RVA22U64-NEXT: lbu a7, 6(a0)
; RVA22U64-NEXT: lbu t0, 7(a0)
; RVA22U64-NEXT: slli a2, a2, 8
; RVA22U64-NEXT: slli a3, a3, 16
; RVA22U64-NEXT: slli a4, a4, 24
; RVA22U64-NEXT: slli a5, a5, 32
; RVA22U64-NEXT: slli a1, a1, 40
; RVA22U64-NEXT: or a6, a6, a2
; RVA22U64-NEXT: or t2, a4, a3
; RVA22U64-NEXT: or t1, a1, a5
; RVA22U64-NEXT: lbu a4, 8(a0)
; RVA22U64-NEXT: lbu a5, 9(a0)
; RVA22U64-NEXT: lbu a2, 10(a0)
; RVA22U64-NEXT: lbu a1, 11(a0)
; RVA22U64-NEXT: slli a7, a7, 48
; RVA22U64-NEXT: slli t0, t0, 56
; RVA22U64-NEXT: slli a5, a5, 8
; RVA22U64-NEXT: slli a2, a2, 16
; RVA22U64-NEXT: slli a1, a1, 24
; RVA22U64-NEXT: or a7, t0, a7
; RVA22U64-NEXT: or a4, a4, a5
; RVA22U64-NEXT: or a1, a1, a2
; RVA22U64-NEXT: lbu a2, 12(a0)
; RVA22U64-NEXT: lbu a5, 13(a0)
; RVA22U64-NEXT: lbu a3, 14(a0)
; RVA22U64-NEXT: lbu a0, 15(a0)
; RVA22U64-NEXT: slli a2, a2, 32
; RVA22U64-NEXT: slli a5, a5, 40
; RVA22U64-NEXT: or a2, a2, a5
; RVA22U64-NEXT: slli a3, a3, 48
; RVA22U64-NEXT: slli a0, a0, 56
; RVA22U64-NEXT: or a0, a0, a3
; RVA22U64-NEXT: or a3, a6, t2
; RVA22U64-NEXT: or a5, a7, t1
; RVA22U64-NEXT: or a1, a1, a4
; RVA22U64-NEXT: or a0, a0, a2
; RVA22U64-NEXT: or a3, a3, a5
; RVA22U64-NEXT: or a0, a0, a1
; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RVA22U64-NEXT: vmv.v.x v8, a3
; RVA22U64-NEXT: vslide1down.vx v8, v8, a0
; RVA22U64-NEXT: ret
;
; RVA22U64-PACK-LABEL: buildvec_v16i8_loads_contigous:
; RVA22U64-PACK: # %bb.0:
; RVA22U64-PACK-NEXT: lbu a1, 0(a0)
; RVA22U64-PACK-NEXT: lbu a2, 1(a0)
; RVA22U64-PACK-NEXT: lbu a6, 2(a0)
; RVA22U64-PACK-NEXT: lbu a7, 3(a0)
; RVA22U64-PACK-NEXT: lbu t0, 4(a0)
; RVA22U64-PACK-NEXT: lbu a3, 5(a0)
; RVA22U64-PACK-NEXT: lbu a4, 6(a0)
; RVA22U64-PACK-NEXT: lbu a5, 7(a0)
; RVA22U64-PACK-NEXT: packh t1, a1, a2
; RVA22U64-PACK-NEXT: lbu t2, 8(a0)
; RVA22U64-PACK-NEXT: lbu t3, 9(a0)
; RVA22U64-PACK-NEXT: lbu t4, 10(a0)
; RVA22U64-PACK-NEXT: lbu a1, 11(a0)
; RVA22U64-PACK-NEXT: packh a6, a6, a7
; RVA22U64-PACK-NEXT: packh a7, t0, a3
; RVA22U64-PACK-NEXT: packh t0, a4, a5
; RVA22U64-PACK-NEXT: lbu a5, 12(a0)
; RVA22U64-PACK-NEXT: lbu a3, 13(a0)
; RVA22U64-PACK-NEXT: lbu a2, 14(a0)
; RVA22U64-PACK-NEXT: lbu a0, 15(a0)
; RVA22U64-PACK-NEXT: packh a4, t2, t3
; RVA22U64-PACK-NEXT: packh a1, t4, a1
; RVA22U64-PACK-NEXT: packh a3, a5, a3
; RVA22U64-PACK-NEXT: packh a0, a2, a0
; RVA22U64-PACK-NEXT: packw a2, t1, a6
; RVA22U64-PACK-NEXT: packw a5, a7, t0
; RVA22U64-PACK-NEXT: packw a1, a4, a1
; RVA22U64-PACK-NEXT: packw a0, a3, a0
; RVA22U64-PACK-NEXT: pack a2, a2, a5
; RVA22U64-PACK-NEXT: pack a0, a1, a0
; RVA22U64-PACK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RVA22U64-PACK-NEXT: vmv.v.x v8, a2
; RVA22U64-PACK-NEXT: vslide1down.vx v8, v8, a0
; RVA22U64-PACK-NEXT: ret
;
; RV64ZVE32-LABEL: buildvec_v16i8_loads_contigous:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: addi sp, sp, -16
; RV64ZVE32-NEXT: .cfi_def_cfa_offset 16
; RV64ZVE32-NEXT: sd s0, 8(sp) # 8-byte Folded Spill
; RV64ZVE32-NEXT: .cfi_offset s0, -8
; RV64ZVE32-NEXT: lbu a1, 0(a0)
; RV64ZVE32-NEXT: lbu a2, 1(a0)
; RV64ZVE32-NEXT: lbu a3, 2(a0)
; RV64ZVE32-NEXT: lbu a4, 3(a0)
; RV64ZVE32-NEXT: lbu a5, 4(a0)
; RV64ZVE32-NEXT: lbu a6, 5(a0)
; RV64ZVE32-NEXT: lbu a7, 6(a0)
; RV64ZVE32-NEXT: lbu t0, 7(a0)
; RV64ZVE32-NEXT: lbu t1, 8(a0)
; RV64ZVE32-NEXT: lbu t2, 9(a0)
; RV64ZVE32-NEXT: lbu t3, 10(a0)
; RV64ZVE32-NEXT: lbu t4, 11(a0)
; RV64ZVE32-NEXT: li t5, 255
; RV64ZVE32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32-NEXT: vmv.s.x v0, t5
; RV64ZVE32-NEXT: lbu t5, 12(a0)
; RV64ZVE32-NEXT: lbu t6, 13(a0)
; RV64ZVE32-NEXT: lbu s0, 14(a0)
; RV64ZVE32-NEXT: lbu a0, 15(a0)
; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, mu
; RV64ZVE32-NEXT: vmv.v.x v8, t1
; RV64ZVE32-NEXT: vmv.v.x v9, a1
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t2
; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a2
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t3
; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a3
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t4
; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a4
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t5
; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a5
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t6
; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a6
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, s0
; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a7
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a0
; RV64ZVE32-NEXT: vslide1down.vx v9, v9, t0
; RV64ZVE32-NEXT: vslidedown.vi v8, v9, 8, v0.t
; RV64ZVE32-NEXT: ld s0, 8(sp) # 8-byte Folded Reload
; RV64ZVE32-NEXT: .cfi_restore s0
; RV64ZVE32-NEXT: addi sp, sp, 16
; RV64ZVE32-NEXT: .cfi_def_cfa_offset 0
; RV64ZVE32-NEXT: ret
%p2 = getelementptr i8, ptr %p, i32 1
%p3 = getelementptr i8, ptr %p, i32 2
%p4 = getelementptr i8, ptr %p, i32 3
%p5 = getelementptr i8, ptr %p, i32 4
%p6 = getelementptr i8, ptr %p, i32 5
%p7 = getelementptr i8, ptr %p, i32 6
%p8 = getelementptr i8, ptr %p, i32 7
%p9 = getelementptr i8, ptr %p, i32 8
%p10 = getelementptr i8, ptr %p, i32 9
%p11 = getelementptr i8, ptr %p, i32 10
%p12 = getelementptr i8, ptr %p, i32 11
%p13 = getelementptr i8, ptr %p, i32 12
%p14 = getelementptr i8, ptr %p, i32 13
%p15 = getelementptr i8, ptr %p, i32 14
%p16 = getelementptr i8, ptr %p, i32 15
%ld1 = load i8, ptr %p
%ld2 = load i8, ptr %p2
%ld3 = load i8, ptr %p3
%ld4 = load i8, ptr %p4
%ld5 = load i8, ptr %p5
%ld6 = load i8, ptr %p6
%ld7 = load i8, ptr %p7
%ld8 = load i8, ptr %p8
%ld9 = load i8, ptr %p9
%ld10 = load i8, ptr %p10
%ld11 = load i8, ptr %p11
%ld12 = load i8, ptr %p12
%ld13 = load i8, ptr %p13
%ld14 = load i8, ptr %p14
%ld15 = load i8, ptr %p15
%ld16 = load i8, ptr %p16
%v1 = insertelement <16 x i8> poison, i8 %ld1, i32 0
%v2 = insertelement <16 x i8> %v1, i8 %ld2, i32 1
%v3 = insertelement <16 x i8> %v2, i8 %ld3, i32 2
%v4 = insertelement <16 x i8> %v3, i8 %ld4, i32 3
%v5 = insertelement <16 x i8> %v4, i8 %ld5, i32 4
%v6 = insertelement <16 x i8> %v5, i8 %ld6, i32 5
%v7 = insertelement <16 x i8> %v6, i8 %ld7, i32 6
%v8 = insertelement <16 x i8> %v7, i8 %ld8, i32 7
%v9 = insertelement <16 x i8> %v8, i8 %ld9, i32 8
%v10 = insertelement <16 x i8> %v9, i8 %ld10, i32 9
%v11 = insertelement <16 x i8> %v10, i8 %ld11, i32 10
%v12 = insertelement <16 x i8> %v11, i8 %ld12, i32 11
%v13 = insertelement <16 x i8> %v12, i8 %ld13, i32 12
%v14 = insertelement <16 x i8> %v13, i8 %ld14, i32 13
%v15 = insertelement <16 x i8> %v14, i8 %ld15, i32 14
%v16 = insertelement <16 x i8> %v15, i8 %ld16, i32 15
ret <16 x i8> %v16
}
define <16 x i8> @buildvec_v16i8_loads_gather(ptr %p) {
; RV32-ONLY-LABEL: buildvec_v16i8_loads_gather:
; RV32-ONLY: # %bb.0:
; RV32-ONLY-NEXT: addi sp, sp, -16
; RV32-ONLY-NEXT: .cfi_def_cfa_offset 16
; RV32-ONLY-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
; RV32-ONLY-NEXT: .cfi_offset s0, -4
; RV32-ONLY-NEXT: lbu a1, 0(a0)
; RV32-ONLY-NEXT: lbu a2, 1(a0)
; RV32-ONLY-NEXT: lbu a3, 22(a0)
; RV32-ONLY-NEXT: lbu a4, 31(a0)
; RV32-ONLY-NEXT: lbu a5, 623(a0)
; RV32-ONLY-NEXT: lbu a6, 44(a0)
; RV32-ONLY-NEXT: lbu a7, 55(a0)
; RV32-ONLY-NEXT: lbu t0, 75(a0)
; RV32-ONLY-NEXT: lbu t1, 82(a0)
; RV32-ONLY-NEXT: lbu t2, 154(a0)
; RV32-ONLY-NEXT: lbu t3, 161(a0)
; RV32-ONLY-NEXT: lbu t4, 163(a0)
; RV32-ONLY-NEXT: li t5, 255
; RV32-ONLY-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV32-ONLY-NEXT: vmv.s.x v0, t5
; RV32-ONLY-NEXT: lbu t5, 93(a0)
; RV32-ONLY-NEXT: lbu t6, 105(a0)
; RV32-ONLY-NEXT: lbu s0, 124(a0)
; RV32-ONLY-NEXT: lbu a0, 144(a0)
; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, mu
; RV32-ONLY-NEXT: vmv.v.x v8, t1
; RV32-ONLY-NEXT: vmv.v.x v9, a1
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t5
; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a2
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t6
; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a3
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t3
; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a4
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, s0
; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a6
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t4
; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a7
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a0
; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a5
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t2
; RV32-ONLY-NEXT: vslide1down.vx v9, v9, t0
; RV32-ONLY-NEXT: vslidedown.vi v8, v9, 8, v0.t
; RV32-ONLY-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32-ONLY-NEXT: .cfi_restore s0
; RV32-ONLY-NEXT: addi sp, sp, 16
; RV32-ONLY-NEXT: .cfi_def_cfa_offset 0
; RV32-ONLY-NEXT: ret
;
; RV32VB-LABEL: buildvec_v16i8_loads_gather:
; RV32VB: # %bb.0:
; RV32VB-NEXT: lbu a1, 0(a0)
; RV32VB-NEXT: lbu a2, 1(a0)
; RV32VB-NEXT: lbu a3, 22(a0)
; RV32VB-NEXT: lbu a4, 31(a0)
; RV32VB-NEXT: lbu a5, 623(a0)
; RV32VB-NEXT: lbu a6, 44(a0)
; RV32VB-NEXT: lbu a7, 55(a0)
; RV32VB-NEXT: lbu t0, 75(a0)
; RV32VB-NEXT: lbu t1, 82(a0)
; RV32VB-NEXT: slli a2, a2, 8
; RV32VB-NEXT: slli a3, a3, 16
; RV32VB-NEXT: slli a4, a4, 24
; RV32VB-NEXT: slli a7, a7, 8
; RV32VB-NEXT: or a1, a1, a2
; RV32VB-NEXT: or a3, a4, a3
; RV32VB-NEXT: or a2, a6, a7
; RV32VB-NEXT: lbu a4, 93(a0)
; RV32VB-NEXT: lbu a6, 105(a0)
; RV32VB-NEXT: lbu a7, 124(a0)
; RV32VB-NEXT: lbu t2, 144(a0)
; RV32VB-NEXT: slli a5, a5, 16
; RV32VB-NEXT: slli t0, t0, 24
; RV32VB-NEXT: slli a4, a4, 8
; RV32VB-NEXT: or a5, t0, a5
; RV32VB-NEXT: or a4, t1, a4
; RV32VB-NEXT: lbu t0, 161(a0)
; RV32VB-NEXT: lbu t1, 154(a0)
; RV32VB-NEXT: lbu a0, 163(a0)
; RV32VB-NEXT: slli a6, a6, 16
; RV32VB-NEXT: slli t0, t0, 24
; RV32VB-NEXT: or a6, t0, a6
; RV32VB-NEXT: slli a0, a0, 8
; RV32VB-NEXT: or a0, a7, a0
; RV32VB-NEXT: slli t2, t2, 16
; RV32VB-NEXT: slli t1, t1, 24
; RV32VB-NEXT: or a7, t1, t2
; RV32VB-NEXT: or a1, a1, a3
; RV32VB-NEXT: or a2, a2, a5
; RV32VB-NEXT: or a3, a4, a6
; RV32VB-NEXT: or a0, a0, a7
; RV32VB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32VB-NEXT: vmv.v.x v8, a1
; RV32VB-NEXT: vslide1down.vx v8, v8, a2
; RV32VB-NEXT: vslide1down.vx v8, v8, a3
; RV32VB-NEXT: vslide1down.vx v8, v8, a0
; RV32VB-NEXT: ret
;
; RV32VB-PACK-LABEL: buildvec_v16i8_loads_gather:
; RV32VB-PACK: # %bb.0:
; RV32VB-PACK-NEXT: lbu a1, 0(a0)
; RV32VB-PACK-NEXT: lbu a2, 1(a0)
; RV32VB-PACK-NEXT: lbu a3, 22(a0)
; RV32VB-PACK-NEXT: lbu a4, 31(a0)
; RV32VB-PACK-NEXT: lbu a5, 623(a0)
; RV32VB-PACK-NEXT: lbu a6, 44(a0)
; RV32VB-PACK-NEXT: lbu a7, 55(a0)
; RV32VB-PACK-NEXT: lbu t0, 75(a0)
; RV32VB-PACK-NEXT: lbu t1, 82(a0)
; RV32VB-PACK-NEXT: packh a1, a1, a2
; RV32VB-PACK-NEXT: lbu a2, 154(a0)
; RV32VB-PACK-NEXT: lbu t2, 161(a0)
; RV32VB-PACK-NEXT: lbu t3, 163(a0)
; RV32VB-PACK-NEXT: packh a3, a3, a4
; RV32VB-PACK-NEXT: packh a4, a6, a7
; RV32VB-PACK-NEXT: packh a5, a5, t0
; RV32VB-PACK-NEXT: lbu a6, 93(a0)
; RV32VB-PACK-NEXT: lbu a7, 105(a0)
; RV32VB-PACK-NEXT: lbu t0, 124(a0)
; RV32VB-PACK-NEXT: lbu a0, 144(a0)
; RV32VB-PACK-NEXT: packh a6, t1, a6
; RV32VB-PACK-NEXT: packh a7, a7, t2
; RV32VB-PACK-NEXT: packh t0, t0, t3
; RV32VB-PACK-NEXT: packh a0, a0, a2
; RV32VB-PACK-NEXT: pack a1, a1, a3
; RV32VB-PACK-NEXT: pack a2, a4, a5
; RV32VB-PACK-NEXT: pack a3, a6, a7
; RV32VB-PACK-NEXT: pack a0, t0, a0
; RV32VB-PACK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32VB-PACK-NEXT: vmv.v.x v8, a1
; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a2
; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a3
; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a0
; RV32VB-PACK-NEXT: ret
;
; RV64V-ONLY-LABEL: buildvec_v16i8_loads_gather:
; RV64V-ONLY: # %bb.0:
; RV64V-ONLY-NEXT: addi sp, sp, -16
; RV64V-ONLY-NEXT: .cfi_def_cfa_offset 16
; RV64V-ONLY-NEXT: sd s0, 8(sp) # 8-byte Folded Spill
; RV64V-ONLY-NEXT: .cfi_offset s0, -8
; RV64V-ONLY-NEXT: lbu a1, 0(a0)
; RV64V-ONLY-NEXT: lbu a2, 1(a0)
; RV64V-ONLY-NEXT: lbu a3, 22(a0)
; RV64V-ONLY-NEXT: lbu a4, 31(a0)
; RV64V-ONLY-NEXT: lbu a5, 623(a0)
; RV64V-ONLY-NEXT: lbu a6, 44(a0)
; RV64V-ONLY-NEXT: lbu a7, 55(a0)
; RV64V-ONLY-NEXT: lbu t0, 75(a0)
; RV64V-ONLY-NEXT: lbu t1, 82(a0)
; RV64V-ONLY-NEXT: lbu t2, 154(a0)
; RV64V-ONLY-NEXT: lbu t3, 161(a0)
; RV64V-ONLY-NEXT: lbu t4, 163(a0)
; RV64V-ONLY-NEXT: li t5, 255
; RV64V-ONLY-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64V-ONLY-NEXT: vmv.s.x v0, t5
; RV64V-ONLY-NEXT: lbu t5, 93(a0)
; RV64V-ONLY-NEXT: lbu t6, 105(a0)
; RV64V-ONLY-NEXT: lbu s0, 124(a0)
; RV64V-ONLY-NEXT: lbu a0, 144(a0)
; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, mu
; RV64V-ONLY-NEXT: vmv.v.x v8, t1
; RV64V-ONLY-NEXT: vmv.v.x v9, a1
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t5
; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a2
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t6
; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a3
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t3
; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a4
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, s0
; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a6
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t4
; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a7
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a0
; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a5
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t2
; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, t0
; RV64V-ONLY-NEXT: vslidedown.vi v8, v9, 8, v0.t
; RV64V-ONLY-NEXT: ld s0, 8(sp) # 8-byte Folded Reload
; RV64V-ONLY-NEXT: .cfi_restore s0
; RV64V-ONLY-NEXT: addi sp, sp, 16
; RV64V-ONLY-NEXT: .cfi_def_cfa_offset 0
; RV64V-ONLY-NEXT: ret
;
; RVA22U64-LABEL: buildvec_v16i8_loads_gather:
; RVA22U64: # %bb.0:
; RVA22U64-NEXT: lbu a7, 0(a0)
; RVA22U64-NEXT: lbu a2, 1(a0)
; RVA22U64-NEXT: lbu a3, 22(a0)
; RVA22U64-NEXT: lbu a4, 31(a0)
; RVA22U64-NEXT: lbu a6, 623(a0)
; RVA22U64-NEXT: lbu a5, 44(a0)
; RVA22U64-NEXT: lbu a1, 55(a0)
; RVA22U64-NEXT: lbu t0, 75(a0)
; RVA22U64-NEXT: lbu t1, 82(a0)
; RVA22U64-NEXT: slli a2, a2, 8
; RVA22U64-NEXT: slli a3, a3, 16
; RVA22U64-NEXT: slli a4, a4, 24
; RVA22U64-NEXT: slli a5, a5, 32
; RVA22U64-NEXT: slli a1, a1, 40
; RVA22U64-NEXT: or a7, a7, a2
; RVA22U64-NEXT: or t3, a4, a3
; RVA22U64-NEXT: or t2, a1, a5
; RVA22U64-NEXT: lbu a4, 93(a0)
; RVA22U64-NEXT: lbu t4, 105(a0)
; RVA22U64-NEXT: lbu a2, 124(a0)
; RVA22U64-NEXT: lbu t5, 144(a0)
; RVA22U64-NEXT: slli a6, a6, 48
; RVA22U64-NEXT: slli t0, t0, 56
; RVA22U64-NEXT: slli a4, a4, 8
; RVA22U64-NEXT: or a3, t0, a6
; RVA22U64-NEXT: or a4, t1, a4
; RVA22U64-NEXT: lbu a5, 161(a0)
; RVA22U64-NEXT: lbu a1, 154(a0)
; RVA22U64-NEXT: lbu a0, 163(a0)
; RVA22U64-NEXT: slli t4, t4, 16
; RVA22U64-NEXT: slli a5, a5, 24
; RVA22U64-NEXT: or a5, a5, t4
; RVA22U64-NEXT: slli a2, a2, 32
; RVA22U64-NEXT: slli a0, a0, 40
; RVA22U64-NEXT: or a0, a0, a2
; RVA22U64-NEXT: slli t5, t5, 48
; RVA22U64-NEXT: slli a1, a1, 56
; RVA22U64-NEXT: or a1, a1, t5
; RVA22U64-NEXT: or a2, a7, t3
; RVA22U64-NEXT: or a3, a3, t2
; RVA22U64-NEXT: or a4, a4, a5
; RVA22U64-NEXT: or a0, a0, a1
; RVA22U64-NEXT: or a2, a2, a3
; RVA22U64-NEXT: or a0, a0, a4
; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RVA22U64-NEXT: vmv.v.x v8, a2
; RVA22U64-NEXT: vslide1down.vx v8, v8, a0
; RVA22U64-NEXT: ret
;
; RVA22U64-PACK-LABEL: buildvec_v16i8_loads_gather:
; RVA22U64-PACK: # %bb.0:
; RVA22U64-PACK-NEXT: addi sp, sp, -16
; RVA22U64-PACK-NEXT: .cfi_def_cfa_offset 16
; RVA22U64-PACK-NEXT: sd s0, 8(sp) # 8-byte Folded Spill
; RVA22U64-PACK-NEXT: .cfi_offset s0, -8
; RVA22U64-PACK-NEXT: lbu a1, 0(a0)
; RVA22U64-PACK-NEXT: lbu a2, 1(a0)
; RVA22U64-PACK-NEXT: lbu a6, 22(a0)
; RVA22U64-PACK-NEXT: lbu a7, 31(a0)
; RVA22U64-PACK-NEXT: lbu t0, 623(a0)
; RVA22U64-PACK-NEXT: lbu t3, 44(a0)
; RVA22U64-PACK-NEXT: lbu t4, 55(a0)
; RVA22U64-PACK-NEXT: lbu t5, 75(a0)
; RVA22U64-PACK-NEXT: lbu t1, 82(a0)
; RVA22U64-PACK-NEXT: packh t2, a1, a2
; RVA22U64-PACK-NEXT: lbu t6, 154(a0)
; RVA22U64-PACK-NEXT: lbu s0, 161(a0)
; RVA22U64-PACK-NEXT: lbu a3, 163(a0)
; RVA22U64-PACK-NEXT: packh a6, a6, a7
; RVA22U64-PACK-NEXT: packh a7, t3, t4
; RVA22U64-PACK-NEXT: packh a2, t0, t5
; RVA22U64-PACK-NEXT: lbu a4, 93(a0)
; RVA22U64-PACK-NEXT: lbu a5, 105(a0)
; RVA22U64-PACK-NEXT: lbu a1, 124(a0)
; RVA22U64-PACK-NEXT: lbu a0, 144(a0)
; RVA22U64-PACK-NEXT: packh a4, t1, a4
; RVA22U64-PACK-NEXT: packh a5, a5, s0
; RVA22U64-PACK-NEXT: packh a1, a1, a3
; RVA22U64-PACK-NEXT: packh a0, a0, t6
; RVA22U64-PACK-NEXT: packw a3, t2, a6
; RVA22U64-PACK-NEXT: packw a2, a7, a2
; RVA22U64-PACK-NEXT: packw a4, a4, a5
; RVA22U64-PACK-NEXT: packw a0, a1, a0
; RVA22U64-PACK-NEXT: pack a1, a3, a2
; RVA22U64-PACK-NEXT: pack a0, a4, a0
; RVA22U64-PACK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RVA22U64-PACK-NEXT: vmv.v.x v8, a1
; RVA22U64-PACK-NEXT: vslide1down.vx v8, v8, a0
; RVA22U64-PACK-NEXT: ld s0, 8(sp) # 8-byte Folded Reload
; RVA22U64-PACK-NEXT: .cfi_restore s0
; RVA22U64-PACK-NEXT: addi sp, sp, 16
; RVA22U64-PACK-NEXT: .cfi_def_cfa_offset 0
; RVA22U64-PACK-NEXT: ret
;
; RV64ZVE32-LABEL: buildvec_v16i8_loads_gather:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: addi sp, sp, -16
; RV64ZVE32-NEXT: .cfi_def_cfa_offset 16
; RV64ZVE32-NEXT: sd s0, 8(sp) # 8-byte Folded Spill
; RV64ZVE32-NEXT: .cfi_offset s0, -8
; RV64ZVE32-NEXT: lbu a1, 0(a0)
; RV64ZVE32-NEXT: lbu a2, 1(a0)
; RV64ZVE32-NEXT: lbu a3, 22(a0)
; RV64ZVE32-NEXT: lbu a4, 31(a0)
; RV64ZVE32-NEXT: lbu a5, 623(a0)
; RV64ZVE32-NEXT: lbu a6, 44(a0)
; RV64ZVE32-NEXT: lbu a7, 55(a0)
; RV64ZVE32-NEXT: lbu t0, 75(a0)
; RV64ZVE32-NEXT: lbu t1, 82(a0)
; RV64ZVE32-NEXT: lbu t2, 154(a0)
; RV64ZVE32-NEXT: lbu t3, 161(a0)
; RV64ZVE32-NEXT: lbu t4, 163(a0)
; RV64ZVE32-NEXT: li t5, 255
; RV64ZVE32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32-NEXT: vmv.s.x v0, t5
; RV64ZVE32-NEXT: lbu t5, 93(a0)
; RV64ZVE32-NEXT: lbu t6, 105(a0)
; RV64ZVE32-NEXT: lbu s0, 124(a0)
; RV64ZVE32-NEXT: lbu a0, 144(a0)
; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, mu
; RV64ZVE32-NEXT: vmv.v.x v8, t1
; RV64ZVE32-NEXT: vmv.v.x v9, a1
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t5
; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a2
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t6
; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a3
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t3
; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a4
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, s0
; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a6
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t4
; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a7
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a0
; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a5
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t2
; RV64ZVE32-NEXT: vslide1down.vx v9, v9, t0
; RV64ZVE32-NEXT: vslidedown.vi v8, v9, 8, v0.t
; RV64ZVE32-NEXT: ld s0, 8(sp) # 8-byte Folded Reload
; RV64ZVE32-NEXT: .cfi_restore s0
; RV64ZVE32-NEXT: addi sp, sp, 16
; RV64ZVE32-NEXT: .cfi_def_cfa_offset 0
; RV64ZVE32-NEXT: ret
%p2 = getelementptr i8, ptr %p, i32 1
%p3 = getelementptr i8, ptr %p, i32 22
%p4 = getelementptr i8, ptr %p, i32 31
%p5 = getelementptr i8, ptr %p, i32 44
%p6 = getelementptr i8, ptr %p, i32 55
%p7 = getelementptr i8, ptr %p, i32 623
%p8 = getelementptr i8, ptr %p, i32 75
%p9 = getelementptr i8, ptr %p, i32 82
%p10 = getelementptr i8, ptr %p, i32 93
%p11 = getelementptr i8, ptr %p, i32 105
%p12 = getelementptr i8, ptr %p, i32 161
%p13 = getelementptr i8, ptr %p, i32 124
%p14 = getelementptr i8, ptr %p, i32 163
%p15 = getelementptr i8, ptr %p, i32 144
%p16 = getelementptr i8, ptr %p, i32 154
%ld1 = load i8, ptr %p
%ld2 = load i8, ptr %p2
%ld3 = load i8, ptr %p3
%ld4 = load i8, ptr %p4
%ld5 = load i8, ptr %p5
%ld6 = load i8, ptr %p6
%ld7 = load i8, ptr %p7
%ld8 = load i8, ptr %p8
%ld9 = load i8, ptr %p9
%ld10 = load i8, ptr %p10
%ld11 = load i8, ptr %p11
%ld12 = load i8, ptr %p12
%ld13 = load i8, ptr %p13
%ld14 = load i8, ptr %p14
%ld15 = load i8, ptr %p15
%ld16 = load i8, ptr %p16
%v1 = insertelement <16 x i8> poison, i8 %ld1, i32 0
%v2 = insertelement <16 x i8> %v1, i8 %ld2, i32 1
%v3 = insertelement <16 x i8> %v2, i8 %ld3, i32 2
%v4 = insertelement <16 x i8> %v3, i8 %ld4, i32 3
%v5 = insertelement <16 x i8> %v4, i8 %ld5, i32 4
%v6 = insertelement <16 x i8> %v5, i8 %ld6, i32 5
%v7 = insertelement <16 x i8> %v6, i8 %ld7, i32 6
%v8 = insertelement <16 x i8> %v7, i8 %ld8, i32 7
%v9 = insertelement <16 x i8> %v8, i8 %ld9, i32 8
%v10 = insertelement <16 x i8> %v9, i8 %ld10, i32 9
%v11 = insertelement <16 x i8> %v10, i8 %ld11, i32 10
%v12 = insertelement <16 x i8> %v11, i8 %ld12, i32 11
%v13 = insertelement <16 x i8> %v12, i8 %ld13, i32 12
%v14 = insertelement <16 x i8> %v13, i8 %ld14, i32 13
%v15 = insertelement <16 x i8> %v14, i8 %ld15, i32 14
%v16 = insertelement <16 x i8> %v15, i8 %ld16, i32 15
ret <16 x i8> %v16
}
define <16 x i8> @buildvec_v16i8_undef_low_half(ptr %p) {
; RV32-ONLY-LABEL: buildvec_v16i8_undef_low_half:
; RV32-ONLY: # %bb.0:
; RV32-ONLY-NEXT: lbu a1, 82(a0)
; RV32-ONLY-NEXT: lbu a2, 93(a0)
; RV32-ONLY-NEXT: lbu a3, 105(a0)
; RV32-ONLY-NEXT: lbu a4, 124(a0)
; RV32-ONLY-NEXT: lbu a5, 144(a0)
; RV32-ONLY-NEXT: lbu a6, 154(a0)
; RV32-ONLY-NEXT: lbu a7, 161(a0)
; RV32-ONLY-NEXT: lbu a0, 163(a0)
; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV32-ONLY-NEXT: vmv.v.x v8, a1
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a3
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a7
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a4
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a0
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a5
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a6
; RV32-ONLY-NEXT: ret
;
; RV32VB-LABEL: buildvec_v16i8_undef_low_half:
; RV32VB: # %bb.0:
; RV32VB-NEXT: lbu a1, 93(a0)
; RV32VB-NEXT: lbu a2, 82(a0)
; RV32VB-NEXT: lbu a3, 105(a0)
; RV32VB-NEXT: lbu a4, 124(a0)
; RV32VB-NEXT: slli a1, a1, 8
; RV32VB-NEXT: or a1, a2, a1
; RV32VB-NEXT: lbu a2, 161(a0)
; RV32VB-NEXT: lbu a5, 144(a0)
; RV32VB-NEXT: lbu a6, 154(a0)
; RV32VB-NEXT: lbu a0, 163(a0)
; RV32VB-NEXT: slli a3, a3, 16
; RV32VB-NEXT: slli a2, a2, 24
; RV32VB-NEXT: or a2, a2, a3
; RV32VB-NEXT: slli a0, a0, 8
; RV32VB-NEXT: or a0, a4, a0
; RV32VB-NEXT: slli a5, a5, 16
; RV32VB-NEXT: slli a6, a6, 24
; RV32VB-NEXT: or a3, a6, a5
; RV32VB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32VB-NEXT: vmv.v.i v8, 0
; RV32VB-NEXT: or a1, a1, a2
; RV32VB-NEXT: or a0, a0, a3
; RV32VB-NEXT: vslide1down.vx v8, v8, zero
; RV32VB-NEXT: vslide1down.vx v8, v8, a1
; RV32VB-NEXT: vslide1down.vx v8, v8, a0
; RV32VB-NEXT: ret
;
; RV32VB-PACK-LABEL: buildvec_v16i8_undef_low_half:
; RV32VB-PACK: # %bb.0:
; RV32VB-PACK-NEXT: lbu a1, 82(a0)
; RV32VB-PACK-NEXT: lbu a2, 93(a0)
; RV32VB-PACK-NEXT: lbu a3, 105(a0)
; RV32VB-PACK-NEXT: lbu a4, 124(a0)
; RV32VB-PACK-NEXT: lbu a5, 161(a0)
; RV32VB-PACK-NEXT: lbu a6, 163(a0)
; RV32VB-PACK-NEXT: lbu a7, 144(a0)
; RV32VB-PACK-NEXT: lbu a0, 154(a0)
; RV32VB-PACK-NEXT: packh a1, a1, a2
; RV32VB-PACK-NEXT: packh a2, a3, a5
; RV32VB-PACK-NEXT: packh a3, a4, a6
; RV32VB-PACK-NEXT: packh a0, a7, a0
; RV32VB-PACK-NEXT: pack a1, a1, a2
; RV32VB-PACK-NEXT: packh a2, a0, a0
; RV32VB-PACK-NEXT: pack a2, a2, a2
; RV32VB-PACK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32VB-PACK-NEXT: vmv.v.x v8, a2
; RV32VB-PACK-NEXT: pack a0, a3, a0
; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a2
; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a1
; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a0
; RV32VB-PACK-NEXT: ret
;
; RV64V-ONLY-LABEL: buildvec_v16i8_undef_low_half:
; RV64V-ONLY: # %bb.0:
; RV64V-ONLY-NEXT: lbu a1, 82(a0)
; RV64V-ONLY-NEXT: lbu a2, 93(a0)
; RV64V-ONLY-NEXT: lbu a3, 105(a0)
; RV64V-ONLY-NEXT: lbu a4, 124(a0)
; RV64V-ONLY-NEXT: lbu a5, 144(a0)
; RV64V-ONLY-NEXT: lbu a6, 154(a0)
; RV64V-ONLY-NEXT: lbu a7, 161(a0)
; RV64V-ONLY-NEXT: lbu a0, 163(a0)
; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV64V-ONLY-NEXT: vmv.v.x v8, a1
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a7
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a4
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a0
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a5
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a6
; RV64V-ONLY-NEXT: ret
;
; RVA22U64-LABEL: buildvec_v16i8_undef_low_half:
; RVA22U64: # %bb.0:
; RVA22U64-NEXT: lbu a1, 93(a0)
; RVA22U64-NEXT: lbu a2, 82(a0)
; RVA22U64-NEXT: lbu a3, 105(a0)
; RVA22U64-NEXT: lbu a4, 124(a0)
; RVA22U64-NEXT: slli a1, a1, 8
; RVA22U64-NEXT: or a6, a2, a1
; RVA22U64-NEXT: lbu a2, 161(a0)
; RVA22U64-NEXT: lbu a5, 144(a0)
; RVA22U64-NEXT: lbu a1, 154(a0)
; RVA22U64-NEXT: lbu a0, 163(a0)
; RVA22U64-NEXT: slli a3, a3, 16
; RVA22U64-NEXT: slli a2, a2, 24
; RVA22U64-NEXT: or a2, a2, a3
; RVA22U64-NEXT: slli a4, a4, 32
; RVA22U64-NEXT: slli a0, a0, 40
; RVA22U64-NEXT: or a0, a0, a4
; RVA22U64-NEXT: slli a5, a5, 48
; RVA22U64-NEXT: slli a1, a1, 56
; RVA22U64-NEXT: or a1, a1, a5
; RVA22U64-NEXT: or a2, a6, a2
; RVA22U64-NEXT: or a0, a0, a1
; RVA22U64-NEXT: or a0, a0, a2
; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RVA22U64-NEXT: vmv.v.i v8, 0
; RVA22U64-NEXT: vslide1down.vx v8, v8, a0
; RVA22U64-NEXT: ret
;
; RVA22U64-PACK-LABEL: buildvec_v16i8_undef_low_half:
; RVA22U64-PACK: # %bb.0:
; RVA22U64-PACK-NEXT: lbu a6, 82(a0)
; RVA22U64-PACK-NEXT: lbu a7, 93(a0)
; RVA22U64-PACK-NEXT: lbu t0, 105(a0)
; RVA22U64-PACK-NEXT: lbu a4, 124(a0)
; RVA22U64-PACK-NEXT: lbu a5, 161(a0)
; RVA22U64-PACK-NEXT: lbu a1, 163(a0)
; RVA22U64-PACK-NEXT: lbu a2, 144(a0)
; RVA22U64-PACK-NEXT: lbu a0, 154(a0)
; RVA22U64-PACK-NEXT: packh a3, a6, a7
; RVA22U64-PACK-NEXT: packh a5, t0, a5
; RVA22U64-PACK-NEXT: packh a1, a4, a1
; RVA22U64-PACK-NEXT: packh a0, a2, a0
; RVA22U64-PACK-NEXT: packw a2, a3, a5
; RVA22U64-PACK-NEXT: packh a3, a0, a0
; RVA22U64-PACK-NEXT: packw a3, a3, a3
; RVA22U64-PACK-NEXT: pack a3, a3, a3
; RVA22U64-PACK-NEXT: packw a0, a1, a0
; RVA22U64-PACK-NEXT: pack a0, a2, a0
; RVA22U64-PACK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RVA22U64-PACK-NEXT: vmv.v.x v8, a3
; RVA22U64-PACK-NEXT: vslide1down.vx v8, v8, a0
; RVA22U64-PACK-NEXT: ret
;
; RV64ZVE32-LABEL: buildvec_v16i8_undef_low_half:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: lbu a1, 82(a0)
; RV64ZVE32-NEXT: lbu a2, 93(a0)
; RV64ZVE32-NEXT: lbu a3, 105(a0)
; RV64ZVE32-NEXT: lbu a4, 124(a0)
; RV64ZVE32-NEXT: lbu a5, 144(a0)
; RV64ZVE32-NEXT: lbu a6, 154(a0)
; RV64ZVE32-NEXT: lbu a7, 161(a0)
; RV64ZVE32-NEXT: lbu a0, 163(a0)
; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV64ZVE32-NEXT: vmv.v.x v8, a1
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a7
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a4
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a0
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a5
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a6
; RV64ZVE32-NEXT: ret
%p9 = getelementptr i8, ptr %p, i32 82
%p10 = getelementptr i8, ptr %p, i32 93
%p11 = getelementptr i8, ptr %p, i32 105
%p12 = getelementptr i8, ptr %p, i32 161
%p13 = getelementptr i8, ptr %p, i32 124
%p14 = getelementptr i8, ptr %p, i32 163
%p15 = getelementptr i8, ptr %p, i32 144
%p16 = getelementptr i8, ptr %p, i32 154
%ld9 = load i8, ptr %p9
%ld10 = load i8, ptr %p10
%ld11 = load i8, ptr %p11
%ld12 = load i8, ptr %p12
%ld13 = load i8, ptr %p13
%ld14 = load i8, ptr %p14
%ld15 = load i8, ptr %p15
%ld16 = load i8, ptr %p16
%v9 = insertelement <16 x i8> poison, i8 %ld9, i32 8
%v10 = insertelement <16 x i8> %v9, i8 %ld10, i32 9
%v11 = insertelement <16 x i8> %v10, i8 %ld11, i32 10
%v12 = insertelement <16 x i8> %v11, i8 %ld12, i32 11
%v13 = insertelement <16 x i8> %v12, i8 %ld13, i32 12
%v14 = insertelement <16 x i8> %v13, i8 %ld14, i32 13
%v15 = insertelement <16 x i8> %v14, i8 %ld15, i32 14
%v16 = insertelement <16 x i8> %v15, i8 %ld16, i32 15
ret <16 x i8> %v16
}
define <16 x i8> @buildvec_v16i8_undef_high_half(ptr %p) {
; RV32-ONLY-LABEL: buildvec_v16i8_undef_high_half:
; RV32-ONLY: # %bb.0:
; RV32-ONLY-NEXT: lbu a1, 0(a0)
; RV32-ONLY-NEXT: lbu a2, 1(a0)
; RV32-ONLY-NEXT: lbu a3, 22(a0)
; RV32-ONLY-NEXT: lbu a4, 31(a0)
; RV32-ONLY-NEXT: lbu a5, 623(a0)
; RV32-ONLY-NEXT: lbu a6, 44(a0)
; RV32-ONLY-NEXT: lbu a7, 55(a0)
; RV32-ONLY-NEXT: lbu a0, 75(a0)
; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV32-ONLY-NEXT: vmv.v.x v8, a1
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a3
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a4
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a6
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a7
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a5
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a0
; RV32-ONLY-NEXT: vslidedown.vi v8, v8, 8
; RV32-ONLY-NEXT: ret
;
; RV32VB-LABEL: buildvec_v16i8_undef_high_half:
; RV32VB: # %bb.0:
; RV32VB-NEXT: lbu a1, 1(a0)
; RV32VB-NEXT: lbu a2, 0(a0)
; RV32VB-NEXT: lbu a3, 22(a0)
; RV32VB-NEXT: lbu a4, 31(a0)
; RV32VB-NEXT: slli a1, a1, 8
; RV32VB-NEXT: or a1, a2, a1
; RV32VB-NEXT: lbu a2, 44(a0)
; RV32VB-NEXT: lbu a5, 55(a0)
; RV32VB-NEXT: slli a3, a3, 16
; RV32VB-NEXT: slli a4, a4, 24
; RV32VB-NEXT: or a3, a4, a3
; RV32VB-NEXT: lbu a4, 623(a0)
; RV32VB-NEXT: lbu a0, 75(a0)
; RV32VB-NEXT: slli a5, a5, 8
; RV32VB-NEXT: or a2, a2, a5
; RV32VB-NEXT: slli a4, a4, 16
; RV32VB-NEXT: slli a0, a0, 24
; RV32VB-NEXT: or a0, a0, a4
; RV32VB-NEXT: or a1, a1, a3
; RV32VB-NEXT: or a0, a2, a0
; RV32VB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32VB-NEXT: vmv.v.x v8, a1
; RV32VB-NEXT: vslide1down.vx v8, v8, a0
; RV32VB-NEXT: vslide1down.vx v8, v8, zero
; RV32VB-NEXT: vslide1down.vx v8, v8, zero
; RV32VB-NEXT: ret
;
; RV32VB-PACK-LABEL: buildvec_v16i8_undef_high_half:
; RV32VB-PACK: # %bb.0:
; RV32VB-PACK-NEXT: lbu a1, 0(a0)
; RV32VB-PACK-NEXT: lbu a2, 1(a0)
; RV32VB-PACK-NEXT: lbu a3, 22(a0)
; RV32VB-PACK-NEXT: lbu a4, 31(a0)
; RV32VB-PACK-NEXT: lbu a5, 623(a0)
; RV32VB-PACK-NEXT: lbu a6, 44(a0)
; RV32VB-PACK-NEXT: lbu a7, 55(a0)
; RV32VB-PACK-NEXT: lbu a0, 75(a0)
; RV32VB-PACK-NEXT: packh a1, a1, a2
; RV32VB-PACK-NEXT: packh a2, a3, a4
; RV32VB-PACK-NEXT: packh a3, a6, a7
; RV32VB-PACK-NEXT: packh a0, a5, a0
; RV32VB-PACK-NEXT: pack a1, a1, a2
; RV32VB-PACK-NEXT: packh a2, a0, a0
; RV32VB-PACK-NEXT: pack a0, a3, a0
; RV32VB-PACK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32VB-PACK-NEXT: vmv.v.x v8, a1
; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a0
; RV32VB-PACK-NEXT: pack a0, a2, a2
; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a0
; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a0
; RV32VB-PACK-NEXT: ret
;
; RV64V-ONLY-LABEL: buildvec_v16i8_undef_high_half:
; RV64V-ONLY: # %bb.0:
; RV64V-ONLY-NEXT: lbu a1, 0(a0)
; RV64V-ONLY-NEXT: lbu a2, 1(a0)
; RV64V-ONLY-NEXT: lbu a3, 22(a0)
; RV64V-ONLY-NEXT: lbu a4, 31(a0)
; RV64V-ONLY-NEXT: lbu a5, 623(a0)
; RV64V-ONLY-NEXT: lbu a6, 44(a0)
; RV64V-ONLY-NEXT: lbu a7, 55(a0)
; RV64V-ONLY-NEXT: lbu a0, 75(a0)
; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV64V-ONLY-NEXT: vmv.v.x v8, a1
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a4
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a6
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a7
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a5
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a0
; RV64V-ONLY-NEXT: vslidedown.vi v8, v8, 8
; RV64V-ONLY-NEXT: ret
;
; RVA22U64-LABEL: buildvec_v16i8_undef_high_half:
; RVA22U64: # %bb.0:
; RVA22U64-NEXT: lbu a1, 1(a0)
; RVA22U64-NEXT: lbu a2, 0(a0)
; RVA22U64-NEXT: lbu a3, 22(a0)
; RVA22U64-NEXT: lbu a4, 31(a0)
; RVA22U64-NEXT: slli a1, a1, 8
; RVA22U64-NEXT: or a1, a1, a2
; RVA22U64-NEXT: lbu a2, 44(a0)
; RVA22U64-NEXT: lbu a5, 55(a0)
; RVA22U64-NEXT: slli a3, a3, 16
; RVA22U64-NEXT: slli a4, a4, 24
; RVA22U64-NEXT: or a3, a3, a4
; RVA22U64-NEXT: lbu a4, 623(a0)
; RVA22U64-NEXT: lbu a0, 75(a0)
; RVA22U64-NEXT: slli a2, a2, 32
; RVA22U64-NEXT: slli a5, a5, 40
; RVA22U64-NEXT: or a2, a2, a5
; RVA22U64-NEXT: slli a4, a4, 48
; RVA22U64-NEXT: slli a0, a0, 56
; RVA22U64-NEXT: or a0, a0, a4
; RVA22U64-NEXT: or a1, a1, a3
; RVA22U64-NEXT: or a0, a0, a2
; RVA22U64-NEXT: or a0, a0, a1
; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RVA22U64-NEXT: vmv.v.x v8, a0
; RVA22U64-NEXT: vslide1down.vx v8, v8, zero
; RVA22U64-NEXT: ret
;
; RVA22U64-PACK-LABEL: buildvec_v16i8_undef_high_half:
; RVA22U64-PACK: # %bb.0:
; RVA22U64-PACK-NEXT: lbu a6, 0(a0)
; RVA22U64-PACK-NEXT: lbu a7, 1(a0)
; RVA22U64-PACK-NEXT: lbu t0, 22(a0)
; RVA22U64-PACK-NEXT: lbu a4, 31(a0)
; RVA22U64-PACK-NEXT: lbu a5, 623(a0)
; RVA22U64-PACK-NEXT: lbu a1, 44(a0)
; RVA22U64-PACK-NEXT: lbu a2, 55(a0)
; RVA22U64-PACK-NEXT: lbu a0, 75(a0)
; RVA22U64-PACK-NEXT: packh a3, a6, a7
; RVA22U64-PACK-NEXT: packh a4, t0, a4
; RVA22U64-PACK-NEXT: packh a1, a1, a2
; RVA22U64-PACK-NEXT: packh a0, a5, a0
; RVA22U64-PACK-NEXT: packw a2, a3, a4
; RVA22U64-PACK-NEXT: packh a3, a0, a0
; RVA22U64-PACK-NEXT: packw a3, a3, a3
; RVA22U64-PACK-NEXT: packw a0, a1, a0
; RVA22U64-PACK-NEXT: pack a0, a2, a0
; RVA22U64-PACK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RVA22U64-PACK-NEXT: vmv.v.x v8, a0
; RVA22U64-PACK-NEXT: pack a0, a3, a3
; RVA22U64-PACK-NEXT: vslide1down.vx v8, v8, a0
; RVA22U64-PACK-NEXT: ret
;
; RV64ZVE32-LABEL: buildvec_v16i8_undef_high_half:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: lbu a1, 0(a0)
; RV64ZVE32-NEXT: lbu a2, 1(a0)
; RV64ZVE32-NEXT: lbu a3, 22(a0)
; RV64ZVE32-NEXT: lbu a4, 31(a0)
; RV64ZVE32-NEXT: lbu a5, 623(a0)
; RV64ZVE32-NEXT: lbu a6, 44(a0)
; RV64ZVE32-NEXT: lbu a7, 55(a0)
; RV64ZVE32-NEXT: lbu a0, 75(a0)
; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV64ZVE32-NEXT: vmv.v.x v8, a1
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a4
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a6
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a7
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a5
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a0
; RV64ZVE32-NEXT: vslidedown.vi v8, v8, 8
; RV64ZVE32-NEXT: ret
%p2 = getelementptr i8, ptr %p, i32 1
%p3 = getelementptr i8, ptr %p, i32 22
%p4 = getelementptr i8, ptr %p, i32 31
%p5 = getelementptr i8, ptr %p, i32 44
%p6 = getelementptr i8, ptr %p, i32 55
%p7 = getelementptr i8, ptr %p, i32 623
%p8 = getelementptr i8, ptr %p, i32 75
%ld1 = load i8, ptr %p
%ld2 = load i8, ptr %p2
%ld3 = load i8, ptr %p3
%ld4 = load i8, ptr %p4
%ld5 = load i8, ptr %p5
%ld6 = load i8, ptr %p6
%ld7 = load i8, ptr %p7
%ld8 = load i8, ptr %p8
%v1 = insertelement <16 x i8> poison, i8 %ld1, i32 0
%v2 = insertelement <16 x i8> %v1, i8 %ld2, i32 1
%v3 = insertelement <16 x i8> %v2, i8 %ld3, i32 2
%v4 = insertelement <16 x i8> %v3, i8 %ld4, i32 3
%v5 = insertelement <16 x i8> %v4, i8 %ld5, i32 4
%v6 = insertelement <16 x i8> %v5, i8 %ld6, i32 5
%v7 = insertelement <16 x i8> %v6, i8 %ld7, i32 6
%v8 = insertelement <16 x i8> %v7, i8 %ld8, i32 7
ret <16 x i8> %v8
}
define <16 x i8> @buildvec_v16i8_undef_edges(ptr %p) {
; RV32-ONLY-LABEL: buildvec_v16i8_undef_edges:
; RV32-ONLY: # %bb.0:
; RV32-ONLY-NEXT: lbu a1, 623(a0)
; RV32-ONLY-NEXT: lbu a2, 31(a0)
; RV32-ONLY-NEXT: lbu a3, 44(a0)
; RV32-ONLY-NEXT: lbu a4, 55(a0)
; RV32-ONLY-NEXT: lbu a5, 75(a0)
; RV32-ONLY-NEXT: li a6, 255
; RV32-ONLY-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV32-ONLY-NEXT: vmv.s.x v0, a6
; RV32-ONLY-NEXT: lbu a6, 82(a0)
; RV32-ONLY-NEXT: lbu a7, 93(a0)
; RV32-ONLY-NEXT: lbu t0, 105(a0)
; RV32-ONLY-NEXT: lbu a0, 161(a0)
; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, mu
; RV32-ONLY-NEXT: vmv.v.x v8, a6
; RV32-ONLY-NEXT: vmv.v.x v9, a2
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a7
; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a3
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t0
; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a4
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a0
; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a1
; RV32-ONLY-NEXT: vslidedown.vi v8, v8, 4
; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a5
; RV32-ONLY-NEXT: vslidedown.vi v8, v9, 8, v0.t
; RV32-ONLY-NEXT: ret
;
; RV32VB-LABEL: buildvec_v16i8_undef_edges:
; RV32VB: # %bb.0:
; RV32VB-NEXT: lbu a1, 623(a0)
; RV32VB-NEXT: lbu a2, 55(a0)
; RV32VB-NEXT: lbu a3, 31(a0)
; RV32VB-NEXT: lbu a4, 75(a0)
; RV32VB-NEXT: lbu a5, 44(a0)
; RV32VB-NEXT: slli a2, a2, 8
; RV32VB-NEXT: slli a1, a1, 16
; RV32VB-NEXT: slli a4, a4, 24
; RV32VB-NEXT: or a2, a5, a2
; RV32VB-NEXT: or a1, a4, a1
; RV32VB-NEXT: lbu a4, 93(a0)
; RV32VB-NEXT: lbu a5, 82(a0)
; RV32VB-NEXT: lbu a6, 105(a0)
; RV32VB-NEXT: lbu a0, 161(a0)
; RV32VB-NEXT: slli a4, a4, 8
; RV32VB-NEXT: or a4, a5, a4
; RV32VB-NEXT: slli a6, a6, 16
; RV32VB-NEXT: slli a0, a0, 24
; RV32VB-NEXT: or a0, a0, a6
; RV32VB-NEXT: slli a3, a3, 24
; RV32VB-NEXT: or a1, a2, a1
; RV32VB-NEXT: or a0, a4, a0
; RV32VB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32VB-NEXT: vmv.v.x v8, a3
; RV32VB-NEXT: vslide1down.vx v8, v8, a1
; RV32VB-NEXT: vslide1down.vx v8, v8, a0
; RV32VB-NEXT: vslide1down.vx v8, v8, zero
; RV32VB-NEXT: ret
;
; RV32VB-PACK-LABEL: buildvec_v16i8_undef_edges:
; RV32VB-PACK: # %bb.0:
; RV32VB-PACK-NEXT: lbu a1, 623(a0)
; RV32VB-PACK-NEXT: lbu a2, 31(a0)
; RV32VB-PACK-NEXT: lbu a3, 44(a0)
; RV32VB-PACK-NEXT: lbu a4, 55(a0)
; RV32VB-PACK-NEXT: lbu a5, 75(a0)
; RV32VB-PACK-NEXT: lbu a6, 82(a0)
; RV32VB-PACK-NEXT: lbu a7, 93(a0)
; RV32VB-PACK-NEXT: lbu t0, 105(a0)
; RV32VB-PACK-NEXT: lbu a0, 161(a0)
; RV32VB-PACK-NEXT: packh a3, a3, a4
; RV32VB-PACK-NEXT: packh a1, a1, a5
; RV32VB-PACK-NEXT: packh a4, a6, a7
; RV32VB-PACK-NEXT: packh a0, t0, a0
; RV32VB-PACK-NEXT: packh a5, a0, a0
; RV32VB-PACK-NEXT: packh a2, a0, a2
; RV32VB-PACK-NEXT: pack a2, a5, a2
; RV32VB-PACK-NEXT: pack a1, a3, a1
; RV32VB-PACK-NEXT: pack a0, a4, a0
; RV32VB-PACK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32VB-PACK-NEXT: vmv.v.x v8, a2
; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a1
; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a0
; RV32VB-PACK-NEXT: pack a0, a5, a5
; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a0
; RV32VB-PACK-NEXT: ret
;
; RV64V-ONLY-LABEL: buildvec_v16i8_undef_edges:
; RV64V-ONLY: # %bb.0:
; RV64V-ONLY-NEXT: lbu a1, 623(a0)
; RV64V-ONLY-NEXT: lbu a2, 31(a0)
; RV64V-ONLY-NEXT: lbu a3, 44(a0)
; RV64V-ONLY-NEXT: lbu a4, 55(a0)
; RV64V-ONLY-NEXT: lbu a5, 75(a0)
; RV64V-ONLY-NEXT: li a6, 255
; RV64V-ONLY-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64V-ONLY-NEXT: vmv.s.x v0, a6
; RV64V-ONLY-NEXT: lbu a6, 82(a0)
; RV64V-ONLY-NEXT: lbu a7, 93(a0)
; RV64V-ONLY-NEXT: lbu t0, 105(a0)
; RV64V-ONLY-NEXT: lbu a0, 161(a0)
; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, mu
; RV64V-ONLY-NEXT: vmv.v.x v8, a6
; RV64V-ONLY-NEXT: vmv.v.x v9, a2
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a7
; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a3
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t0
; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a4
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a0
; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a1
; RV64V-ONLY-NEXT: vslidedown.vi v8, v8, 4
; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a5
; RV64V-ONLY-NEXT: vslidedown.vi v8, v9, 8, v0.t
; RV64V-ONLY-NEXT: ret
;
; RVA22U64-LABEL: buildvec_v16i8_undef_edges:
; RVA22U64: # %bb.0:
; RVA22U64-NEXT: lbu a1, 623(a0)
; RVA22U64-NEXT: lbu a2, 44(a0)
; RVA22U64-NEXT: lbu a3, 55(a0)
; RVA22U64-NEXT: lbu a6, 31(a0)
; RVA22U64-NEXT: lbu a5, 75(a0)
; RVA22U64-NEXT: slli a2, a2, 32
; RVA22U64-NEXT: slli a3, a3, 40
; RVA22U64-NEXT: slli a1, a1, 48
; RVA22U64-NEXT: slli a5, a5, 56
; RVA22U64-NEXT: or a2, a2, a3
; RVA22U64-NEXT: or a1, a1, a5
; RVA22U64-NEXT: lbu a3, 93(a0)
; RVA22U64-NEXT: lbu a5, 82(a0)
; RVA22U64-NEXT: lbu a4, 105(a0)
; RVA22U64-NEXT: lbu a0, 161(a0)
; RVA22U64-NEXT: slli a3, a3, 8
; RVA22U64-NEXT: or a3, a3, a5
; RVA22U64-NEXT: slli a4, a4, 16
; RVA22U64-NEXT: slli a0, a0, 24
; RVA22U64-NEXT: or a0, a0, a4
; RVA22U64-NEXT: slli a6, a6, 24
; RVA22U64-NEXT: or a1, a1, a2
; RVA22U64-NEXT: add.uw a1, a6, a1
; RVA22U64-NEXT: or a0, a0, a3
; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RVA22U64-NEXT: vmv.v.x v8, a1
; RVA22U64-NEXT: vslide1down.vx v8, v8, a0
; RVA22U64-NEXT: ret
;
; RVA22U64-PACK-LABEL: buildvec_v16i8_undef_edges:
; RVA22U64-PACK: # %bb.0:
; RVA22U64-PACK-NEXT: lbu a7, 623(a0)
; RVA22U64-PACK-NEXT: lbu a6, 31(a0)
; RVA22U64-PACK-NEXT: lbu t0, 44(a0)
; RVA22U64-PACK-NEXT: lbu a4, 55(a0)
; RVA22U64-PACK-NEXT: lbu a5, 75(a0)
; RVA22U64-PACK-NEXT: lbu a2, 82(a0)
; RVA22U64-PACK-NEXT: lbu a1, 93(a0)
; RVA22U64-PACK-NEXT: lbu a3, 105(a0)
; RVA22U64-PACK-NEXT: lbu a0, 161(a0)
; RVA22U64-PACK-NEXT: packh a4, t0, a4
; RVA22U64-PACK-NEXT: packh a5, a7, a5
; RVA22U64-PACK-NEXT: packh a1, a2, a1
; RVA22U64-PACK-NEXT: packh a0, a3, a0
; RVA22U64-PACK-NEXT: packh a2, a0, a0
; RVA22U64-PACK-NEXT: packh a3, a0, a6
; RVA22U64-PACK-NEXT: packw a3, a2, a3
; RVA22U64-PACK-NEXT: packw a2, a2, a2
; RVA22U64-PACK-NEXT: packw a4, a4, a5
; RVA22U64-PACK-NEXT: packw a0, a1, a0
; RVA22U64-PACK-NEXT: pack a1, a3, a4
; RVA22U64-PACK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RVA22U64-PACK-NEXT: vmv.v.x v8, a1
; RVA22U64-PACK-NEXT: pack a0, a0, a2
; RVA22U64-PACK-NEXT: vslide1down.vx v8, v8, a0
; RVA22U64-PACK-NEXT: ret
;
; RV64ZVE32-LABEL: buildvec_v16i8_undef_edges:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: lbu a1, 623(a0)
; RV64ZVE32-NEXT: lbu a2, 31(a0)
; RV64ZVE32-NEXT: lbu a3, 44(a0)
; RV64ZVE32-NEXT: lbu a4, 55(a0)
; RV64ZVE32-NEXT: lbu a5, 75(a0)
; RV64ZVE32-NEXT: li a6, 255
; RV64ZVE32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32-NEXT: vmv.s.x v0, a6
; RV64ZVE32-NEXT: lbu a6, 82(a0)
; RV64ZVE32-NEXT: lbu a7, 93(a0)
; RV64ZVE32-NEXT: lbu t0, 105(a0)
; RV64ZVE32-NEXT: lbu a0, 161(a0)
; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, mu
; RV64ZVE32-NEXT: vmv.v.x v8, a6
; RV64ZVE32-NEXT: vmv.v.x v9, a2
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a7
; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a3
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t0
; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a4
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a0
; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a1
; RV64ZVE32-NEXT: vslidedown.vi v8, v8, 4
; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a5
; RV64ZVE32-NEXT: vslidedown.vi v8, v9, 8, v0.t
; RV64ZVE32-NEXT: ret
%p4 = getelementptr i8, ptr %p, i32 31
%p5 = getelementptr i8, ptr %p, i32 44
%p6 = getelementptr i8, ptr %p, i32 55
%p7 = getelementptr i8, ptr %p, i32 623
%p8 = getelementptr i8, ptr %p, i32 75
%p9 = getelementptr i8, ptr %p, i32 82
%p10 = getelementptr i8, ptr %p, i32 93
%p11 = getelementptr i8, ptr %p, i32 105
%p12 = getelementptr i8, ptr %p, i32 161
%ld4 = load i8, ptr %p4
%ld5 = load i8, ptr %p5
%ld6 = load i8, ptr %p6
%ld7 = load i8, ptr %p7
%ld8 = load i8, ptr %p8
%ld9 = load i8, ptr %p9
%ld10 = load i8, ptr %p10
%ld11 = load i8, ptr %p11
%ld12 = load i8, ptr %p12
%v4 = insertelement <16 x i8> poison, i8 %ld4, i32 3
%v5 = insertelement <16 x i8> %v4, i8 %ld5, i32 4
%v6 = insertelement <16 x i8> %v5, i8 %ld6, i32 5
%v7 = insertelement <16 x i8> %v6, i8 %ld7, i32 6
%v8 = insertelement <16 x i8> %v7, i8 %ld8, i32 7
%v9 = insertelement <16 x i8> %v8, i8 %ld9, i32 8
%v10 = insertelement <16 x i8> %v9, i8 %ld10, i32 9
%v11 = insertelement <16 x i8> %v10, i8 %ld11, i32 10
%v12 = insertelement <16 x i8> %v11, i8 %ld12, i32 11
ret <16 x i8> %v12
}
define <16 x i8> @buildvec_v16i8_loads_undef_scattered(ptr %p) {
; RV32-ONLY-LABEL: buildvec_v16i8_loads_undef_scattered:
; RV32-ONLY: # %bb.0:
; RV32-ONLY-NEXT: lbu a1, 0(a0)
; RV32-ONLY-NEXT: lbu a2, 1(a0)
; RV32-ONLY-NEXT: lbu a3, 44(a0)
; RV32-ONLY-NEXT: lbu a4, 55(a0)
; RV32-ONLY-NEXT: lbu a5, 75(a0)
; RV32-ONLY-NEXT: lbu a6, 82(a0)
; RV32-ONLY-NEXT: lbu a7, 93(a0)
; RV32-ONLY-NEXT: lbu t0, 124(a0)
; RV32-ONLY-NEXT: li t1, 255
; RV32-ONLY-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV32-ONLY-NEXT: vmv.s.x v0, t1
; RV32-ONLY-NEXT: lbu t1, 144(a0)
; RV32-ONLY-NEXT: lbu a0, 154(a0)
; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, mu
; RV32-ONLY-NEXT: vmv.v.x v8, a6
; RV32-ONLY-NEXT: vmv.v.x v9, a1
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a7
; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a2
; RV32-ONLY-NEXT: vslidedown.vi v8, v8, 2
; RV32-ONLY-NEXT: vslidedown.vi v9, v9, 2
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t0
; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a3
; RV32-ONLY-NEXT: vslidedown.vi v8, v8, 1
; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a4
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, t1
; RV32-ONLY-NEXT: vslidedown.vi v9, v9, 1
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a0
; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a5
; RV32-ONLY-NEXT: vslidedown.vi v8, v9, 8, v0.t
; RV32-ONLY-NEXT: ret
;
; RV32VB-LABEL: buildvec_v16i8_loads_undef_scattered:
; RV32VB: # %bb.0:
; RV32VB-NEXT: lbu a1, 1(a0)
; RV32VB-NEXT: lbu a2, 0(a0)
; RV32VB-NEXT: lbu a3, 44(a0)
; RV32VB-NEXT: lbu a4, 55(a0)
; RV32VB-NEXT: slli a1, a1, 8
; RV32VB-NEXT: or a1, a2, a1
; RV32VB-NEXT: lbu a2, 75(a0)
; RV32VB-NEXT: lbu a5, 82(a0)
; RV32VB-NEXT: lbu a6, 93(a0)
; RV32VB-NEXT: lbu a7, 124(a0)
; RV32VB-NEXT: slli a4, a4, 8
; RV32VB-NEXT: or a3, a3, a4
; RV32VB-NEXT: lbu a4, 144(a0)
; RV32VB-NEXT: lbu a0, 154(a0)
; RV32VB-NEXT: slli a6, a6, 8
; RV32VB-NEXT: or a5, a5, a6
; RV32VB-NEXT: slli a4, a4, 16
; RV32VB-NEXT: slli a0, a0, 24
; RV32VB-NEXT: or a0, a0, a4
; RV32VB-NEXT: slli a2, a2, 24
; RV32VB-NEXT: or a2, a3, a2
; RV32VB-NEXT: or a0, a7, a0
; RV32VB-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32VB-NEXT: vmv.v.x v8, a1
; RV32VB-NEXT: vslide1down.vx v8, v8, a2
; RV32VB-NEXT: vslide1down.vx v8, v8, a5
; RV32VB-NEXT: vslide1down.vx v8, v8, a0
; RV32VB-NEXT: ret
;
; RV32VB-PACK-LABEL: buildvec_v16i8_loads_undef_scattered:
; RV32VB-PACK: # %bb.0:
; RV32VB-PACK-NEXT: lbu a1, 0(a0)
; RV32VB-PACK-NEXT: lbu a2, 1(a0)
; RV32VB-PACK-NEXT: lbu a3, 44(a0)
; RV32VB-PACK-NEXT: lbu a4, 55(a0)
; RV32VB-PACK-NEXT: lbu a5, 82(a0)
; RV32VB-PACK-NEXT: lbu a6, 93(a0)
; RV32VB-PACK-NEXT: packh a1, a1, a2
; RV32VB-PACK-NEXT: lbu a2, 144(a0)
; RV32VB-PACK-NEXT: lbu a7, 154(a0)
; RV32VB-PACK-NEXT: packh a3, a3, a4
; RV32VB-PACK-NEXT: lbu a4, 75(a0)
; RV32VB-PACK-NEXT: lbu a0, 124(a0)
; RV32VB-PACK-NEXT: packh a5, a5, a6
; RV32VB-PACK-NEXT: packh a2, a2, a7
; RV32VB-PACK-NEXT: packh a4, a0, a4
; RV32VB-PACK-NEXT: pack a3, a3, a4
; RV32VB-PACK-NEXT: packh a4, a0, a0
; RV32VB-PACK-NEXT: packh a0, a0, a0
; RV32VB-PACK-NEXT: pack a0, a0, a2
; RV32VB-PACK-NEXT: pack a1, a1, a4
; RV32VB-PACK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32VB-PACK-NEXT: vmv.v.x v8, a1
; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a3
; RV32VB-PACK-NEXT: pack a1, a5, a4
; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a1
; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a0
; RV32VB-PACK-NEXT: ret
;
; RV64V-ONLY-LABEL: buildvec_v16i8_loads_undef_scattered:
; RV64V-ONLY: # %bb.0:
; RV64V-ONLY-NEXT: lbu a1, 0(a0)
; RV64V-ONLY-NEXT: lbu a2, 1(a0)
; RV64V-ONLY-NEXT: lbu a3, 44(a0)
; RV64V-ONLY-NEXT: lbu a4, 55(a0)
; RV64V-ONLY-NEXT: lbu a5, 75(a0)
; RV64V-ONLY-NEXT: lbu a6, 82(a0)
; RV64V-ONLY-NEXT: lbu a7, 93(a0)
; RV64V-ONLY-NEXT: lbu t0, 124(a0)
; RV64V-ONLY-NEXT: li t1, 255
; RV64V-ONLY-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64V-ONLY-NEXT: vmv.s.x v0, t1
; RV64V-ONLY-NEXT: lbu t1, 144(a0)
; RV64V-ONLY-NEXT: lbu a0, 154(a0)
; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, mu
; RV64V-ONLY-NEXT: vmv.v.x v8, a6
; RV64V-ONLY-NEXT: vmv.v.x v9, a1
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a7
; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a2
; RV64V-ONLY-NEXT: vslidedown.vi v8, v8, 2
; RV64V-ONLY-NEXT: vslidedown.vi v9, v9, 2
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t0
; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a3
; RV64V-ONLY-NEXT: vslidedown.vi v8, v8, 1
; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a4
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t1
; RV64V-ONLY-NEXT: vslidedown.vi v9, v9, 1
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a0
; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a5
; RV64V-ONLY-NEXT: vslidedown.vi v8, v9, 8, v0.t
; RV64V-ONLY-NEXT: ret
;
; RVA22U64-LABEL: buildvec_v16i8_loads_undef_scattered:
; RVA22U64: # %bb.0:
; RVA22U64-NEXT: lbu a1, 1(a0)
; RVA22U64-NEXT: lbu a2, 0(a0)
; RVA22U64-NEXT: lbu a3, 44(a0)
; RVA22U64-NEXT: lbu a4, 55(a0)
; RVA22U64-NEXT: slli a1, a1, 8
; RVA22U64-NEXT: or a6, a2, a1
; RVA22U64-NEXT: lbu a7, 75(a0)
; RVA22U64-NEXT: lbu a5, 82(a0)
; RVA22U64-NEXT: lbu a1, 93(a0)
; RVA22U64-NEXT: lbu a2, 124(a0)
; RVA22U64-NEXT: slli a3, a3, 32
; RVA22U64-NEXT: slli a4, a4, 40
; RVA22U64-NEXT: or a3, a3, a4
; RVA22U64-NEXT: lbu a4, 144(a0)
; RVA22U64-NEXT: lbu a0, 154(a0)
; RVA22U64-NEXT: slli a1, a1, 8
; RVA22U64-NEXT: or a1, a1, a5
; RVA22U64-NEXT: slli a4, a4, 48
; RVA22U64-NEXT: slli a0, a0, 56
; RVA22U64-NEXT: or a0, a0, a4
; RVA22U64-NEXT: slli a7, a7, 56
; RVA22U64-NEXT: or a3, a7, a3
; RVA22U64-NEXT: slli a2, a2, 32
; RVA22U64-NEXT: or a0, a0, a2
; RVA22U64-NEXT: or a2, a6, a3
; RVA22U64-NEXT: or a0, a0, a1
; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RVA22U64-NEXT: vmv.v.x v8, a2
; RVA22U64-NEXT: vslide1down.vx v8, v8, a0
; RVA22U64-NEXT: ret
;
; RVA22U64-PACK-LABEL: buildvec_v16i8_loads_undef_scattered:
; RVA22U64-PACK: # %bb.0:
; RVA22U64-PACK-NEXT: lbu a1, 0(a0)
; RVA22U64-PACK-NEXT: lbu a2, 1(a0)
; RVA22U64-PACK-NEXT: lbu a6, 44(a0)
; RVA22U64-PACK-NEXT: lbu a7, 55(a0)
; RVA22U64-PACK-NEXT: lbu t1, 82(a0)
; RVA22U64-PACK-NEXT: lbu a3, 93(a0)
; RVA22U64-PACK-NEXT: packh t0, a1, a2
; RVA22U64-PACK-NEXT: lbu a2, 144(a0)
; RVA22U64-PACK-NEXT: lbu a4, 154(a0)
; RVA22U64-PACK-NEXT: packh a1, a6, a7
; RVA22U64-PACK-NEXT: lbu a5, 75(a0)
; RVA22U64-PACK-NEXT: lbu a0, 124(a0)
; RVA22U64-PACK-NEXT: packh a3, t1, a3
; RVA22U64-PACK-NEXT: packh a2, a2, a4
; RVA22U64-PACK-NEXT: packh a4, a0, a5
; RVA22U64-PACK-NEXT: packw a1, a1, a4
; RVA22U64-PACK-NEXT: packh a4, a0, a0
; RVA22U64-PACK-NEXT: packh a0, a0, a0
; RVA22U64-PACK-NEXT: packw a5, t0, a4
; RVA22U64-PACK-NEXT: packw a0, a0, a2
; RVA22U64-PACK-NEXT: packw a2, a3, a4
; RVA22U64-PACK-NEXT: pack a1, a5, a1
; RVA22U64-PACK-NEXT: pack a0, a2, a0
; RVA22U64-PACK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RVA22U64-PACK-NEXT: vmv.v.x v8, a1
; RVA22U64-PACK-NEXT: vslide1down.vx v8, v8, a0
; RVA22U64-PACK-NEXT: ret
;
; RV64ZVE32-LABEL: buildvec_v16i8_loads_undef_scattered:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: lbu a1, 0(a0)
; RV64ZVE32-NEXT: lbu a2, 1(a0)
; RV64ZVE32-NEXT: lbu a3, 44(a0)
; RV64ZVE32-NEXT: lbu a4, 55(a0)
; RV64ZVE32-NEXT: lbu a5, 75(a0)
; RV64ZVE32-NEXT: lbu a6, 82(a0)
; RV64ZVE32-NEXT: lbu a7, 93(a0)
; RV64ZVE32-NEXT: lbu t0, 124(a0)
; RV64ZVE32-NEXT: li t1, 255
; RV64ZVE32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32-NEXT: vmv.s.x v0, t1
; RV64ZVE32-NEXT: lbu t1, 144(a0)
; RV64ZVE32-NEXT: lbu a0, 154(a0)
; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, mu
; RV64ZVE32-NEXT: vmv.v.x v8, a6
; RV64ZVE32-NEXT: vmv.v.x v9, a1
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a7
; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a2
; RV64ZVE32-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32-NEXT: vslidedown.vi v9, v9, 2
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t0
; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a3
; RV64ZVE32-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a4
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t1
; RV64ZVE32-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a0
; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a5
; RV64ZVE32-NEXT: vslidedown.vi v8, v9, 8, v0.t
; RV64ZVE32-NEXT: ret
%p2 = getelementptr i8, ptr %p, i32 1
%p3 = getelementptr i8, ptr %p, i32 22
%p4 = getelementptr i8, ptr %p, i32 31
%p5 = getelementptr i8, ptr %p, i32 44
%p6 = getelementptr i8, ptr %p, i32 55
%p7 = getelementptr i8, ptr %p, i32 623
%p8 = getelementptr i8, ptr %p, i32 75
%p9 = getelementptr i8, ptr %p, i32 82
%p10 = getelementptr i8, ptr %p, i32 93
%p11 = getelementptr i8, ptr %p, i32 105
%p12 = getelementptr i8, ptr %p, i32 161
%p13 = getelementptr i8, ptr %p, i32 124
%p14 = getelementptr i8, ptr %p, i32 163
%p15 = getelementptr i8, ptr %p, i32 144
%p16 = getelementptr i8, ptr %p, i32 154
%ld1 = load i8, ptr %p
%ld2 = load i8, ptr %p2
%ld3 = load i8, ptr %p3
%ld4 = load i8, ptr %p4
%ld5 = load i8, ptr %p5
%ld6 = load i8, ptr %p6
%ld7 = load i8, ptr %p7
%ld8 = load i8, ptr %p8
%ld9 = load i8, ptr %p9
%ld10 = load i8, ptr %p10
%ld11 = load i8, ptr %p11
%ld12 = load i8, ptr %p12
%ld13 = load i8, ptr %p13
%ld14 = load i8, ptr %p14
%ld15 = load i8, ptr %p15
%ld16 = load i8, ptr %p16
%v1 = insertelement <16 x i8> poison, i8 %ld1, i32 0
%v2 = insertelement <16 x i8> %v1, i8 %ld2, i32 1
%v3 = insertelement <16 x i8> %v2, i8 poison, i32 2
%v4 = insertelement <16 x i8> %v3, i8 poison, i32 3
%v5 = insertelement <16 x i8> %v4, i8 %ld5, i32 4
%v6 = insertelement <16 x i8> %v5, i8 %ld6, i32 5
%v7 = insertelement <16 x i8> %v6, i8 poison, i32 6
%v8 = insertelement <16 x i8> %v7, i8 %ld8, i32 7
%v9 = insertelement <16 x i8> %v8, i8 %ld9, i32 8
%v10 = insertelement <16 x i8> %v9, i8 %ld10, i32 9
%v11 = insertelement <16 x i8> %v10, i8 poison, i32 10
%v12 = insertelement <16 x i8> %v11, i8 poison, i32 11
%v13 = insertelement <16 x i8> %v12, i8 %ld13, i32 12
%v14 = insertelement <16 x i8> %v13, i8 poison, i32 13
%v15 = insertelement <16 x i8> %v14, i8 %ld15, i32 14
%v16 = insertelement <16 x i8> %v15, i8 %ld16, i32 15
ret <16 x i8> %v16
}
define <8 x i8> @buildvec_v8i8_pack(i8 %e1, i8 %e2, i8 %e3, i8 %e4, i8 %e5, i8 %e6, i8 %e7, i8 %e8) {
; RV32-ONLY-LABEL: buildvec_v8i8_pack:
; RV32-ONLY: # %bb.0:
; RV32-ONLY-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; RV32-ONLY-NEXT: vmv.v.x v8, a4
; RV32-ONLY-NEXT: vmv.v.x v9, a0
; RV32-ONLY-NEXT: vmv.v.i v0, 15
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a5
; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a1
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a6
; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a2
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a7
; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a3
; RV32-ONLY-NEXT: vslidedown.vi v8, v9, 4, v0.t
; RV32-ONLY-NEXT: ret
;
; RV32VB-LABEL: buildvec_v8i8_pack:
; RV32VB: # %bb.0:
; RV32VB-NEXT: slli a7, a7, 24
; RV32VB-NEXT: zext.b a6, a6
; RV32VB-NEXT: zext.b a4, a4
; RV32VB-NEXT: zext.b a5, a5
; RV32VB-NEXT: slli a3, a3, 24
; RV32VB-NEXT: zext.b a2, a2
; RV32VB-NEXT: zext.b a0, a0
; RV32VB-NEXT: zext.b a1, a1
; RV32VB-NEXT: slli a6, a6, 16
; RV32VB-NEXT: slli a5, a5, 8
; RV32VB-NEXT: slli a2, a2, 16
; RV32VB-NEXT: slli a1, a1, 8
; RV32VB-NEXT: or a6, a7, a6
; RV32VB-NEXT: or a4, a4, a5
; RV32VB-NEXT: or a2, a3, a2
; RV32VB-NEXT: or a0, a0, a1
; RV32VB-NEXT: or a1, a4, a6
; RV32VB-NEXT: or a0, a0, a2
; RV32VB-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; RV32VB-NEXT: vmv.v.x v8, a0
; RV32VB-NEXT: vslide1down.vx v8, v8, a1
; RV32VB-NEXT: ret
;
; RV32VB-PACK-LABEL: buildvec_v8i8_pack:
; RV32VB-PACK: # %bb.0:
; RV32VB-PACK-NEXT: packh a6, a6, a7
; RV32VB-PACK-NEXT: packh a4, a4, a5
; RV32VB-PACK-NEXT: packh a2, a2, a3
; RV32VB-PACK-NEXT: packh a0, a0, a1
; RV32VB-PACK-NEXT: pack a1, a4, a6
; RV32VB-PACK-NEXT: pack a0, a0, a2
; RV32VB-PACK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; RV32VB-PACK-NEXT: vmv.v.x v8, a0
; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a1
; RV32VB-PACK-NEXT: ret
;
; RV64V-ONLY-LABEL: buildvec_v8i8_pack:
; RV64V-ONLY: # %bb.0:
; RV64V-ONLY-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; RV64V-ONLY-NEXT: vmv.v.x v8, a4
; RV64V-ONLY-NEXT: vmv.v.x v9, a0
; RV64V-ONLY-NEXT: vmv.v.i v0, 15
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a5
; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a1
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a6
; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a2
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a7
; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a3
; RV64V-ONLY-NEXT: vslidedown.vi v8, v9, 4, v0.t
; RV64V-ONLY-NEXT: ret
;
; RVA22U64-LABEL: buildvec_v8i8_pack:
; RVA22U64: # %bb.0:
; RVA22U64-NEXT: zext.b t0, a4
; RVA22U64-NEXT: zext.b a5, a5
; RVA22U64-NEXT: slli a7, a7, 56
; RVA22U64-NEXT: zext.b a4, a6
; RVA22U64-NEXT: zext.b a2, a2
; RVA22U64-NEXT: zext.b a3, a3
; RVA22U64-NEXT: zext.b a0, a0
; RVA22U64-NEXT: zext.b a1, a1
; RVA22U64-NEXT: slli t0, t0, 32
; RVA22U64-NEXT: slli a5, a5, 40
; RVA22U64-NEXT: slli a4, a4, 48
; RVA22U64-NEXT: slli a2, a2, 16
; RVA22U64-NEXT: slli a3, a3, 24
; RVA22U64-NEXT: slli a1, a1, 8
; RVA22U64-NEXT: or a5, a5, t0
; RVA22U64-NEXT: or a4, a7, a4
; RVA22U64-NEXT: or a2, a2, a3
; RVA22U64-NEXT: or a0, a0, a1
; RVA22U64-NEXT: or a4, a4, a5
; RVA22U64-NEXT: or a0, a0, a2
; RVA22U64-NEXT: or a0, a0, a4
; RVA22U64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RVA22U64-NEXT: vmv.s.x v8, a0
; RVA22U64-NEXT: ret
;
; RVA22U64-PACK-LABEL: buildvec_v8i8_pack:
; RVA22U64-PACK: # %bb.0:
; RVA22U64-PACK-NEXT: packh a6, a6, a7
; RVA22U64-PACK-NEXT: packh a4, a4, a5
; RVA22U64-PACK-NEXT: packh a2, a2, a3
; RVA22U64-PACK-NEXT: packh a0, a0, a1
; RVA22U64-PACK-NEXT: packw a1, a4, a6
; RVA22U64-PACK-NEXT: packw a0, a0, a2
; RVA22U64-PACK-NEXT: pack a0, a0, a1
; RVA22U64-PACK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RVA22U64-PACK-NEXT: vmv.s.x v8, a0
; RVA22U64-PACK-NEXT: ret
;
; RV64ZVE32-LABEL: buildvec_v8i8_pack:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; RV64ZVE32-NEXT: vmv.v.x v8, a4
; RV64ZVE32-NEXT: vmv.v.x v9, a0
; RV64ZVE32-NEXT: vmv.v.i v0, 15
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a5
; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a1
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a6
; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a2
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a7
; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a3
; RV64ZVE32-NEXT: vslidedown.vi v8, v9, 4, v0.t
; RV64ZVE32-NEXT: ret
%v1 = insertelement <8 x i8> poison, i8 %e1, i32 0
%v2 = insertelement <8 x i8> %v1, i8 %e2, i32 1
%v3 = insertelement <8 x i8> %v2, i8 %e3, i32 2
%v4 = insertelement <8 x i8> %v3, i8 %e4, i32 3
%v5 = insertelement <8 x i8> %v4, i8 %e5, i32 4
%v6 = insertelement <8 x i8> %v5, i8 %e6, i32 5
%v7 = insertelement <8 x i8> %v6, i8 %e7, i32 6
%v8 = insertelement <8 x i8> %v7, i8 %e8, i32 7
ret <8 x i8> %v8
}
define <6 x i8> @buildvec_v6i8_pack(i8 %e1, i8 %e2, i8 %e3, i8 %e4, i8 %e5, i8 %e6) {
; RV32-ONLY-LABEL: buildvec_v6i8_pack:
; RV32-ONLY: # %bb.0:
; RV32-ONLY-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV32-ONLY-NEXT: vmv.v.x v8, a0
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a1
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a3
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a4
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a5
; RV32-ONLY-NEXT: vslidedown.vi v8, v8, 2
; RV32-ONLY-NEXT: ret
;
; RV32VB-LABEL: buildvec_v6i8_pack:
; RV32VB: # %bb.0:
; RV32VB-NEXT: slli a3, a3, 24
; RV32VB-NEXT: zext.b a2, a2
; RV32VB-NEXT: zext.b a0, a0
; RV32VB-NEXT: zext.b a1, a1
; RV32VB-NEXT: zext.b a4, a4
; RV32VB-NEXT: zext.b a5, a5
; RV32VB-NEXT: slli a2, a2, 16
; RV32VB-NEXT: slli a1, a1, 8
; RV32VB-NEXT: slli a5, a5, 8
; RV32VB-NEXT: or a2, a3, a2
; RV32VB-NEXT: or a0, a0, a1
; RV32VB-NEXT: or a0, a0, a2
; RV32VB-NEXT: or a4, a4, a5
; RV32VB-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; RV32VB-NEXT: vmv.v.x v8, a0
; RV32VB-NEXT: vslide1down.vx v8, v8, a4
; RV32VB-NEXT: ret
;
; RV32VB-PACK-LABEL: buildvec_v6i8_pack:
; RV32VB-PACK: # %bb.0:
; RV32VB-PACK-NEXT: packh a2, a2, a3
; RV32VB-PACK-NEXT: packh a0, a0, a1
; RV32VB-PACK-NEXT: packh a1, a4, a5
; RV32VB-PACK-NEXT: packh a3, a0, a0
; RV32VB-PACK-NEXT: pack a0, a0, a2
; RV32VB-PACK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; RV32VB-PACK-NEXT: vmv.v.x v8, a0
; RV32VB-PACK-NEXT: pack a0, a1, a3
; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a0
; RV32VB-PACK-NEXT: ret
;
; RV64V-ONLY-LABEL: buildvec_v6i8_pack:
; RV64V-ONLY: # %bb.0:
; RV64V-ONLY-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64V-ONLY-NEXT: vmv.v.x v8, a0
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a1
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a4
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a5
; RV64V-ONLY-NEXT: vslidedown.vi v8, v8, 2
; RV64V-ONLY-NEXT: ret
;
; RVA22U64-LABEL: buildvec_v6i8_pack:
; RVA22U64: # %bb.0:
; RVA22U64-NEXT: zext.b a2, a2
; RVA22U64-NEXT: zext.b a3, a3
; RVA22U64-NEXT: zext.b a0, a0
; RVA22U64-NEXT: zext.b a1, a1
; RVA22U64-NEXT: zext.b a4, a4
; RVA22U64-NEXT: zext.b a5, a5
; RVA22U64-NEXT: slli a2, a2, 16
; RVA22U64-NEXT: slli a3, a3, 24
; RVA22U64-NEXT: slli a1, a1, 8
; RVA22U64-NEXT: slli a4, a4, 32
; RVA22U64-NEXT: slli a5, a5, 40
; RVA22U64-NEXT: or a2, a2, a3
; RVA22U64-NEXT: or a0, a0, a1
; RVA22U64-NEXT: or a0, a0, a2
; RVA22U64-NEXT: or a4, a4, a5
; RVA22U64-NEXT: or a0, a0, a4
; RVA22U64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RVA22U64-NEXT: vmv.s.x v8, a0
; RVA22U64-NEXT: ret
;
; RVA22U64-PACK-LABEL: buildvec_v6i8_pack:
; RVA22U64-PACK: # %bb.0:
; RVA22U64-PACK-NEXT: packh a2, a2, a3
; RVA22U64-PACK-NEXT: packh a0, a0, a1
; RVA22U64-PACK-NEXT: packh a1, a4, a5
; RVA22U64-PACK-NEXT: packh a3, a0, a0
; RVA22U64-PACK-NEXT: packw a0, a0, a2
; RVA22U64-PACK-NEXT: packw a1, a1, a3
; RVA22U64-PACK-NEXT: pack a0, a0, a1
; RVA22U64-PACK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RVA22U64-PACK-NEXT: vmv.s.x v8, a0
; RVA22U64-PACK-NEXT: ret
;
; RV64ZVE32-LABEL: buildvec_v6i8_pack:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64ZVE32-NEXT: vmv.v.x v8, a0
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a1
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a4
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a5
; RV64ZVE32-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32-NEXT: ret
%v1 = insertelement <6 x i8> poison, i8 %e1, i32 0
%v2 = insertelement <6 x i8> %v1, i8 %e2, i32 1
%v3 = insertelement <6 x i8> %v2, i8 %e3, i32 2
%v4 = insertelement <6 x i8> %v3, i8 %e4, i32 3
%v5 = insertelement <6 x i8> %v4, i8 %e5, i32 4
%v6 = insertelement <6 x i8> %v5, i8 %e6, i32 5
ret <6 x i8> %v6
}
define <4 x i16> @buildvec_v4i16_pack(i16 %e1, i16 %e2, i16 %e3, i16 %e4) {
; RV32-ONLY-LABEL: buildvec_v4i16_pack:
; RV32-ONLY: # %bb.0:
; RV32-ONLY-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; RV32-ONLY-NEXT: vmv.v.x v8, a0
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a1
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a3
; RV32-ONLY-NEXT: ret
;
; RV32VB-LABEL: buildvec_v4i16_pack:
; RV32VB: # %bb.0:
; RV32VB-NEXT: slli a3, a3, 16
; RV32VB-NEXT: zext.h a2, a2
; RV32VB-NEXT: slli a1, a1, 16
; RV32VB-NEXT: zext.h a0, a0
; RV32VB-NEXT: or a2, a2, a3
; RV32VB-NEXT: or a0, a0, a1
; RV32VB-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; RV32VB-NEXT: vmv.v.x v8, a0
; RV32VB-NEXT: vslide1down.vx v8, v8, a2
; RV32VB-NEXT: ret
;
; RV32VB-PACK-LABEL: buildvec_v4i16_pack:
; RV32VB-PACK: # %bb.0:
; RV32VB-PACK-NEXT: pack a2, a2, a3
; RV32VB-PACK-NEXT: pack a0, a0, a1
; RV32VB-PACK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; RV32VB-PACK-NEXT: vmv.v.x v8, a0
; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a2
; RV32VB-PACK-NEXT: ret
;
; RV64V-ONLY-LABEL: buildvec_v4i16_pack:
; RV64V-ONLY: # %bb.0:
; RV64V-ONLY-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; RV64V-ONLY-NEXT: vmv.v.x v8, a0
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a1
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3
; RV64V-ONLY-NEXT: ret
;
; RVA22U64-LABEL: buildvec_v4i16_pack:
; RVA22U64: # %bb.0:
; RVA22U64-NEXT: slli a3, a3, 48
; RVA22U64-NEXT: slli a2, a2, 48
; RVA22U64-NEXT: zext.h a0, a0
; RVA22U64-NEXT: slli a1, a1, 48
; RVA22U64-NEXT: srli a2, a2, 16
; RVA22U64-NEXT: srli a1, a1, 32
; RVA22U64-NEXT: or a2, a2, a3
; RVA22U64-NEXT: or a0, a0, a1
; RVA22U64-NEXT: or a0, a0, a2
; RVA22U64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RVA22U64-NEXT: vmv.s.x v8, a0
; RVA22U64-NEXT: ret
;
; RVA22U64-PACK-LABEL: buildvec_v4i16_pack:
; RVA22U64-PACK: # %bb.0:
; RVA22U64-PACK-NEXT: packw a2, a2, a3
; RVA22U64-PACK-NEXT: packw a0, a0, a1
; RVA22U64-PACK-NEXT: pack a0, a0, a2
; RVA22U64-PACK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RVA22U64-PACK-NEXT: vmv.s.x v8, a0
; RVA22U64-PACK-NEXT: ret
;
; RV64ZVE32-LABEL: buildvec_v4i16_pack:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; RV64ZVE32-NEXT: vmv.v.x v8, a0
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a1
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3
; RV64ZVE32-NEXT: ret
%v1 = insertelement <4 x i16> poison, i16 %e1, i32 0
%v2 = insertelement <4 x i16> %v1, i16 %e2, i32 1
%v3 = insertelement <4 x i16> %v2, i16 %e3, i32 2
%v4 = insertelement <4 x i16> %v3, i16 %e4, i32 3
ret <4 x i16> %v4
}
define <2 x i32> @buildvec_v2i32_pack(i32 %e1, i32 %e2) {
; RV32-LABEL: buildvec_v2i32_pack:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; RV32-NEXT: vmv.v.x v8, a0
; RV32-NEXT: vslide1down.vx v8, v8, a1
; RV32-NEXT: ret
;
; RV64V-ONLY-LABEL: buildvec_v2i32_pack:
; RV64V-ONLY: # %bb.0:
; RV64V-ONLY-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; RV64V-ONLY-NEXT: vmv.v.x v8, a0
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a1
; RV64V-ONLY-NEXT: ret
;
; RVA22U64-LABEL: buildvec_v2i32_pack:
; RVA22U64: # %bb.0:
; RVA22U64-NEXT: slli a1, a1, 32
; RVA22U64-NEXT: add.uw a0, a0, a1
; RVA22U64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RVA22U64-NEXT: vmv.s.x v8, a0
; RVA22U64-NEXT: ret
;
; RVA22U64-PACK-LABEL: buildvec_v2i32_pack:
; RVA22U64-PACK: # %bb.0:
; RVA22U64-PACK-NEXT: pack a0, a0, a1
; RVA22U64-PACK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RVA22U64-PACK-NEXT: vmv.s.x v8, a0
; RVA22U64-PACK-NEXT: ret
;
; RV64ZVE32-LABEL: buildvec_v2i32_pack:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32-NEXT: vmv.v.x v8, a0
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a1
; RV64ZVE32-NEXT: ret
%v1 = insertelement <2 x i32> poison, i32 %e1, i32 0
%v2 = insertelement <2 x i32> %v1, i32 %e2, i32 1
ret <2 x i32> %v2
}
define <1 x i16> @buildvec_v1i16_pack(i16 %e1) {
; CHECK-LABEL: buildvec_v1i16_pack:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: ret
%v1 = insertelement <1 x i16> poison, i16 %e1, i32 0
ret <1 x i16> %v1
}
define <1 x i32> @buildvec_v1i32_pack(i32 %e1) {
; CHECK-LABEL: buildvec_v1i32_pack:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: ret
%v1 = insertelement <1 x i32> poison, i32 %e1, i32 0
ret <1 x i32> %v1
}
define <4 x i32> @buildvec_vslide1up(i32 %e1, i32 %e2) {
; CHECK-LABEL: buildvec_vslide1up:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vmv.v.x v8, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma
; CHECK-NEXT: vmv.s.x v8, a1
; CHECK-NEXT: ret
%v1 = insertelement <4 x i32> poison, i32 %e2, i32 0
%v2 = insertelement <4 x i32> %v1, i32 %e1, i32 1
%v3 = insertelement <4 x i32> %v2, i32 %e1, i32 2
%v4 = insertelement <4 x i32> %v3, i32 %e1, i32 3
ret <4 x i32> %v4
}
define <4 x i1> @buildvec_i1_splat(i1 %e1) {
; CHECK-LABEL: buildvec_i1_splat:
; CHECK: # %bb.0:
; CHECK-NEXT: andi a0, a0, 1
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vmv.v.x v8, a0
; CHECK-NEXT: vmsne.vi v0, v8, 0
; CHECK-NEXT: ret
%v1 = insertelement <4 x i1> poison, i1 %e1, i32 0
%v2 = insertelement <4 x i1> %v1, i1 %e1, i32 1
%v3 = insertelement <4 x i1> %v2, i1 %e1, i32 2
%v4 = insertelement <4 x i1> %v3, i1 %e1, i32 3
ret <4 x i1> %v4
}
define <4 x i32> @buildvec_vredsum_slideup(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> %arg2, <8 x i32> %arg3) nounwind {
; RV32-LABEL: buildvec_vredsum_slideup:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vmv.s.x v16, zero
; RV32-NEXT: vredsum.vs v8, v8, v16
; RV32-NEXT: vredsum.vs v9, v10, v16
; RV32-NEXT: vredsum.vs v10, v12, v16
; RV32-NEXT: vredsum.vs v11, v14, v16
; RV32-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV32-NEXT: vslideup.vi v10, v11, 1
; RV32-NEXT: vslideup.vi v9, v10, 1
; RV32-NEXT: vslideup.vi v8, v9, 1
; RV32-NEXT: ret
;
; RV64V-ONLY-LABEL: buildvec_vredsum_slideup:
; RV64V-ONLY: # %bb.0:
; RV64V-ONLY-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64V-ONLY-NEXT: vmv.s.x v16, zero
; RV64V-ONLY-NEXT: vredsum.vs v8, v8, v16
; RV64V-ONLY-NEXT: vredsum.vs v9, v10, v16
; RV64V-ONLY-NEXT: vredsum.vs v10, v12, v16
; RV64V-ONLY-NEXT: vredsum.vs v11, v14, v16
; RV64V-ONLY-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64V-ONLY-NEXT: vslideup.vi v10, v11, 1
; RV64V-ONLY-NEXT: vslideup.vi v9, v10, 1
; RV64V-ONLY-NEXT: vslideup.vi v8, v9, 1
; RV64V-ONLY-NEXT: ret
;
; RVA22U64-LABEL: buildvec_vredsum_slideup:
; RVA22U64: # %bb.0:
; RVA22U64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RVA22U64-NEXT: vmv.s.x v16, zero
; RVA22U64-NEXT: vredsum.vs v8, v8, v16
; RVA22U64-NEXT: vredsum.vs v9, v10, v16
; RVA22U64-NEXT: vredsum.vs v10, v12, v16
; RVA22U64-NEXT: vredsum.vs v11, v14, v16
; RVA22U64-NEXT: vmv.x.s a0, v8
; RVA22U64-NEXT: vmv.x.s a1, v9
; RVA22U64-NEXT: vmv.x.s a2, v10
; RVA22U64-NEXT: slli a1, a1, 32
; RVA22U64-NEXT: add.uw a0, a0, a1
; RVA22U64-NEXT: vmv.x.s a1, v11
; RVA22U64-NEXT: slli a1, a1, 32
; RVA22U64-NEXT: add.uw a1, a2, a1
; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RVA22U64-NEXT: vmv.v.x v8, a0
; RVA22U64-NEXT: vslide1down.vx v8, v8, a1
; RVA22U64-NEXT: ret
;
; RVA22U64-PACK-LABEL: buildvec_vredsum_slideup:
; RVA22U64-PACK: # %bb.0:
; RVA22U64-PACK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RVA22U64-PACK-NEXT: vmv.s.x v16, zero
; RVA22U64-PACK-NEXT: vredsum.vs v8, v8, v16
; RVA22U64-PACK-NEXT: vredsum.vs v9, v10, v16
; RVA22U64-PACK-NEXT: vredsum.vs v10, v12, v16
; RVA22U64-PACK-NEXT: vredsum.vs v11, v14, v16
; RVA22U64-PACK-NEXT: vmv.x.s a0, v8
; RVA22U64-PACK-NEXT: vmv.x.s a1, v9
; RVA22U64-PACK-NEXT: vmv.x.s a2, v10
; RVA22U64-PACK-NEXT: pack a0, a0, a1
; RVA22U64-PACK-NEXT: vmv.x.s a1, v11
; RVA22U64-PACK-NEXT: pack a1, a2, a1
; RVA22U64-PACK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RVA22U64-PACK-NEXT: vmv.v.x v8, a0
; RVA22U64-PACK-NEXT: vslide1down.vx v8, v8, a1
; RVA22U64-PACK-NEXT: ret
;
; RV64ZVE32-LABEL: buildvec_vredsum_slideup:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64ZVE32-NEXT: vmv.s.x v16, zero
; RV64ZVE32-NEXT: vredsum.vs v8, v8, v16
; RV64ZVE32-NEXT: vredsum.vs v9, v10, v16
; RV64ZVE32-NEXT: vredsum.vs v10, v12, v16
; RV64ZVE32-NEXT: vredsum.vs v11, v14, v16
; RV64ZVE32-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32-NEXT: vslideup.vi v10, v11, 1
; RV64ZVE32-NEXT: vslideup.vi v9, v10, 1
; RV64ZVE32-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32-NEXT: ret
%247 = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %arg0)
%248 = insertelement <4 x i32> poison, i32 %247, i64 0
%250 = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %arg1)
%251 = insertelement <4 x i32> %248, i32 %250, i64 1
%252 = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %arg2)
%253 = insertelement <4 x i32> %251, i32 %252, i64 2
%254 = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %arg3)
%255 = insertelement <4 x i32> %253, i32 %254, i64 3
ret <4 x i32> %255
}
define <4 x i32> @buildvec_vredmax_slideup(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> %arg2, <8 x i32> %arg3) nounwind {
; RV32-LABEL: buildvec_vredmax_slideup:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vredmaxu.vs v8, v8, v8
; RV32-NEXT: vredmaxu.vs v9, v10, v10
; RV32-NEXT: vredmaxu.vs v10, v12, v12
; RV32-NEXT: vredmaxu.vs v11, v14, v14
; RV32-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV32-NEXT: vslideup.vi v10, v11, 1
; RV32-NEXT: vslideup.vi v9, v10, 1
; RV32-NEXT: vslideup.vi v8, v9, 1
; RV32-NEXT: ret
;
; RV64V-ONLY-LABEL: buildvec_vredmax_slideup:
; RV64V-ONLY: # %bb.0:
; RV64V-ONLY-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64V-ONLY-NEXT: vredmaxu.vs v8, v8, v8
; RV64V-ONLY-NEXT: vredmaxu.vs v9, v10, v10
; RV64V-ONLY-NEXT: vredmaxu.vs v10, v12, v12
; RV64V-ONLY-NEXT: vredmaxu.vs v11, v14, v14
; RV64V-ONLY-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64V-ONLY-NEXT: vslideup.vi v10, v11, 1
; RV64V-ONLY-NEXT: vslideup.vi v9, v10, 1
; RV64V-ONLY-NEXT: vslideup.vi v8, v9, 1
; RV64V-ONLY-NEXT: ret
;
; RVA22U64-LABEL: buildvec_vredmax_slideup:
; RVA22U64: # %bb.0:
; RVA22U64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RVA22U64-NEXT: vredmaxu.vs v8, v8, v8
; RVA22U64-NEXT: vredmaxu.vs v9, v10, v10
; RVA22U64-NEXT: vredmaxu.vs v10, v12, v12
; RVA22U64-NEXT: vredmaxu.vs v11, v14, v14
; RVA22U64-NEXT: vmv.x.s a0, v8
; RVA22U64-NEXT: vmv.x.s a1, v9
; RVA22U64-NEXT: vmv.x.s a2, v10
; RVA22U64-NEXT: slli a1, a1, 32
; RVA22U64-NEXT: add.uw a0, a0, a1
; RVA22U64-NEXT: vmv.x.s a1, v11
; RVA22U64-NEXT: slli a1, a1, 32
; RVA22U64-NEXT: add.uw a1, a2, a1
; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RVA22U64-NEXT: vmv.v.x v8, a0
; RVA22U64-NEXT: vslide1down.vx v8, v8, a1
; RVA22U64-NEXT: ret
;
; RVA22U64-PACK-LABEL: buildvec_vredmax_slideup:
; RVA22U64-PACK: # %bb.0:
; RVA22U64-PACK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RVA22U64-PACK-NEXT: vredmaxu.vs v8, v8, v8
; RVA22U64-PACK-NEXT: vredmaxu.vs v9, v10, v10
; RVA22U64-PACK-NEXT: vredmaxu.vs v10, v12, v12
; RVA22U64-PACK-NEXT: vredmaxu.vs v11, v14, v14
; RVA22U64-PACK-NEXT: vmv.x.s a0, v8
; RVA22U64-PACK-NEXT: vmv.x.s a1, v9
; RVA22U64-PACK-NEXT: vmv.x.s a2, v10
; RVA22U64-PACK-NEXT: pack a0, a0, a1
; RVA22U64-PACK-NEXT: vmv.x.s a1, v11
; RVA22U64-PACK-NEXT: pack a1, a2, a1
; RVA22U64-PACK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RVA22U64-PACK-NEXT: vmv.v.x v8, a0
; RVA22U64-PACK-NEXT: vslide1down.vx v8, v8, a1
; RVA22U64-PACK-NEXT: ret
;
; RV64ZVE32-LABEL: buildvec_vredmax_slideup:
; RV64ZVE32: # %bb.0:
; RV64ZVE32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64ZVE32-NEXT: vredmaxu.vs v8, v8, v8
; RV64ZVE32-NEXT: vredmaxu.vs v9, v10, v10
; RV64ZVE32-NEXT: vredmaxu.vs v10, v12, v12
; RV64ZVE32-NEXT: vredmaxu.vs v11, v14, v14
; RV64ZVE32-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32-NEXT: vslideup.vi v10, v11, 1
; RV64ZVE32-NEXT: vslideup.vi v9, v10, 1
; RV64ZVE32-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32-NEXT: ret
%247 = tail call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> %arg0)
%248 = insertelement <4 x i32> poison, i32 %247, i64 0
%250 = tail call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> %arg1)
%251 = insertelement <4 x i32> %248, i32 %250, i64 1
%252 = tail call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> %arg2)
%253 = insertelement <4 x i32> %251, i32 %252, i64 2
%254 = tail call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> %arg3)
%255 = insertelement <4 x i32> %253, i32 %254, i64 3
ret <4 x i32> %255
}
define <16 x i16> @PR159294(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) {
; RV32-ONLY-LABEL: PR159294:
; RV32-ONLY: # %bb.0: # %entry
; RV32-ONLY-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32-ONLY-NEXT: vmv.x.s a0, v8
; RV32-ONLY-NEXT: vmv.x.s a1, v9
; RV32-ONLY-NEXT: vmv.x.s a2, v10
; RV32-ONLY-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; RV32-ONLY-NEXT: vmv.v.x v8, a2
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a0
; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a1
; RV32-ONLY-NEXT: vslidedown.vi v8, v8, 13
; RV32-ONLY-NEXT: ret
;
; RV32VB-LABEL: PR159294:
; RV32VB: # %bb.0: # %entry
; RV32VB-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32VB-NEXT: vmv.x.s a0, v8
; RV32VB-NEXT: vmv.x.s a1, v10
; RV32VB-NEXT: slli a0, a0, 16
; RV32VB-NEXT: zext.h a1, a1
; RV32VB-NEXT: or a0, a1, a0
; RV32VB-NEXT: vmv.x.s a1, v9
; RV32VB-NEXT: vmv.v.i v8, 0
; RV32VB-NEXT: zext.h a1, a1
; RV32VB-NEXT: vsetvli zero, zero, e32, m2, tu, ma
; RV32VB-NEXT: vmv.s.x v8, a0
; RV32VB-NEXT: vmv.s.x v10, a1
; RV32VB-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV32VB-NEXT: vslideup.vi v8, v10, 1
; RV32VB-NEXT: ret
;
; RV32VB-PACK-LABEL: PR159294:
; RV32VB-PACK: # %bb.0: # %entry
; RV32VB-PACK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32VB-PACK-NEXT: vmv.x.s a0, v8
; RV32VB-PACK-NEXT: vmv.x.s a1, v10
; RV32VB-PACK-NEXT: vmv.x.s a2, v9
; RV32VB-PACK-NEXT: pack a0, a1, a0
; RV32VB-PACK-NEXT: pack a1, a0, a0
; RV32VB-PACK-NEXT: vmv.v.x v8, a1
; RV32VB-PACK-NEXT: pack a1, a2, a0
; RV32VB-PACK-NEXT: vsetvli zero, zero, e32, m2, tu, ma
; RV32VB-PACK-NEXT: vmv.s.x v8, a0
; RV32VB-PACK-NEXT: vmv.s.x v10, a1
; RV32VB-PACK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV32VB-PACK-NEXT: vslideup.vi v8, v10, 1
; RV32VB-PACK-NEXT: ret
;
; RV64V-ONLY-LABEL: PR159294:
; RV64V-ONLY: # %bb.0: # %entry
; RV64V-ONLY-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64V-ONLY-NEXT: vmv.x.s a0, v8
; RV64V-ONLY-NEXT: vmv.x.s a1, v9
; RV64V-ONLY-NEXT: vmv.x.s a2, v10
; RV64V-ONLY-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; RV64V-ONLY-NEXT: vmv.v.x v8, a2
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a0
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a1
; RV64V-ONLY-NEXT: vslidedown.vi v8, v8, 13
; RV64V-ONLY-NEXT: ret
;
; RVA22U64-LABEL: PR159294:
; RVA22U64: # %bb.0: # %entry
; RVA22U64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RVA22U64-NEXT: vmv.x.s a0, v8
; RVA22U64-NEXT: vmv.x.s a1, v10
; RVA22U64-NEXT: slli a0, a0, 16
; RVA22U64-NEXT: zext.h a1, a1
; RVA22U64-NEXT: or a0, a0, a1
; RVA22U64-NEXT: vmv.x.s a1, v9
; RVA22U64-NEXT: vmv.v.i v8, 0
; RVA22U64-NEXT: zext.h a1, a1
; RVA22U64-NEXT: vsetvli zero, zero, e32, m2, tu, ma
; RVA22U64-NEXT: vmv.s.x v8, a0
; RVA22U64-NEXT: vmv.s.x v10, a1
; RVA22U64-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RVA22U64-NEXT: vslideup.vi v8, v10, 1
; RVA22U64-NEXT: ret
;
; RVA22U64-PACK-LABEL: PR159294:
; RVA22U64-PACK: # %bb.0: # %entry
; RVA22U64-PACK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RVA22U64-PACK-NEXT: vmv.x.s a0, v8
; RVA22U64-PACK-NEXT: vmv.x.s a1, v10
; RVA22U64-PACK-NEXT: vmv.x.s a2, v9
; RVA22U64-PACK-NEXT: packw a0, a1, a0
; RVA22U64-PACK-NEXT: packw a1, a0, a0
; RVA22U64-PACK-NEXT: vmv.v.x v8, a1
; RVA22U64-PACK-NEXT: packw a1, a2, a0
; RVA22U64-PACK-NEXT: vsetvli zero, zero, e32, m2, tu, ma
; RVA22U64-PACK-NEXT: vmv.s.x v8, a0
; RVA22U64-PACK-NEXT: vmv.s.x v10, a1
; RVA22U64-PACK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RVA22U64-PACK-NEXT: vslideup.vi v8, v10, 1
; RVA22U64-PACK-NEXT: ret
;
; RV64ZVE32-LABEL: PR159294:
; RV64ZVE32: # %bb.0: # %entry
; RV64ZVE32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32-NEXT: vmv.x.s a0, v8
; RV64ZVE32-NEXT: vmv.x.s a1, v9
; RV64ZVE32-NEXT: vmv.x.s a2, v10
; RV64ZVE32-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; RV64ZVE32-NEXT: vmv.v.x v8, a2
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a0
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a1
; RV64ZVE32-NEXT: vslidedown.vi v8, v8, 13
; RV64ZVE32-NEXT: ret
entry:
%vecext3 = extractelement <2 x i32> %a, i32 0
%conv4 = trunc i32 %vecext3 to i16
%vecinit5 = insertelement <16 x i16> <i16 0, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison>, i16 %conv4, i32 1
%vecext7 = extractelement <2 x i32> %b, i32 0
%conv8 = trunc i32 %vecext7 to i16
%vecinit9 = insertelement <16 x i16> %vecinit5, i16 %conv8, i32 2
%vecext59 = extractelement <2 x i32> %c, i32 0
%conv60 = trunc i32 %vecext59 to i16
%vecinit61 = insertelement <16 x i16> %vecinit9, i16 %conv60, i32 0
ret <16 x i16> %vecinit61
}
define <16 x i32> @PR159294_zext(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c) {
; RV32-LABEL: PR159294_zext:
; RV32: # %bb.0: # %entry
; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV32-NEXT: vmv.x.s a0, v8
; RV32-NEXT: vmv.x.s a1, v9
; RV32-NEXT: vmv.x.s a2, v10
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV32-NEXT: vmv.v.x v8, a2
; RV32-NEXT: lui a2, 16
; RV32-NEXT: vslide1down.vx v8, v8, a0
; RV32-NEXT: vslide1down.vx v8, v8, a1
; RV32-NEXT: vslidedown.vi v8, v8, 13
; RV32-NEXT: addi a2, a2, -1
; RV32-NEXT: vand.vx v8, v8, a2
; RV32-NEXT: ret
;
; RV64V-ONLY-LABEL: PR159294_zext:
; RV64V-ONLY: # %bb.0: # %entry
; RV64V-ONLY-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64V-ONLY-NEXT: vmv.x.s a0, v8
; RV64V-ONLY-NEXT: lui a1, 16
; RV64V-ONLY-NEXT: vmv.x.s a2, v9
; RV64V-ONLY-NEXT: vmv.x.s a3, v10
; RV64V-ONLY-NEXT: addi a1, a1, -1
; RV64V-ONLY-NEXT: and a0, a0, a1
; RV64V-ONLY-NEXT: and a2, a2, a1
; RV64V-ONLY-NEXT: and a1, a3, a1
; RV64V-ONLY-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV64V-ONLY-NEXT: vmv.v.x v8, a1
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a0
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2
; RV64V-ONLY-NEXT: vslidedown.vi v8, v8, 13
; RV64V-ONLY-NEXT: ret
;
; RVA22U64-LABEL: PR159294_zext:
; RVA22U64: # %bb.0: # %entry
; RVA22U64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RVA22U64-NEXT: vmv.x.s a0, v8
; RVA22U64-NEXT: vmv.x.s a1, v10
; RVA22U64-NEXT: slli a0, a0, 48
; RVA22U64-NEXT: zext.h a1, a1
; RVA22U64-NEXT: srli a0, a0, 16
; RVA22U64-NEXT: or a0, a0, a1
; RVA22U64-NEXT: vmv.x.s a1, v9
; RVA22U64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RVA22U64-NEXT: vmv.v.i v8, 0
; RVA22U64-NEXT: zext.h a1, a1
; RVA22U64-NEXT: vsetvli zero, zero, e64, m4, tu, ma
; RVA22U64-NEXT: vmv.s.x v8, a0
; RVA22U64-NEXT: vmv.s.x v12, a1
; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, tu, ma
; RVA22U64-NEXT: vslideup.vi v8, v12, 1
; RVA22U64-NEXT: ret
;
; RVA22U64-PACK-LABEL: PR159294_zext:
; RVA22U64-PACK: # %bb.0: # %entry
; RVA22U64-PACK-NEXT: vsetivli zero, 1, e16, m2, ta, ma
; RVA22U64-PACK-NEXT: vmv1r.v v12, v9
; RVA22U64-PACK-NEXT: vmv.x.s a0, v8
; RVA22U64-PACK-NEXT: vmv.x.s a1, v10
; RVA22U64-PACK-NEXT: pack a2, a0, a0
; RVA22U64-PACK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RVA22U64-PACK-NEXT: vmv.v.x v8, a2
; RVA22U64-PACK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; RVA22U64-PACK-NEXT: vmv.x.s a2, v12
; RVA22U64-PACK-NEXT: zext.h a0, a0
; RVA22U64-PACK-NEXT: zext.h a1, a1
; RVA22U64-PACK-NEXT: zext.h a2, a2
; RVA22U64-PACK-NEXT: pack a0, a1, a0
; RVA22U64-PACK-NEXT: pack a1, a2, a0
; RVA22U64-PACK-NEXT: vsetvli zero, zero, e64, m4, tu, ma
; RVA22U64-PACK-NEXT: vmv.s.x v8, a0
; RVA22U64-PACK-NEXT: vmv.s.x v12, a1
; RVA22U64-PACK-NEXT: vsetivli zero, 2, e64, m1, tu, ma
; RVA22U64-PACK-NEXT: vslideup.vi v8, v12, 1
; RVA22U64-PACK-NEXT: ret
;
; RV64ZVE32-LABEL: PR159294_zext:
; RV64ZVE32: # %bb.0: # %entry
; RV64ZVE32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32-NEXT: vmv.x.s a0, v8
; RV64ZVE32-NEXT: lui a1, 16
; RV64ZVE32-NEXT: vmv.x.s a2, v9
; RV64ZVE32-NEXT: vmv.x.s a3, v10
; RV64ZVE32-NEXT: addi a1, a1, -1
; RV64ZVE32-NEXT: and a0, a0, a1
; RV64ZVE32-NEXT: and a2, a2, a1
; RV64ZVE32-NEXT: and a1, a3, a1
; RV64ZVE32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV64ZVE32-NEXT: vmv.v.x v8, a1
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a0
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2
; RV64ZVE32-NEXT: vslidedown.vi v8, v8, 13
; RV64ZVE32-NEXT: ret
entry:
%vecext3 = extractelement <2 x i16> %a, i32 0
%conv4 = zext i16 %vecext3 to i32
%vecinit5 = insertelement <16 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>, i32 %conv4, i32 1
%vecext7 = extractelement <2 x i16> %b, i32 0
%conv8 = zext i16 %vecext7 to i32
%vecinit9 = insertelement <16 x i32> %vecinit5, i32 %conv8, i32 2
%vecext59 = extractelement <2 x i16> %c, i32 0
%conv60 = zext i16 %vecext59 to i32
%vecinit61 = insertelement <16 x i32> %vecinit9, i32 %conv60, i32 0
ret <16 x i32> %vecinit61
}
define <16 x i32> @PR159294_sext(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c) {
; RV32-LABEL: PR159294_sext:
; RV32: # %bb.0: # %entry
; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV32-NEXT: vmv.x.s a0, v8
; RV32-NEXT: vmv.x.s a1, v9
; RV32-NEXT: vmv.x.s a2, v10
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV32-NEXT: vmv.v.x v8, a2
; RV32-NEXT: vslide1down.vx v8, v8, a0
; RV32-NEXT: vslide1down.vx v8, v8, a1
; RV32-NEXT: vslidedown.vi v8, v8, 13
; RV32-NEXT: ret
;
; RV64V-ONLY-LABEL: PR159294_sext:
; RV64V-ONLY: # %bb.0: # %entry
; RV64V-ONLY-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64V-ONLY-NEXT: vmv.x.s a0, v8
; RV64V-ONLY-NEXT: vmv.x.s a1, v9
; RV64V-ONLY-NEXT: vmv.x.s a2, v10
; RV64V-ONLY-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV64V-ONLY-NEXT: vmv.v.x v8, a2
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a0
; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a1
; RV64V-ONLY-NEXT: vslidedown.vi v8, v8, 13
; RV64V-ONLY-NEXT: ret
;
; RVA22U64-LABEL: PR159294_sext:
; RVA22U64: # %bb.0: # %entry
; RVA22U64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RVA22U64-NEXT: vmv.x.s a0, v8
; RVA22U64-NEXT: vmv.x.s a1, v10
; RVA22U64-NEXT: slli a0, a0, 32
; RVA22U64-NEXT: add.uw a0, a1, a0
; RVA22U64-NEXT: vmv.x.s a1, v9
; RVA22U64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RVA22U64-NEXT: vmv.v.i v8, 0
; RVA22U64-NEXT: zext.w a1, a1
; RVA22U64-NEXT: vsetvli zero, zero, e64, m4, tu, ma
; RVA22U64-NEXT: vmv.s.x v8, a0
; RVA22U64-NEXT: vmv.s.x v12, a1
; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, tu, ma
; RVA22U64-NEXT: vslideup.vi v8, v12, 1
; RVA22U64-NEXT: ret
;
; RVA22U64-PACK-LABEL: PR159294_sext:
; RVA22U64-PACK: # %bb.0: # %entry
; RVA22U64-PACK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RVA22U64-PACK-NEXT: vmv.x.s a0, v8
; RVA22U64-PACK-NEXT: vmv.x.s a1, v10
; RVA22U64-PACK-NEXT: vmv.x.s a2, v9
; RVA22U64-PACK-NEXT: pack a0, a1, a0
; RVA22U64-PACK-NEXT: pack a1, a0, a0
; RVA22U64-PACK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RVA22U64-PACK-NEXT: vmv.v.x v8, a1
; RVA22U64-PACK-NEXT: pack a1, a2, a0
; RVA22U64-PACK-NEXT: vsetvli zero, zero, e64, m4, tu, ma
; RVA22U64-PACK-NEXT: vmv.s.x v8, a0
; RVA22U64-PACK-NEXT: vmv.s.x v12, a1
; RVA22U64-PACK-NEXT: vsetivli zero, 2, e64, m1, tu, ma
; RVA22U64-PACK-NEXT: vslideup.vi v8, v12, 1
; RVA22U64-PACK-NEXT: ret
;
; RV64ZVE32-LABEL: PR159294_sext:
; RV64ZVE32: # %bb.0: # %entry
; RV64ZVE32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32-NEXT: vmv.x.s a0, v8
; RV64ZVE32-NEXT: vmv.x.s a1, v9
; RV64ZVE32-NEXT: vmv.x.s a2, v10
; RV64ZVE32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV64ZVE32-NEXT: vmv.v.x v8, a2
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a0
; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a1
; RV64ZVE32-NEXT: vslidedown.vi v8, v8, 13
; RV64ZVE32-NEXT: ret
entry:
%vecext3 = extractelement <2 x i16> %a, i32 0
%conv4 = sext i16 %vecext3 to i32
%vecinit5 = insertelement <16 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>, i32 %conv4, i32 1
%vecext7 = extractelement <2 x i16> %b, i32 0
%conv8 = sext i16 %vecext7 to i32
%vecinit9 = insertelement <16 x i32> %vecinit5, i32 %conv8, i32 2
%vecext59 = extractelement <2 x i16> %c, i32 0
%conv60 = sext i16 %vecext59 to i32
%vecinit61 = insertelement <16 x i32> %vecinit9, i32 %conv60, i32 0
ret <16 x i32> %vecinit61
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; RV64: {{.*}}