blob: 539a8403c93521ac5c485ca3f347a60e9e623cd3 [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32V
; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64V
; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+zve32f,+zvl128b -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVE32F
; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+zve32f,+zvl128b -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVE32F
declare <1 x i8> @llvm.masked.gather.v1i8.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x i8>)
define <1 x i8> @mgather_v1i8(<1 x ptr> %ptrs, <1 x i1> %m, <1 x i8> %passthru) {
; RV32V-LABEL: mgather_v1i8:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 1, e8, mf8, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vmv1r.v v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v1i8:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 1, e8, mf8, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vmv1r.v v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v1i8:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32ZVE32F-NEXT: vmv1r.v v8, v9
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v1i8:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vfirst.m a1, v0
; RV64ZVE32F-NEXT: bnez a1, .LBB0_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vle8.v v8, (a0)
; RV64ZVE32F-NEXT: .LBB0_2: # %else
; RV64ZVE32F-NEXT: ret
%v = call <1 x i8> @llvm.masked.gather.v1i8.v1p0(<1 x ptr> %ptrs, i32 1, <1 x i1> %m, <1 x i8> %passthru)
ret <1 x i8> %v
}
declare <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i8>)
define <2 x i8> @mgather_v2i8(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i8> %passthru) {
; RV32V-LABEL: mgather_v2i8:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vmv1r.v v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v2i8:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vmv1r.v v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v2i8:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32ZVE32F-NEXT: vmv1r.v v8, v9
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v2i8:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: bnez a3, .LBB1_3
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB1_4
; RV64ZVE32F-NEXT: .LBB1_2: # %else2
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB1_3: # %cond.load
; RV64ZVE32F-NEXT: lbu a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB1_2
; RV64ZVE32F-NEXT: .LBB1_4: # %cond.load1
; RV64ZVE32F-NEXT: lbu a0, 0(a1)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: ret
%v = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %ptrs, i32 1, <2 x i1> %m, <2 x i8> %passthru)
ret <2 x i8> %v
}
define <2 x i16> @mgather_v2i8_sextload_v2i16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i8> %passthru) {
; RV32V-LABEL: mgather_v2i8_sextload_v2i16:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; RV32V-NEXT: vsext.vf2 v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v2i8_sextload_v2i16:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; RV64V-NEXT: vsext.vf2 v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v2i8_sextload_v2i16:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV32ZVE32F-NEXT: vsext.vf2 v8, v9
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v2i8_sextload_v2i16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB2_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: lbu a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: .LBB2_2: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB2_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: lbu a0, 0(a1)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: .LBB2_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vsext.vf2 v9, v8
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
%v = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %ptrs, i32 1, <2 x i1> %m, <2 x i8> %passthru)
%ev = sext <2 x i8> %v to <2 x i16>
ret <2 x i16> %ev
}
define <2 x i16> @mgather_v2i8_zextload_v2i16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i8> %passthru) {
; RV32V-LABEL: mgather_v2i8_zextload_v2i16:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; RV32V-NEXT: vzext.vf2 v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v2i8_zextload_v2i16:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; RV64V-NEXT: vzext.vf2 v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v2i8_zextload_v2i16:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV32ZVE32F-NEXT: vzext.vf2 v8, v9
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v2i8_zextload_v2i16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB3_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: lbu a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: .LBB3_2: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB3_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: lbu a0, 0(a1)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: .LBB3_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vzext.vf2 v9, v8
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
%v = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %ptrs, i32 1, <2 x i1> %m, <2 x i8> %passthru)
%ev = zext <2 x i8> %v to <2 x i16>
ret <2 x i16> %ev
}
define <2 x i32> @mgather_v2i8_sextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i8> %passthru) {
; RV32V-LABEL: mgather_v2i8_sextload_v2i32:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; RV32V-NEXT: vsext.vf4 v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v2i8_sextload_v2i32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; RV64V-NEXT: vsext.vf4 v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v2i8_sextload_v2i32:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vsext.vf4 v8, v9
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v2i8_sextload_v2i32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB4_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: lbu a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: .LBB4_2: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB4_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: lbu a0, 0(a1)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: .LBB4_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vsext.vf4 v9, v8
; RV64ZVE32F-NEXT: vmv.v.v v8, v9
; RV64ZVE32F-NEXT: ret
%v = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %ptrs, i32 1, <2 x i1> %m, <2 x i8> %passthru)
%ev = sext <2 x i8> %v to <2 x i32>
ret <2 x i32> %ev
}
define <2 x i32> @mgather_v2i8_zextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i8> %passthru) {
; RV32V-LABEL: mgather_v2i8_zextload_v2i32:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; RV32V-NEXT: vzext.vf4 v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v2i8_zextload_v2i32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; RV64V-NEXT: vzext.vf4 v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v2i8_zextload_v2i32:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vzext.vf4 v8, v9
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v2i8_zextload_v2i32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB5_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: lbu a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: .LBB5_2: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB5_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: lbu a0, 0(a1)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: .LBB5_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vzext.vf4 v9, v8
; RV64ZVE32F-NEXT: vmv.v.v v8, v9
; RV64ZVE32F-NEXT: ret
%v = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %ptrs, i32 1, <2 x i1> %m, <2 x i8> %passthru)
%ev = zext <2 x i8> %v to <2 x i32>
ret <2 x i32> %ev
}
define <2 x i64> @mgather_v2i8_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i8> %passthru) {
; RV32V-LABEL: mgather_v2i8_sextload_v2i64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; RV32V-NEXT: vsext.vf8 v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v2i8_sextload_v2i64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; RV64V-NEXT: vsext.vf8 v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v2i8_sextload_v2i64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: srai a2, a1, 31
; RV32ZVE32F-NEXT: vmv.x.s a3, v9
; RV32ZVE32F-NEXT: srai a4, a3, 31
; RV32ZVE32F-NEXT: sw a3, 0(a0)
; RV32ZVE32F-NEXT: sw a1, 8(a0)
; RV32ZVE32F-NEXT: sw a4, 4(a0)
; RV32ZVE32F-NEXT: sw a2, 12(a0)
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v2i8_sextload_v2i64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB6_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: lbu a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: .LBB6_2: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB6_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: lbu a0, 0(a1)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: .LBB6_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: ret
%v = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %ptrs, i32 1, <2 x i1> %m, <2 x i8> %passthru)
%ev = sext <2 x i8> %v to <2 x i64>
ret <2 x i64> %ev
}
define <2 x i64> @mgather_v2i8_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i8> %passthru) {
; RV32V-LABEL: mgather_v2i8_zextload_v2i64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; RV32V-NEXT: vzext.vf8 v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v2i8_zextload_v2i64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; RV64V-NEXT: vzext.vf8 v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v2i8_zextload_v2i64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: andi a1, a1, 255
; RV32ZVE32F-NEXT: vmv.x.s a2, v9
; RV32ZVE32F-NEXT: andi a2, a2, 255
; RV32ZVE32F-NEXT: sw zero, 12(a0)
; RV32ZVE32F-NEXT: sw zero, 4(a0)
; RV32ZVE32F-NEXT: sw a2, 0(a0)
; RV32ZVE32F-NEXT: sw a1, 8(a0)
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v2i8_zextload_v2i64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB7_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: lbu a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: .LBB7_2: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB7_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: lbu a0, 0(a1)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: .LBB7_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: andi a0, a0, 255
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: andi a1, a1, 255
; RV64ZVE32F-NEXT: ret
%v = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %ptrs, i32 1, <2 x i1> %m, <2 x i8> %passthru)
%ev = zext <2 x i8> %v to <2 x i64>
ret <2 x i64> %ev
}
declare <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i8>)
define <4 x i8> @mgather_v4i8(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i8> %passthru) {
; RV32-LABEL: mgather_v4i8:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_v4i8:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
; RV64V-NEXT: vluxei64.v v10, (zero), v8, v0.t
; RV64V-NEXT: vmv1r.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v4i8:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: bnez a2, .LBB8_5
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB8_6
; RV64ZVE32F-NEXT: .LBB8_2: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB8_7
; RV64ZVE32F-NEXT: .LBB8_3: # %else5
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: bnez a1, .LBB8_8
; RV64ZVE32F-NEXT: .LBB8_4: # %else8
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB8_5: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB8_2
; RV64ZVE32F-NEXT: .LBB8_6: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB8_3
; RV64ZVE32F-NEXT: .LBB8_7: # %cond.load4
; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, mf4, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: beqz a1, .LBB8_4
; RV64ZVE32F-NEXT: .LBB8_8: # %cond.load7
; RV64ZVE32F-NEXT: ld a0, 24(a0)
; RV64ZVE32F-NEXT: lbu a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
; RV64ZVE32F-NEXT: ret
%v = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> %m, <4 x i8> %passthru)
ret <4 x i8> %v
}
define <4 x i8> @mgather_truemask_v4i8(<4 x ptr> %ptrs, <4 x i8> %passthru) {
; RV32-LABEL: mgather_truemask_v4i8:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; RV32-NEXT: vluxei32.v v9, (zero), v8
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_truemask_v4i8:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; RV64V-NEXT: vluxei64.v v10, (zero), v8
; RV64V-NEXT: vmv1r.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_truemask_v4i8:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: ld a1, 8(a0)
; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: ld a3, 24(a0)
; RV64ZVE32F-NEXT: ld a0, 0(a0)
; RV64ZVE32F-NEXT: lbu a1, 0(a1)
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: lbu a3, 0(a3)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vlse8.v v8, (a0), zero
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3
; RV64ZVE32F-NEXT: ret
%v = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 1), <4 x i8> %passthru)
ret <4 x i8> %v
}
define <4 x i8> @mgather_falsemask_v4i8(<4 x ptr> %ptrs, <4 x i8> %passthru) {
; RV32-LABEL: mgather_falsemask_v4i8:
; RV32: # %bb.0:
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_falsemask_v4i8:
; RV64V: # %bb.0:
; RV64V-NEXT: vmv1r.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_falsemask_v4i8:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: ret
%v = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> zeroinitializer, <4 x i8> %passthru)
ret <4 x i8> %v
}
declare <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i8>)
define <8 x i8> @mgather_v8i8(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i8> %passthru) {
; RV32-LABEL: mgather_v8i8:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
; RV32-NEXT: vmv1r.v v8, v10
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_v8i8:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; RV64V-NEXT: vluxei64.v v12, (zero), v8, v0.t
; RV64V-NEXT: vmv1r.v v8, v12
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v8i8:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: bnez a2, .LBB11_9
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB11_10
; RV64ZVE32F-NEXT: .LBB11_2: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB11_11
; RV64ZVE32F-NEXT: .LBB11_3: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB11_12
; RV64ZVE32F-NEXT: .LBB11_4: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB11_13
; RV64ZVE32F-NEXT: .LBB11_5: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB11_14
; RV64ZVE32F-NEXT: .LBB11_6: # %else14
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: bnez a2, .LBB11_15
; RV64ZVE32F-NEXT: .LBB11_7: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB11_16
; RV64ZVE32F-NEXT: .LBB11_8: # %else20
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB11_9: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB11_2
; RV64ZVE32F-NEXT: .LBB11_10: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB11_3
; RV64ZVE32F-NEXT: .LBB11_11: # %cond.load4
; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, mf2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB11_4
; RV64ZVE32F-NEXT: .LBB11_12: # %cond.load7
; RV64ZVE32F-NEXT: ld a2, 24(a0)
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB11_5
; RV64ZVE32F-NEXT: .LBB11_13: # %cond.load10
; RV64ZVE32F-NEXT: ld a2, 32(a0)
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e8, mf2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB11_6
; RV64ZVE32F-NEXT: .LBB11_14: # %cond.load13
; RV64ZVE32F-NEXT: ld a2, 40(a0)
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: beqz a2, .LBB11_7
; RV64ZVE32F-NEXT: .LBB11_15: # %cond.load16
; RV64ZVE32F-NEXT: ld a2, 48(a0)
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 7, e8, mf2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB11_8
; RV64ZVE32F-NEXT: .LBB11_16: # %cond.load19
; RV64ZVE32F-NEXT: ld a0, 56(a0)
; RV64ZVE32F-NEXT: lbu a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7
; RV64ZVE32F-NEXT: ret
%v = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> %ptrs, i32 1, <8 x i1> %m, <8 x i8> %passthru)
ret <8 x i8> %v
}
define <8 x i8> @mgather_baseidx_v8i8(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i8> %passthru) {
; RV32-LABEL: mgather_baseidx_v8i8:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vsext.vf4 v10, v8
; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, mu
; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_v8i8:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64V-NEXT: vsext.vf8 v12, v8
; RV64V-NEXT: vsetvli zero, zero, e8, mf2, ta, mu
; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t
; RV64V-NEXT: vmv1r.v v8, v9
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_v8i8:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: beqz a2, .LBB12_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: .LBB12_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB12_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1
; RV64ZVE32F-NEXT: .LBB12_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB12_12
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB12_13
; RV64ZVE32F-NEXT: .LBB12_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB12_14
; RV64ZVE32F-NEXT: .LBB12_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB12_9
; RV64ZVE32F-NEXT: .LBB12_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5
; RV64ZVE32F-NEXT: .LBB12_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB12_15
; RV64ZVE32F-NEXT: # %bb.10: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB12_16
; RV64ZVE32F-NEXT: .LBB12_11: # %else20
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB12_12: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v11, a2
; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, mf2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB12_6
; RV64ZVE32F-NEXT: .LBB12_13: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB12_7
; RV64ZVE32F-NEXT: .LBB12_14: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 5, e8, mf2, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB12_8
; RV64ZVE32F-NEXT: j .LBB12_9
; RV64ZVE32F-NEXT: .LBB12_15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vsetivli zero, 7, e8, mf2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB12_11
; RV64ZVE32F-NEXT: .LBB12_16: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: lbu a0, 0(a0)
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i8, ptr %base, <8 x i8> %idxs
%v = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> %ptrs, i32 1, <8 x i1> %m, <8 x i8> %passthru)
ret <8 x i8> %v
}
declare <1 x i16> @llvm.masked.gather.v1i16.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x i16>)
define <1 x i16> @mgather_v1i16(<1 x ptr> %ptrs, <1 x i1> %m, <1 x i16> %passthru) {
; RV32V-LABEL: mgather_v1i16:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 1, e16, mf4, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vmv1r.v v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v1i16:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 1, e16, mf4, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vmv1r.v v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v1i16:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32ZVE32F-NEXT: vmv1r.v v8, v9
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v1i16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vfirst.m a1, v0
; RV64ZVE32F-NEXT: bnez a1, .LBB13_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vle16.v v8, (a0)
; RV64ZVE32F-NEXT: .LBB13_2: # %else
; RV64ZVE32F-NEXT: ret
%v = call <1 x i16> @llvm.masked.gather.v1i16.v1p0(<1 x ptr> %ptrs, i32 2, <1 x i1> %m, <1 x i16> %passthru)
ret <1 x i16> %v
}
declare <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i16>)
define <2 x i16> @mgather_v2i16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> %passthru) {
; RV32V-LABEL: mgather_v2i16:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vmv1r.v v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v2i16:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vmv1r.v v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v2i16:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32ZVE32F-NEXT: vmv1r.v v8, v9
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v2i16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: bnez a3, .LBB14_3
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB14_4
; RV64ZVE32F-NEXT: .LBB14_2: # %else2
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB14_3: # %cond.load
; RV64ZVE32F-NEXT: lh a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB14_2
; RV64ZVE32F-NEXT: .LBB14_4: # %cond.load1
; RV64ZVE32F-NEXT: lh a0, 0(a1)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: ret
%v = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x i16> %passthru)
ret <2 x i16> %v
}
define <2 x i32> @mgather_v2i16_sextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> %passthru) {
; RV32V-LABEL: mgather_v2i16_sextload_v2i32:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; RV32V-NEXT: vsext.vf2 v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v2i16_sextload_v2i32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; RV64V-NEXT: vsext.vf2 v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v2i16_sextload_v2i32:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vsext.vf2 v8, v9
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v2i16_sextload_v2i32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB15_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: lh a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: .LBB15_2: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB15_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: lh a0, 0(a1)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: .LBB15_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vsext.vf2 v9, v8
; RV64ZVE32F-NEXT: vmv.v.v v8, v9
; RV64ZVE32F-NEXT: ret
%v = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x i16> %passthru)
%ev = sext <2 x i16> %v to <2 x i32>
ret <2 x i32> %ev
}
define <2 x i32> @mgather_v2i16_zextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> %passthru) {
; RV32V-LABEL: mgather_v2i16_zextload_v2i32:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; RV32V-NEXT: vzext.vf2 v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v2i16_zextload_v2i32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; RV64V-NEXT: vzext.vf2 v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v2i16_zextload_v2i32:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vzext.vf2 v8, v9
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v2i16_zextload_v2i32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB16_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: lh a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: .LBB16_2: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB16_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: lh a0, 0(a1)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: .LBB16_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vzext.vf2 v9, v8
; RV64ZVE32F-NEXT: vmv.v.v v8, v9
; RV64ZVE32F-NEXT: ret
%v = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x i16> %passthru)
%ev = zext <2 x i16> %v to <2 x i32>
ret <2 x i32> %ev
}
define <2 x i64> @mgather_v2i16_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> %passthru) {
; RV32V-LABEL: mgather_v2i16_sextload_v2i64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; RV32V-NEXT: vsext.vf4 v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v2i16_sextload_v2i64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; RV64V-NEXT: vsext.vf4 v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v2i16_sextload_v2i64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: srai a2, a1, 31
; RV32ZVE32F-NEXT: vmv.x.s a3, v9
; RV32ZVE32F-NEXT: srai a4, a3, 31
; RV32ZVE32F-NEXT: sw a3, 0(a0)
; RV32ZVE32F-NEXT: sw a1, 8(a0)
; RV32ZVE32F-NEXT: sw a4, 4(a0)
; RV32ZVE32F-NEXT: sw a2, 12(a0)
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v2i16_sextload_v2i64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB17_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: lh a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: .LBB17_2: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB17_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: lh a0, 0(a1)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: .LBB17_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: ret
%v = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x i16> %passthru)
%ev = sext <2 x i16> %v to <2 x i64>
ret <2 x i64> %ev
}
define <2 x i64> @mgather_v2i16_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> %passthru) {
; RV32V-LABEL: mgather_v2i16_zextload_v2i64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; RV32V-NEXT: vzext.vf4 v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v2i16_zextload_v2i64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; RV64V-NEXT: vzext.vf4 v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v2i16_zextload_v2i64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: lui a2, 16
; RV32ZVE32F-NEXT: addi a2, a2, -1
; RV32ZVE32F-NEXT: and a1, a1, a2
; RV32ZVE32F-NEXT: vmv.x.s a3, v9
; RV32ZVE32F-NEXT: and a2, a3, a2
; RV32ZVE32F-NEXT: sw zero, 12(a0)
; RV32ZVE32F-NEXT: sw zero, 4(a0)
; RV32ZVE32F-NEXT: sw a2, 0(a0)
; RV32ZVE32F-NEXT: sw a1, 8(a0)
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v2i16_zextload_v2i64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB18_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: lh a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: .LBB18_2: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB18_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: lh a0, 0(a1)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: .LBB18_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: lui a1, 16
; RV64ZVE32F-NEXT: addiw a1, a1, -1
; RV64ZVE32F-NEXT: and a0, a0, a1
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: and a1, a2, a1
; RV64ZVE32F-NEXT: ret
%v = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x i16> %passthru)
%ev = zext <2 x i16> %v to <2 x i64>
ret <2 x i64> %ev
}
declare <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i16>)
define <4 x i16> @mgather_v4i16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i16> %passthru) {
; RV32-LABEL: mgather_v4i16:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_v4i16:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
; RV64V-NEXT: vluxei64.v v10, (zero), v8, v0.t
; RV64V-NEXT: vmv1r.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v4i16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: bnez a2, .LBB19_5
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB19_6
; RV64ZVE32F-NEXT: .LBB19_2: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB19_7
; RV64ZVE32F-NEXT: .LBB19_3: # %else5
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: bnez a1, .LBB19_8
; RV64ZVE32F-NEXT: .LBB19_4: # %else8
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB19_5: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB19_2
; RV64ZVE32F-NEXT: .LBB19_6: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB19_3
; RV64ZVE32F-NEXT: .LBB19_7: # %cond.load4
; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, mf2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: beqz a1, .LBB19_4
; RV64ZVE32F-NEXT: .LBB19_8: # %cond.load7
; RV64ZVE32F-NEXT: ld a0, 24(a0)
; RV64ZVE32F-NEXT: lh a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
; RV64ZVE32F-NEXT: ret
%v = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> %m, <4 x i16> %passthru)
ret <4 x i16> %v
}
define <4 x i16> @mgather_truemask_v4i16(<4 x ptr> %ptrs, <4 x i16> %passthru) {
; RV32-LABEL: mgather_truemask_v4i16:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; RV32-NEXT: vluxei32.v v9, (zero), v8
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_truemask_v4i16:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; RV64V-NEXT: vluxei64.v v10, (zero), v8
; RV64V-NEXT: vmv1r.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_truemask_v4i16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: ld a1, 8(a0)
; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: ld a3, 24(a0)
; RV64ZVE32F-NEXT: ld a0, 0(a0)
; RV64ZVE32F-NEXT: lh a1, 0(a1)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: lh a3, 0(a3)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3
; RV64ZVE32F-NEXT: ret
%v = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1), <4 x i16> %passthru)
ret <4 x i16> %v
}
define <4 x i16> @mgather_falsemask_v4i16(<4 x ptr> %ptrs, <4 x i16> %passthru) {
; RV32-LABEL: mgather_falsemask_v4i16:
; RV32: # %bb.0:
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_falsemask_v4i16:
; RV64V: # %bb.0:
; RV64V-NEXT: vmv1r.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_falsemask_v4i16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: ret
%v = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> zeroinitializer, <4 x i16> %passthru)
ret <4 x i16> %v
}
declare <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i16>)
define <8 x i16> @mgather_v8i16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i16> %passthru) {
; RV32-LABEL: mgather_v8i16:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu
; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_v8i16:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, mu
; RV64V-NEXT: vluxei64.v v12, (zero), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v8i16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: bnez a2, .LBB22_9
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB22_10
; RV64ZVE32F-NEXT: .LBB22_2: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB22_11
; RV64ZVE32F-NEXT: .LBB22_3: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB22_12
; RV64ZVE32F-NEXT: .LBB22_4: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB22_13
; RV64ZVE32F-NEXT: .LBB22_5: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB22_14
; RV64ZVE32F-NEXT: .LBB22_6: # %else14
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: bnez a2, .LBB22_15
; RV64ZVE32F-NEXT: .LBB22_7: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB22_16
; RV64ZVE32F-NEXT: .LBB22_8: # %else20
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB22_9: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB22_2
; RV64ZVE32F-NEXT: .LBB22_10: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB22_3
; RV64ZVE32F-NEXT: .LBB22_11: # %cond.load4
; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB22_4
; RV64ZVE32F-NEXT: .LBB22_12: # %cond.load7
; RV64ZVE32F-NEXT: ld a2, 24(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB22_5
; RV64ZVE32F-NEXT: .LBB22_13: # %cond.load10
; RV64ZVE32F-NEXT: ld a2, 32(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB22_6
; RV64ZVE32F-NEXT: .LBB22_14: # %cond.load13
; RV64ZVE32F-NEXT: ld a2, 40(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: beqz a2, .LBB22_7
; RV64ZVE32F-NEXT: .LBB22_15: # %cond.load16
; RV64ZVE32F-NEXT: ld a2, 48(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB22_8
; RV64ZVE32F-NEXT: .LBB22_16: # %cond.load19
; RV64ZVE32F-NEXT: ld a0, 56(a0)
; RV64ZVE32F-NEXT: lh a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7
; RV64ZVE32F-NEXT: ret
%v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x i16> %passthru)
ret <8 x i16> %v
}
define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i16> %passthru) {
; RV32-LABEL: mgather_baseidx_v8i8_v8i16:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vsext.vf4 v10, v8
; RV32-NEXT: vadd.vv v10, v10, v10
; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t
; RV32-NEXT: vmv.v.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_v8i8_v8i16:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64V-NEXT: vsext.vf8 v12, v8
; RV64V-NEXT: vadd.vv v12, v12, v12
; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu
; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t
; RV64V-NEXT: vmv.v.v v8, v9
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_v8i8_v8i16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: beqz a2, .LBB23_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: .LBB23_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB23_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1
; RV64ZVE32F-NEXT: .LBB23_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB23_12
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB23_13
; RV64ZVE32F-NEXT: .LBB23_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB23_14
; RV64ZVE32F-NEXT: .LBB23_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB23_9
; RV64ZVE32F-NEXT: .LBB23_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5
; RV64ZVE32F-NEXT: .LBB23_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB23_15
; RV64ZVE32F-NEXT: # %bb.10: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB23_16
; RV64ZVE32F-NEXT: .LBB23_11: # %else20
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB23_12: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v11, a2
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB23_6
; RV64ZVE32F-NEXT: .LBB23_13: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB23_7
; RV64ZVE32F-NEXT: .LBB23_14: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB23_8
; RV64ZVE32F-NEXT: j .LBB23_9
; RV64ZVE32F-NEXT: .LBB23_15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB23_11
; RV64ZVE32F-NEXT: .LBB23_16: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: slli a1, a1, 1
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: lh a0, 0(a0)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i8> %idxs
%v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x i16> %passthru)
ret <8 x i16> %v
}
define <8 x i16> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i16> %passthru) {
; RV32-LABEL: mgather_baseidx_sext_v8i8_v8i16:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vsext.vf4 v10, v8
; RV32-NEXT: vadd.vv v10, v10, v10
; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t
; RV32-NEXT: vmv.v.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_sext_v8i8_v8i16:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64V-NEXT: vsext.vf8 v12, v8
; RV64V-NEXT: vadd.vv v12, v12, v12
; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu
; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t
; RV64V-NEXT: vmv.v.v v8, v9
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8i16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: beqz a2, .LBB24_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: .LBB24_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB24_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1
; RV64ZVE32F-NEXT: .LBB24_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB24_12
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB24_13
; RV64ZVE32F-NEXT: .LBB24_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB24_14
; RV64ZVE32F-NEXT: .LBB24_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB24_9
; RV64ZVE32F-NEXT: .LBB24_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5
; RV64ZVE32F-NEXT: .LBB24_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB24_15
; RV64ZVE32F-NEXT: # %bb.10: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB24_16
; RV64ZVE32F-NEXT: .LBB24_11: # %else20
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB24_12: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v11, a2
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB24_6
; RV64ZVE32F-NEXT: .LBB24_13: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB24_7
; RV64ZVE32F-NEXT: .LBB24_14: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB24_8
; RV64ZVE32F-NEXT: j .LBB24_9
; RV64ZVE32F-NEXT: .LBB24_15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB24_11
; RV64ZVE32F-NEXT: .LBB24_16: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: slli a1, a1, 1
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: lh a0, 0(a0)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
%eidxs = sext <8 x i8> %idxs to <8 x i16>
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %eidxs
%v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x i16> %passthru)
ret <8 x i16> %v
}
define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i16> %passthru) {
; RV32-LABEL: mgather_baseidx_zext_v8i8_v8i16:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV32-NEXT: vwaddu.vv v10, v8, v8
; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
; RV32-NEXT: vluxei16.v v9, (a0), v10, v0.t
; RV32-NEXT: vmv.v.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8i16:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64V-NEXT: vwaddu.vv v10, v8, v8
; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu
; RV64V-NEXT: vluxei16.v v9, (a0), v10, v0.t
; RV64V-NEXT: vmv.v.v v8, v9
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8i16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: beqz a2, .LBB25_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: .LBB25_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB25_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1
; RV64ZVE32F-NEXT: .LBB25_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB25_12
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB25_13
; RV64ZVE32F-NEXT: .LBB25_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB25_14
; RV64ZVE32F-NEXT: .LBB25_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB25_9
; RV64ZVE32F-NEXT: .LBB25_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5
; RV64ZVE32F-NEXT: .LBB25_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB25_15
; RV64ZVE32F-NEXT: # %bb.10: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB25_16
; RV64ZVE32F-NEXT: .LBB25_11: # %else20
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB25_12: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v11, a2
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB25_6
; RV64ZVE32F-NEXT: .LBB25_13: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB25_7
; RV64ZVE32F-NEXT: .LBB25_14: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB25_8
; RV64ZVE32F-NEXT: j .LBB25_9
; RV64ZVE32F-NEXT: .LBB25_15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB25_11
; RV64ZVE32F-NEXT: .LBB25_16: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: andi a1, a1, 255
; RV64ZVE32F-NEXT: slli a1, a1, 1
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: lh a0, 0(a0)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
%eidxs = zext <8 x i8> %idxs to <8 x i16>
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %eidxs
%v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x i16> %passthru)
ret <8 x i16> %v
}
define <8 x i16> @mgather_baseidx_v8i16(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i16> %passthru) {
; RV32-LABEL: mgather_baseidx_v8i16:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu
; RV32-NEXT: vwadd.vv v10, v8, v8
; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t
; RV32-NEXT: vmv.v.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_v8i16:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64V-NEXT: vsext.vf4 v12, v8
; RV64V-NEXT: vadd.vv v12, v12, v12
; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu
; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t
; RV64V-NEXT: vmv.v.v v8, v9
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_v8i16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: beqz a2, .LBB26_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: .LBB26_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB26_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1
; RV64ZVE32F-NEXT: .LBB26_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB26_12
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB26_13
; RV64ZVE32F-NEXT: .LBB26_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB26_14
; RV64ZVE32F-NEXT: .LBB26_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB26_9
; RV64ZVE32F-NEXT: .LBB26_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5
; RV64ZVE32F-NEXT: .LBB26_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB26_15
; RV64ZVE32F-NEXT: # %bb.10: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB26_16
; RV64ZVE32F-NEXT: .LBB26_11: # %else20
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB26_12: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v11, a2
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB26_6
; RV64ZVE32F-NEXT: .LBB26_13: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB26_7
; RV64ZVE32F-NEXT: .LBB26_14: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB26_8
; RV64ZVE32F-NEXT: j .LBB26_9
; RV64ZVE32F-NEXT: .LBB26_15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB26_11
; RV64ZVE32F-NEXT: .LBB26_16: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: slli a1, a1, 1
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: lh a0, 0(a0)
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %idxs
%v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x i16> %passthru)
ret <8 x i16> %v
}
declare <1 x i32> @llvm.masked.gather.v1i32.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x i32>)
define <1 x i32> @mgather_v1i32(<1 x ptr> %ptrs, <1 x i1> %m, <1 x i32> %passthru) {
; RV32V-LABEL: mgather_v1i32:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 1, e32, mf2, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vmv1r.v v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v1i32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 1, e32, mf2, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vmv1r.v v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v1i32:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32ZVE32F-NEXT: vmv.v.v v8, v9
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v1i32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vfirst.m a1, v0
; RV64ZVE32F-NEXT: bnez a1, .LBB27_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vle32.v v8, (a0)
; RV64ZVE32F-NEXT: .LBB27_2: # %else
; RV64ZVE32F-NEXT: ret
%v = call <1 x i32> @llvm.masked.gather.v1i32.v1p0(<1 x ptr> %ptrs, i32 4, <1 x i1> %m, <1 x i32> %passthru)
ret <1 x i32> %v
}
declare <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i32>)
define <2 x i32> @mgather_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i32> %passthru) {
; RV32V-LABEL: mgather_v2i32:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vmv1r.v v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v2i32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vmv1r.v v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v2i32:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32ZVE32F-NEXT: vmv.v.v v8, v9
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v2i32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: bnez a3, .LBB28_3
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB28_4
; RV64ZVE32F-NEXT: .LBB28_2: # %else2
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB28_3: # %cond.load
; RV64ZVE32F-NEXT: lw a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m4, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB28_2
; RV64ZVE32F-NEXT: .LBB28_4: # %cond.load1
; RV64ZVE32F-NEXT: lw a0, 0(a1)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: ret
%v = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %m, <2 x i32> %passthru)
ret <2 x i32> %v
}
define <2 x i64> @mgather_v2i32_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i32> %passthru) {
; RV32V-LABEL: mgather_v2i32_sextload_v2i64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; RV32V-NEXT: vsext.vf2 v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v2i32_sextload_v2i64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; RV64V-NEXT: vsext.vf2 v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v2i32_sextload_v2i64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: srai a1, a1, 31
; RV32ZVE32F-NEXT: vmv.x.s a2, v9
; RV32ZVE32F-NEXT: srai a2, a2, 31
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vse32.v v9, (a0)
; RV32ZVE32F-NEXT: addi a3, a0, 8
; RV32ZVE32F-NEXT: vse32.v v8, (a3)
; RV32ZVE32F-NEXT: sw a2, 4(a0)
; RV32ZVE32F-NEXT: sw a1, 12(a0)
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v2i32_sextload_v2i64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB29_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: lw a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m4, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: .LBB29_2: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB29_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: lw a0, 0(a1)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: .LBB29_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: ret
%v = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %m, <2 x i32> %passthru)
%ev = sext <2 x i32> %v to <2 x i64>
ret <2 x i64> %ev
}
define <2 x i64> @mgather_v2i32_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i32> %passthru) {
; RV32V-LABEL: mgather_v2i32_zextload_v2i64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; RV32V-NEXT: vzext.vf2 v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v2i32_zextload_v2i64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; RV64V-NEXT: vzext.vf2 v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v2i32_zextload_v2i64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV32ZVE32F-NEXT: sw zero, 12(a0)
; RV32ZVE32F-NEXT: sw zero, 4(a0)
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vse32.v v9, (a0)
; RV32ZVE32F-NEXT: addi a0, a0, 8
; RV32ZVE32F-NEXT: vse32.v v8, (a0)
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v2i32_zextload_v2i64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB30_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: lw a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m4, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: .LBB30_2: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB30_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: lw a0, 0(a1)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: .LBB30_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 32
; RV64ZVE32F-NEXT: srli a0, a0, 32
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: slli a1, a1, 32
; RV64ZVE32F-NEXT: srli a1, a1, 32
; RV64ZVE32F-NEXT: ret
%v = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %m, <2 x i32> %passthru)
%ev = zext <2 x i32> %v to <2 x i64>
ret <2 x i64> %ev
}
declare <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i32>)
define <4 x i32> @mgather_v4i32(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i32> %passthru) {
; RV32-LABEL: mgather_v4i32:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu
; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32-NEXT: vmv.v.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_v4i32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, mu
; RV64V-NEXT: vluxei64.v v10, (zero), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v4i32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: bnez a2, .LBB31_5
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB31_6
; RV64ZVE32F-NEXT: .LBB31_2: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB31_7
; RV64ZVE32F-NEXT: .LBB31_3: # %else5
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: bnez a1, .LBB31_8
; RV64ZVE32F-NEXT: .LBB31_4: # %else8
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB31_5: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m4, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB31_2
; RV64ZVE32F-NEXT: .LBB31_6: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB31_3
; RV64ZVE32F-NEXT: .LBB31_7: # %cond.load4
; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: beqz a1, .LBB31_4
; RV64ZVE32F-NEXT: .LBB31_8: # %cond.load7
; RV64ZVE32F-NEXT: ld a0, 24(a0)
; RV64ZVE32F-NEXT: lw a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
; RV64ZVE32F-NEXT: ret
%v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %m, <4 x i32> %passthru)
ret <4 x i32> %v
}
define <4 x i32> @mgather_truemask_v4i32(<4 x ptr> %ptrs, <4 x i32> %passthru) {
; RV32-LABEL: mgather_truemask_v4i32:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vluxei32.v v8, (zero), v8
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_truemask_v4i32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64V-NEXT: vluxei64.v v10, (zero), v8
; RV64V-NEXT: vmv.v.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_truemask_v4i32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: ld a1, 8(a0)
; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: ld a3, 24(a0)
; RV64ZVE32F-NEXT: ld a0, 0(a0)
; RV64ZVE32F-NEXT: lw a1, 0(a1)
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: lw a3, 0(a3)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vlse32.v v8, (a0), zero
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3
; RV64ZVE32F-NEXT: ret
%v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 1), <4 x i32> %passthru)
ret <4 x i32> %v
}
define <4 x i32> @mgather_falsemask_v4i32(<4 x ptr> %ptrs, <4 x i32> %passthru) {
; RV32-LABEL: mgather_falsemask_v4i32:
; RV32: # %bb.0:
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_falsemask_v4i32:
; RV64V: # %bb.0:
; RV64V-NEXT: vmv1r.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_falsemask_v4i32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: ret
%v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> zeroinitializer, <4 x i32> %passthru)
ret <4 x i32> %v
}
declare <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i32>)
define <8 x i32> @mgather_v8i32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i32> %passthru) {
; RV32-LABEL: mgather_v8i32:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_v8i32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, mu
; RV64V-NEXT: vluxei64.v v12, (zero), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v8i32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: bnez a2, .LBB34_9
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB34_10
; RV64ZVE32F-NEXT: .LBB34_2: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB34_11
; RV64ZVE32F-NEXT: .LBB34_3: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB34_12
; RV64ZVE32F-NEXT: .LBB34_4: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB34_13
; RV64ZVE32F-NEXT: .LBB34_5: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB34_14
; RV64ZVE32F-NEXT: .LBB34_6: # %else14
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: bnez a2, .LBB34_15
; RV64ZVE32F-NEXT: .LBB34_7: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB34_16
; RV64ZVE32F-NEXT: .LBB34_8: # %else20
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB34_9: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m4, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB34_2
; RV64ZVE32F-NEXT: .LBB34_10: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB34_3
; RV64ZVE32F-NEXT: .LBB34_11: # %cond.load4
; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB34_4
; RV64ZVE32F-NEXT: .LBB34_12: # %cond.load7
; RV64ZVE32F-NEXT: ld a2, 24(a0)
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB34_5
; RV64ZVE32F-NEXT: .LBB34_13: # %cond.load10
; RV64ZVE32F-NEXT: ld a2, 32(a0)
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB34_6
; RV64ZVE32F-NEXT: .LBB34_14: # %cond.load13
; RV64ZVE32F-NEXT: ld a2, 40(a0)
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 5
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: beqz a2, .LBB34_7
; RV64ZVE32F-NEXT: .LBB34_15: # %cond.load16
; RV64ZVE32F-NEXT: ld a2, 48(a0)
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB34_8
; RV64ZVE32F-NEXT: .LBB34_16: # %cond.load19
; RV64ZVE32F-NEXT: ld a0, 56(a0)
; RV64ZVE32F-NEXT: lw a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 7
; RV64ZVE32F-NEXT: ret
%v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
ret <8 x i32> %v
}
define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i32> %passthru) {
; RV32-LABEL: mgather_baseidx_v8i8_v8i32:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
; RV32-NEXT: vsext.vf4 v12, v8
; RV32-NEXT: vsll.vi v8, v12, 2
; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_v8i8_v8i32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64V-NEXT: vsext.vf8 v12, v8
; RV64V-NEXT: vsll.vi v12, v12, 2
; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t
; RV64V-NEXT: vmv.v.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_v8i8_v8i32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: beqz a2, .LBB35_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m4, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: .LBB35_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB35_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
; RV64ZVE32F-NEXT: .LBB35_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB35_12
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB35_13
; RV64ZVE32F-NEXT: .LBB35_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB35_14
; RV64ZVE32F-NEXT: .LBB35_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB35_9
; RV64ZVE32F-NEXT: .LBB35_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
; RV64ZVE32F-NEXT: .LBB35_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB35_15
; RV64ZVE32F-NEXT: # %bb.10: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB35_16
; RV64ZVE32F-NEXT: .LBB35_11: # %else20
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB35_12: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB35_6
; RV64ZVE32F-NEXT: .LBB35_13: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB35_7
; RV64ZVE32F-NEXT: .LBB35_14: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB35_8
; RV64ZVE32F-NEXT: j .LBB35_9
; RV64ZVE32F-NEXT: .LBB35_15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB35_11
; RV64ZVE32F-NEXT: .LBB35_16: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: slli a1, a1, 2
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: lw a0, 0(a0)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i32, ptr %base, <8 x i8> %idxs
%v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
ret <8 x i32> %v
}
define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i32> %passthru) {
; RV32-LABEL: mgather_baseidx_sext_v8i8_v8i32:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
; RV32-NEXT: vsext.vf4 v12, v8
; RV32-NEXT: vsll.vi v8, v12, 2
; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_sext_v8i8_v8i32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64V-NEXT: vsext.vf8 v12, v8
; RV64V-NEXT: vsll.vi v12, v12, 2
; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t
; RV64V-NEXT: vmv.v.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8i32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: beqz a2, .LBB36_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m4, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: .LBB36_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB36_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
; RV64ZVE32F-NEXT: .LBB36_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB36_12
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB36_13
; RV64ZVE32F-NEXT: .LBB36_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB36_14
; RV64ZVE32F-NEXT: .LBB36_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB36_9
; RV64ZVE32F-NEXT: .LBB36_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
; RV64ZVE32F-NEXT: .LBB36_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB36_15
; RV64ZVE32F-NEXT: # %bb.10: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB36_16
; RV64ZVE32F-NEXT: .LBB36_11: # %else20
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB36_12: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB36_6
; RV64ZVE32F-NEXT: .LBB36_13: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB36_7
; RV64ZVE32F-NEXT: .LBB36_14: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB36_8
; RV64ZVE32F-NEXT: j .LBB36_9
; RV64ZVE32F-NEXT: .LBB36_15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB36_11
; RV64ZVE32F-NEXT: .LBB36_16: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: slli a1, a1, 2
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: lw a0, 0(a0)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
%eidxs = sext <8 x i8> %idxs to <8 x i32>
%ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
%v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
ret <8 x i32> %v
}
define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i32> %passthru) {
; RV32-LABEL: mgather_baseidx_zext_v8i8_v8i32:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV32-NEXT: vzext.vf2 v9, v8
; RV32-NEXT: vsll.vi v8, v9, 2
; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; RV32-NEXT: vluxei16.v v10, (a0), v8, v0.t
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8i32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64V-NEXT: vzext.vf2 v9, v8
; RV64V-NEXT: vsll.vi v8, v9, 2
; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; RV64V-NEXT: vluxei16.v v10, (a0), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8i32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: beqz a2, .LBB37_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m4, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: .LBB37_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB37_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
; RV64ZVE32F-NEXT: .LBB37_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB37_12
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB37_13
; RV64ZVE32F-NEXT: .LBB37_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB37_14
; RV64ZVE32F-NEXT: .LBB37_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB37_9
; RV64ZVE32F-NEXT: .LBB37_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
; RV64ZVE32F-NEXT: .LBB37_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB37_15
; RV64ZVE32F-NEXT: # %bb.10: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB37_16
; RV64ZVE32F-NEXT: .LBB37_11: # %else20
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB37_12: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB37_6
; RV64ZVE32F-NEXT: .LBB37_13: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB37_7
; RV64ZVE32F-NEXT: .LBB37_14: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB37_8
; RV64ZVE32F-NEXT: j .LBB37_9
; RV64ZVE32F-NEXT: .LBB37_15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB37_11
; RV64ZVE32F-NEXT: .LBB37_16: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: andi a1, a1, 255
; RV64ZVE32F-NEXT: slli a1, a1, 2
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: lw a0, 0(a0)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
%eidxs = zext <8 x i8> %idxs to <8 x i32>
%ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
%v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
ret <8 x i32> %v
}
define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i32> %passthru) {
; RV32-LABEL: mgather_baseidx_v8i16_v8i32:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
; RV32-NEXT: vsext.vf2 v12, v8
; RV32-NEXT: vsll.vi v8, v12, 2
; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_v8i16_v8i32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64V-NEXT: vsext.vf4 v12, v8
; RV64V-NEXT: vsll.vi v12, v12, 2
; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t
; RV64V-NEXT: vmv.v.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_v8i16_v8i32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: beqz a2, .LBB38_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m4, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: .LBB38_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB38_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
; RV64ZVE32F-NEXT: .LBB38_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB38_12
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB38_13
; RV64ZVE32F-NEXT: .LBB38_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB38_14
; RV64ZVE32F-NEXT: .LBB38_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB38_9
; RV64ZVE32F-NEXT: .LBB38_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
; RV64ZVE32F-NEXT: .LBB38_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB38_15
; RV64ZVE32F-NEXT: # %bb.10: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB38_16
; RV64ZVE32F-NEXT: .LBB38_11: # %else20
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB38_12: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB38_6
; RV64ZVE32F-NEXT: .LBB38_13: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB38_7
; RV64ZVE32F-NEXT: .LBB38_14: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB38_8
; RV64ZVE32F-NEXT: j .LBB38_9
; RV64ZVE32F-NEXT: .LBB38_15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB38_11
; RV64ZVE32F-NEXT: .LBB38_16: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: slli a1, a1, 2
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: lw a0, 0(a0)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i32, ptr %base, <8 x i16> %idxs
%v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
ret <8 x i32> %v
}
define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i32> %passthru) {
; RV32-LABEL: mgather_baseidx_sext_v8i16_v8i32:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
; RV32-NEXT: vsext.vf2 v12, v8
; RV32-NEXT: vsll.vi v8, v12, 2
; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_sext_v8i16_v8i32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64V-NEXT: vsext.vf4 v12, v8
; RV64V-NEXT: vsll.vi v12, v12, 2
; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t
; RV64V-NEXT: vmv.v.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i16_v8i32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: beqz a2, .LBB39_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m4, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: .LBB39_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB39_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
; RV64ZVE32F-NEXT: .LBB39_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB39_12
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB39_13
; RV64ZVE32F-NEXT: .LBB39_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB39_14
; RV64ZVE32F-NEXT: .LBB39_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB39_9
; RV64ZVE32F-NEXT: .LBB39_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
; RV64ZVE32F-NEXT: .LBB39_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB39_15
; RV64ZVE32F-NEXT: # %bb.10: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB39_16
; RV64ZVE32F-NEXT: .LBB39_11: # %else20
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB39_12: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB39_6
; RV64ZVE32F-NEXT: .LBB39_13: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB39_7
; RV64ZVE32F-NEXT: .LBB39_14: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB39_8
; RV64ZVE32F-NEXT: j .LBB39_9
; RV64ZVE32F-NEXT: .LBB39_15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB39_11
; RV64ZVE32F-NEXT: .LBB39_16: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: slli a1, a1, 2
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: lw a0, 0(a0)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
%eidxs = sext <8 x i16> %idxs to <8 x i32>
%ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
%v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
ret <8 x i32> %v
}
define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i32> %passthru) {
; RV32-LABEL: mgather_baseidx_zext_v8i16_v8i32:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
; RV32-NEXT: vzext.vf2 v12, v8
; RV32-NEXT: vsll.vi v8, v12, 2
; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_zext_v8i16_v8i32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, mu
; RV64V-NEXT: vzext.vf2 v12, v8
; RV64V-NEXT: vsll.vi v8, v12, 2
; RV64V-NEXT: vluxei32.v v10, (a0), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i16_v8i32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: lui a1, 16
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: addiw a1, a1, -1
; RV64ZVE32F-NEXT: beqz a3, .LBB40_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: and a3, a3, a1
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: lw a3, 0(a3)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m4, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a3
; RV64ZVE32F-NEXT: .LBB40_2: # %else
; RV64ZVE32F-NEXT: andi a3, a2, 2
; RV64ZVE32F-NEXT: beqz a3, .LBB40_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: and a3, a3, a1
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: lw a3, 0(a3)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a3
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
; RV64ZVE32F-NEXT: .LBB40_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: andi a3, a2, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a3, .LBB40_12
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: bnez a3, .LBB40_13
; RV64ZVE32F-NEXT: .LBB40_6: # %else8
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: bnez a3, .LBB40_14
; RV64ZVE32F-NEXT: .LBB40_7: # %else11
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: beqz a3, .LBB40_9
; RV64ZVE32F-NEXT: .LBB40_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: and a3, a3, a1
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: lw a3, 0(a3)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a3
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
; RV64ZVE32F-NEXT: .LBB40_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: andi a3, a2, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: bnez a3, .LBB40_15
; RV64ZVE32F-NEXT: # %bb.10: # %else17
; RV64ZVE32F-NEXT: andi a2, a2, -128
; RV64ZVE32F-NEXT: bnez a2, .LBB40_16
; RV64ZVE32F-NEXT: .LBB40_11: # %else20
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB40_12: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: and a3, a3, a1
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: lw a3, 0(a3)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a3
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: beqz a3, .LBB40_6
; RV64ZVE32F-NEXT: .LBB40_13: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: and a3, a3, a1
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: lw a3, 0(a3)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a3
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: beqz a3, .LBB40_7
; RV64ZVE32F-NEXT: .LBB40_14: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: and a3, a3, a1
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: lw a3, 0(a3)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a3
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: bnez a3, .LBB40_8
; RV64ZVE32F-NEXT: j .LBB40_9
; RV64ZVE32F-NEXT: .LBB40_15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: and a3, a3, a1
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: lw a3, 0(a3)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a3
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: andi a2, a2, -128
; RV64ZVE32F-NEXT: beqz a2, .LBB40_11
; RV64ZVE32F-NEXT: .LBB40_16: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: and a1, a2, a1
; RV64ZVE32F-NEXT: slli a1, a1, 2
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: lw a0, 0(a0)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
%eidxs = zext <8 x i16> %idxs to <8 x i32>
%ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
%v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
ret <8 x i32> %v
}
define <8 x i32> @mgather_baseidx_v8i32(ptr %base, <8 x i32> %idxs, <8 x i1> %m, <8 x i32> %passthru) {
; RV32-LABEL: mgather_baseidx_v8i32:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
; RV32-NEXT: vsll.vi v8, v8, 2
; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_v8i32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64V-NEXT: vsext.vf2 v12, v8
; RV64V-NEXT: vsll.vi v12, v12, 2
; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t
; RV64V-NEXT: vmv.v.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_v8i32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: beqz a2, .LBB41_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m4, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: .LBB41_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB41_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
; RV64ZVE32F-NEXT: .LBB41_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB41_12
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB41_13
; RV64ZVE32F-NEXT: .LBB41_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB41_14
; RV64ZVE32F-NEXT: .LBB41_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB41_9
; RV64ZVE32F-NEXT: .LBB41_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
; RV64ZVE32F-NEXT: .LBB41_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v12, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB41_15
; RV64ZVE32F-NEXT: # %bb.10: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB41_16
; RV64ZVE32F-NEXT: .LBB41_11: # %else20
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB41_12: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB41_6
; RV64ZVE32F-NEXT: .LBB41_13: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB41_7
; RV64ZVE32F-NEXT: .LBB41_14: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB41_8
; RV64ZVE32F-NEXT: j .LBB41_9
; RV64ZVE32F-NEXT: .LBB41_15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB41_11
; RV64ZVE32F-NEXT: .LBB41_16: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: slli a1, a1, 2
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: lw a0, 0(a0)
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %idxs
%v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
ret <8 x i32> %v
}
declare <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x i64>)
define <1 x i64> @mgather_v1i64(<1 x ptr> %ptrs, <1 x i1> %m, <1 x i64> %passthru) {
; RV32V-LABEL: mgather_v1i64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 1, e64, m1, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v1i64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v1i64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
; RV32ZVE32F-NEXT: vfirst.m a2, v0
; RV32ZVE32F-NEXT: bnez a2, .LBB42_2
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: lw a1, 4(a0)
; RV32ZVE32F-NEXT: lw a0, 0(a0)
; RV32ZVE32F-NEXT: .LBB42_2: # %else
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v1i64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vfirst.m a2, v0
; RV64ZVE32F-NEXT: bnez a2, .LBB42_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: ld a1, 0(a0)
; RV64ZVE32F-NEXT: .LBB42_2: # %else
; RV64ZVE32F-NEXT: mv a0, a1
; RV64ZVE32F-NEXT: ret
%v = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> %ptrs, i32 8, <1 x i1> %m, <1 x i64> %passthru)
ret <1 x i64> %v
}
declare <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i64>)
define <2 x i64> @mgather_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i64> %passthru) {
; RV32V-LABEL: mgather_v2i64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 2, e64, m1, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v2i64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v2i64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a4, v0
; RV32ZVE32F-NEXT: andi a2, a4, 1
; RV32ZVE32F-NEXT: beqz a2, .LBB43_3
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
; RV32ZVE32F-NEXT: lw a2, 4(a3)
; RV32ZVE32F-NEXT: lw a3, 0(a3)
; RV32ZVE32F-NEXT: andi a4, a4, 2
; RV32ZVE32F-NEXT: bnez a4, .LBB43_4
; RV32ZVE32F-NEXT: .LBB43_2:
; RV32ZVE32F-NEXT: lw a4, 12(a1)
; RV32ZVE32F-NEXT: lw a1, 8(a1)
; RV32ZVE32F-NEXT: j .LBB43_5
; RV32ZVE32F-NEXT: .LBB43_3:
; RV32ZVE32F-NEXT: lw a2, 4(a1)
; RV32ZVE32F-NEXT: lw a3, 0(a1)
; RV32ZVE32F-NEXT: andi a4, a4, 2
; RV32ZVE32F-NEXT: beqz a4, .LBB43_2
; RV32ZVE32F-NEXT: .LBB43_4: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: lw a4, 4(a1)
; RV32ZVE32F-NEXT: lw a1, 0(a1)
; RV32ZVE32F-NEXT: .LBB43_5: # %else2
; RV32ZVE32F-NEXT: sw a3, 0(a0)
; RV32ZVE32F-NEXT: sw a2, 4(a0)
; RV32ZVE32F-NEXT: sw a1, 8(a0)
; RV32ZVE32F-NEXT: sw a4, 12(a0)
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v2i64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a4, v0
; RV64ZVE32F-NEXT: andi a5, a4, 1
; RV64ZVE32F-NEXT: beqz a5, .LBB43_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: .LBB43_2: # %else
; RV64ZVE32F-NEXT: andi a4, a4, 2
; RV64ZVE32F-NEXT: beqz a4, .LBB43_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: ld a3, 0(a1)
; RV64ZVE32F-NEXT: .LBB43_4: # %else2
; RV64ZVE32F-NEXT: mv a0, a2
; RV64ZVE32F-NEXT: mv a1, a3
; RV64ZVE32F-NEXT: ret
%v = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> %ptrs, i32 8, <2 x i1> %m, <2 x i64> %passthru)
ret <2 x i64> %v
}
declare <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i64>)
define <4 x i64> @mgather_v4i64(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i64> %passthru) {
; RV32V-LABEL: mgather_v4i64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 4, e64, m2, ta, mu
; RV32V-NEXT: vluxei32.v v10, (zero), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v10
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v4i64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, mu
; RV64V-NEXT: vluxei64.v v10, (zero), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v10
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v4i64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a6, v0
; RV32ZVE32F-NEXT: andi a2, a6, 1
; RV32ZVE32F-NEXT: beqz a2, .LBB44_5
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
; RV32ZVE32F-NEXT: lw a2, 4(a3)
; RV32ZVE32F-NEXT: lw a3, 0(a3)
; RV32ZVE32F-NEXT: andi a4, a6, 2
; RV32ZVE32F-NEXT: bnez a4, .LBB44_6
; RV32ZVE32F-NEXT: .LBB44_2:
; RV32ZVE32F-NEXT: lw a4, 12(a1)
; RV32ZVE32F-NEXT: lw a5, 8(a1)
; RV32ZVE32F-NEXT: andi a7, a6, 4
; RV32ZVE32F-NEXT: bnez a7, .LBB44_7
; RV32ZVE32F-NEXT: .LBB44_3:
; RV32ZVE32F-NEXT: lw a7, 20(a1)
; RV32ZVE32F-NEXT: lw t0, 16(a1)
; RV32ZVE32F-NEXT: andi a6, a6, 8
; RV32ZVE32F-NEXT: bnez a6, .LBB44_8
; RV32ZVE32F-NEXT: .LBB44_4:
; RV32ZVE32F-NEXT: lw a6, 28(a1)
; RV32ZVE32F-NEXT: lw a1, 24(a1)
; RV32ZVE32F-NEXT: j .LBB44_9
; RV32ZVE32F-NEXT: .LBB44_5:
; RV32ZVE32F-NEXT: lw a2, 4(a1)
; RV32ZVE32F-NEXT: lw a3, 0(a1)
; RV32ZVE32F-NEXT: andi a4, a6, 2
; RV32ZVE32F-NEXT: beqz a4, .LBB44_2
; RV32ZVE32F-NEXT: .LBB44_6: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v9
; RV32ZVE32F-NEXT: lw a4, 4(a5)
; RV32ZVE32F-NEXT: lw a5, 0(a5)
; RV32ZVE32F-NEXT: andi a7, a6, 4
; RV32ZVE32F-NEXT: beqz a7, .LBB44_3
; RV32ZVE32F-NEXT: .LBB44_7: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s t0, v9
; RV32ZVE32F-NEXT: lw a7, 4(t0)
; RV32ZVE32F-NEXT: lw t0, 0(t0)
; RV32ZVE32F-NEXT: andi a6, a6, 8
; RV32ZVE32F-NEXT: beqz a6, .LBB44_4
; RV32ZVE32F-NEXT: .LBB44_8: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: lw a6, 4(a1)
; RV32ZVE32F-NEXT: lw a1, 0(a1)
; RV32ZVE32F-NEXT: .LBB44_9: # %else8
; RV32ZVE32F-NEXT: sw a3, 0(a0)
; RV32ZVE32F-NEXT: sw a2, 4(a0)
; RV32ZVE32F-NEXT: sw a5, 8(a0)
; RV32ZVE32F-NEXT: sw a4, 12(a0)
; RV32ZVE32F-NEXT: sw t0, 16(a0)
; RV32ZVE32F-NEXT: sw a7, 20(a0)
; RV32ZVE32F-NEXT: sw a1, 24(a0)
; RV32ZVE32F-NEXT: sw a6, 28(a0)
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v4i64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a5, v0
; RV64ZVE32F-NEXT: andi a3, a5, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB44_5
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: ld a3, 0(a1)
; RV64ZVE32F-NEXT: ld a3, 0(a3)
; RV64ZVE32F-NEXT: andi a4, a5, 2
; RV64ZVE32F-NEXT: bnez a4, .LBB44_6
; RV64ZVE32F-NEXT: .LBB44_2:
; RV64ZVE32F-NEXT: ld a4, 8(a2)
; RV64ZVE32F-NEXT: andi a6, a5, 4
; RV64ZVE32F-NEXT: bnez a6, .LBB44_7
; RV64ZVE32F-NEXT: .LBB44_3:
; RV64ZVE32F-NEXT: ld a6, 16(a2)
; RV64ZVE32F-NEXT: andi a5, a5, 8
; RV64ZVE32F-NEXT: bnez a5, .LBB44_8
; RV64ZVE32F-NEXT: .LBB44_4:
; RV64ZVE32F-NEXT: ld a1, 24(a2)
; RV64ZVE32F-NEXT: j .LBB44_9
; RV64ZVE32F-NEXT: .LBB44_5:
; RV64ZVE32F-NEXT: ld a3, 0(a2)
; RV64ZVE32F-NEXT: andi a4, a5, 2
; RV64ZVE32F-NEXT: beqz a4, .LBB44_2
; RV64ZVE32F-NEXT: .LBB44_6: # %cond.load1
; RV64ZVE32F-NEXT: ld a4, 8(a1)
; RV64ZVE32F-NEXT: ld a4, 0(a4)
; RV64ZVE32F-NEXT: andi a6, a5, 4
; RV64ZVE32F-NEXT: beqz a6, .LBB44_3
; RV64ZVE32F-NEXT: .LBB44_7: # %cond.load4
; RV64ZVE32F-NEXT: ld a6, 16(a1)
; RV64ZVE32F-NEXT: ld a6, 0(a6)
; RV64ZVE32F-NEXT: andi a5, a5, 8
; RV64ZVE32F-NEXT: beqz a5, .LBB44_4
; RV64ZVE32F-NEXT: .LBB44_8: # %cond.load7
; RV64ZVE32F-NEXT: ld a1, 24(a1)
; RV64ZVE32F-NEXT: ld a1, 0(a1)
; RV64ZVE32F-NEXT: .LBB44_9: # %else8
; RV64ZVE32F-NEXT: sd a3, 0(a0)
; RV64ZVE32F-NEXT: sd a4, 8(a0)
; RV64ZVE32F-NEXT: sd a6, 16(a0)
; RV64ZVE32F-NEXT: sd a1, 24(a0)
; RV64ZVE32F-NEXT: ret
%v = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> %m, <4 x i64> %passthru)
ret <4 x i64> %v
}
define <4 x i64> @mgather_truemask_v4i64(<4 x ptr> %ptrs, <4 x i64> %passthru) {
; RV32V-LABEL: mgather_truemask_v4i64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32V-NEXT: vluxei32.v v10, (zero), v8
; RV32V-NEXT: vmv.v.v v8, v10
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_truemask_v4i64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV64V-NEXT: vluxei64.v v8, (zero), v8
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_truemask_v4i64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: lw a2, 0(a1)
; RV32ZVE32F-NEXT: lw a1, 4(a1)
; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a3, v9
; RV32ZVE32F-NEXT: lw a4, 0(a3)
; RV32ZVE32F-NEXT: lw a3, 4(a3)
; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a5, v9
; RV32ZVE32F-NEXT: lw a6, 0(a5)
; RV32ZVE32F-NEXT: lw a5, 4(a5)
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a7, v8
; RV32ZVE32F-NEXT: lw t0, 4(a7)
; RV32ZVE32F-NEXT: lw a7, 0(a7)
; RV32ZVE32F-NEXT: sw a1, 4(a0)
; RV32ZVE32F-NEXT: sw a2, 0(a0)
; RV32ZVE32F-NEXT: sw t0, 28(a0)
; RV32ZVE32F-NEXT: sw a7, 24(a0)
; RV32ZVE32F-NEXT: sw a5, 20(a0)
; RV32ZVE32F-NEXT: sw a6, 16(a0)
; RV32ZVE32F-NEXT: sw a3, 12(a0)
; RV32ZVE32F-NEXT: sw a4, 8(a0)
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_truemask_v4i64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: ld a2, 24(a1)
; RV64ZVE32F-NEXT: ld a3, 16(a1)
; RV64ZVE32F-NEXT: ld a4, 8(a1)
; RV64ZVE32F-NEXT: ld a1, 0(a1)
; RV64ZVE32F-NEXT: ld a2, 0(a2)
; RV64ZVE32F-NEXT: ld a3, 0(a3)
; RV64ZVE32F-NEXT: ld a4, 0(a4)
; RV64ZVE32F-NEXT: ld a1, 0(a1)
; RV64ZVE32F-NEXT: sd a2, 24(a0)
; RV64ZVE32F-NEXT: sd a3, 16(a0)
; RV64ZVE32F-NEXT: sd a4, 8(a0)
; RV64ZVE32F-NEXT: sd a1, 0(a0)
; RV64ZVE32F-NEXT: ret
%v = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> splat (i1 1), <4 x i64> %passthru)
ret <4 x i64> %v
}
define <4 x i64> @mgather_falsemask_v4i64(<4 x ptr> %ptrs, <4 x i64> %passthru) {
; RV32V-LABEL: mgather_falsemask_v4i64:
; RV32V: # %bb.0:
; RV32V-NEXT: vmv2r.v v8, v10
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_falsemask_v4i64:
; RV64V: # %bb.0:
; RV64V-NEXT: vmv2r.v v8, v10
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_falsemask_v4i64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: lw a2, 0(a1)
; RV32ZVE32F-NEXT: lw a3, 4(a1)
; RV32ZVE32F-NEXT: lw a4, 8(a1)
; RV32ZVE32F-NEXT: lw a5, 12(a1)
; RV32ZVE32F-NEXT: lw a6, 28(a1)
; RV32ZVE32F-NEXT: lw a7, 24(a1)
; RV32ZVE32F-NEXT: lw t0, 20(a1)
; RV32ZVE32F-NEXT: lw a1, 16(a1)
; RV32ZVE32F-NEXT: sw a6, 28(a0)
; RV32ZVE32F-NEXT: sw a7, 24(a0)
; RV32ZVE32F-NEXT: sw t0, 20(a0)
; RV32ZVE32F-NEXT: sw a1, 16(a0)
; RV32ZVE32F-NEXT: sw a5, 12(a0)
; RV32ZVE32F-NEXT: sw a4, 8(a0)
; RV32ZVE32F-NEXT: sw a3, 4(a0)
; RV32ZVE32F-NEXT: sw a2, 0(a0)
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_falsemask_v4i64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: ld a1, 24(a2)
; RV64ZVE32F-NEXT: ld a3, 16(a2)
; RV64ZVE32F-NEXT: ld a4, 8(a2)
; RV64ZVE32F-NEXT: ld a2, 0(a2)
; RV64ZVE32F-NEXT: sd a1, 24(a0)
; RV64ZVE32F-NEXT: sd a3, 16(a0)
; RV64ZVE32F-NEXT: sd a4, 8(a0)
; RV64ZVE32F-NEXT: sd a2, 0(a0)
; RV64ZVE32F-NEXT: ret
%v = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> zeroinitializer, <4 x i64> %passthru)
ret <4 x i64> %v
}
declare <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i64>)
define <8 x i64> @mgather_v8i64(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i64> %passthru) {
; RV32V-LABEL: mgather_v8i64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV32V-NEXT: vluxei32.v v12, (zero), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v12
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v8i64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64V-NEXT: vluxei64.v v12, (zero), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v8i64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s t0, v0
; RV32ZVE32F-NEXT: andi a2, t0, 1
; RV32ZVE32F-NEXT: beqz a2, .LBB47_7
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
; RV32ZVE32F-NEXT: lw a2, 4(a3)
; RV32ZVE32F-NEXT: lw a3, 0(a3)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: bnez a4, .LBB47_8
; RV32ZVE32F-NEXT: .LBB47_2:
; RV32ZVE32F-NEXT: lw a4, 12(a1)
; RV32ZVE32F-NEXT: lw a5, 8(a1)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: bnez a6, .LBB47_9
; RV32ZVE32F-NEXT: .LBB47_3:
; RV32ZVE32F-NEXT: lw a6, 20(a1)
; RV32ZVE32F-NEXT: lw a7, 16(a1)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: bnez t1, .LBB47_10
; RV32ZVE32F-NEXT: .LBB47_4:
; RV32ZVE32F-NEXT: lw t1, 28(a1)
; RV32ZVE32F-NEXT: lw t2, 24(a1)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: bnez t3, .LBB47_11
; RV32ZVE32F-NEXT: .LBB47_5:
; RV32ZVE32F-NEXT: lw t3, 36(a1)
; RV32ZVE32F-NEXT: lw t4, 32(a1)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: bnez t5, .LBB47_12
; RV32ZVE32F-NEXT: .LBB47_6:
; RV32ZVE32F-NEXT: lw t5, 44(a1)
; RV32ZVE32F-NEXT: lw t6, 40(a1)
; RV32ZVE32F-NEXT: j .LBB47_13
; RV32ZVE32F-NEXT: .LBB47_7:
; RV32ZVE32F-NEXT: lw a2, 4(a1)
; RV32ZVE32F-NEXT: lw a3, 0(a1)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: beqz a4, .LBB47_2
; RV32ZVE32F-NEXT: .LBB47_8: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v10
; RV32ZVE32F-NEXT: lw a4, 4(a5)
; RV32ZVE32F-NEXT: lw a5, 0(a5)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: beqz a6, .LBB47_3
; RV32ZVE32F-NEXT: .LBB47_9: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v10
; RV32ZVE32F-NEXT: lw a6, 4(a7)
; RV32ZVE32F-NEXT: lw a7, 0(a7)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: beqz t1, .LBB47_4
; RV32ZVE32F-NEXT: .LBB47_10: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v10
; RV32ZVE32F-NEXT: lw t1, 4(t2)
; RV32ZVE32F-NEXT: lw t2, 0(t2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: beqz t3, .LBB47_5
; RV32ZVE32F-NEXT: .LBB47_11: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v10
; RV32ZVE32F-NEXT: lw t3, 4(t4)
; RV32ZVE32F-NEXT: lw t4, 0(t4)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: beqz t5, .LBB47_6
; RV32ZVE32F-NEXT: .LBB47_12: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v10
; RV32ZVE32F-NEXT: lw t5, 4(t6)
; RV32ZVE32F-NEXT: lw t6, 0(t6)
; RV32ZVE32F-NEXT: .LBB47_13: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: andi s0, t0, 64
; RV32ZVE32F-NEXT: beqz s0, .LBB47_16
; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v10
; RV32ZVE32F-NEXT: lw s0, 4(s1)
; RV32ZVE32F-NEXT: lw s1, 0(s1)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: bnez t0, .LBB47_17
; RV32ZVE32F-NEXT: .LBB47_15:
; RV32ZVE32F-NEXT: lw t0, 60(a1)
; RV32ZVE32F-NEXT: lw a1, 56(a1)
; RV32ZVE32F-NEXT: j .LBB47_18
; RV32ZVE32F-NEXT: .LBB47_16:
; RV32ZVE32F-NEXT: lw s0, 52(a1)
; RV32ZVE32F-NEXT: lw s1, 48(a1)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: beqz t0, .LBB47_15
; RV32ZVE32F-NEXT: .LBB47_17: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: lw t0, 4(a1)
; RV32ZVE32F-NEXT: lw a1, 0(a1)
; RV32ZVE32F-NEXT: .LBB47_18: # %else20
; RV32ZVE32F-NEXT: sw a3, 0(a0)
; RV32ZVE32F-NEXT: sw a2, 4(a0)
; RV32ZVE32F-NEXT: sw a5, 8(a0)
; RV32ZVE32F-NEXT: sw a4, 12(a0)
; RV32ZVE32F-NEXT: sw a7, 16(a0)
; RV32ZVE32F-NEXT: sw a6, 20(a0)
; RV32ZVE32F-NEXT: sw t2, 24(a0)
; RV32ZVE32F-NEXT: sw t1, 28(a0)
; RV32ZVE32F-NEXT: sw t4, 32(a0)
; RV32ZVE32F-NEXT: sw t3, 36(a0)
; RV32ZVE32F-NEXT: sw t6, 40(a0)
; RV32ZVE32F-NEXT: sw t5, 44(a0)
; RV32ZVE32F-NEXT: sw s1, 48(a0)
; RV32ZVE32F-NEXT: sw s0, 52(a0)
; RV32ZVE32F-NEXT: sw a1, 56(a0)
; RV32ZVE32F-NEXT: sw t0, 60(a0)
; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: addi sp, sp, 16
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v8i64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a6, v0
; RV64ZVE32F-NEXT: andi a3, a6, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB47_9
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: ld a3, 0(a1)
; RV64ZVE32F-NEXT: ld a3, 0(a3)
; RV64ZVE32F-NEXT: andi a4, a6, 2
; RV64ZVE32F-NEXT: bnez a4, .LBB47_10
; RV64ZVE32F-NEXT: .LBB47_2:
; RV64ZVE32F-NEXT: ld a4, 8(a2)
; RV64ZVE32F-NEXT: andi a5, a6, 4
; RV64ZVE32F-NEXT: bnez a5, .LBB47_11
; RV64ZVE32F-NEXT: .LBB47_3:
; RV64ZVE32F-NEXT: ld a5, 16(a2)
; RV64ZVE32F-NEXT: andi a7, a6, 8
; RV64ZVE32F-NEXT: bnez a7, .LBB47_12
; RV64ZVE32F-NEXT: .LBB47_4:
; RV64ZVE32F-NEXT: ld a7, 24(a2)
; RV64ZVE32F-NEXT: andi t0, a6, 16
; RV64ZVE32F-NEXT: bnez t0, .LBB47_13
; RV64ZVE32F-NEXT: .LBB47_5:
; RV64ZVE32F-NEXT: ld t0, 32(a2)
; RV64ZVE32F-NEXT: andi t1, a6, 32
; RV64ZVE32F-NEXT: bnez t1, .LBB47_14
; RV64ZVE32F-NEXT: .LBB47_6:
; RV64ZVE32F-NEXT: ld t1, 40(a2)
; RV64ZVE32F-NEXT: andi t2, a6, 64
; RV64ZVE32F-NEXT: bnez t2, .LBB47_15
; RV64ZVE32F-NEXT: .LBB47_7:
; RV64ZVE32F-NEXT: ld t2, 48(a2)
; RV64ZVE32F-NEXT: andi a6, a6, -128
; RV64ZVE32F-NEXT: bnez a6, .LBB47_16
; RV64ZVE32F-NEXT: .LBB47_8:
; RV64ZVE32F-NEXT: ld a1, 56(a2)
; RV64ZVE32F-NEXT: j .LBB47_17
; RV64ZVE32F-NEXT: .LBB47_9:
; RV64ZVE32F-NEXT: ld a3, 0(a2)
; RV64ZVE32F-NEXT: andi a4, a6, 2
; RV64ZVE32F-NEXT: beqz a4, .LBB47_2
; RV64ZVE32F-NEXT: .LBB47_10: # %cond.load1
; RV64ZVE32F-NEXT: ld a4, 8(a1)
; RV64ZVE32F-NEXT: ld a4, 0(a4)
; RV64ZVE32F-NEXT: andi a5, a6, 4
; RV64ZVE32F-NEXT: beqz a5, .LBB47_3
; RV64ZVE32F-NEXT: .LBB47_11: # %cond.load4
; RV64ZVE32F-NEXT: ld a5, 16(a1)
; RV64ZVE32F-NEXT: ld a5, 0(a5)
; RV64ZVE32F-NEXT: andi a7, a6, 8
; RV64ZVE32F-NEXT: beqz a7, .LBB47_4
; RV64ZVE32F-NEXT: .LBB47_12: # %cond.load7
; RV64ZVE32F-NEXT: ld a7, 24(a1)
; RV64ZVE32F-NEXT: ld a7, 0(a7)
; RV64ZVE32F-NEXT: andi t0, a6, 16
; RV64ZVE32F-NEXT: beqz t0, .LBB47_5
; RV64ZVE32F-NEXT: .LBB47_13: # %cond.load10
; RV64ZVE32F-NEXT: ld t0, 32(a1)
; RV64ZVE32F-NEXT: ld t0, 0(t0)
; RV64ZVE32F-NEXT: andi t1, a6, 32
; RV64ZVE32F-NEXT: beqz t1, .LBB47_6
; RV64ZVE32F-NEXT: .LBB47_14: # %cond.load13
; RV64ZVE32F-NEXT: ld t1, 40(a1)
; RV64ZVE32F-NEXT: ld t1, 0(t1)
; RV64ZVE32F-NEXT: andi t2, a6, 64
; RV64ZVE32F-NEXT: beqz t2, .LBB47_7
; RV64ZVE32F-NEXT: .LBB47_15: # %cond.load16
; RV64ZVE32F-NEXT: ld t2, 48(a1)
; RV64ZVE32F-NEXT: ld t2, 0(t2)
; RV64ZVE32F-NEXT: andi a6, a6, -128
; RV64ZVE32F-NEXT: beqz a6, .LBB47_8
; RV64ZVE32F-NEXT: .LBB47_16: # %cond.load19
; RV64ZVE32F-NEXT: ld a1, 56(a1)
; RV64ZVE32F-NEXT: ld a1, 0(a1)
; RV64ZVE32F-NEXT: .LBB47_17: # %else20
; RV64ZVE32F-NEXT: sd a3, 0(a0)
; RV64ZVE32F-NEXT: sd a4, 8(a0)
; RV64ZVE32F-NEXT: sd a5, 16(a0)
; RV64ZVE32F-NEXT: sd a7, 24(a0)
; RV64ZVE32F-NEXT: sd t0, 32(a0)
; RV64ZVE32F-NEXT: sd t1, 40(a0)
; RV64ZVE32F-NEXT: sd t2, 48(a0)
; RV64ZVE32F-NEXT: sd a1, 56(a0)
; RV64ZVE32F-NEXT: ret
%v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
ret <8 x i64> %v
}
define <8 x i64> @mgather_baseidx_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
; RV32V-LABEL: mgather_baseidx_v8i8_v8i64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32V-NEXT: vsext.vf4 v10, v8
; RV32V-NEXT: vsll.vi v8, v10, 3
; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v12
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_v8i8_v8i64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64V-NEXT: vsext.vf8 v16, v8
; RV64V-NEXT: vsll.vi v8, v16, 3
; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_baseidx_v8i8_v8i64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vsext.vf4 v10, v8
; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s t0, v0
; RV32ZVE32F-NEXT: andi a1, t0, 1
; RV32ZVE32F-NEXT: beqz a1, .LBB48_7
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
; RV32ZVE32F-NEXT: lw a1, 4(a3)
; RV32ZVE32F-NEXT: lw a3, 0(a3)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: bnez a4, .LBB48_8
; RV32ZVE32F-NEXT: .LBB48_2:
; RV32ZVE32F-NEXT: lw a4, 12(a2)
; RV32ZVE32F-NEXT: lw a5, 8(a2)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: bnez a6, .LBB48_9
; RV32ZVE32F-NEXT: .LBB48_3:
; RV32ZVE32F-NEXT: lw a6, 20(a2)
; RV32ZVE32F-NEXT: lw a7, 16(a2)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: bnez t1, .LBB48_10
; RV32ZVE32F-NEXT: .LBB48_4:
; RV32ZVE32F-NEXT: lw t1, 28(a2)
; RV32ZVE32F-NEXT: lw t2, 24(a2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: bnez t3, .LBB48_11
; RV32ZVE32F-NEXT: .LBB48_5:
; RV32ZVE32F-NEXT: lw t3, 36(a2)
; RV32ZVE32F-NEXT: lw t4, 32(a2)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: bnez t5, .LBB48_12
; RV32ZVE32F-NEXT: .LBB48_6:
; RV32ZVE32F-NEXT: lw t5, 44(a2)
; RV32ZVE32F-NEXT: lw t6, 40(a2)
; RV32ZVE32F-NEXT: j .LBB48_13
; RV32ZVE32F-NEXT: .LBB48_7:
; RV32ZVE32F-NEXT: lw a1, 4(a2)
; RV32ZVE32F-NEXT: lw a3, 0(a2)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: beqz a4, .LBB48_2
; RV32ZVE32F-NEXT: .LBB48_8: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v10
; RV32ZVE32F-NEXT: lw a4, 4(a5)
; RV32ZVE32F-NEXT: lw a5, 0(a5)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: beqz a6, .LBB48_3
; RV32ZVE32F-NEXT: .LBB48_9: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v10
; RV32ZVE32F-NEXT: lw a6, 4(a7)
; RV32ZVE32F-NEXT: lw a7, 0(a7)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: beqz t1, .LBB48_4
; RV32ZVE32F-NEXT: .LBB48_10: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v10
; RV32ZVE32F-NEXT: lw t1, 4(t2)
; RV32ZVE32F-NEXT: lw t2, 0(t2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: beqz t3, .LBB48_5
; RV32ZVE32F-NEXT: .LBB48_11: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v10
; RV32ZVE32F-NEXT: lw t3, 4(t4)
; RV32ZVE32F-NEXT: lw t4, 0(t4)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: beqz t5, .LBB48_6
; RV32ZVE32F-NEXT: .LBB48_12: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v10
; RV32ZVE32F-NEXT: lw t5, 4(t6)
; RV32ZVE32F-NEXT: lw t6, 0(t6)
; RV32ZVE32F-NEXT: .LBB48_13: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: andi s0, t0, 64
; RV32ZVE32F-NEXT: beqz s0, .LBB48_16
; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v10
; RV32ZVE32F-NEXT: lw s0, 4(s1)
; RV32ZVE32F-NEXT: lw s1, 0(s1)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: bnez t0, .LBB48_17
; RV32ZVE32F-NEXT: .LBB48_15:
; RV32ZVE32F-NEXT: lw t0, 60(a2)
; RV32ZVE32F-NEXT: lw a2, 56(a2)
; RV32ZVE32F-NEXT: j .LBB48_18
; RV32ZVE32F-NEXT: .LBB48_16:
; RV32ZVE32F-NEXT: lw s0, 52(a2)
; RV32ZVE32F-NEXT: lw s1, 48(a2)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: beqz t0, .LBB48_15
; RV32ZVE32F-NEXT: .LBB48_17: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: lw t0, 4(a2)
; RV32ZVE32F-NEXT: lw a2, 0(a2)
; RV32ZVE32F-NEXT: .LBB48_18: # %else20
; RV32ZVE32F-NEXT: sw a3, 0(a0)
; RV32ZVE32F-NEXT: sw a1, 4(a0)
; RV32ZVE32F-NEXT: sw a5, 8(a0)
; RV32ZVE32F-NEXT: sw a4, 12(a0)
; RV32ZVE32F-NEXT: sw a7, 16(a0)
; RV32ZVE32F-NEXT: sw a6, 20(a0)
; RV32ZVE32F-NEXT: sw t2, 24(a0)
; RV32ZVE32F-NEXT: sw t1, 28(a0)
; RV32ZVE32F-NEXT: sw t4, 32(a0)
; RV32ZVE32F-NEXT: sw t3, 36(a0)
; RV32ZVE32F-NEXT: sw t6, 40(a0)
; RV32ZVE32F-NEXT: sw t5, 44(a0)
; RV32ZVE32F-NEXT: sw s1, 48(a0)
; RV32ZVE32F-NEXT: sw s0, 52(a0)
; RV32ZVE32F-NEXT: sw a2, 56(a0)
; RV32ZVE32F-NEXT: sw t0, 60(a0)
; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: addi sp, sp, 16
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_v8i8_v8i64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a5, v0
; RV64ZVE32F-NEXT: andi a3, a5, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB48_3
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: ld a3, 0(a3)
; RV64ZVE32F-NEXT: andi a4, a5, 2
; RV64ZVE32F-NEXT: bnez a4, .LBB48_4
; RV64ZVE32F-NEXT: .LBB48_2:
; RV64ZVE32F-NEXT: ld a4, 8(a2)
; RV64ZVE32F-NEXT: j .LBB48_5
; RV64ZVE32F-NEXT: .LBB48_3:
; RV64ZVE32F-NEXT: ld a3, 0(a2)
; RV64ZVE32F-NEXT: andi a4, a5, 2
; RV64ZVE32F-NEXT: beqz a4, .LBB48_2
; RV64ZVE32F-NEXT: .LBB48_4: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a4, v9
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: ld a4, 0(a4)
; RV64ZVE32F-NEXT: .LBB48_5: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a6, a5, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: beqz a6, .LBB48_10
; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a6, v8
; RV64ZVE32F-NEXT: slli a6, a6, 3
; RV64ZVE32F-NEXT: add a6, a1, a6
; RV64ZVE32F-NEXT: ld a6, 0(a6)
; RV64ZVE32F-NEXT: andi a7, a5, 8
; RV64ZVE32F-NEXT: bnez a7, .LBB48_11
; RV64ZVE32F-NEXT: .LBB48_7:
; RV64ZVE32F-NEXT: ld a7, 24(a2)
; RV64ZVE32F-NEXT: andi t0, a5, 16
; RV64ZVE32F-NEXT: bnez t0, .LBB48_12
; RV64ZVE32F-NEXT: .LBB48_8:
; RV64ZVE32F-NEXT: ld t0, 32(a2)
; RV64ZVE32F-NEXT: andi t1, a5, 32
; RV64ZVE32F-NEXT: bnez t1, .LBB48_13
; RV64ZVE32F-NEXT: .LBB48_9:
; RV64ZVE32F-NEXT: ld t1, 40(a2)
; RV64ZVE32F-NEXT: j .LBB48_14
; RV64ZVE32F-NEXT: .LBB48_10:
; RV64ZVE32F-NEXT: ld a6, 16(a2)
; RV64ZVE32F-NEXT: andi a7, a5, 8
; RV64ZVE32F-NEXT: beqz a7, .LBB48_7
; RV64ZVE32F-NEXT: .LBB48_11: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a7, v8
; RV64ZVE32F-NEXT: slli a7, a7, 3
; RV64ZVE32F-NEXT: add a7, a1, a7
; RV64ZVE32F-NEXT: ld a7, 0(a7)
; RV64ZVE32F-NEXT: andi t0, a5, 16
; RV64ZVE32F-NEXT: beqz t0, .LBB48_8
; RV64ZVE32F-NEXT: .LBB48_12: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s t0, v9
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: ld t0, 0(t0)
; RV64ZVE32F-NEXT: andi t1, a5, 32
; RV64ZVE32F-NEXT: beqz t1, .LBB48_9
; RV64ZVE32F-NEXT: .LBB48_13: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s t1, v8
; RV64ZVE32F-NEXT: slli t1, t1, 3
; RV64ZVE32F-NEXT: add t1, a1, t1
; RV64ZVE32F-NEXT: ld t1, 0(t1)
; RV64ZVE32F-NEXT: .LBB48_14: # %else14
; RV64ZVE32F-NEXT: andi t2, a5, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: beqz t2, .LBB48_17
; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s t2, v8
; RV64ZVE32F-NEXT: slli t2, t2, 3
; RV64ZVE32F-NEXT: add t2, a1, t2
; RV64ZVE32F-NEXT: ld t2, 0(t2)
; RV64ZVE32F-NEXT: andi a5, a5, -128
; RV64ZVE32F-NEXT: bnez a5, .LBB48_18
; RV64ZVE32F-NEXT: .LBB48_16:
; RV64ZVE32F-NEXT: ld a1, 56(a2)
; RV64ZVE32F-NEXT: j .LBB48_19
; RV64ZVE32F-NEXT: .LBB48_17:
; RV64ZVE32F-NEXT: ld t2, 48(a2)
; RV64ZVE32F-NEXT: andi a5, a5, -128
; RV64ZVE32F-NEXT: beqz a5, .LBB48_16
; RV64ZVE32F-NEXT: .LBB48_18: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: ld a1, 0(a1)
; RV64ZVE32F-NEXT: .LBB48_19: # %else20
; RV64ZVE32F-NEXT: sd a3, 0(a0)
; RV64ZVE32F-NEXT: sd a4, 8(a0)
; RV64ZVE32F-NEXT: sd a6, 16(a0)
; RV64ZVE32F-NEXT: sd a7, 24(a0)
; RV64ZVE32F-NEXT: sd t0, 32(a0)
; RV64ZVE32F-NEXT: sd t1, 40(a0)
; RV64ZVE32F-NEXT: sd t2, 48(a0)
; RV64ZVE32F-NEXT: sd a1, 56(a0)
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i64, ptr %base, <8 x i8> %idxs
%v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
ret <8 x i64> %v
}
define <8 x i64> @mgather_baseidx_sext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
; RV32V-LABEL: mgather_baseidx_sext_v8i8_v8i64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32V-NEXT: vsext.vf4 v10, v8
; RV32V-NEXT: vsll.vi v8, v10, 3
; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v12
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_sext_v8i8_v8i64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64V-NEXT: vsext.vf8 v16, v8
; RV64V-NEXT: vsll.vi v8, v16, 3
; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8i64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vsext.vf4 v10, v8
; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s t0, v0
; RV32ZVE32F-NEXT: andi a1, t0, 1
; RV32ZVE32F-NEXT: beqz a1, .LBB49_7
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
; RV32ZVE32F-NEXT: lw a1, 4(a3)
; RV32ZVE32F-NEXT: lw a3, 0(a3)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: bnez a4, .LBB49_8
; RV32ZVE32F-NEXT: .LBB49_2:
; RV32ZVE32F-NEXT: lw a4, 12(a2)
; RV32ZVE32F-NEXT: lw a5, 8(a2)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: bnez a6, .LBB49_9
; RV32ZVE32F-NEXT: .LBB49_3:
; RV32ZVE32F-NEXT: lw a6, 20(a2)
; RV32ZVE32F-NEXT: lw a7, 16(a2)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: bnez t1, .LBB49_10
; RV32ZVE32F-NEXT: .LBB49_4:
; RV32ZVE32F-NEXT: lw t1, 28(a2)
; RV32ZVE32F-NEXT: lw t2, 24(a2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: bnez t3, .LBB49_11
; RV32ZVE32F-NEXT: .LBB49_5:
; RV32ZVE32F-NEXT: lw t3, 36(a2)
; RV32ZVE32F-NEXT: lw t4, 32(a2)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: bnez t5, .LBB49_12
; RV32ZVE32F-NEXT: .LBB49_6:
; RV32ZVE32F-NEXT: lw t5, 44(a2)
; RV32ZVE32F-NEXT: lw t6, 40(a2)
; RV32ZVE32F-NEXT: j .LBB49_13
; RV32ZVE32F-NEXT: .LBB49_7:
; RV32ZVE32F-NEXT: lw a1, 4(a2)
; RV32ZVE32F-NEXT: lw a3, 0(a2)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: beqz a4, .LBB49_2
; RV32ZVE32F-NEXT: .LBB49_8: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v10
; RV32ZVE32F-NEXT: lw a4, 4(a5)
; RV32ZVE32F-NEXT: lw a5, 0(a5)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: beqz a6, .LBB49_3
; RV32ZVE32F-NEXT: .LBB49_9: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v10
; RV32ZVE32F-NEXT: lw a6, 4(a7)
; RV32ZVE32F-NEXT: lw a7, 0(a7)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: beqz t1, .LBB49_4
; RV32ZVE32F-NEXT: .LBB49_10: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v10
; RV32ZVE32F-NEXT: lw t1, 4(t2)
; RV32ZVE32F-NEXT: lw t2, 0(t2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: beqz t3, .LBB49_5
; RV32ZVE32F-NEXT: .LBB49_11: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v10
; RV32ZVE32F-NEXT: lw t3, 4(t4)
; RV32ZVE32F-NEXT: lw t4, 0(t4)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: beqz t5, .LBB49_6
; RV32ZVE32F-NEXT: .LBB49_12: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v10
; RV32ZVE32F-NEXT: lw t5, 4(t6)
; RV32ZVE32F-NEXT: lw t6, 0(t6)
; RV32ZVE32F-NEXT: .LBB49_13: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: andi s0, t0, 64
; RV32ZVE32F-NEXT: beqz s0, .LBB49_16
; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v10
; RV32ZVE32F-NEXT: lw s0, 4(s1)
; RV32ZVE32F-NEXT: lw s1, 0(s1)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: bnez t0, .LBB49_17
; RV32ZVE32F-NEXT: .LBB49_15:
; RV32ZVE32F-NEXT: lw t0, 60(a2)
; RV32ZVE32F-NEXT: lw a2, 56(a2)
; RV32ZVE32F-NEXT: j .LBB49_18
; RV32ZVE32F-NEXT: .LBB49_16:
; RV32ZVE32F-NEXT: lw s0, 52(a2)
; RV32ZVE32F-NEXT: lw s1, 48(a2)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: beqz t0, .LBB49_15
; RV32ZVE32F-NEXT: .LBB49_17: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: lw t0, 4(a2)
; RV32ZVE32F-NEXT: lw a2, 0(a2)
; RV32ZVE32F-NEXT: .LBB49_18: # %else20
; RV32ZVE32F-NEXT: sw a3, 0(a0)
; RV32ZVE32F-NEXT: sw a1, 4(a0)
; RV32ZVE32F-NEXT: sw a5, 8(a0)
; RV32ZVE32F-NEXT: sw a4, 12(a0)
; RV32ZVE32F-NEXT: sw a7, 16(a0)
; RV32ZVE32F-NEXT: sw a6, 20(a0)
; RV32ZVE32F-NEXT: sw t2, 24(a0)
; RV32ZVE32F-NEXT: sw t1, 28(a0)
; RV32ZVE32F-NEXT: sw t4, 32(a0)
; RV32ZVE32F-NEXT: sw t3, 36(a0)
; RV32ZVE32F-NEXT: sw t6, 40(a0)
; RV32ZVE32F-NEXT: sw t5, 44(a0)
; RV32ZVE32F-NEXT: sw s1, 48(a0)
; RV32ZVE32F-NEXT: sw s0, 52(a0)
; RV32ZVE32F-NEXT: sw a2, 56(a0)
; RV32ZVE32F-NEXT: sw t0, 60(a0)
; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: addi sp, sp, 16
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8i64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a5, v0
; RV64ZVE32F-NEXT: andi a3, a5, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB49_3
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: ld a3, 0(a3)
; RV64ZVE32F-NEXT: andi a4, a5, 2
; RV64ZVE32F-NEXT: bnez a4, .LBB49_4
; RV64ZVE32F-NEXT: .LBB49_2:
; RV64ZVE32F-NEXT: ld a4, 8(a2)
; RV64ZVE32F-NEXT: j .LBB49_5
; RV64ZVE32F-NEXT: .LBB49_3:
; RV64ZVE32F-NEXT: ld a3, 0(a2)
; RV64ZVE32F-NEXT: andi a4, a5, 2
; RV64ZVE32F-NEXT: beqz a4, .LBB49_2
; RV64ZVE32F-NEXT: .LBB49_4: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a4, v9
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: ld a4, 0(a4)
; RV64ZVE32F-NEXT: .LBB49_5: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a6, a5, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: beqz a6, .LBB49_10
; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a6, v8
; RV64ZVE32F-NEXT: slli a6, a6, 3
; RV64ZVE32F-NEXT: add a6, a1, a6
; RV64ZVE32F-NEXT: ld a6, 0(a6)
; RV64ZVE32F-NEXT: andi a7, a5, 8
; RV64ZVE32F-NEXT: bnez a7, .LBB49_11
; RV64ZVE32F-NEXT: .LBB49_7:
; RV64ZVE32F-NEXT: ld a7, 24(a2)
; RV64ZVE32F-NEXT: andi t0, a5, 16
; RV64ZVE32F-NEXT: bnez t0, .LBB49_12
; RV64ZVE32F-NEXT: .LBB49_8:
; RV64ZVE32F-NEXT: ld t0, 32(a2)
; RV64ZVE32F-NEXT: andi t1, a5, 32
; RV64ZVE32F-NEXT: bnez t1, .LBB49_13
; RV64ZVE32F-NEXT: .LBB49_9:
; RV64ZVE32F-NEXT: ld t1, 40(a2)
; RV64ZVE32F-NEXT: j .LBB49_14
; RV64ZVE32F-NEXT: .LBB49_10:
; RV64ZVE32F-NEXT: ld a6, 16(a2)
; RV64ZVE32F-NEXT: andi a7, a5, 8
; RV64ZVE32F-NEXT: beqz a7, .LBB49_7
; RV64ZVE32F-NEXT: .LBB49_11: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a7, v8
; RV64ZVE32F-NEXT: slli a7, a7, 3
; RV64ZVE32F-NEXT: add a7, a1, a7
; RV64ZVE32F-NEXT: ld a7, 0(a7)
; RV64ZVE32F-NEXT: andi t0, a5, 16
; RV64ZVE32F-NEXT: beqz t0, .LBB49_8
; RV64ZVE32F-NEXT: .LBB49_12: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s t0, v9
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: ld t0, 0(t0)
; RV64ZVE32F-NEXT: andi t1, a5, 32
; RV64ZVE32F-NEXT: beqz t1, .LBB49_9
; RV64ZVE32F-NEXT: .LBB49_13: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s t1, v8
; RV64ZVE32F-NEXT: slli t1, t1, 3
; RV64ZVE32F-NEXT: add t1, a1, t1
; RV64ZVE32F-NEXT: ld t1, 0(t1)
; RV64ZVE32F-NEXT: .LBB49_14: # %else14
; RV64ZVE32F-NEXT: andi t2, a5, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: beqz t2, .LBB49_17
; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s t2, v8
; RV64ZVE32F-NEXT: slli t2, t2, 3
; RV64ZVE32F-NEXT: add t2, a1, t2
; RV64ZVE32F-NEXT: ld t2, 0(t2)
; RV64ZVE32F-NEXT: andi a5, a5, -128
; RV64ZVE32F-NEXT: bnez a5, .LBB49_18
; RV64ZVE32F-NEXT: .LBB49_16:
; RV64ZVE32F-NEXT: ld a1, 56(a2)
; RV64ZVE32F-NEXT: j .LBB49_19
; RV64ZVE32F-NEXT: .LBB49_17:
; RV64ZVE32F-NEXT: ld t2, 48(a2)
; RV64ZVE32F-NEXT: andi a5, a5, -128
; RV64ZVE32F-NEXT: beqz a5, .LBB49_16
; RV64ZVE32F-NEXT: .LBB49_18: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: ld a1, 0(a1)
; RV64ZVE32F-NEXT: .LBB49_19: # %else20
; RV64ZVE32F-NEXT: sd a3, 0(a0)
; RV64ZVE32F-NEXT: sd a4, 8(a0)
; RV64ZVE32F-NEXT: sd a6, 16(a0)
; RV64ZVE32F-NEXT: sd a7, 24(a0)
; RV64ZVE32F-NEXT: sd t0, 32(a0)
; RV64ZVE32F-NEXT: sd t1, 40(a0)
; RV64ZVE32F-NEXT: sd t2, 48(a0)
; RV64ZVE32F-NEXT: sd a1, 56(a0)
; RV64ZVE32F-NEXT: ret
%eidxs = sext <8 x i8> %idxs to <8 x i64>
%ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
%v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
ret <8 x i64> %v
}
define <8 x i64> @mgather_baseidx_zext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
; RV32V-LABEL: mgather_baseidx_zext_v8i8_v8i64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV32V-NEXT: vzext.vf2 v9, v8
; RV32V-NEXT: vsll.vi v8, v9, 3
; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; RV32V-NEXT: vluxei16.v v12, (a0), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v12
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8i64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64V-NEXT: vzext.vf2 v9, v8
; RV64V-NEXT: vsll.vi v8, v9, 3
; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; RV64V-NEXT: vluxei16.v v12, (a0), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8i64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vzext.vf4 v10, v8
; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s t0, v0
; RV32ZVE32F-NEXT: andi a1, t0, 1
; RV32ZVE32F-NEXT: beqz a1, .LBB50_7
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
; RV32ZVE32F-NEXT: lw a1, 4(a3)
; RV32ZVE32F-NEXT: lw a3, 0(a3)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: bnez a4, .LBB50_8
; RV32ZVE32F-NEXT: .LBB50_2:
; RV32ZVE32F-NEXT: lw a4, 12(a2)
; RV32ZVE32F-NEXT: lw a5, 8(a2)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: bnez a6, .LBB50_9
; RV32ZVE32F-NEXT: .LBB50_3:
; RV32ZVE32F-NEXT: lw a6, 20(a2)
; RV32ZVE32F-NEXT: lw a7, 16(a2)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: bnez t1, .LBB50_10
; RV32ZVE32F-NEXT: .LBB50_4:
; RV32ZVE32F-NEXT: lw t1, 28(a2)
; RV32ZVE32F-NEXT: lw t2, 24(a2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: bnez t3, .LBB50_11
; RV32ZVE32F-NEXT: .LBB50_5:
; RV32ZVE32F-NEXT: lw t3, 36(a2)
; RV32ZVE32F-NEXT: lw t4, 32(a2)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: bnez t5, .LBB50_12
; RV32ZVE32F-NEXT: .LBB50_6:
; RV32ZVE32F-NEXT: lw t5, 44(a2)
; RV32ZVE32F-NEXT: lw t6, 40(a2)
; RV32ZVE32F-NEXT: j .LBB50_13
; RV32ZVE32F-NEXT: .LBB50_7:
; RV32ZVE32F-NEXT: lw a1, 4(a2)
; RV32ZVE32F-NEXT: lw a3, 0(a2)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: beqz a4, .LBB50_2
; RV32ZVE32F-NEXT: .LBB50_8: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v10
; RV32ZVE32F-NEXT: lw a4, 4(a5)
; RV32ZVE32F-NEXT: lw a5, 0(a5)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: beqz a6, .LBB50_3
; RV32ZVE32F-NEXT: .LBB50_9: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v10
; RV32ZVE32F-NEXT: lw a6, 4(a7)
; RV32ZVE32F-NEXT: lw a7, 0(a7)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: beqz t1, .LBB50_4
; RV32ZVE32F-NEXT: .LBB50_10: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v10
; RV32ZVE32F-NEXT: lw t1, 4(t2)
; RV32ZVE32F-NEXT: lw t2, 0(t2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: beqz t3, .LBB50_5
; RV32ZVE32F-NEXT: .LBB50_11: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v10
; RV32ZVE32F-NEXT: lw t3, 4(t4)
; RV32ZVE32F-NEXT: lw t4, 0(t4)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: beqz t5, .LBB50_6
; RV32ZVE32F-NEXT: .LBB50_12: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v10
; RV32ZVE32F-NEXT: lw t5, 4(t6)
; RV32ZVE32F-NEXT: lw t6, 0(t6)
; RV32ZVE32F-NEXT: .LBB50_13: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: andi s0, t0, 64
; RV32ZVE32F-NEXT: beqz s0, .LBB50_16
; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v10
; RV32ZVE32F-NEXT: lw s0, 4(s1)
; RV32ZVE32F-NEXT: lw s1, 0(s1)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: bnez t0, .LBB50_17
; RV32ZVE32F-NEXT: .LBB50_15:
; RV32ZVE32F-NEXT: lw t0, 60(a2)
; RV32ZVE32F-NEXT: lw a2, 56(a2)
; RV32ZVE32F-NEXT: j .LBB50_18
; RV32ZVE32F-NEXT: .LBB50_16:
; RV32ZVE32F-NEXT: lw s0, 52(a2)
; RV32ZVE32F-NEXT: lw s1, 48(a2)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: beqz t0, .LBB50_15
; RV32ZVE32F-NEXT: .LBB50_17: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: lw t0, 4(a2)
; RV32ZVE32F-NEXT: lw a2, 0(a2)
; RV32ZVE32F-NEXT: .LBB50_18: # %else20
; RV32ZVE32F-NEXT: sw a3, 0(a0)
; RV32ZVE32F-NEXT: sw a1, 4(a0)
; RV32ZVE32F-NEXT: sw a5, 8(a0)
; RV32ZVE32F-NEXT: sw a4, 12(a0)
; RV32ZVE32F-NEXT: sw a7, 16(a0)
; RV32ZVE32F-NEXT: sw a6, 20(a0)
; RV32ZVE32F-NEXT: sw t2, 24(a0)
; RV32ZVE32F-NEXT: sw t1, 28(a0)
; RV32ZVE32F-NEXT: sw t4, 32(a0)
; RV32ZVE32F-NEXT: sw t3, 36(a0)
; RV32ZVE32F-NEXT: sw t6, 40(a0)
; RV32ZVE32F-NEXT: sw t5, 44(a0)
; RV32ZVE32F-NEXT: sw s1, 48(a0)
; RV32ZVE32F-NEXT: sw s0, 52(a0)
; RV32ZVE32F-NEXT: sw a2, 56(a0)
; RV32ZVE32F-NEXT: sw t0, 60(a0)
; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: addi sp, sp, 16
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8i64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a5, v0
; RV64ZVE32F-NEXT: andi a3, a5, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB50_3
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: andi a3, a3, 255
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: ld a3, 0(a3)
; RV64ZVE32F-NEXT: andi a4, a5, 2
; RV64ZVE32F-NEXT: bnez a4, .LBB50_4
; RV64ZVE32F-NEXT: .LBB50_2:
; RV64ZVE32F-NEXT: ld a4, 8(a2)
; RV64ZVE32F-NEXT: j .LBB50_5
; RV64ZVE32F-NEXT: .LBB50_3:
; RV64ZVE32F-NEXT: ld a3, 0(a2)
; RV64ZVE32F-NEXT: andi a4, a5, 2
; RV64ZVE32F-NEXT: beqz a4, .LBB50_2
; RV64ZVE32F-NEXT: .LBB50_4: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a4, v9
; RV64ZVE32F-NEXT: andi a4, a4, 255
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: ld a4, 0(a4)
; RV64ZVE32F-NEXT: .LBB50_5: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a6, a5, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: beqz a6, .LBB50_10
; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a6, v8
; RV64ZVE32F-NEXT: andi a6, a6, 255
; RV64ZVE32F-NEXT: slli a6, a6, 3
; RV64ZVE32F-NEXT: add a6, a1, a6
; RV64ZVE32F-NEXT: ld a6, 0(a6)
; RV64ZVE32F-NEXT: andi a7, a5, 8
; RV64ZVE32F-NEXT: bnez a7, .LBB50_11
; RV64ZVE32F-NEXT: .LBB50_7:
; RV64ZVE32F-NEXT: ld a7, 24(a2)
; RV64ZVE32F-NEXT: andi t0, a5, 16
; RV64ZVE32F-NEXT: bnez t0, .LBB50_12
; RV64ZVE32F-NEXT: .LBB50_8:
; RV64ZVE32F-NEXT: ld t0, 32(a2)
; RV64ZVE32F-NEXT: andi t1, a5, 32
; RV64ZVE32F-NEXT: bnez t1, .LBB50_13
; RV64ZVE32F-NEXT: .LBB50_9:
; RV64ZVE32F-NEXT: ld t1, 40(a2)
; RV64ZVE32F-NEXT: j .LBB50_14
; RV64ZVE32F-NEXT: .LBB50_10:
; RV64ZVE32F-NEXT: ld a6, 16(a2)
; RV64ZVE32F-NEXT: andi a7, a5, 8
; RV64ZVE32F-NEXT: beqz a7, .LBB50_7
; RV64ZVE32F-NEXT: .LBB50_11: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a7, v8
; RV64ZVE32F-NEXT: andi a7, a7, 255
; RV64ZVE32F-NEXT: slli a7, a7, 3
; RV64ZVE32F-NEXT: add a7, a1, a7
; RV64ZVE32F-NEXT: ld a7, 0(a7)
; RV64ZVE32F-NEXT: andi t0, a5, 16
; RV64ZVE32F-NEXT: beqz t0, .LBB50_8
; RV64ZVE32F-NEXT: .LBB50_12: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s t0, v9
; RV64ZVE32F-NEXT: andi t0, t0, 255
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: ld t0, 0(t0)
; RV64ZVE32F-NEXT: andi t1, a5, 32
; RV64ZVE32F-NEXT: beqz t1, .LBB50_9
; RV64ZVE32F-NEXT: .LBB50_13: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s t1, v8
; RV64ZVE32F-NEXT: andi t1, t1, 255
; RV64ZVE32F-NEXT: slli t1, t1, 3
; RV64ZVE32F-NEXT: add t1, a1, t1
; RV64ZVE32F-NEXT: ld t1, 0(t1)
; RV64ZVE32F-NEXT: .LBB50_14: # %else14
; RV64ZVE32F-NEXT: andi t2, a5, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: beqz t2, .LBB50_17
; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s t2, v8
; RV64ZVE32F-NEXT: andi t2, t2, 255
; RV64ZVE32F-NEXT: slli t2, t2, 3
; RV64ZVE32F-NEXT: add t2, a1, t2
; RV64ZVE32F-NEXT: ld t2, 0(t2)
; RV64ZVE32F-NEXT: andi a5, a5, -128
; RV64ZVE32F-NEXT: bnez a5, .LBB50_18
; RV64ZVE32F-NEXT: .LBB50_16:
; RV64ZVE32F-NEXT: ld a1, 56(a2)
; RV64ZVE32F-NEXT: j .LBB50_19
; RV64ZVE32F-NEXT: .LBB50_17:
; RV64ZVE32F-NEXT: ld t2, 48(a2)
; RV64ZVE32F-NEXT: andi a5, a5, -128
; RV64ZVE32F-NEXT: beqz a5, .LBB50_16
; RV64ZVE32F-NEXT: .LBB50_18: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: ld a1, 0(a1)
; RV64ZVE32F-NEXT: .LBB50_19: # %else20
; RV64ZVE32F-NEXT: sd a3, 0(a0)
; RV64ZVE32F-NEXT: sd a4, 8(a0)
; RV64ZVE32F-NEXT: sd a6, 16(a0)
; RV64ZVE32F-NEXT: sd a7, 24(a0)
; RV64ZVE32F-NEXT: sd t0, 32(a0)
; RV64ZVE32F-NEXT: sd t1, 40(a0)
; RV64ZVE32F-NEXT: sd t2, 48(a0)
; RV64ZVE32F-NEXT: sd a1, 56(a0)
; RV64ZVE32F-NEXT: ret
%eidxs = zext <8 x i8> %idxs to <8 x i64>
%ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
%v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
ret <8 x i64> %v
}
define <8 x i64> @mgather_baseidx_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
; RV32V-LABEL: mgather_baseidx_v8i16_v8i64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32V-NEXT: vsext.vf2 v10, v8
; RV32V-NEXT: vsll.vi v8, v10, 3
; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v12
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_v8i16_v8i64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64V-NEXT: vsext.vf4 v16, v8
; RV64V-NEXT: vsll.vi v8, v16, 3
; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_baseidx_v8i16_v8i64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vsext.vf2 v10, v8
; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s t0, v0
; RV32ZVE32F-NEXT: andi a1, t0, 1
; RV32ZVE32F-NEXT: beqz a1, .LBB51_7
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
; RV32ZVE32F-NEXT: lw a1, 4(a3)
; RV32ZVE32F-NEXT: lw a3, 0(a3)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: bnez a4, .LBB51_8
; RV32ZVE32F-NEXT: .LBB51_2:
; RV32ZVE32F-NEXT: lw a4, 12(a2)
; RV32ZVE32F-NEXT: lw a5, 8(a2)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: bnez a6, .LBB51_9
; RV32ZVE32F-NEXT: .LBB51_3:
; RV32ZVE32F-NEXT: lw a6, 20(a2)
; RV32ZVE32F-NEXT: lw a7, 16(a2)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: bnez t1, .LBB51_10
; RV32ZVE32F-NEXT: .LBB51_4:
; RV32ZVE32F-NEXT: lw t1, 28(a2)
; RV32ZVE32F-NEXT: lw t2, 24(a2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: bnez t3, .LBB51_11
; RV32ZVE32F-NEXT: .LBB51_5:
; RV32ZVE32F-NEXT: lw t3, 36(a2)
; RV32ZVE32F-NEXT: lw t4, 32(a2)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: bnez t5, .LBB51_12
; RV32ZVE32F-NEXT: .LBB51_6:
; RV32ZVE32F-NEXT: lw t5, 44(a2)
; RV32ZVE32F-NEXT: lw t6, 40(a2)
; RV32ZVE32F-NEXT: j .LBB51_13
; RV32ZVE32F-NEXT: .LBB51_7:
; RV32ZVE32F-NEXT: lw a1, 4(a2)
; RV32ZVE32F-NEXT: lw a3, 0(a2)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: beqz a4, .LBB51_2
; RV32ZVE32F-NEXT: .LBB51_8: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v10
; RV32ZVE32F-NEXT: lw a4, 4(a5)
; RV32ZVE32F-NEXT: lw a5, 0(a5)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: beqz a6, .LBB51_3
; RV32ZVE32F-NEXT: .LBB51_9: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v10
; RV32ZVE32F-NEXT: lw a6, 4(a7)
; RV32ZVE32F-NEXT: lw a7, 0(a7)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: beqz t1, .LBB51_4
; RV32ZVE32F-NEXT: .LBB51_10: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v10
; RV32ZVE32F-NEXT: lw t1, 4(t2)
; RV32ZVE32F-NEXT: lw t2, 0(t2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: beqz t3, .LBB51_5
; RV32ZVE32F-NEXT: .LBB51_11: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v10
; RV32ZVE32F-NEXT: lw t3, 4(t4)
; RV32ZVE32F-NEXT: lw t4, 0(t4)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: beqz t5, .LBB51_6
; RV32ZVE32F-NEXT: .LBB51_12: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v10
; RV32ZVE32F-NEXT: lw t5, 4(t6)
; RV32ZVE32F-NEXT: lw t6, 0(t6)
; RV32ZVE32F-NEXT: .LBB51_13: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: andi s0, t0, 64
; RV32ZVE32F-NEXT: beqz s0, .LBB51_16
; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v10
; RV32ZVE32F-NEXT: lw s0, 4(s1)
; RV32ZVE32F-NEXT: lw s1, 0(s1)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: bnez t0, .LBB51_17
; RV32ZVE32F-NEXT: .LBB51_15:
; RV32ZVE32F-NEXT: lw t0, 60(a2)
; RV32ZVE32F-NEXT: lw a2, 56(a2)
; RV32ZVE32F-NEXT: j .LBB51_18
; RV32ZVE32F-NEXT: .LBB51_16:
; RV32ZVE32F-NEXT: lw s0, 52(a2)
; RV32ZVE32F-NEXT: lw s1, 48(a2)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: beqz t0, .LBB51_15
; RV32ZVE32F-NEXT: .LBB51_17: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: lw t0, 4(a2)
; RV32ZVE32F-NEXT: lw a2, 0(a2)
; RV32ZVE32F-NEXT: .LBB51_18: # %else20
; RV32ZVE32F-NEXT: sw a3, 0(a0)
; RV32ZVE32F-NEXT: sw a1, 4(a0)
; RV32ZVE32F-NEXT: sw a5, 8(a0)
; RV32ZVE32F-NEXT: sw a4, 12(a0)
; RV32ZVE32F-NEXT: sw a7, 16(a0)
; RV32ZVE32F-NEXT: sw a6, 20(a0)
; RV32ZVE32F-NEXT: sw t2, 24(a0)
; RV32ZVE32F-NEXT: sw t1, 28(a0)
; RV32ZVE32F-NEXT: sw t4, 32(a0)
; RV32ZVE32F-NEXT: sw t3, 36(a0)
; RV32ZVE32F-NEXT: sw t6, 40(a0)
; RV32ZVE32F-NEXT: sw t5, 44(a0)
; RV32ZVE32F-NEXT: sw s1, 48(a0)
; RV32ZVE32F-NEXT: sw s0, 52(a0)
; RV32ZVE32F-NEXT: sw a2, 56(a0)
; RV32ZVE32F-NEXT: sw t0, 60(a0)
; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: addi sp, sp, 16
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_v8i16_v8i64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a5, v0
; RV64ZVE32F-NEXT: andi a3, a5, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB51_3
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: ld a3, 0(a3)
; RV64ZVE32F-NEXT: andi a4, a5, 2
; RV64ZVE32F-NEXT: bnez a4, .LBB51_4
; RV64ZVE32F-NEXT: .LBB51_2:
; RV64ZVE32F-NEXT: ld a4, 8(a2)
; RV64ZVE32F-NEXT: j .LBB51_5
; RV64ZVE32F-NEXT: .LBB51_3:
; RV64ZVE32F-NEXT: ld a3, 0(a2)
; RV64ZVE32F-NEXT: andi a4, a5, 2
; RV64ZVE32F-NEXT: beqz a4, .LBB51_2
; RV64ZVE32F-NEXT: .LBB51_4: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a4, v9
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: ld a4, 0(a4)
; RV64ZVE32F-NEXT: .LBB51_5: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: andi a6, a5, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: beqz a6, .LBB51_10
; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a6, v8
; RV64ZVE32F-NEXT: slli a6, a6, 3
; RV64ZVE32F-NEXT: add a6, a1, a6
; RV64ZVE32F-NEXT: ld a6, 0(a6)
; RV64ZVE32F-NEXT: andi a7, a5, 8
; RV64ZVE32F-NEXT: bnez a7, .LBB51_11
; RV64ZVE32F-NEXT: .LBB51_7:
; RV64ZVE32F-NEXT: ld a7, 24(a2)
; RV64ZVE32F-NEXT: andi t0, a5, 16
; RV64ZVE32F-NEXT: bnez t0, .LBB51_12
; RV64ZVE32F-NEXT: .LBB51_8:
; RV64ZVE32F-NEXT: ld t0, 32(a2)
; RV64ZVE32F-NEXT: andi t1, a5, 32
; RV64ZVE32F-NEXT: bnez t1, .LBB51_13
; RV64ZVE32F-NEXT: .LBB51_9:
; RV64ZVE32F-NEXT: ld t1, 40(a2)
; RV64ZVE32F-NEXT: j .LBB51_14
; RV64ZVE32F-NEXT: .LBB51_10:
; RV64ZVE32F-NEXT: ld a6, 16(a2)
; RV64ZVE32F-NEXT: andi a7, a5, 8
; RV64ZVE32F-NEXT: beqz a7, .LBB51_7
; RV64ZVE32F-NEXT: .LBB51_11: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a7, v8
; RV64ZVE32F-NEXT: slli a7, a7, 3
; RV64ZVE32F-NEXT: add a7, a1, a7
; RV64ZVE32F-NEXT: ld a7, 0(a7)
; RV64ZVE32F-NEXT: andi t0, a5, 16
; RV64ZVE32F-NEXT: beqz t0, .LBB51_8
; RV64ZVE32F-NEXT: .LBB51_12: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s t0, v9
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: ld t0, 0(t0)
; RV64ZVE32F-NEXT: andi t1, a5, 32
; RV64ZVE32F-NEXT: beqz t1, .LBB51_9
; RV64ZVE32F-NEXT: .LBB51_13: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s t1, v8
; RV64ZVE32F-NEXT: slli t1, t1, 3
; RV64ZVE32F-NEXT: add t1, a1, t1
; RV64ZVE32F-NEXT: ld t1, 0(t1)
; RV64ZVE32F-NEXT: .LBB51_14: # %else14
; RV64ZVE32F-NEXT: andi t2, a5, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: beqz t2, .LBB51_17
; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s t2, v8
; RV64ZVE32F-NEXT: slli t2, t2, 3
; RV64ZVE32F-NEXT: add t2, a1, t2
; RV64ZVE32F-NEXT: ld t2, 0(t2)
; RV64ZVE32F-NEXT: andi a5, a5, -128
; RV64ZVE32F-NEXT: bnez a5, .LBB51_18
; RV64ZVE32F-NEXT: .LBB51_16:
; RV64ZVE32F-NEXT: ld a1, 56(a2)
; RV64ZVE32F-NEXT: j .LBB51_19
; RV64ZVE32F-NEXT: .LBB51_17:
; RV64ZVE32F-NEXT: ld t2, 48(a2)
; RV64ZVE32F-NEXT: andi a5, a5, -128
; RV64ZVE32F-NEXT: beqz a5, .LBB51_16
; RV64ZVE32F-NEXT: .LBB51_18: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: ld a1, 0(a1)
; RV64ZVE32F-NEXT: .LBB51_19: # %else20
; RV64ZVE32F-NEXT: sd a3, 0(a0)
; RV64ZVE32F-NEXT: sd a4, 8(a0)
; RV64ZVE32F-NEXT: sd a6, 16(a0)
; RV64ZVE32F-NEXT: sd a7, 24(a0)
; RV64ZVE32F-NEXT: sd t0, 32(a0)
; RV64ZVE32F-NEXT: sd t1, 40(a0)
; RV64ZVE32F-NEXT: sd t2, 48(a0)
; RV64ZVE32F-NEXT: sd a1, 56(a0)
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i64, ptr %base, <8 x i16> %idxs
%v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
ret <8 x i64> %v
}
define <8 x i64> @mgather_baseidx_sext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
; RV32V-LABEL: mgather_baseidx_sext_v8i16_v8i64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32V-NEXT: vsext.vf2 v10, v8
; RV32V-NEXT: vsll.vi v8, v10, 3
; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v12
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_sext_v8i16_v8i64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64V-NEXT: vsext.vf4 v16, v8
; RV64V-NEXT: vsll.vi v8, v16, 3
; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_baseidx_sext_v8i16_v8i64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vsext.vf2 v10, v8
; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s t0, v0
; RV32ZVE32F-NEXT: andi a1, t0, 1
; RV32ZVE32F-NEXT: beqz a1, .LBB52_7
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
; RV32ZVE32F-NEXT: lw a1, 4(a3)
; RV32ZVE32F-NEXT: lw a3, 0(a3)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: bnez a4, .LBB52_8
; RV32ZVE32F-NEXT: .LBB52_2:
; RV32ZVE32F-NEXT: lw a4, 12(a2)
; RV32ZVE32F-NEXT: lw a5, 8(a2)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: bnez a6, .LBB52_9
; RV32ZVE32F-NEXT: .LBB52_3:
; RV32ZVE32F-NEXT: lw a6, 20(a2)
; RV32ZVE32F-NEXT: lw a7, 16(a2)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: bnez t1, .LBB52_10
; RV32ZVE32F-NEXT: .LBB52_4:
; RV32ZVE32F-NEXT: lw t1, 28(a2)
; RV32ZVE32F-NEXT: lw t2, 24(a2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: bnez t3, .LBB52_11
; RV32ZVE32F-NEXT: .LBB52_5:
; RV32ZVE32F-NEXT: lw t3, 36(a2)
; RV32ZVE32F-NEXT: lw t4, 32(a2)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: bnez t5, .LBB52_12
; RV32ZVE32F-NEXT: .LBB52_6:
; RV32ZVE32F-NEXT: lw t5, 44(a2)
; RV32ZVE32F-NEXT: lw t6, 40(a2)
; RV32ZVE32F-NEXT: j .LBB52_13
; RV32ZVE32F-NEXT: .LBB52_7:
; RV32ZVE32F-NEXT: lw a1, 4(a2)
; RV32ZVE32F-NEXT: lw a3, 0(a2)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: beqz a4, .LBB52_2
; RV32ZVE32F-NEXT: .LBB52_8: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v10
; RV32ZVE32F-NEXT: lw a4, 4(a5)
; RV32ZVE32F-NEXT: lw a5, 0(a5)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: beqz a6, .LBB52_3
; RV32ZVE32F-NEXT: .LBB52_9: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v10
; RV32ZVE32F-NEXT: lw a6, 4(a7)
; RV32ZVE32F-NEXT: lw a7, 0(a7)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: beqz t1, .LBB52_4
; RV32ZVE32F-NEXT: .LBB52_10: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v10
; RV32ZVE32F-NEXT: lw t1, 4(t2)
; RV32ZVE32F-NEXT: lw t2, 0(t2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: beqz t3, .LBB52_5
; RV32ZVE32F-NEXT: .LBB52_11: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v10
; RV32ZVE32F-NEXT: lw t3, 4(t4)
; RV32ZVE32F-NEXT: lw t4, 0(t4)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: beqz t5, .LBB52_6
; RV32ZVE32F-NEXT: .LBB52_12: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v10
; RV32ZVE32F-NEXT: lw t5, 4(t6)
; RV32ZVE32F-NEXT: lw t6, 0(t6)
; RV32ZVE32F-NEXT: .LBB52_13: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: andi s0, t0, 64
; RV32ZVE32F-NEXT: beqz s0, .LBB52_16
; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v10
; RV32ZVE32F-NEXT: lw s0, 4(s1)
; RV32ZVE32F-NEXT: lw s1, 0(s1)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: bnez t0, .LBB52_17
; RV32ZVE32F-NEXT: .LBB52_15:
; RV32ZVE32F-NEXT: lw t0, 60(a2)
; RV32ZVE32F-NEXT: lw a2, 56(a2)
; RV32ZVE32F-NEXT: j .LBB52_18
; RV32ZVE32F-NEXT: .LBB52_16:
; RV32ZVE32F-NEXT: lw s0, 52(a2)
; RV32ZVE32F-NEXT: lw s1, 48(a2)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: beqz t0, .LBB52_15
; RV32ZVE32F-NEXT: .LBB52_17: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: lw t0, 4(a2)
; RV32ZVE32F-NEXT: lw a2, 0(a2)
; RV32ZVE32F-NEXT: .LBB52_18: # %else20
; RV32ZVE32F-NEXT: sw a3, 0(a0)
; RV32ZVE32F-NEXT: sw a1, 4(a0)
; RV32ZVE32F-NEXT: sw a5, 8(a0)
; RV32ZVE32F-NEXT: sw a4, 12(a0)
; RV32ZVE32F-NEXT: sw a7, 16(a0)
; RV32ZVE32F-NEXT: sw a6, 20(a0)
; RV32ZVE32F-NEXT: sw t2, 24(a0)
; RV32ZVE32F-NEXT: sw t1, 28(a0)
; RV32ZVE32F-NEXT: sw t4, 32(a0)
; RV32ZVE32F-NEXT: sw t3, 36(a0)
; RV32ZVE32F-NEXT: sw t6, 40(a0)
; RV32ZVE32F-NEXT: sw t5, 44(a0)
; RV32ZVE32F-NEXT: sw s1, 48(a0)
; RV32ZVE32F-NEXT: sw s0, 52(a0)
; RV32ZVE32F-NEXT: sw a2, 56(a0)
; RV32ZVE32F-NEXT: sw t0, 60(a0)
; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: addi sp, sp, 16
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i16_v8i64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a5, v0
; RV64ZVE32F-NEXT: andi a3, a5, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB52_3
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: ld a3, 0(a3)
; RV64ZVE32F-NEXT: andi a4, a5, 2
; RV64ZVE32F-NEXT: bnez a4, .LBB52_4
; RV64ZVE32F-NEXT: .LBB52_2:
; RV64ZVE32F-NEXT: ld a4, 8(a2)
; RV64ZVE32F-NEXT: j .LBB52_5
; RV64ZVE32F-NEXT: .LBB52_3:
; RV64ZVE32F-NEXT: ld a3, 0(a2)
; RV64ZVE32F-NEXT: andi a4, a5, 2
; RV64ZVE32F-NEXT: beqz a4, .LBB52_2
; RV64ZVE32F-NEXT: .LBB52_4: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a4, v9
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: ld a4, 0(a4)
; RV64ZVE32F-NEXT: .LBB52_5: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: andi a6, a5, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: beqz a6, .LBB52_10
; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a6, v8
; RV64ZVE32F-NEXT: slli a6, a6, 3
; RV64ZVE32F-NEXT: add a6, a1, a6
; RV64ZVE32F-NEXT: ld a6, 0(a6)
; RV64ZVE32F-NEXT: andi a7, a5, 8
; RV64ZVE32F-NEXT: bnez a7, .LBB52_11
; RV64ZVE32F-NEXT: .LBB52_7:
; RV64ZVE32F-NEXT: ld a7, 24(a2)
; RV64ZVE32F-NEXT: andi t0, a5, 16
; RV64ZVE32F-NEXT: bnez t0, .LBB52_12
; RV64ZVE32F-NEXT: .LBB52_8:
; RV64ZVE32F-NEXT: ld t0, 32(a2)
; RV64ZVE32F-NEXT: andi t1, a5, 32
; RV64ZVE32F-NEXT: bnez t1, .LBB52_13
; RV64ZVE32F-NEXT: .LBB52_9:
; RV64ZVE32F-NEXT: ld t1, 40(a2)
; RV64ZVE32F-NEXT: j .LBB52_14
; RV64ZVE32F-NEXT: .LBB52_10:
; RV64ZVE32F-NEXT: ld a6, 16(a2)
; RV64ZVE32F-NEXT: andi a7, a5, 8
; RV64ZVE32F-NEXT: beqz a7, .LBB52_7
; RV64ZVE32F-NEXT: .LBB52_11: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a7, v8
; RV64ZVE32F-NEXT: slli a7, a7, 3
; RV64ZVE32F-NEXT: add a7, a1, a7
; RV64ZVE32F-NEXT: ld a7, 0(a7)
; RV64ZVE32F-NEXT: andi t0, a5, 16
; RV64ZVE32F-NEXT: beqz t0, .LBB52_8
; RV64ZVE32F-NEXT: .LBB52_12: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s t0, v9
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: ld t0, 0(t0)
; RV64ZVE32F-NEXT: andi t1, a5, 32
; RV64ZVE32F-NEXT: beqz t1, .LBB52_9
; RV64ZVE32F-NEXT: .LBB52_13: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s t1, v8
; RV64ZVE32F-NEXT: slli t1, t1, 3
; RV64ZVE32F-NEXT: add t1, a1, t1
; RV64ZVE32F-NEXT: ld t1, 0(t1)
; RV64ZVE32F-NEXT: .LBB52_14: # %else14
; RV64ZVE32F-NEXT: andi t2, a5, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: beqz t2, .LBB52_17
; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s t2, v8
; RV64ZVE32F-NEXT: slli t2, t2, 3
; RV64ZVE32F-NEXT: add t2, a1, t2
; RV64ZVE32F-NEXT: ld t2, 0(t2)
; RV64ZVE32F-NEXT: andi a5, a5, -128
; RV64ZVE32F-NEXT: bnez a5, .LBB52_18
; RV64ZVE32F-NEXT: .LBB52_16:
; RV64ZVE32F-NEXT: ld a1, 56(a2)
; RV64ZVE32F-NEXT: j .LBB52_19
; RV64ZVE32F-NEXT: .LBB52_17:
; RV64ZVE32F-NEXT: ld t2, 48(a2)
; RV64ZVE32F-NEXT: andi a5, a5, -128
; RV64ZVE32F-NEXT: beqz a5, .LBB52_16
; RV64ZVE32F-NEXT: .LBB52_18: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: ld a1, 0(a1)
; RV64ZVE32F-NEXT: .LBB52_19: # %else20
; RV64ZVE32F-NEXT: sd a3, 0(a0)
; RV64ZVE32F-NEXT: sd a4, 8(a0)
; RV64ZVE32F-NEXT: sd a6, 16(a0)
; RV64ZVE32F-NEXT: sd a7, 24(a0)
; RV64ZVE32F-NEXT: sd t0, 32(a0)
; RV64ZVE32F-NEXT: sd t1, 40(a0)
; RV64ZVE32F-NEXT: sd t2, 48(a0)
; RV64ZVE32F-NEXT: sd a1, 56(a0)
; RV64ZVE32F-NEXT: ret
%eidxs = sext <8 x i16> %idxs to <8 x i64>
%ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
%v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
ret <8 x i64> %v
}
define <8 x i64> @mgather_baseidx_zext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
; RV32V-LABEL: mgather_baseidx_zext_v8i16_v8i64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32V-NEXT: vzext.vf2 v10, v8
; RV32V-NEXT: vsll.vi v8, v10, 3
; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v12
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_zext_v8i16_v8i64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64V-NEXT: vzext.vf2 v10, v8
; RV64V-NEXT: vsll.vi v8, v10, 3
; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; RV64V-NEXT: vluxei32.v v12, (a0), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_baseidx_zext_v8i16_v8i64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vzext.vf2 v10, v8
; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s t0, v0
; RV32ZVE32F-NEXT: andi a1, t0, 1
; RV32ZVE32F-NEXT: beqz a1, .LBB53_7
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
; RV32ZVE32F-NEXT: lw a1, 4(a3)
; RV32ZVE32F-NEXT: lw a3, 0(a3)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: bnez a4, .LBB53_8
; RV32ZVE32F-NEXT: .LBB53_2:
; RV32ZVE32F-NEXT: lw a4, 12(a2)
; RV32ZVE32F-NEXT: lw a5, 8(a2)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: bnez a6, .LBB53_9
; RV32ZVE32F-NEXT: .LBB53_3:
; RV32ZVE32F-NEXT: lw a6, 20(a2)
; RV32ZVE32F-NEXT: lw a7, 16(a2)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: bnez t1, .LBB53_10
; RV32ZVE32F-NEXT: .LBB53_4:
; RV32ZVE32F-NEXT: lw t1, 28(a2)
; RV32ZVE32F-NEXT: lw t2, 24(a2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: bnez t3, .LBB53_11
; RV32ZVE32F-NEXT: .LBB53_5:
; RV32ZVE32F-NEXT: lw t3, 36(a2)
; RV32ZVE32F-NEXT: lw t4, 32(a2)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: bnez t5, .LBB53_12
; RV32ZVE32F-NEXT: .LBB53_6:
; RV32ZVE32F-NEXT: lw t5, 44(a2)
; RV32ZVE32F-NEXT: lw t6, 40(a2)
; RV32ZVE32F-NEXT: j .LBB53_13
; RV32ZVE32F-NEXT: .LBB53_7:
; RV32ZVE32F-NEXT: lw a1, 4(a2)
; RV32ZVE32F-NEXT: lw a3, 0(a2)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: beqz a4, .LBB53_2
; RV32ZVE32F-NEXT: .LBB53_8: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v10
; RV32ZVE32F-NEXT: lw a4, 4(a5)
; RV32ZVE32F-NEXT: lw a5, 0(a5)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: beqz a6, .LBB53_3
; RV32ZVE32F-NEXT: .LBB53_9: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v10
; RV32ZVE32F-NEXT: lw a6, 4(a7)
; RV32ZVE32F-NEXT: lw a7, 0(a7)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: beqz t1, .LBB53_4
; RV32ZVE32F-NEXT: .LBB53_10: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v10
; RV32ZVE32F-NEXT: lw t1, 4(t2)
; RV32ZVE32F-NEXT: lw t2, 0(t2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: beqz t3, .LBB53_5
; RV32ZVE32F-NEXT: .LBB53_11: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v10
; RV32ZVE32F-NEXT: lw t3, 4(t4)
; RV32ZVE32F-NEXT: lw t4, 0(t4)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: beqz t5, .LBB53_6
; RV32ZVE32F-NEXT: .LBB53_12: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v10
; RV32ZVE32F-NEXT: lw t5, 4(t6)
; RV32ZVE32F-NEXT: lw t6, 0(t6)
; RV32ZVE32F-NEXT: .LBB53_13: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: andi s0, t0, 64
; RV32ZVE32F-NEXT: beqz s0, .LBB53_16
; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v10
; RV32ZVE32F-NEXT: lw s0, 4(s1)
; RV32ZVE32F-NEXT: lw s1, 0(s1)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: bnez t0, .LBB53_17
; RV32ZVE32F-NEXT: .LBB53_15:
; RV32ZVE32F-NEXT: lw t0, 60(a2)
; RV32ZVE32F-NEXT: lw a2, 56(a2)
; RV32ZVE32F-NEXT: j .LBB53_18
; RV32ZVE32F-NEXT: .LBB53_16:
; RV32ZVE32F-NEXT: lw s0, 52(a2)
; RV32ZVE32F-NEXT: lw s1, 48(a2)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: beqz t0, .LBB53_15
; RV32ZVE32F-NEXT: .LBB53_17: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: lw t0, 4(a2)
; RV32ZVE32F-NEXT: lw a2, 0(a2)
; RV32ZVE32F-NEXT: .LBB53_18: # %else20
; RV32ZVE32F-NEXT: sw a3, 0(a0)
; RV32ZVE32F-NEXT: sw a1, 4(a0)
; RV32ZVE32F-NEXT: sw a5, 8(a0)
; RV32ZVE32F-NEXT: sw a4, 12(a0)
; RV32ZVE32F-NEXT: sw a7, 16(a0)
; RV32ZVE32F-NEXT: sw a6, 20(a0)
; RV32ZVE32F-NEXT: sw t2, 24(a0)
; RV32ZVE32F-NEXT: sw t1, 28(a0)
; RV32ZVE32F-NEXT: sw t4, 32(a0)
; RV32ZVE32F-NEXT: sw t3, 36(a0)
; RV32ZVE32F-NEXT: sw t6, 40(a0)
; RV32ZVE32F-NEXT: sw t5, 44(a0)
; RV32ZVE32F-NEXT: sw s1, 48(a0)
; RV32ZVE32F-NEXT: sw s0, 52(a0)
; RV32ZVE32F-NEXT: sw a2, 56(a0)
; RV32ZVE32F-NEXT: sw t0, 60(a0)
; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: addi sp, sp, 16
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i16_v8i64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: lui a5, 16
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a6, v0
; RV64ZVE32F-NEXT: andi a3, a6, 1
; RV64ZVE32F-NEXT: addiw a5, a5, -1
; RV64ZVE32F-NEXT: beqz a3, .LBB53_3
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: and a3, a3, a5
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: ld a3, 0(a3)
; RV64ZVE32F-NEXT: andi a4, a6, 2
; RV64ZVE32F-NEXT: bnez a4, .LBB53_4
; RV64ZVE32F-NEXT: .LBB53_2:
; RV64ZVE32F-NEXT: ld a4, 8(a2)
; RV64ZVE32F-NEXT: j .LBB53_5
; RV64ZVE32F-NEXT: .LBB53_3:
; RV64ZVE32F-NEXT: ld a3, 0(a2)
; RV64ZVE32F-NEXT: andi a4, a6, 2
; RV64ZVE32F-NEXT: beqz a4, .LBB53_2
; RV64ZVE32F-NEXT: .LBB53_4: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a4, v9
; RV64ZVE32F-NEXT: and a4, a4, a5
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: ld a4, 0(a4)
; RV64ZVE32F-NEXT: .LBB53_5: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: andi a7, a6, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: beqz a7, .LBB53_10
; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a7, v8
; RV64ZVE32F-NEXT: and a7, a7, a5
; RV64ZVE32F-NEXT: slli a7, a7, 3
; RV64ZVE32F-NEXT: add a7, a1, a7
; RV64ZVE32F-NEXT: ld a7, 0(a7)
; RV64ZVE32F-NEXT: andi t0, a6, 8
; RV64ZVE32F-NEXT: bnez t0, .LBB53_11
; RV64ZVE32F-NEXT: .LBB53_7:
; RV64ZVE32F-NEXT: ld t0, 24(a2)
; RV64ZVE32F-NEXT: andi t1, a6, 16
; RV64ZVE32F-NEXT: bnez t1, .LBB53_12
; RV64ZVE32F-NEXT: .LBB53_8:
; RV64ZVE32F-NEXT: ld t1, 32(a2)
; RV64ZVE32F-NEXT: andi t2, a6, 32
; RV64ZVE32F-NEXT: bnez t2, .LBB53_13
; RV64ZVE32F-NEXT: .LBB53_9:
; RV64ZVE32F-NEXT: ld t2, 40(a2)
; RV64ZVE32F-NEXT: j .LBB53_14
; RV64ZVE32F-NEXT: .LBB53_10:
; RV64ZVE32F-NEXT: ld a7, 16(a2)
; RV64ZVE32F-NEXT: andi t0, a6, 8
; RV64ZVE32F-NEXT: beqz t0, .LBB53_7
; RV64ZVE32F-NEXT: .LBB53_11: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s t0, v8
; RV64ZVE32F-NEXT: and t0, t0, a5
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: ld t0, 0(t0)
; RV64ZVE32F-NEXT: andi t1, a6, 16
; RV64ZVE32F-NEXT: beqz t1, .LBB53_8
; RV64ZVE32F-NEXT: .LBB53_12: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s t1, v9
; RV64ZVE32F-NEXT: and t1, t1, a5
; RV64ZVE32F-NEXT: slli t1, t1, 3
; RV64ZVE32F-NEXT: add t1, a1, t1
; RV64ZVE32F-NEXT: ld t1, 0(t1)
; RV64ZVE32F-NEXT: andi t2, a6, 32
; RV64ZVE32F-NEXT: beqz t2, .LBB53_9
; RV64ZVE32F-NEXT: .LBB53_13: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s t2, v8
; RV64ZVE32F-NEXT: and t2, t2, a5
; RV64ZVE32F-NEXT: slli t2, t2, 3
; RV64ZVE32F-NEXT: add t2, a1, t2
; RV64ZVE32F-NEXT: ld t2, 0(t2)
; RV64ZVE32F-NEXT: .LBB53_14: # %else14
; RV64ZVE32F-NEXT: andi t3, a6, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: beqz t3, .LBB53_17
; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s t3, v8
; RV64ZVE32F-NEXT: and t3, t3, a5
; RV64ZVE32F-NEXT: slli t3, t3, 3
; RV64ZVE32F-NEXT: add t3, a1, t3
; RV64ZVE32F-NEXT: ld t3, 0(t3)
; RV64ZVE32F-NEXT: andi a6, a6, -128
; RV64ZVE32F-NEXT: bnez a6, .LBB53_18
; RV64ZVE32F-NEXT: .LBB53_16:
; RV64ZVE32F-NEXT: ld a1, 56(a2)
; RV64ZVE32F-NEXT: j .LBB53_19
; RV64ZVE32F-NEXT: .LBB53_17:
; RV64ZVE32F-NEXT: ld t3, 48(a2)
; RV64ZVE32F-NEXT: andi a6, a6, -128
; RV64ZVE32F-NEXT: beqz a6, .LBB53_16
; RV64ZVE32F-NEXT: .LBB53_18: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: and a2, a2, a5
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: ld a1, 0(a1)
; RV64ZVE32F-NEXT: .LBB53_19: # %else20
; RV64ZVE32F-NEXT: sd a3, 0(a0)
; RV64ZVE32F-NEXT: sd a4, 8(a0)
; RV64ZVE32F-NEXT: sd a7, 16(a0)
; RV64ZVE32F-NEXT: sd t0, 24(a0)
; RV64ZVE32F-NEXT: sd t1, 32(a0)
; RV64ZVE32F-NEXT: sd t2, 40(a0)
; RV64ZVE32F-NEXT: sd t3, 48(a0)
; RV64ZVE32F-NEXT: sd a1, 56(a0)
; RV64ZVE32F-NEXT: ret
%eidxs = zext <8 x i16> %idxs to <8 x i64>
%ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
%v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
ret <8 x i64> %v
}
define <8 x i64> @mgather_baseidx_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
; RV32V-LABEL: mgather_baseidx_v8i32_v8i64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32V-NEXT: vsll.vi v8, v8, 3
; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v12
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_v8i32_v8i64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64V-NEXT: vsext.vf2 v16, v8
; RV64V-NEXT: vsll.vi v8, v16, 3
; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_baseidx_v8i32_v8i64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s t0, v0
; RV32ZVE32F-NEXT: andi a1, t0, 1
; RV32ZVE32F-NEXT: beqz a1, .LBB54_7
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
; RV32ZVE32F-NEXT: lw a1, 4(a3)
; RV32ZVE32F-NEXT: lw a3, 0(a3)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: bnez a4, .LBB54_8
; RV32ZVE32F-NEXT: .LBB54_2:
; RV32ZVE32F-NEXT: lw a4, 12(a2)
; RV32ZVE32F-NEXT: lw a5, 8(a2)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: bnez a6, .LBB54_9
; RV32ZVE32F-NEXT: .LBB54_3:
; RV32ZVE32F-NEXT: lw a6, 20(a2)
; RV32ZVE32F-NEXT: lw a7, 16(a2)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: bnez t1, .LBB54_10
; RV32ZVE32F-NEXT: .LBB54_4:
; RV32ZVE32F-NEXT: lw t1, 28(a2)
; RV32ZVE32F-NEXT: lw t2, 24(a2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: bnez t3, .LBB54_11
; RV32ZVE32F-NEXT: .LBB54_5:
; RV32ZVE32F-NEXT: lw t3, 36(a2)
; RV32ZVE32F-NEXT: lw t4, 32(a2)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: bnez t5, .LBB54_12
; RV32ZVE32F-NEXT: .LBB54_6:
; RV32ZVE32F-NEXT: lw t5, 44(a2)
; RV32ZVE32F-NEXT: lw t6, 40(a2)
; RV32ZVE32F-NEXT: j .LBB54_13
; RV32ZVE32F-NEXT: .LBB54_7:
; RV32ZVE32F-NEXT: lw a1, 4(a2)
; RV32ZVE32F-NEXT: lw a3, 0(a2)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: beqz a4, .LBB54_2
; RV32ZVE32F-NEXT: .LBB54_8: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v10
; RV32ZVE32F-NEXT: lw a4, 4(a5)
; RV32ZVE32F-NEXT: lw a5, 0(a5)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: beqz a6, .LBB54_3
; RV32ZVE32F-NEXT: .LBB54_9: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v10
; RV32ZVE32F-NEXT: lw a6, 4(a7)
; RV32ZVE32F-NEXT: lw a7, 0(a7)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: beqz t1, .LBB54_4
; RV32ZVE32F-NEXT: .LBB54_10: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v10
; RV32ZVE32F-NEXT: lw t1, 4(t2)
; RV32ZVE32F-NEXT: lw t2, 0(t2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: beqz t3, .LBB54_5
; RV32ZVE32F-NEXT: .LBB54_11: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v10
; RV32ZVE32F-NEXT: lw t3, 4(t4)
; RV32ZVE32F-NEXT: lw t4, 0(t4)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: beqz t5, .LBB54_6
; RV32ZVE32F-NEXT: .LBB54_12: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v10
; RV32ZVE32F-NEXT: lw t5, 4(t6)
; RV32ZVE32F-NEXT: lw t6, 0(t6)
; RV32ZVE32F-NEXT: .LBB54_13: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: andi s0, t0, 64
; RV32ZVE32F-NEXT: beqz s0, .LBB54_16
; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v10
; RV32ZVE32F-NEXT: lw s0, 4(s1)
; RV32ZVE32F-NEXT: lw s1, 0(s1)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: bnez t0, .LBB54_17
; RV32ZVE32F-NEXT: .LBB54_15:
; RV32ZVE32F-NEXT: lw t0, 60(a2)
; RV32ZVE32F-NEXT: lw a2, 56(a2)
; RV32ZVE32F-NEXT: j .LBB54_18
; RV32ZVE32F-NEXT: .LBB54_16:
; RV32ZVE32F-NEXT: lw s0, 52(a2)
; RV32ZVE32F-NEXT: lw s1, 48(a2)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: beqz t0, .LBB54_15
; RV32ZVE32F-NEXT: .LBB54_17: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: lw t0, 4(a2)
; RV32ZVE32F-NEXT: lw a2, 0(a2)
; RV32ZVE32F-NEXT: .LBB54_18: # %else20
; RV32ZVE32F-NEXT: sw a3, 0(a0)
; RV32ZVE32F-NEXT: sw a1, 4(a0)
; RV32ZVE32F-NEXT: sw a5, 8(a0)
; RV32ZVE32F-NEXT: sw a4, 12(a0)
; RV32ZVE32F-NEXT: sw a7, 16(a0)
; RV32ZVE32F-NEXT: sw a6, 20(a0)
; RV32ZVE32F-NEXT: sw t2, 24(a0)
; RV32ZVE32F-NEXT: sw t1, 28(a0)
; RV32ZVE32F-NEXT: sw t4, 32(a0)
; RV32ZVE32F-NEXT: sw t3, 36(a0)
; RV32ZVE32F-NEXT: sw t6, 40(a0)
; RV32ZVE32F-NEXT: sw t5, 44(a0)
; RV32ZVE32F-NEXT: sw s1, 48(a0)
; RV32ZVE32F-NEXT: sw s0, 52(a0)
; RV32ZVE32F-NEXT: sw a2, 56(a0)
; RV32ZVE32F-NEXT: sw t0, 60(a0)
; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: addi sp, sp, 16
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_v8i32_v8i64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a5, v0
; RV64ZVE32F-NEXT: andi a3, a5, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB54_3
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: ld a3, 0(a3)
; RV64ZVE32F-NEXT: andi a4, a5, 2
; RV64ZVE32F-NEXT: bnez a4, .LBB54_4
; RV64ZVE32F-NEXT: .LBB54_2:
; RV64ZVE32F-NEXT: ld a4, 8(a2)
; RV64ZVE32F-NEXT: j .LBB54_5
; RV64ZVE32F-NEXT: .LBB54_3:
; RV64ZVE32F-NEXT: ld a3, 0(a2)
; RV64ZVE32F-NEXT: andi a4, a5, 2
; RV64ZVE32F-NEXT: beqz a4, .LBB54_2
; RV64ZVE32F-NEXT: .LBB54_4: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a4, v10
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: ld a4, 0(a4)
; RV64ZVE32F-NEXT: .LBB54_5: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: andi a6, a5, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: beqz a6, .LBB54_10
; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a6, v8
; RV64ZVE32F-NEXT: slli a6, a6, 3
; RV64ZVE32F-NEXT: add a6, a1, a6
; RV64ZVE32F-NEXT: ld a6, 0(a6)
; RV64ZVE32F-NEXT: andi a7, a5, 8
; RV64ZVE32F-NEXT: bnez a7, .LBB54_11
; RV64ZVE32F-NEXT: .LBB54_7:
; RV64ZVE32F-NEXT: ld a7, 24(a2)
; RV64ZVE32F-NEXT: andi t0, a5, 16
; RV64ZVE32F-NEXT: bnez t0, .LBB54_12
; RV64ZVE32F-NEXT: .LBB54_8:
; RV64ZVE32F-NEXT: ld t0, 32(a2)
; RV64ZVE32F-NEXT: andi t1, a5, 32
; RV64ZVE32F-NEXT: bnez t1, .LBB54_13
; RV64ZVE32F-NEXT: .LBB54_9:
; RV64ZVE32F-NEXT: ld t1, 40(a2)
; RV64ZVE32F-NEXT: j .LBB54_14
; RV64ZVE32F-NEXT: .LBB54_10:
; RV64ZVE32F-NEXT: ld a6, 16(a2)
; RV64ZVE32F-NEXT: andi a7, a5, 8
; RV64ZVE32F-NEXT: beqz a7, .LBB54_7
; RV64ZVE32F-NEXT: .LBB54_11: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a7, v8
; RV64ZVE32F-NEXT: slli a7, a7, 3
; RV64ZVE32F-NEXT: add a7, a1, a7
; RV64ZVE32F-NEXT: ld a7, 0(a7)
; RV64ZVE32F-NEXT: andi t0, a5, 16
; RV64ZVE32F-NEXT: beqz t0, .LBB54_8
; RV64ZVE32F-NEXT: .LBB54_12: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s t0, v10
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: ld t0, 0(t0)
; RV64ZVE32F-NEXT: andi t1, a5, 32
; RV64ZVE32F-NEXT: beqz t1, .LBB54_9
; RV64ZVE32F-NEXT: .LBB54_13: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s t1, v8
; RV64ZVE32F-NEXT: slli t1, t1, 3
; RV64ZVE32F-NEXT: add t1, a1, t1
; RV64ZVE32F-NEXT: ld t1, 0(t1)
; RV64ZVE32F-NEXT: .LBB54_14: # %else14
; RV64ZVE32F-NEXT: andi t2, a5, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV64ZVE32F-NEXT: beqz t2, .LBB54_17
; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s t2, v8
; RV64ZVE32F-NEXT: slli t2, t2, 3
; RV64ZVE32F-NEXT: add t2, a1, t2
; RV64ZVE32F-NEXT: ld t2, 0(t2)
; RV64ZVE32F-NEXT: andi a5, a5, -128
; RV64ZVE32F-NEXT: bnez a5, .LBB54_18
; RV64ZVE32F-NEXT: .LBB54_16:
; RV64ZVE32F-NEXT: ld a1, 56(a2)
; RV64ZVE32F-NEXT: j .LBB54_19
; RV64ZVE32F-NEXT: .LBB54_17:
; RV64ZVE32F-NEXT: ld t2, 48(a2)
; RV64ZVE32F-NEXT: andi a5, a5, -128
; RV64ZVE32F-NEXT: beqz a5, .LBB54_16
; RV64ZVE32F-NEXT: .LBB54_18: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: ld a1, 0(a1)
; RV64ZVE32F-NEXT: .LBB54_19: # %else20
; RV64ZVE32F-NEXT: sd a3, 0(a0)
; RV64ZVE32F-NEXT: sd a4, 8(a0)
; RV64ZVE32F-NEXT: sd a6, 16(a0)
; RV64ZVE32F-NEXT: sd a7, 24(a0)
; RV64ZVE32F-NEXT: sd t0, 32(a0)
; RV64ZVE32F-NEXT: sd t1, 40(a0)
; RV64ZVE32F-NEXT: sd t2, 48(a0)
; RV64ZVE32F-NEXT: sd a1, 56(a0)
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i64, ptr %base, <8 x i32> %idxs
%v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
ret <8 x i64> %v
}
define <8 x i64> @mgather_baseidx_sext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
; RV32V-LABEL: mgather_baseidx_sext_v8i32_v8i64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32V-NEXT: vsll.vi v8, v8, 3
; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v12
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_sext_v8i32_v8i64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64V-NEXT: vsext.vf2 v16, v8
; RV64V-NEXT: vsll.vi v8, v16, 3
; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_baseidx_sext_v8i32_v8i64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s t0, v0
; RV32ZVE32F-NEXT: andi a1, t0, 1
; RV32ZVE32F-NEXT: beqz a1, .LBB55_7
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
; RV32ZVE32F-NEXT: lw a1, 4(a3)
; RV32ZVE32F-NEXT: lw a3, 0(a3)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: bnez a4, .LBB55_8
; RV32ZVE32F-NEXT: .LBB55_2:
; RV32ZVE32F-NEXT: lw a4, 12(a2)
; RV32ZVE32F-NEXT: lw a5, 8(a2)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: bnez a6, .LBB55_9
; RV32ZVE32F-NEXT: .LBB55_3:
; RV32ZVE32F-NEXT: lw a6, 20(a2)
; RV32ZVE32F-NEXT: lw a7, 16(a2)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: bnez t1, .LBB55_10
; RV32ZVE32F-NEXT: .LBB55_4:
; RV32ZVE32F-NEXT: lw t1, 28(a2)
; RV32ZVE32F-NEXT: lw t2, 24(a2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: bnez t3, .LBB55_11
; RV32ZVE32F-NEXT: .LBB55_5:
; RV32ZVE32F-NEXT: lw t3, 36(a2)
; RV32ZVE32F-NEXT: lw t4, 32(a2)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: bnez t5, .LBB55_12
; RV32ZVE32F-NEXT: .LBB55_6:
; RV32ZVE32F-NEXT: lw t5, 44(a2)
; RV32ZVE32F-NEXT: lw t6, 40(a2)
; RV32ZVE32F-NEXT: j .LBB55_13
; RV32ZVE32F-NEXT: .LBB55_7:
; RV32ZVE32F-NEXT: lw a1, 4(a2)
; RV32ZVE32F-NEXT: lw a3, 0(a2)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: beqz a4, .LBB55_2
; RV32ZVE32F-NEXT: .LBB55_8: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v10
; RV32ZVE32F-NEXT: lw a4, 4(a5)
; RV32ZVE32F-NEXT: lw a5, 0(a5)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: beqz a6, .LBB55_3
; RV32ZVE32F-NEXT: .LBB55_9: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v10
; RV32ZVE32F-NEXT: lw a6, 4(a7)
; RV32ZVE32F-NEXT: lw a7, 0(a7)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: beqz t1, .LBB55_4
; RV32ZVE32F-NEXT: .LBB55_10: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v10
; RV32ZVE32F-NEXT: lw t1, 4(t2)
; RV32ZVE32F-NEXT: lw t2, 0(t2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: beqz t3, .LBB55_5
; RV32ZVE32F-NEXT: .LBB55_11: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v10
; RV32ZVE32F-NEXT: lw t3, 4(t4)
; RV32ZVE32F-NEXT: lw t4, 0(t4)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: beqz t5, .LBB55_6
; RV32ZVE32F-NEXT: .LBB55_12: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v10
; RV32ZVE32F-NEXT: lw t5, 4(t6)
; RV32ZVE32F-NEXT: lw t6, 0(t6)
; RV32ZVE32F-NEXT: .LBB55_13: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: andi s0, t0, 64
; RV32ZVE32F-NEXT: beqz s0, .LBB55_16
; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v10
; RV32ZVE32F-NEXT: lw s0, 4(s1)
; RV32ZVE32F-NEXT: lw s1, 0(s1)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: bnez t0, .LBB55_17
; RV32ZVE32F-NEXT: .LBB55_15:
; RV32ZVE32F-NEXT: lw t0, 60(a2)
; RV32ZVE32F-NEXT: lw a2, 56(a2)
; RV32ZVE32F-NEXT: j .LBB55_18
; RV32ZVE32F-NEXT: .LBB55_16:
; RV32ZVE32F-NEXT: lw s0, 52(a2)
; RV32ZVE32F-NEXT: lw s1, 48(a2)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: beqz t0, .LBB55_15
; RV32ZVE32F-NEXT: .LBB55_17: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: lw t0, 4(a2)
; RV32ZVE32F-NEXT: lw a2, 0(a2)
; RV32ZVE32F-NEXT: .LBB55_18: # %else20
; RV32ZVE32F-NEXT: sw a3, 0(a0)
; RV32ZVE32F-NEXT: sw a1, 4(a0)
; RV32ZVE32F-NEXT: sw a5, 8(a0)
; RV32ZVE32F-NEXT: sw a4, 12(a0)
; RV32ZVE32F-NEXT: sw a7, 16(a0)
; RV32ZVE32F-NEXT: sw a6, 20(a0)
; RV32ZVE32F-NEXT: sw t2, 24(a0)
; RV32ZVE32F-NEXT: sw t1, 28(a0)
; RV32ZVE32F-NEXT: sw t4, 32(a0)
; RV32ZVE32F-NEXT: sw t3, 36(a0)
; RV32ZVE32F-NEXT: sw t6, 40(a0)
; RV32ZVE32F-NEXT: sw t5, 44(a0)
; RV32ZVE32F-NEXT: sw s1, 48(a0)
; RV32ZVE32F-NEXT: sw s0, 52(a0)
; RV32ZVE32F-NEXT: sw a2, 56(a0)
; RV32ZVE32F-NEXT: sw t0, 60(a0)
; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: addi sp, sp, 16
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i32_v8i64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a5, v0
; RV64ZVE32F-NEXT: andi a3, a5, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB55_3
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: ld a3, 0(a3)
; RV64ZVE32F-NEXT: andi a4, a5, 2
; RV64ZVE32F-NEXT: bnez a4, .LBB55_4
; RV64ZVE32F-NEXT: .LBB55_2:
; RV64ZVE32F-NEXT: ld a4, 8(a2)
; RV64ZVE32F-NEXT: j .LBB55_5
; RV64ZVE32F-NEXT: .LBB55_3:
; RV64ZVE32F-NEXT: ld a3, 0(a2)
; RV64ZVE32F-NEXT: andi a4, a5, 2
; RV64ZVE32F-NEXT: beqz a4, .LBB55_2
; RV64ZVE32F-NEXT: .LBB55_4: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a4, v10
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: ld a4, 0(a4)
; RV64ZVE32F-NEXT: .LBB55_5: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: andi a6, a5, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: beqz a6, .LBB55_10
; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a6, v8
; RV64ZVE32F-NEXT: slli a6, a6, 3
; RV64ZVE32F-NEXT: add a6, a1, a6
; RV64ZVE32F-NEXT: ld a6, 0(a6)
; RV64ZVE32F-NEXT: andi a7, a5, 8
; RV64ZVE32F-NEXT: bnez a7, .LBB55_11
; RV64ZVE32F-NEXT: .LBB55_7:
; RV64ZVE32F-NEXT: ld a7, 24(a2)
; RV64ZVE32F-NEXT: andi t0, a5, 16
; RV64ZVE32F-NEXT: bnez t0, .LBB55_12
; RV64ZVE32F-NEXT: .LBB55_8:
; RV64ZVE32F-NEXT: ld t0, 32(a2)
; RV64ZVE32F-NEXT: andi t1, a5, 32
; RV64ZVE32F-NEXT: bnez t1, .LBB55_13
; RV64ZVE32F-NEXT: .LBB55_9:
; RV64ZVE32F-NEXT: ld t1, 40(a2)
; RV64ZVE32F-NEXT: j .LBB55_14
; RV64ZVE32F-NEXT: .LBB55_10:
; RV64ZVE32F-NEXT: ld a6, 16(a2)
; RV64ZVE32F-NEXT: andi a7, a5, 8
; RV64ZVE32F-NEXT: beqz a7, .LBB55_7
; RV64ZVE32F-NEXT: .LBB55_11: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a7, v8
; RV64ZVE32F-NEXT: slli a7, a7, 3
; RV64ZVE32F-NEXT: add a7, a1, a7
; RV64ZVE32F-NEXT: ld a7, 0(a7)
; RV64ZVE32F-NEXT: andi t0, a5, 16
; RV64ZVE32F-NEXT: beqz t0, .LBB55_8
; RV64ZVE32F-NEXT: .LBB55_12: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s t0, v10
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: ld t0, 0(t0)
; RV64ZVE32F-NEXT: andi t1, a5, 32
; RV64ZVE32F-NEXT: beqz t1, .LBB55_9
; RV64ZVE32F-NEXT: .LBB55_13: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s t1, v8
; RV64ZVE32F-NEXT: slli t1, t1, 3
; RV64ZVE32F-NEXT: add t1, a1, t1
; RV64ZVE32F-NEXT: ld t1, 0(t1)
; RV64ZVE32F-NEXT: .LBB55_14: # %else14
; RV64ZVE32F-NEXT: andi t2, a5, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV64ZVE32F-NEXT: beqz t2, .LBB55_17
; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s t2, v8
; RV64ZVE32F-NEXT: slli t2, t2, 3
; RV64ZVE32F-NEXT: add t2, a1, t2
; RV64ZVE32F-NEXT: ld t2, 0(t2)
; RV64ZVE32F-NEXT: andi a5, a5, -128
; RV64ZVE32F-NEXT: bnez a5, .LBB55_18
; RV64ZVE32F-NEXT: .LBB55_16:
; RV64ZVE32F-NEXT: ld a1, 56(a2)
; RV64ZVE32F-NEXT: j .LBB55_19
; RV64ZVE32F-NEXT: .LBB55_17:
; RV64ZVE32F-NEXT: ld t2, 48(a2)
; RV64ZVE32F-NEXT: andi a5, a5, -128
; RV64ZVE32F-NEXT: beqz a5, .LBB55_16
; RV64ZVE32F-NEXT: .LBB55_18: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: ld a1, 0(a1)
; RV64ZVE32F-NEXT: .LBB55_19: # %else20
; RV64ZVE32F-NEXT: sd a3, 0(a0)
; RV64ZVE32F-NEXT: sd a4, 8(a0)
; RV64ZVE32F-NEXT: sd a6, 16(a0)
; RV64ZVE32F-NEXT: sd a7, 24(a0)
; RV64ZVE32F-NEXT: sd t0, 32(a0)
; RV64ZVE32F-NEXT: sd t1, 40(a0)
; RV64ZVE32F-NEXT: sd t2, 48(a0)
; RV64ZVE32F-NEXT: sd a1, 56(a0)
; RV64ZVE32F-NEXT: ret
%eidxs = sext <8 x i32> %idxs to <8 x i64>
%ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
%v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
ret <8 x i64> %v
}
define <8 x i64> @mgather_baseidx_zext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
; RV32V-LABEL: mgather_baseidx_zext_v8i32_v8i64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32V-NEXT: vsll.vi v8, v8, 3
; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v12
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_zext_v8i32_v8i64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64V-NEXT: vzext.vf2 v16, v8
; RV64V-NEXT: vsll.vi v8, v16, 3
; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_baseidx_zext_v8i32_v8i64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s t0, v0
; RV32ZVE32F-NEXT: andi a1, t0, 1
; RV32ZVE32F-NEXT: beqz a1, .LBB56_7
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
; RV32ZVE32F-NEXT: lw a1, 4(a3)
; RV32ZVE32F-NEXT: lw a3, 0(a3)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: bnez a4, .LBB56_8
; RV32ZVE32F-NEXT: .LBB56_2:
; RV32ZVE32F-NEXT: lw a4, 12(a2)
; RV32ZVE32F-NEXT: lw a5, 8(a2)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: bnez a6, .LBB56_9
; RV32ZVE32F-NEXT: .LBB56_3:
; RV32ZVE32F-NEXT: lw a6, 20(a2)
; RV32ZVE32F-NEXT: lw a7, 16(a2)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: bnez t1, .LBB56_10
; RV32ZVE32F-NEXT: .LBB56_4:
; RV32ZVE32F-NEXT: lw t1, 28(a2)
; RV32ZVE32F-NEXT: lw t2, 24(a2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: bnez t3, .LBB56_11
; RV32ZVE32F-NEXT: .LBB56_5:
; RV32ZVE32F-NEXT: lw t3, 36(a2)
; RV32ZVE32F-NEXT: lw t4, 32(a2)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: bnez t5, .LBB56_12
; RV32ZVE32F-NEXT: .LBB56_6:
; RV32ZVE32F-NEXT: lw t5, 44(a2)
; RV32ZVE32F-NEXT: lw t6, 40(a2)
; RV32ZVE32F-NEXT: j .LBB56_13
; RV32ZVE32F-NEXT: .LBB56_7:
; RV32ZVE32F-NEXT: lw a1, 4(a2)
; RV32ZVE32F-NEXT: lw a3, 0(a2)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: beqz a4, .LBB56_2
; RV32ZVE32F-NEXT: .LBB56_8: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v10
; RV32ZVE32F-NEXT: lw a4, 4(a5)
; RV32ZVE32F-NEXT: lw a5, 0(a5)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: beqz a6, .LBB56_3
; RV32ZVE32F-NEXT: .LBB56_9: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v10
; RV32ZVE32F-NEXT: lw a6, 4(a7)
; RV32ZVE32F-NEXT: lw a7, 0(a7)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: beqz t1, .LBB56_4
; RV32ZVE32F-NEXT: .LBB56_10: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v10
; RV32ZVE32F-NEXT: lw t1, 4(t2)
; RV32ZVE32F-NEXT: lw t2, 0(t2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: beqz t3, .LBB56_5
; RV32ZVE32F-NEXT: .LBB56_11: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v10
; RV32ZVE32F-NEXT: lw t3, 4(t4)
; RV32ZVE32F-NEXT: lw t4, 0(t4)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: beqz t5, .LBB56_6
; RV32ZVE32F-NEXT: .LBB56_12: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v10
; RV32ZVE32F-NEXT: lw t5, 4(t6)
; RV32ZVE32F-NEXT: lw t6, 0(t6)
; RV32ZVE32F-NEXT: .LBB56_13: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: andi s0, t0, 64
; RV32ZVE32F-NEXT: beqz s0, .LBB56_16
; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v10
; RV32ZVE32F-NEXT: lw s0, 4(s1)
; RV32ZVE32F-NEXT: lw s1, 0(s1)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: bnez t0, .LBB56_17
; RV32ZVE32F-NEXT: .LBB56_15:
; RV32ZVE32F-NEXT: lw t0, 60(a2)
; RV32ZVE32F-NEXT: lw a2, 56(a2)
; RV32ZVE32F-NEXT: j .LBB56_18
; RV32ZVE32F-NEXT: .LBB56_16:
; RV32ZVE32F-NEXT: lw s0, 52(a2)
; RV32ZVE32F-NEXT: lw s1, 48(a2)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: beqz t0, .LBB56_15
; RV32ZVE32F-NEXT: .LBB56_17: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: lw t0, 4(a2)
; RV32ZVE32F-NEXT: lw a2, 0(a2)
; RV32ZVE32F-NEXT: .LBB56_18: # %else20
; RV32ZVE32F-NEXT: sw a3, 0(a0)
; RV32ZVE32F-NEXT: sw a1, 4(a0)
; RV32ZVE32F-NEXT: sw a5, 8(a0)
; RV32ZVE32F-NEXT: sw a4, 12(a0)
; RV32ZVE32F-NEXT: sw a7, 16(a0)
; RV32ZVE32F-NEXT: sw a6, 20(a0)
; RV32ZVE32F-NEXT: sw t2, 24(a0)
; RV32ZVE32F-NEXT: sw t1, 28(a0)
; RV32ZVE32F-NEXT: sw t4, 32(a0)
; RV32ZVE32F-NEXT: sw t3, 36(a0)
; RV32ZVE32F-NEXT: sw t6, 40(a0)
; RV32ZVE32F-NEXT: sw t5, 44(a0)
; RV32ZVE32F-NEXT: sw s1, 48(a0)
; RV32ZVE32F-NEXT: sw s0, 52(a0)
; RV32ZVE32F-NEXT: sw a2, 56(a0)
; RV32ZVE32F-NEXT: sw t0, 60(a0)
; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: addi sp, sp, 16
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i32_v8i64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a5, v0
; RV64ZVE32F-NEXT: andi a3, a5, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB56_3
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 32
; RV64ZVE32F-NEXT: srli a3, a3, 29
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: ld a3, 0(a3)
; RV64ZVE32F-NEXT: andi a4, a5, 2
; RV64ZVE32F-NEXT: bnez a4, .LBB56_4
; RV64ZVE32F-NEXT: .LBB56_2:
; RV64ZVE32F-NEXT: ld a4, 8(a2)
; RV64ZVE32F-NEXT: j .LBB56_5
; RV64ZVE32F-NEXT: .LBB56_3:
; RV64ZVE32F-NEXT: ld a3, 0(a2)
; RV64ZVE32F-NEXT: andi a4, a5, 2
; RV64ZVE32F-NEXT: beqz a4, .LBB56_2
; RV64ZVE32F-NEXT: .LBB56_4: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a4, v10
; RV64ZVE32F-NEXT: slli a4, a4, 32
; RV64ZVE32F-NEXT: srli a4, a4, 29
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: ld a4, 0(a4)
; RV64ZVE32F-NEXT: .LBB56_5: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: andi a6, a5, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: beqz a6, .LBB56_10
; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a6, v8
; RV64ZVE32F-NEXT: slli a6, a6, 32
; RV64ZVE32F-NEXT: srli a6, a6, 29
; RV64ZVE32F-NEXT: add a6, a1, a6
; RV64ZVE32F-NEXT: ld a6, 0(a6)
; RV64ZVE32F-NEXT: andi a7, a5, 8
; RV64ZVE32F-NEXT: bnez a7, .LBB56_11
; RV64ZVE32F-NEXT: .LBB56_7:
; RV64ZVE32F-NEXT: ld a7, 24(a2)
; RV64ZVE32F-NEXT: andi t0, a5, 16
; RV64ZVE32F-NEXT: bnez t0, .LBB56_12
; RV64ZVE32F-NEXT: .LBB56_8:
; RV64ZVE32F-NEXT: ld t0, 32(a2)
; RV64ZVE32F-NEXT: andi t1, a5, 32
; RV64ZVE32F-NEXT: bnez t1, .LBB56_13
; RV64ZVE32F-NEXT: .LBB56_9:
; RV64ZVE32F-NEXT: ld t1, 40(a2)
; RV64ZVE32F-NEXT: j .LBB56_14
; RV64ZVE32F-NEXT: .LBB56_10:
; RV64ZVE32F-NEXT: ld a6, 16(a2)
; RV64ZVE32F-NEXT: andi a7, a5, 8
; RV64ZVE32F-NEXT: beqz a7, .LBB56_7
; RV64ZVE32F-NEXT: .LBB56_11: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a7, v8
; RV64ZVE32F-NEXT: slli a7, a7, 32
; RV64ZVE32F-NEXT: srli a7, a7, 29
; RV64ZVE32F-NEXT: add a7, a1, a7
; RV64ZVE32F-NEXT: ld a7, 0(a7)
; RV64ZVE32F-NEXT: andi t0, a5, 16
; RV64ZVE32F-NEXT: beqz t0, .LBB56_8
; RV64ZVE32F-NEXT: .LBB56_12: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s t0, v10
; RV64ZVE32F-NEXT: slli t0, t0, 32
; RV64ZVE32F-NEXT: srli t0, t0, 29
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: ld t0, 0(t0)
; RV64ZVE32F-NEXT: andi t1, a5, 32
; RV64ZVE32F-NEXT: beqz t1, .LBB56_9
; RV64ZVE32F-NEXT: .LBB56_13: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s t1, v8
; RV64ZVE32F-NEXT: slli t1, t1, 32
; RV64ZVE32F-NEXT: srli t1, t1, 29
; RV64ZVE32F-NEXT: add t1, a1, t1
; RV64ZVE32F-NEXT: ld t1, 0(t1)
; RV64ZVE32F-NEXT: .LBB56_14: # %else14
; RV64ZVE32F-NEXT: andi t2, a5, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV64ZVE32F-NEXT: beqz t2, .LBB56_17
; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s t2, v8
; RV64ZVE32F-NEXT: slli t2, t2, 32
; RV64ZVE32F-NEXT: srli t2, t2, 29
; RV64ZVE32F-NEXT: add t2, a1, t2
; RV64ZVE32F-NEXT: ld t2, 0(t2)
; RV64ZVE32F-NEXT: andi a5, a5, -128
; RV64ZVE32F-NEXT: bnez a5, .LBB56_18
; RV64ZVE32F-NEXT: .LBB56_16:
; RV64ZVE32F-NEXT: ld a1, 56(a2)
; RV64ZVE32F-NEXT: j .LBB56_19
; RV64ZVE32F-NEXT: .LBB56_17:
; RV64ZVE32F-NEXT: ld t2, 48(a2)
; RV64ZVE32F-NEXT: andi a5, a5, -128
; RV64ZVE32F-NEXT: beqz a5, .LBB56_16
; RV64ZVE32F-NEXT: .LBB56_18: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 32
; RV64ZVE32F-NEXT: srli a2, a2, 29
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: ld a1, 0(a1)
; RV64ZVE32F-NEXT: .LBB56_19: # %else20
; RV64ZVE32F-NEXT: sd a3, 0(a0)
; RV64ZVE32F-NEXT: sd a4, 8(a0)
; RV64ZVE32F-NEXT: sd a6, 16(a0)
; RV64ZVE32F-NEXT: sd a7, 24(a0)
; RV64ZVE32F-NEXT: sd t0, 32(a0)
; RV64ZVE32F-NEXT: sd t1, 40(a0)
; RV64ZVE32F-NEXT: sd t2, 48(a0)
; RV64ZVE32F-NEXT: sd a1, 56(a0)
; RV64ZVE32F-NEXT: ret
%eidxs = zext <8 x i32> %idxs to <8 x i64>
%ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
%v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
ret <8 x i64> %v
}
define <8 x i64> @mgather_baseidx_v8i64(ptr %base, <8 x i64> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
; RV32V-LABEL: mgather_baseidx_v8i64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32V-NEXT: vnsrl.wi v16, v8, 0
; RV32V-NEXT: vsll.vi v8, v16, 3
; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v12
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_v8i64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64V-NEXT: vsll.vi v8, v8, 3
; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_baseidx_v8i64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: lw a4, 56(a2)
; RV32ZVE32F-NEXT: lw a5, 48(a2)
; RV32ZVE32F-NEXT: lw a6, 40(a2)
; RV32ZVE32F-NEXT: lw a7, 32(a2)
; RV32ZVE32F-NEXT: lw t0, 24(a2)
; RV32ZVE32F-NEXT: lw t1, 16(a2)
; RV32ZVE32F-NEXT: lw t2, 8(a2)
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vlse32.v v8, (a2), zero
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t2
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t1
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t0
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a7
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a6
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a5
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a4
; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s t0, v0
; RV32ZVE32F-NEXT: andi a1, t0, 1
; RV32ZVE32F-NEXT: beqz a1, .LBB57_7
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: lw a1, 4(a2)
; RV32ZVE32F-NEXT: lw a2, 0(a2)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: bnez a4, .LBB57_8
; RV32ZVE32F-NEXT: .LBB57_2:
; RV32ZVE32F-NEXT: lw a4, 12(a3)
; RV32ZVE32F-NEXT: lw a5, 8(a3)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: bnez a6, .LBB57_9
; RV32ZVE32F-NEXT: .LBB57_3:
; RV32ZVE32F-NEXT: lw a6, 20(a3)
; RV32ZVE32F-NEXT: lw a7, 16(a3)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: bnez t1, .LBB57_10
; RV32ZVE32F-NEXT: .LBB57_4:
; RV32ZVE32F-NEXT: lw t1, 28(a3)
; RV32ZVE32F-NEXT: lw t2, 24(a3)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: bnez t3, .LBB57_11
; RV32ZVE32F-NEXT: .LBB57_5:
; RV32ZVE32F-NEXT: lw t3, 36(a3)
; RV32ZVE32F-NEXT: lw t4, 32(a3)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: bnez t5, .LBB57_12
; RV32ZVE32F-NEXT: .LBB57_6:
; RV32ZVE32F-NEXT: lw t5, 44(a3)
; RV32ZVE32F-NEXT: lw t6, 40(a3)
; RV32ZVE32F-NEXT: j .LBB57_13
; RV32ZVE32F-NEXT: .LBB57_7:
; RV32ZVE32F-NEXT: lw a1, 4(a3)
; RV32ZVE32F-NEXT: lw a2, 0(a3)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: beqz a4, .LBB57_2
; RV32ZVE32F-NEXT: .LBB57_8: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v10
; RV32ZVE32F-NEXT: lw a4, 4(a5)
; RV32ZVE32F-NEXT: lw a5, 0(a5)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: beqz a6, .LBB57_3
; RV32ZVE32F-NEXT: .LBB57_9: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v10
; RV32ZVE32F-NEXT: lw a6, 4(a7)
; RV32ZVE32F-NEXT: lw a7, 0(a7)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: beqz t1, .LBB57_4
; RV32ZVE32F-NEXT: .LBB57_10: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v10
; RV32ZVE32F-NEXT: lw t1, 4(t2)
; RV32ZVE32F-NEXT: lw t2, 0(t2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: beqz t3, .LBB57_5
; RV32ZVE32F-NEXT: .LBB57_11: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v10
; RV32ZVE32F-NEXT: lw t3, 4(t4)
; RV32ZVE32F-NEXT: lw t4, 0(t4)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: beqz t5, .LBB57_6
; RV32ZVE32F-NEXT: .LBB57_12: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v10
; RV32ZVE32F-NEXT: lw t5, 4(t6)
; RV32ZVE32F-NEXT: lw t6, 0(t6)
; RV32ZVE32F-NEXT: .LBB57_13: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: andi s0, t0, 64
; RV32ZVE32F-NEXT: beqz s0, .LBB57_16
; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v10
; RV32ZVE32F-NEXT: lw s0, 4(s1)
; RV32ZVE32F-NEXT: lw s1, 0(s1)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: bnez t0, .LBB57_17
; RV32ZVE32F-NEXT: .LBB57_15:
; RV32ZVE32F-NEXT: lw t0, 60(a3)
; RV32ZVE32F-NEXT: lw a3, 56(a3)
; RV32ZVE32F-NEXT: j .LBB57_18
; RV32ZVE32F-NEXT: .LBB57_16:
; RV32ZVE32F-NEXT: lw s0, 52(a3)
; RV32ZVE32F-NEXT: lw s1, 48(a3)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: beqz t0, .LBB57_15
; RV32ZVE32F-NEXT: .LBB57_17: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
; RV32ZVE32F-NEXT: lw t0, 4(a3)
; RV32ZVE32F-NEXT: lw a3, 0(a3)
; RV32ZVE32F-NEXT: .LBB57_18: # %else20
; RV32ZVE32F-NEXT: sw a2, 0(a0)
; RV32ZVE32F-NEXT: sw a1, 4(a0)
; RV32ZVE32F-NEXT: sw a5, 8(a0)
; RV32ZVE32F-NEXT: sw a4, 12(a0)
; RV32ZVE32F-NEXT: sw a7, 16(a0)
; RV32ZVE32F-NEXT: sw a6, 20(a0)
; RV32ZVE32F-NEXT: sw t2, 24(a0)
; RV32ZVE32F-NEXT: sw t1, 28(a0)
; RV32ZVE32F-NEXT: sw t4, 32(a0)
; RV32ZVE32F-NEXT: sw t3, 36(a0)
; RV32ZVE32F-NEXT: sw t6, 40(a0)
; RV32ZVE32F-NEXT: sw t5, 44(a0)
; RV32ZVE32F-NEXT: sw s1, 48(a0)
; RV32ZVE32F-NEXT: sw s0, 52(a0)
; RV32ZVE32F-NEXT: sw a3, 56(a0)
; RV32ZVE32F-NEXT: sw t0, 60(a0)
; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: addi sp, sp, 16
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_v8i64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a6, v0
; RV64ZVE32F-NEXT: andi a4, a6, 1
; RV64ZVE32F-NEXT: beqz a4, .LBB57_9
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: ld a4, 0(a2)
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: ld a4, 0(a4)
; RV64ZVE32F-NEXT: andi a5, a6, 2
; RV64ZVE32F-NEXT: bnez a5, .LBB57_10
; RV64ZVE32F-NEXT: .LBB57_2:
; RV64ZVE32F-NEXT: ld a5, 8(a3)
; RV64ZVE32F-NEXT: andi a7, a6, 4
; RV64ZVE32F-NEXT: bnez a7, .LBB57_11
; RV64ZVE32F-NEXT: .LBB57_3:
; RV64ZVE32F-NEXT: ld a7, 16(a3)
; RV64ZVE32F-NEXT: andi t0, a6, 8
; RV64ZVE32F-NEXT: bnez t0, .LBB57_12
; RV64ZVE32F-NEXT: .LBB57_4:
; RV64ZVE32F-NEXT: ld t0, 24(a3)
; RV64ZVE32F-NEXT: andi t1, a6, 16
; RV64ZVE32F-NEXT: bnez t1, .LBB57_13
; RV64ZVE32F-NEXT: .LBB57_5:
; RV64ZVE32F-NEXT: ld t1, 32(a3)
; RV64ZVE32F-NEXT: andi t2, a6, 32
; RV64ZVE32F-NEXT: bnez t2, .LBB57_14
; RV64ZVE32F-NEXT: .LBB57_6:
; RV64ZVE32F-NEXT: ld t2, 40(a3)
; RV64ZVE32F-NEXT: andi t3, a6, 64
; RV64ZVE32F-NEXT: bnez t3, .LBB57_15
; RV64ZVE32F-NEXT: .LBB57_7:
; RV64ZVE32F-NEXT: ld t3, 48(a3)
; RV64ZVE32F-NEXT: andi a6, a6, -128
; RV64ZVE32F-NEXT: bnez a6, .LBB57_16
; RV64ZVE32F-NEXT: .LBB57_8:
; RV64ZVE32F-NEXT: ld a1, 56(a3)
; RV64ZVE32F-NEXT: j .LBB57_17
; RV64ZVE32F-NEXT: .LBB57_9:
; RV64ZVE32F-NEXT: ld a4, 0(a3)
; RV64ZVE32F-NEXT: andi a5, a6, 2
; RV64ZVE32F-NEXT: beqz a5, .LBB57_2
; RV64ZVE32F-NEXT: .LBB57_10: # %cond.load1
; RV64ZVE32F-NEXT: ld a5, 8(a2)
; RV64ZVE32F-NEXT: slli a5, a5, 3
; RV64ZVE32F-NEXT: add a5, a1, a5
; RV64ZVE32F-NEXT: ld a5, 0(a5)
; RV64ZVE32F-NEXT: andi a7, a6, 4
; RV64ZVE32F-NEXT: beqz a7, .LBB57_3
; RV64ZVE32F-NEXT: .LBB57_11: # %cond.load4
; RV64ZVE32F-NEXT: ld a7, 16(a2)
; RV64ZVE32F-NEXT: slli a7, a7, 3
; RV64ZVE32F-NEXT: add a7, a1, a7
; RV64ZVE32F-NEXT: ld a7, 0(a7)
; RV64ZVE32F-NEXT: andi t0, a6, 8
; RV64ZVE32F-NEXT: beqz t0, .LBB57_4
; RV64ZVE32F-NEXT: .LBB57_12: # %cond.load7
; RV64ZVE32F-NEXT: ld t0, 24(a2)
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: ld t0, 0(t0)
; RV64ZVE32F-NEXT: andi t1, a6, 16
; RV64ZVE32F-NEXT: beqz t1, .LBB57_5
; RV64ZVE32F-NEXT: .LBB57_13: # %cond.load10
; RV64ZVE32F-NEXT: ld t1, 32(a2)
; RV64ZVE32F-NEXT: slli t1, t1, 3
; RV64ZVE32F-NEXT: add t1, a1, t1
; RV64ZVE32F-NEXT: ld t1, 0(t1)
; RV64ZVE32F-NEXT: andi t2, a6, 32
; RV64ZVE32F-NEXT: beqz t2, .LBB57_6
; RV64ZVE32F-NEXT: .LBB57_14: # %cond.load13
; RV64ZVE32F-NEXT: ld t2, 40(a2)
; RV64ZVE32F-NEXT: slli t2, t2, 3
; RV64ZVE32F-NEXT: add t2, a1, t2
; RV64ZVE32F-NEXT: ld t2, 0(t2)
; RV64ZVE32F-NEXT: andi t3, a6, 64
; RV64ZVE32F-NEXT: beqz t3, .LBB57_7
; RV64ZVE32F-NEXT: .LBB57_15: # %cond.load16
; RV64ZVE32F-NEXT: ld t3, 48(a2)
; RV64ZVE32F-NEXT: slli t3, t3, 3
; RV64ZVE32F-NEXT: add t3, a1, t3
; RV64ZVE32F-NEXT: ld t3, 0(t3)
; RV64ZVE32F-NEXT: andi a6, a6, -128
; RV64ZVE32F-NEXT: beqz a6, .LBB57_8
; RV64ZVE32F-NEXT: .LBB57_16: # %cond.load19
; RV64ZVE32F-NEXT: ld a2, 56(a2)
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: ld a1, 0(a1)
; RV64ZVE32F-NEXT: .LBB57_17: # %else20
; RV64ZVE32F-NEXT: sd a4, 0(a0)
; RV64ZVE32F-NEXT: sd a5, 8(a0)
; RV64ZVE32F-NEXT: sd a7, 16(a0)
; RV64ZVE32F-NEXT: sd t0, 24(a0)
; RV64ZVE32F-NEXT: sd t1, 32(a0)
; RV64ZVE32F-NEXT: sd t2, 40(a0)
; RV64ZVE32F-NEXT: sd t3, 48(a0)
; RV64ZVE32F-NEXT: sd a1, 56(a0)
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %idxs
%v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
ret <8 x i64> %v
}
declare <1 x half> @llvm.masked.gather.v1f16.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x half>)
define <1 x half> @mgather_v1f16(<1 x ptr> %ptrs, <1 x i1> %m, <1 x half> %passthru) {
; RV32V-LABEL: mgather_v1f16:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 1, e16, mf4, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vmv1r.v v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v1f16:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 1, e16, mf4, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vmv1r.v v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v1f16:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32ZVE32F-NEXT: vmv1r.v v8, v9
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v1f16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vfirst.m a1, v0
; RV64ZVE32F-NEXT: bnez a1, .LBB58_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vle16.v v8, (a0)
; RV64ZVE32F-NEXT: .LBB58_2: # %else
; RV64ZVE32F-NEXT: ret
%v = call <1 x half> @llvm.masked.gather.v1f16.v1p0(<1 x ptr> %ptrs, i32 2, <1 x i1> %m, <1 x half> %passthru)
ret <1 x half> %v
}
declare <2 x half> @llvm.masked.gather.v2f16.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x half>)
define <2 x half> @mgather_v2f16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x half> %passthru) {
; RV32V-LABEL: mgather_v2f16:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vmv1r.v v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v2f16:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vmv1r.v v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v2f16:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32ZVE32F-NEXT: vmv1r.v v8, v9
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v2f16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: bnez a3, .LBB59_3
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB59_4
; RV64ZVE32F-NEXT: .LBB59_2: # %else2
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB59_3: # %cond.load
; RV64ZVE32F-NEXT: flh fa5, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB59_2
; RV64ZVE32F-NEXT: .LBB59_4: # %cond.load1
; RV64ZVE32F-NEXT: flh fa5, 0(a1)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: ret
%v = call <2 x half> @llvm.masked.gather.v2f16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x half> %passthru)
ret <2 x half> %v
}
declare <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x half>)
define <4 x half> @mgather_v4f16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x half> %passthru) {
; RV32-LABEL: mgather_v4f16:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_v4f16:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
; RV64V-NEXT: vluxei64.v v10, (zero), v8, v0.t
; RV64V-NEXT: vmv1r.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v4f16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: bnez a2, .LBB60_5
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB60_6
; RV64ZVE32F-NEXT: .LBB60_2: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB60_7
; RV64ZVE32F-NEXT: .LBB60_3: # %else5
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: bnez a1, .LBB60_8
; RV64ZVE32F-NEXT: .LBB60_4: # %else8
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB60_5: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB60_2
; RV64ZVE32F-NEXT: .LBB60_6: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB60_3
; RV64ZVE32F-NEXT: .LBB60_7: # %cond.load4
; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, mf2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: beqz a1, .LBB60_4
; RV64ZVE32F-NEXT: .LBB60_8: # %cond.load7
; RV64ZVE32F-NEXT: ld a0, 24(a0)
; RV64ZVE32F-NEXT: flh fa5, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
; RV64ZVE32F-NEXT: ret
%v = call <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> %m, <4 x half> %passthru)
ret <4 x half> %v
}
define <4 x half> @mgather_truemask_v4f16(<4 x ptr> %ptrs, <4 x half> %passthru) {
; RV32-LABEL: mgather_truemask_v4f16:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; RV32-NEXT: vluxei32.v v9, (zero), v8
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_truemask_v4f16:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; RV64V-NEXT: vluxei64.v v10, (zero), v8
; RV64V-NEXT: vmv1r.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_truemask_v4f16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: ld a1, 8(a0)
; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: ld a3, 24(a0)
; RV64ZVE32F-NEXT: ld a0, 0(a0)
; RV64ZVE32F-NEXT: flh fa5, 0(a1)
; RV64ZVE32F-NEXT: flh fa4, 0(a2)
; RV64ZVE32F-NEXT: flh fa3, 0(a3)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero
; RV64ZVE32F-NEXT: vfslide1down.vf v8, v8, fa5
; RV64ZVE32F-NEXT: vfslide1down.vf v8, v8, fa4
; RV64ZVE32F-NEXT: vfslide1down.vf v8, v8, fa3
; RV64ZVE32F-NEXT: ret
%v = call <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1), <4 x half> %passthru)
ret <4 x half> %v
}
define <4 x half> @mgather_falsemask_v4f16(<4 x ptr> %ptrs, <4 x half> %passthru) {
; RV32-LABEL: mgather_falsemask_v4f16:
; RV32: # %bb.0:
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_falsemask_v4f16:
; RV64V: # %bb.0:
; RV64V-NEXT: vmv1r.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_falsemask_v4f16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: ret
%v = call <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> zeroinitializer, <4 x half> %passthru)
ret <4 x half> %v
}
declare <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x half>)
define <8 x half> @mgather_v8f16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x half> %passthru) {
; RV32-LABEL: mgather_v8f16:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu
; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_v8f16:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, mu
; RV64V-NEXT: vluxei64.v v12, (zero), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v8f16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: bnez a2, .LBB63_9
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB63_10
; RV64ZVE32F-NEXT: .LBB63_2: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB63_11
; RV64ZVE32F-NEXT: .LBB63_3: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB63_12
; RV64ZVE32F-NEXT: .LBB63_4: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB63_13
; RV64ZVE32F-NEXT: .LBB63_5: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB63_14
; RV64ZVE32F-NEXT: .LBB63_6: # %else14
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: bnez a2, .LBB63_15
; RV64ZVE32F-NEXT: .LBB63_7: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB63_16
; RV64ZVE32F-NEXT: .LBB63_8: # %else20
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB63_9: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB63_2
; RV64ZVE32F-NEXT: .LBB63_10: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB63_3
; RV64ZVE32F-NEXT: .LBB63_11: # %cond.load4
; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB63_4
; RV64ZVE32F-NEXT: .LBB63_12: # %cond.load7
; RV64ZVE32F-NEXT: ld a2, 24(a0)
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB63_5
; RV64ZVE32F-NEXT: .LBB63_13: # %cond.load10
; RV64ZVE32F-NEXT: ld a2, 32(a0)
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB63_6
; RV64ZVE32F-NEXT: .LBB63_14: # %cond.load13
; RV64ZVE32F-NEXT: ld a2, 40(a0)
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: beqz a2, .LBB63_7
; RV64ZVE32F-NEXT: .LBB63_15: # %cond.load16
; RV64ZVE32F-NEXT: ld a2, 48(a0)
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB63_8
; RV64ZVE32F-NEXT: .LBB63_16: # %cond.load19
; RV64ZVE32F-NEXT: ld a0, 56(a0)
; RV64ZVE32F-NEXT: flh fa5, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7
; RV64ZVE32F-NEXT: ret
%v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru)
ret <8 x half> %v
}
define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x half> %passthru) {
; RV32-LABEL: mgather_baseidx_v8i8_v8f16:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vsext.vf4 v10, v8
; RV32-NEXT: vadd.vv v10, v10, v10
; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t
; RV32-NEXT: vmv.v.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_v8i8_v8f16:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64V-NEXT: vsext.vf8 v12, v8
; RV64V-NEXT: vadd.vv v12, v12, v12
; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu
; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t
; RV64V-NEXT: vmv.v.v v8, v9
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_v8i8_v8f16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: beqz a2, .LBB64_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: .LBB64_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB64_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1
; RV64ZVE32F-NEXT: .LBB64_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB64_12
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB64_13
; RV64ZVE32F-NEXT: .LBB64_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB64_14
; RV64ZVE32F-NEXT: .LBB64_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB64_9
; RV64ZVE32F-NEXT: .LBB64_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5
; RV64ZVE32F-NEXT: .LBB64_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB64_15
; RV64ZVE32F-NEXT: # %bb.10: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB64_16
; RV64ZVE32F-NEXT: .LBB64_11: # %else20
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB64_12: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v11, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB64_6
; RV64ZVE32F-NEXT: .LBB64_13: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB64_7
; RV64ZVE32F-NEXT: .LBB64_14: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB64_8
; RV64ZVE32F-NEXT: j .LBB64_9
; RV64ZVE32F-NEXT: .LBB64_15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB64_11
; RV64ZVE32F-NEXT: .LBB64_16: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: slli a1, a1, 1
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: flh fa5, 0(a0)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds half, ptr %base, <8 x i8> %idxs
%v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru)
ret <8 x half> %v
}
define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x half> %passthru) {
; RV32-LABEL: mgather_baseidx_sext_v8i8_v8f16:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vsext.vf4 v10, v8
; RV32-NEXT: vadd.vv v10, v10, v10
; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t
; RV32-NEXT: vmv.v.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_sext_v8i8_v8f16:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64V-NEXT: vsext.vf8 v12, v8
; RV64V-NEXT: vadd.vv v12, v12, v12
; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu
; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t
; RV64V-NEXT: vmv.v.v v8, v9
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8f16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: beqz a2, .LBB65_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: .LBB65_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB65_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1
; RV64ZVE32F-NEXT: .LBB65_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB65_12
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB65_13
; RV64ZVE32F-NEXT: .LBB65_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB65_14
; RV64ZVE32F-NEXT: .LBB65_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB65_9
; RV64ZVE32F-NEXT: .LBB65_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5
; RV64ZVE32F-NEXT: .LBB65_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB65_15
; RV64ZVE32F-NEXT: # %bb.10: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB65_16
; RV64ZVE32F-NEXT: .LBB65_11: # %else20
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB65_12: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v11, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB65_6
; RV64ZVE32F-NEXT: .LBB65_13: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB65_7
; RV64ZVE32F-NEXT: .LBB65_14: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB65_8
; RV64ZVE32F-NEXT: j .LBB65_9
; RV64ZVE32F-NEXT: .LBB65_15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB65_11
; RV64ZVE32F-NEXT: .LBB65_16: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: slli a1, a1, 1
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: flh fa5, 0(a0)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
%eidxs = sext <8 x i8> %idxs to <8 x i16>
%ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs
%v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru)
ret <8 x half> %v
}
define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x half> %passthru) {
; RV32-LABEL: mgather_baseidx_zext_v8i8_v8f16:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV32-NEXT: vwaddu.vv v10, v8, v8
; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
; RV32-NEXT: vluxei16.v v9, (a0), v10, v0.t
; RV32-NEXT: vmv.v.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8f16:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64V-NEXT: vwaddu.vv v10, v8, v8
; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu
; RV64V-NEXT: vluxei16.v v9, (a0), v10, v0.t
; RV64V-NEXT: vmv.v.v v8, v9
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8f16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: beqz a2, .LBB66_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: .LBB66_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB66_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1
; RV64ZVE32F-NEXT: .LBB66_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB66_12
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB66_13
; RV64ZVE32F-NEXT: .LBB66_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB66_14
; RV64ZVE32F-NEXT: .LBB66_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB66_9
; RV64ZVE32F-NEXT: .LBB66_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5
; RV64ZVE32F-NEXT: .LBB66_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB66_15
; RV64ZVE32F-NEXT: # %bb.10: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB66_16
; RV64ZVE32F-NEXT: .LBB66_11: # %else20
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB66_12: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v11, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB66_6
; RV64ZVE32F-NEXT: .LBB66_13: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB66_7
; RV64ZVE32F-NEXT: .LBB66_14: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB66_8
; RV64ZVE32F-NEXT: j .LBB66_9
; RV64ZVE32F-NEXT: .LBB66_15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB66_11
; RV64ZVE32F-NEXT: .LBB66_16: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: andi a1, a1, 255
; RV64ZVE32F-NEXT: slli a1, a1, 1
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: flh fa5, 0(a0)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
%eidxs = zext <8 x i8> %idxs to <8 x i16>
%ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs
%v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru)
ret <8 x half> %v
}
define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x half> %passthru) {
; RV32-LABEL: mgather_baseidx_v8f16:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu
; RV32-NEXT: vwadd.vv v10, v8, v8
; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t
; RV32-NEXT: vmv.v.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_v8f16:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64V-NEXT: vsext.vf4 v12, v8
; RV64V-NEXT: vadd.vv v12, v12, v12
; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu
; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t
; RV64V-NEXT: vmv.v.v v8, v9
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_v8f16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: beqz a2, .LBB67_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: .LBB67_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB67_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1
; RV64ZVE32F-NEXT: .LBB67_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB67_12
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB67_13
; RV64ZVE32F-NEXT: .LBB67_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB67_14
; RV64ZVE32F-NEXT: .LBB67_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB67_9
; RV64ZVE32F-NEXT: .LBB67_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5
; RV64ZVE32F-NEXT: .LBB67_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB67_15
; RV64ZVE32F-NEXT: # %bb.10: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB67_16
; RV64ZVE32F-NEXT: .LBB67_11: # %else20
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB67_12: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vfmv.s.f v11, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB67_6
; RV64ZVE32F-NEXT: .LBB67_13: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB67_7
; RV64ZVE32F-NEXT: .LBB67_14: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB67_8
; RV64ZVE32F-NEXT: j .LBB67_9
; RV64ZVE32F-NEXT: .LBB67_15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB67_11
; RV64ZVE32F-NEXT: .LBB67_16: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: slli a1, a1, 1
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: flh fa5, 0(a0)
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %idxs
%v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru)
ret <8 x half> %v
}
declare <1 x float> @llvm.masked.gather.v1f32.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x float>)
define <1 x float> @mgather_v1f32(<1 x ptr> %ptrs, <1 x i1> %m, <1 x float> %passthru) {
; RV32V-LABEL: mgather_v1f32:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 1, e32, mf2, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vmv1r.v v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v1f32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 1, e32, mf2, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vmv1r.v v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v1f32:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32ZVE32F-NEXT: vmv.v.v v8, v9
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v1f32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vfirst.m a1, v0
; RV64ZVE32F-NEXT: bnez a1, .LBB68_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vle32.v v8, (a0)
; RV64ZVE32F-NEXT: .LBB68_2: # %else
; RV64ZVE32F-NEXT: ret
%v = call <1 x float> @llvm.masked.gather.v1f32.v1p0(<1 x ptr> %ptrs, i32 4, <1 x i1> %m, <1 x float> %passthru)
ret <1 x float> %v
}
declare <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x float>)
define <2 x float> @mgather_v2f32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x float> %passthru) {
; RV32V-LABEL: mgather_v2f32:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vmv1r.v v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v2f32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vmv1r.v v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v2f32:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32ZVE32F-NEXT: vmv.v.v v8, v9
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v2f32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: bnez a3, .LBB69_3
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB69_4
; RV64ZVE32F-NEXT: .LBB69_2: # %else2
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB69_3: # %cond.load
; RV64ZVE32F-NEXT: flw fa5, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m4, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB69_2
; RV64ZVE32F-NEXT: .LBB69_4: # %cond.load1
; RV64ZVE32F-NEXT: flw fa5, 0(a1)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: ret
%v = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %m, <2 x float> %passthru)
ret <2 x float> %v
}
declare <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x float>)
define <4 x float> @mgather_v4f32(<4 x ptr> %ptrs, <4 x i1> %m, <4 x float> %passthru) {
; RV32-LABEL: mgather_v4f32:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu
; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32-NEXT: vmv.v.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_v4f32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, mu
; RV64V-NEXT: vluxei64.v v10, (zero), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v4f32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: bnez a2, .LBB70_5
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB70_6
; RV64ZVE32F-NEXT: .LBB70_2: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB70_7
; RV64ZVE32F-NEXT: .LBB70_3: # %else5
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: bnez a1, .LBB70_8
; RV64ZVE32F-NEXT: .LBB70_4: # %else8
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB70_5: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m4, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB70_2
; RV64ZVE32F-NEXT: .LBB70_6: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB70_3
; RV64ZVE32F-NEXT: .LBB70_7: # %cond.load4
; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: beqz a1, .LBB70_4
; RV64ZVE32F-NEXT: .LBB70_8: # %cond.load7
; RV64ZVE32F-NEXT: ld a0, 24(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
; RV64ZVE32F-NEXT: ret
%v = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %m, <4 x float> %passthru)
ret <4 x float> %v
}
define <4 x float> @mgather_truemask_v4f32(<4 x ptr> %ptrs, <4 x float> %passthru) {
; RV32-LABEL: mgather_truemask_v4f32:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vluxei32.v v8, (zero), v8
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_truemask_v4f32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64V-NEXT: vluxei64.v v10, (zero), v8
; RV64V-NEXT: vmv.v.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_truemask_v4f32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: ld a1, 8(a0)
; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: ld a3, 24(a0)
; RV64ZVE32F-NEXT: ld a0, 0(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a1)
; RV64ZVE32F-NEXT: flw fa4, 0(a2)
; RV64ZVE32F-NEXT: flw fa3, 0(a3)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vlse32.v v8, (a0), zero
; RV64ZVE32F-NEXT: vfslide1down.vf v8, v8, fa5
; RV64ZVE32F-NEXT: vfslide1down.vf v8, v8, fa4
; RV64ZVE32F-NEXT: vfslide1down.vf v8, v8, fa3
; RV64ZVE32F-NEXT: ret
%v = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 1), <4 x float> %passthru)
ret <4 x float> %v
}
define <4 x float> @mgather_falsemask_v4f32(<4 x ptr> %ptrs, <4 x float> %passthru) {
; RV32-LABEL: mgather_falsemask_v4f32:
; RV32: # %bb.0:
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_falsemask_v4f32:
; RV64V: # %bb.0:
; RV64V-NEXT: vmv1r.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_falsemask_v4f32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: ret
%v = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> zeroinitializer, <4 x float> %passthru)
ret <4 x float> %v
}
declare <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x float>)
define <8 x float> @mgather_v8f32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x float> %passthru) {
; RV32-LABEL: mgather_v8f32:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_v8f32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, mu
; RV64V-NEXT: vluxei64.v v12, (zero), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v8f32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: bnez a2, .LBB73_9
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB73_10
; RV64ZVE32F-NEXT: .LBB73_2: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB73_11
; RV64ZVE32F-NEXT: .LBB73_3: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB73_12
; RV64ZVE32F-NEXT: .LBB73_4: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB73_13
; RV64ZVE32F-NEXT: .LBB73_5: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB73_14
; RV64ZVE32F-NEXT: .LBB73_6: # %else14
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: bnez a2, .LBB73_15
; RV64ZVE32F-NEXT: .LBB73_7: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB73_16
; RV64ZVE32F-NEXT: .LBB73_8: # %else20
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB73_9: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m4, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB73_2
; RV64ZVE32F-NEXT: .LBB73_10: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB73_3
; RV64ZVE32F-NEXT: .LBB73_11: # %cond.load4
; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB73_4
; RV64ZVE32F-NEXT: .LBB73_12: # %cond.load7
; RV64ZVE32F-NEXT: ld a2, 24(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB73_5
; RV64ZVE32F-NEXT: .LBB73_13: # %cond.load10
; RV64ZVE32F-NEXT: ld a2, 32(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB73_6
; RV64ZVE32F-NEXT: .LBB73_14: # %cond.load13
; RV64ZVE32F-NEXT: ld a2, 40(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 5
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: beqz a2, .LBB73_7
; RV64ZVE32F-NEXT: .LBB73_15: # %cond.load16
; RV64ZVE32F-NEXT: ld a2, 48(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB73_8
; RV64ZVE32F-NEXT: .LBB73_16: # %cond.load19
; RV64ZVE32F-NEXT: ld a0, 56(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 7
; RV64ZVE32F-NEXT: ret
%v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
ret <8 x float> %v
}
define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x float> %passthru) {
; RV32-LABEL: mgather_baseidx_v8i8_v8f32:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
; RV32-NEXT: vsext.vf4 v12, v8
; RV32-NEXT: vsll.vi v8, v12, 2
; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_v8i8_v8f32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64V-NEXT: vsext.vf8 v12, v8
; RV64V-NEXT: vsll.vi v12, v12, 2
; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t
; RV64V-NEXT: vmv.v.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_v8i8_v8f32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: beqz a2, .LBB74_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m4, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: .LBB74_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB74_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
; RV64ZVE32F-NEXT: .LBB74_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB74_12
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB74_13
; RV64ZVE32F-NEXT: .LBB74_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB74_14
; RV64ZVE32F-NEXT: .LBB74_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB74_9
; RV64ZVE32F-NEXT: .LBB74_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
; RV64ZVE32F-NEXT: .LBB74_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB74_15
; RV64ZVE32F-NEXT: # %bb.10: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB74_16
; RV64ZVE32F-NEXT: .LBB74_11: # %else20
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB74_12: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB74_6
; RV64ZVE32F-NEXT: .LBB74_13: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB74_7
; RV64ZVE32F-NEXT: .LBB74_14: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB74_8
; RV64ZVE32F-NEXT: j .LBB74_9
; RV64ZVE32F-NEXT: .LBB74_15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB74_11
; RV64ZVE32F-NEXT: .LBB74_16: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: slli a1, a1, 2
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: flw fa5, 0(a0)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds float, ptr %base, <8 x i8> %idxs
%v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
ret <8 x float> %v
}
define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x float> %passthru) {
; RV32-LABEL: mgather_baseidx_sext_v8i8_v8f32:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
; RV32-NEXT: vsext.vf4 v12, v8
; RV32-NEXT: vsll.vi v8, v12, 2
; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_sext_v8i8_v8f32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64V-NEXT: vsext.vf8 v12, v8
; RV64V-NEXT: vsll.vi v12, v12, 2
; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t
; RV64V-NEXT: vmv.v.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8f32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: beqz a2, .LBB75_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m4, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: .LBB75_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB75_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
; RV64ZVE32F-NEXT: .LBB75_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB75_12
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB75_13
; RV64ZVE32F-NEXT: .LBB75_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB75_14
; RV64ZVE32F-NEXT: .LBB75_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB75_9
; RV64ZVE32F-NEXT: .LBB75_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
; RV64ZVE32F-NEXT: .LBB75_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB75_15
; RV64ZVE32F-NEXT: # %bb.10: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB75_16
; RV64ZVE32F-NEXT: .LBB75_11: # %else20
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB75_12: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB75_6
; RV64ZVE32F-NEXT: .LBB75_13: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB75_7
; RV64ZVE32F-NEXT: .LBB75_14: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB75_8
; RV64ZVE32F-NEXT: j .LBB75_9
; RV64ZVE32F-NEXT: .LBB75_15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB75_11
; RV64ZVE32F-NEXT: .LBB75_16: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: slli a1, a1, 2
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: flw fa5, 0(a0)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
%eidxs = sext <8 x i8> %idxs to <8 x i32>
%ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
%v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
ret <8 x float> %v
}
define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x float> %passthru) {
; RV32-LABEL: mgather_baseidx_zext_v8i8_v8f32:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV32-NEXT: vzext.vf2 v9, v8
; RV32-NEXT: vsll.vi v8, v9, 2
; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; RV32-NEXT: vluxei16.v v10, (a0), v8, v0.t
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8f32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64V-NEXT: vzext.vf2 v9, v8
; RV64V-NEXT: vsll.vi v8, v9, 2
; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; RV64V-NEXT: vluxei16.v v10, (a0), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8f32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: beqz a2, .LBB76_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m4, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: .LBB76_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB76_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
; RV64ZVE32F-NEXT: .LBB76_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB76_12
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB76_13
; RV64ZVE32F-NEXT: .LBB76_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB76_14
; RV64ZVE32F-NEXT: .LBB76_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB76_9
; RV64ZVE32F-NEXT: .LBB76_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
; RV64ZVE32F-NEXT: .LBB76_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB76_15
; RV64ZVE32F-NEXT: # %bb.10: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB76_16
; RV64ZVE32F-NEXT: .LBB76_11: # %else20
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB76_12: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB76_6
; RV64ZVE32F-NEXT: .LBB76_13: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB76_7
; RV64ZVE32F-NEXT: .LBB76_14: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB76_8
; RV64ZVE32F-NEXT: j .LBB76_9
; RV64ZVE32F-NEXT: .LBB76_15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB76_11
; RV64ZVE32F-NEXT: .LBB76_16: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: andi a1, a1, 255
; RV64ZVE32F-NEXT: slli a1, a1, 2
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: flw fa5, 0(a0)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
%eidxs = zext <8 x i8> %idxs to <8 x i32>
%ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
%v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
ret <8 x float> %v
}
define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x float> %passthru) {
; RV32-LABEL: mgather_baseidx_v8i16_v8f32:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
; RV32-NEXT: vsext.vf2 v12, v8
; RV32-NEXT: vsll.vi v8, v12, 2
; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_v8i16_v8f32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64V-NEXT: vsext.vf4 v12, v8
; RV64V-NEXT: vsll.vi v12, v12, 2
; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t
; RV64V-NEXT: vmv.v.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_v8i16_v8f32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: beqz a2, .LBB77_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m4, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: .LBB77_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB77_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
; RV64ZVE32F-NEXT: .LBB77_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB77_12
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB77_13
; RV64ZVE32F-NEXT: .LBB77_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB77_14
; RV64ZVE32F-NEXT: .LBB77_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB77_9
; RV64ZVE32F-NEXT: .LBB77_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
; RV64ZVE32F-NEXT: .LBB77_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB77_15
; RV64ZVE32F-NEXT: # %bb.10: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB77_16
; RV64ZVE32F-NEXT: .LBB77_11: # %else20
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB77_12: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB77_6
; RV64ZVE32F-NEXT: .LBB77_13: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB77_7
; RV64ZVE32F-NEXT: .LBB77_14: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB77_8
; RV64ZVE32F-NEXT: j .LBB77_9
; RV64ZVE32F-NEXT: .LBB77_15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB77_11
; RV64ZVE32F-NEXT: .LBB77_16: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: slli a1, a1, 2
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: flw fa5, 0(a0)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds float, ptr %base, <8 x i16> %idxs
%v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
ret <8 x float> %v
}
define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x float> %passthru) {
; RV32-LABEL: mgather_baseidx_sext_v8i16_v8f32:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
; RV32-NEXT: vsext.vf2 v12, v8
; RV32-NEXT: vsll.vi v8, v12, 2
; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_sext_v8i16_v8f32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64V-NEXT: vsext.vf4 v12, v8
; RV64V-NEXT: vsll.vi v12, v12, 2
; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t
; RV64V-NEXT: vmv.v.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i16_v8f32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: beqz a2, .LBB78_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m4, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: .LBB78_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB78_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
; RV64ZVE32F-NEXT: .LBB78_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB78_12
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB78_13
; RV64ZVE32F-NEXT: .LBB78_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB78_14
; RV64ZVE32F-NEXT: .LBB78_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB78_9
; RV64ZVE32F-NEXT: .LBB78_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
; RV64ZVE32F-NEXT: .LBB78_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB78_15
; RV64ZVE32F-NEXT: # %bb.10: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB78_16
; RV64ZVE32F-NEXT: .LBB78_11: # %else20
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB78_12: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB78_6
; RV64ZVE32F-NEXT: .LBB78_13: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB78_7
; RV64ZVE32F-NEXT: .LBB78_14: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB78_8
; RV64ZVE32F-NEXT: j .LBB78_9
; RV64ZVE32F-NEXT: .LBB78_15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB78_11
; RV64ZVE32F-NEXT: .LBB78_16: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: slli a1, a1, 2
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: flw fa5, 0(a0)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
%eidxs = sext <8 x i16> %idxs to <8 x i32>
%ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
%v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
ret <8 x float> %v
}
define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x float> %passthru) {
; RV32-LABEL: mgather_baseidx_zext_v8i16_v8f32:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
; RV32-NEXT: vzext.vf2 v12, v8
; RV32-NEXT: vsll.vi v8, v12, 2
; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_zext_v8i16_v8f32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, mu
; RV64V-NEXT: vzext.vf2 v12, v8
; RV64V-NEXT: vsll.vi v8, v12, 2
; RV64V-NEXT: vluxei32.v v10, (a0), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i16_v8f32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: lui a1, 16
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: addiw a1, a1, -1
; RV64ZVE32F-NEXT: beqz a3, .LBB79_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: and a3, a3, a1
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: flw fa5, 0(a3)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m4, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: .LBB79_2: # %else
; RV64ZVE32F-NEXT: andi a3, a2, 2
; RV64ZVE32F-NEXT: beqz a3, .LBB79_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: and a3, a3, a1
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: flw fa5, 0(a3)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
; RV64ZVE32F-NEXT: .LBB79_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: andi a3, a2, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a3, .LBB79_12
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: bnez a3, .LBB79_13
; RV64ZVE32F-NEXT: .LBB79_6: # %else8
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: bnez a3, .LBB79_14
; RV64ZVE32F-NEXT: .LBB79_7: # %else11
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: beqz a3, .LBB79_9
; RV64ZVE32F-NEXT: .LBB79_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: and a3, a3, a1
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: flw fa5, 0(a3)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
; RV64ZVE32F-NEXT: .LBB79_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: andi a3, a2, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: bnez a3, .LBB79_15
; RV64ZVE32F-NEXT: # %bb.10: # %else17
; RV64ZVE32F-NEXT: andi a2, a2, -128
; RV64ZVE32F-NEXT: bnez a2, .LBB79_16
; RV64ZVE32F-NEXT: .LBB79_11: # %else20
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB79_12: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: and a3, a3, a1
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: flw fa5, 0(a3)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: beqz a3, .LBB79_6
; RV64ZVE32F-NEXT: .LBB79_13: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: and a3, a3, a1
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: flw fa5, 0(a3)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: beqz a3, .LBB79_7
; RV64ZVE32F-NEXT: .LBB79_14: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: and a3, a3, a1
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: flw fa5, 0(a3)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: bnez a3, .LBB79_8
; RV64ZVE32F-NEXT: j .LBB79_9
; RV64ZVE32F-NEXT: .LBB79_15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: and a3, a3, a1
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: flw fa5, 0(a3)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: andi a2, a2, -128
; RV64ZVE32F-NEXT: beqz a2, .LBB79_11
; RV64ZVE32F-NEXT: .LBB79_16: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: and a1, a2, a1
; RV64ZVE32F-NEXT: slli a1, a1, 2
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: flw fa5, 0(a0)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
%eidxs = zext <8 x i16> %idxs to <8 x i32>
%ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
%v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
ret <8 x float> %v
}
define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> %m, <8 x float> %passthru) {
; RV32-LABEL: mgather_baseidx_v8f32:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
; RV32-NEXT: vsll.vi v8, v8, 2
; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_v8f32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64V-NEXT: vsext.vf2 v12, v8
; RV64V-NEXT: vsll.vi v12, v12, 2
; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t
; RV64V-NEXT: vmv.v.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_v8f32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: beqz a2, .LBB80_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m4, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: .LBB80_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB80_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
; RV64ZVE32F-NEXT: .LBB80_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB80_12
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB80_13
; RV64ZVE32F-NEXT: .LBB80_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB80_14
; RV64ZVE32F-NEXT: .LBB80_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB80_9
; RV64ZVE32F-NEXT: .LBB80_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
; RV64ZVE32F-NEXT: .LBB80_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v12, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB80_15
; RV64ZVE32F-NEXT: # %bb.10: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB80_16
; RV64ZVE32F-NEXT: .LBB80_11: # %else20
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB80_12: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB80_6
; RV64ZVE32F-NEXT: .LBB80_13: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB80_7
; RV64ZVE32F-NEXT: .LBB80_14: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB80_8
; RV64ZVE32F-NEXT: j .LBB80_9
; RV64ZVE32F-NEXT: .LBB80_15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB80_11
; RV64ZVE32F-NEXT: .LBB80_16: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: slli a1, a1, 2
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: flw fa5, 0(a0)
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %idxs
%v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
ret <8 x float> %v
}
declare <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x double>)
define <1 x double> @mgather_v1f64(<1 x ptr> %ptrs, <1 x i1> %m, <1 x double> %passthru) {
; RV32V-LABEL: mgather_v1f64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 1, e64, m1, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v1f64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v1f64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
; RV32ZVE32F-NEXT: vfirst.m a0, v0
; RV32ZVE32F-NEXT: bnez a0, .LBB81_2
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a0)
; RV32ZVE32F-NEXT: .LBB81_2: # %else
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v1f64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vfirst.m a1, v0
; RV64ZVE32F-NEXT: bnez a1, .LBB81_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: fld fa0, 0(a0)
; RV64ZVE32F-NEXT: .LBB81_2: # %else
; RV64ZVE32F-NEXT: ret
%v = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> %ptrs, i32 8, <1 x i1> %m, <1 x double> %passthru)
ret <1 x double> %v
}
declare <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x double>)
define <2 x double> @mgather_v2f64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x double> %passthru) {
; RV32V-LABEL: mgather_v2f64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 2, e64, m1, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v2f64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v2f64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a0, v0
; RV32ZVE32F-NEXT: andi a1, a0, 1
; RV32ZVE32F-NEXT: bnez a1, .LBB82_3
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a0, a0, 2
; RV32ZVE32F-NEXT: bnez a0, .LBB82_4
; RV32ZVE32F-NEXT: .LBB82_2: # %else2
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB82_3: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a1)
; RV32ZVE32F-NEXT: andi a0, a0, 2
; RV32ZVE32F-NEXT: beqz a0, .LBB82_2
; RV32ZVE32F-NEXT: .LBB82_4: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: fld fa1, 0(a0)
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v2f64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: bnez a3, .LBB82_3
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB82_4
; RV64ZVE32F-NEXT: .LBB82_2: # %else2
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB82_3: # %cond.load
; RV64ZVE32F-NEXT: fld fa0, 0(a0)
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB82_2
; RV64ZVE32F-NEXT: .LBB82_4: # %cond.load1
; RV64ZVE32F-NEXT: fld fa1, 0(a1)
; RV64ZVE32F-NEXT: ret
%v = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 8, <2 x i1> %m, <2 x double> %passthru)
ret <2 x double> %v
}
declare <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x double>)
define <4 x double> @mgather_v4f64(<4 x ptr> %ptrs, <4 x i1> %m, <4 x double> %passthru) {
; RV32V-LABEL: mgather_v4f64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 4, e64, m2, ta, mu
; RV32V-NEXT: vluxei32.v v10, (zero), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v10
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v4f64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, mu
; RV64V-NEXT: vluxei64.v v10, (zero), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v10
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v4f64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a1, v0
; RV32ZVE32F-NEXT: andi a2, a1, 1
; RV32ZVE32F-NEXT: bnez a2, .LBB83_6
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: bnez a2, .LBB83_7
; RV32ZVE32F-NEXT: .LBB83_2: # %else2
; RV32ZVE32F-NEXT: andi a2, a1, 4
; RV32ZVE32F-NEXT: bnez a2, .LBB83_8
; RV32ZVE32F-NEXT: .LBB83_3: # %else5
; RV32ZVE32F-NEXT: andi a1, a1, 8
; RV32ZVE32F-NEXT: beqz a1, .LBB83_5
; RV32ZVE32F-NEXT: .LBB83_4: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa3, 0(a1)
; RV32ZVE32F-NEXT: .LBB83_5: # %else8
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB83_6: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: beqz a2, .LBB83_2
; RV32ZVE32F-NEXT: .LBB83_7: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a2, v9
; RV32ZVE32F-NEXT: fld fa1, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 4
; RV32ZVE32F-NEXT: beqz a2, .LBB83_3
; RV32ZVE32F-NEXT: .LBB83_8: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a2, v9
; RV32ZVE32F-NEXT: fld fa2, 0(a2)
; RV32ZVE32F-NEXT: andi a1, a1, 8
; RV32ZVE32F-NEXT: bnez a1, .LBB83_4
; RV32ZVE32F-NEXT: j .LBB83_5
;
; RV64ZVE32F-LABEL: mgather_v4f64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: bnez a3, .LBB83_6
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a3, a2, 2
; RV64ZVE32F-NEXT: bnez a3, .LBB83_7
; RV64ZVE32F-NEXT: .LBB83_2: # %else2
; RV64ZVE32F-NEXT: andi a3, a2, 4
; RV64ZVE32F-NEXT: bnez a3, .LBB83_8
; RV64ZVE32F-NEXT: .LBB83_3: # %else5
; RV64ZVE32F-NEXT: andi a2, a2, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB83_5
; RV64ZVE32F-NEXT: .LBB83_4: # %cond.load7
; RV64ZVE32F-NEXT: ld a1, 24(a1)
; RV64ZVE32F-NEXT: fld fa3, 0(a1)
; RV64ZVE32F-NEXT: .LBB83_5: # %else8
; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB83_6: # %cond.load
; RV64ZVE32F-NEXT: ld a3, 0(a1)
; RV64ZVE32F-NEXT: fld fa0, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 2
; RV64ZVE32F-NEXT: beqz a3, .LBB83_2
; RV64ZVE32F-NEXT: .LBB83_7: # %cond.load1
; RV64ZVE32F-NEXT: ld a3, 8(a1)
; RV64ZVE32F-NEXT: fld fa1, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 4
; RV64ZVE32F-NEXT: beqz a3, .LBB83_3
; RV64ZVE32F-NEXT: .LBB83_8: # %cond.load4
; RV64ZVE32F-NEXT: ld a3, 16(a1)
; RV64ZVE32F-NEXT: fld fa2, 0(a3)
; RV64ZVE32F-NEXT: andi a2, a2, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB83_4
; RV64ZVE32F-NEXT: j .LBB83_5
%v = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> %m, <4 x double> %passthru)
ret <4 x double> %v
}
define <4 x double> @mgather_truemask_v4f64(<4 x ptr> %ptrs, <4 x double> %passthru) {
; RV32V-LABEL: mgather_truemask_v4f64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32V-NEXT: vluxei32.v v10, (zero), v8
; RV32V-NEXT: vmv.v.v v8, v10
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_truemask_v4f64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV64V-NEXT: vluxei64.v v8, (zero), v8
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_truemask_v4f64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa5, 0(a1)
; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a1, v9
; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV32ZVE32F-NEXT: fld fa4, 0(a1)
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa3, 0(a1)
; RV32ZVE32F-NEXT: vmv.x.s a1, v9
; RV32ZVE32F-NEXT: fld fa2, 0(a1)
; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
; RV32ZVE32F-NEXT: fsd fa4, 16(a0)
; RV32ZVE32F-NEXT: fsd fa2, 8(a0)
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_truemask_v4f64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: ld a2, 24(a1)
; RV64ZVE32F-NEXT: ld a3, 16(a1)
; RV64ZVE32F-NEXT: ld a4, 8(a1)
; RV64ZVE32F-NEXT: ld a1, 0(a1)
; RV64ZVE32F-NEXT: fld fa5, 0(a2)
; RV64ZVE32F-NEXT: fld fa4, 0(a3)
; RV64ZVE32F-NEXT: fld fa3, 0(a4)
; RV64ZVE32F-NEXT: fld fa2, 0(a1)
; RV64ZVE32F-NEXT: fsd fa5, 24(a0)
; RV64ZVE32F-NEXT: fsd fa4, 16(a0)
; RV64ZVE32F-NEXT: fsd fa3, 8(a0)
; RV64ZVE32F-NEXT: fsd fa2, 0(a0)
; RV64ZVE32F-NEXT: ret
%v = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> splat (i1 1), <4 x double> %passthru)
ret <4 x double> %v
}
define <4 x double> @mgather_falsemask_v4f64(<4 x ptr> %ptrs, <4 x double> %passthru) {
; RV32V-LABEL: mgather_falsemask_v4f64:
; RV32V: # %bb.0:
; RV32V-NEXT: vmv2r.v v8, v10
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_falsemask_v4f64:
; RV64V: # %bb.0:
; RV64V-NEXT: vmv2r.v v8, v10
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_falsemask_v4f64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_falsemask_v4f64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
; RV64ZVE32F-NEXT: ret
%v = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> zeroinitializer, <4 x double> %passthru)
ret <4 x double> %v
}
declare <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x double>)
define <8 x double> @mgather_v8f64(<8 x ptr> %ptrs, <8 x i1> %m, <8 x double> %passthru) {
; RV32V-LABEL: mgather_v8f64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV32V-NEXT: vluxei32.v v12, (zero), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v12
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v8f64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64V-NEXT: vluxei64.v v12, (zero), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v8f64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a1, v0
; RV32ZVE32F-NEXT: andi a2, a1, 1
; RV32ZVE32F-NEXT: bnez a2, .LBB86_10
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: bnez a2, .LBB86_11
; RV32ZVE32F-NEXT: .LBB86_2: # %else2
; RV32ZVE32F-NEXT: andi a2, a1, 4
; RV32ZVE32F-NEXT: bnez a2, .LBB86_12
; RV32ZVE32F-NEXT: .LBB86_3: # %else5
; RV32ZVE32F-NEXT: andi a2, a1, 8
; RV32ZVE32F-NEXT: bnez a2, .LBB86_13
; RV32ZVE32F-NEXT: .LBB86_4: # %else8
; RV32ZVE32F-NEXT: andi a2, a1, 16
; RV32ZVE32F-NEXT: bnez a2, .LBB86_14
; RV32ZVE32F-NEXT: .LBB86_5: # %else11
; RV32ZVE32F-NEXT: andi a2, a1, 32
; RV32ZVE32F-NEXT: bnez a2, .LBB86_15
; RV32ZVE32F-NEXT: .LBB86_6: # %else14
; RV32ZVE32F-NEXT: andi a2, a1, 64
; RV32ZVE32F-NEXT: bnez a2, .LBB86_16
; RV32ZVE32F-NEXT: .LBB86_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a1, -128
; RV32ZVE32F-NEXT: beqz a1, .LBB86_9
; RV32ZVE32F-NEXT: .LBB86_8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa7, 0(a1)
; RV32ZVE32F-NEXT: .LBB86_9: # %else20
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
; RV32ZVE32F-NEXT: fsd fa4, 32(a0)
; RV32ZVE32F-NEXT: fsd fa5, 40(a0)
; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB86_10: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: beqz a2, .LBB86_2
; RV32ZVE32F-NEXT: .LBB86_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa1, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 4
; RV32ZVE32F-NEXT: beqz a2, .LBB86_3
; RV32ZVE32F-NEXT: .LBB86_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa2, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 8
; RV32ZVE32F-NEXT: beqz a2, .LBB86_4
; RV32ZVE32F-NEXT: .LBB86_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa3, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 16
; RV32ZVE32F-NEXT: beqz a2, .LBB86_5
; RV32ZVE32F-NEXT: .LBB86_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa4, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 32
; RV32ZVE32F-NEXT: beqz a2, .LBB86_6
; RV32ZVE32F-NEXT: .LBB86_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa5, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 64
; RV32ZVE32F-NEXT: beqz a2, .LBB86_7
; RV32ZVE32F-NEXT: .LBB86_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa6, 0(a2)
; RV32ZVE32F-NEXT: andi a1, a1, -128
; RV32ZVE32F-NEXT: bnez a1, .LBB86_8
; RV32ZVE32F-NEXT: j .LBB86_9
;
; RV64ZVE32F-LABEL: mgather_v8f64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: bnez a3, .LBB86_10
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a3, a2, 2
; RV64ZVE32F-NEXT: bnez a3, .LBB86_11
; RV64ZVE32F-NEXT: .LBB86_2: # %else2
; RV64ZVE32F-NEXT: andi a3, a2, 4
; RV64ZVE32F-NEXT: bnez a3, .LBB86_12
; RV64ZVE32F-NEXT: .LBB86_3: # %else5
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: bnez a3, .LBB86_13
; RV64ZVE32F-NEXT: .LBB86_4: # %else8
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: bnez a3, .LBB86_14
; RV64ZVE32F-NEXT: .LBB86_5: # %else11
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: bnez a3, .LBB86_15
; RV64ZVE32F-NEXT: .LBB86_6: # %else14
; RV64ZVE32F-NEXT: andi a3, a2, 64
; RV64ZVE32F-NEXT: bnez a3, .LBB86_16
; RV64ZVE32F-NEXT: .LBB86_7: # %else17
; RV64ZVE32F-NEXT: andi a2, a2, -128
; RV64ZVE32F-NEXT: beqz a2, .LBB86_9
; RV64ZVE32F-NEXT: .LBB86_8: # %cond.load19
; RV64ZVE32F-NEXT: ld a1, 56(a1)
; RV64ZVE32F-NEXT: fld fa7, 0(a1)
; RV64ZVE32F-NEXT: .LBB86_9: # %else20
; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
; RV64ZVE32F-NEXT: fsd fa4, 32(a0)
; RV64ZVE32F-NEXT: fsd fa5, 40(a0)
; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB86_10: # %cond.load
; RV64ZVE32F-NEXT: ld a3, 0(a1)
; RV64ZVE32F-NEXT: fld fa0, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 2
; RV64ZVE32F-NEXT: beqz a3, .LBB86_2
; RV64ZVE32F-NEXT: .LBB86_11: # %cond.load1
; RV64ZVE32F-NEXT: ld a3, 8(a1)
; RV64ZVE32F-NEXT: fld fa1, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 4
; RV64ZVE32F-NEXT: beqz a3, .LBB86_3
; RV64ZVE32F-NEXT: .LBB86_12: # %cond.load4
; RV64ZVE32F-NEXT: ld a3, 16(a1)
; RV64ZVE32F-NEXT: fld fa2, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: beqz a3, .LBB86_4
; RV64ZVE32F-NEXT: .LBB86_13: # %cond.load7
; RV64ZVE32F-NEXT: ld a3, 24(a1)
; RV64ZVE32F-NEXT: fld fa3, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: beqz a3, .LBB86_5
; RV64ZVE32F-NEXT: .LBB86_14: # %cond.load10
; RV64ZVE32F-NEXT: ld a3, 32(a1)
; RV64ZVE32F-NEXT: fld fa4, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: beqz a3, .LBB86_6
; RV64ZVE32F-NEXT: .LBB86_15: # %cond.load13
; RV64ZVE32F-NEXT: ld a3, 40(a1)
; RV64ZVE32F-NEXT: fld fa5, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 64
; RV64ZVE32F-NEXT: beqz a3, .LBB86_7
; RV64ZVE32F-NEXT: .LBB86_16: # %cond.load16
; RV64ZVE32F-NEXT: ld a3, 48(a1)
; RV64ZVE32F-NEXT: fld fa6, 0(a3)
; RV64ZVE32F-NEXT: andi a2, a2, -128
; RV64ZVE32F-NEXT: bnez a2, .LBB86_8
; RV64ZVE32F-NEXT: j .LBB86_9
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
ret <8 x double> %v
}
define <8 x double> @mgather_baseidx_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x double> %passthru) {
; RV32V-LABEL: mgather_baseidx_v8i8_v8f64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32V-NEXT: vsext.vf4 v10, v8
; RV32V-NEXT: vsll.vi v8, v10, 3
; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v12
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_v8i8_v8f64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64V-NEXT: vsext.vf8 v16, v8
; RV64V-NEXT: vsll.vi v8, v16, 3
; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_baseidx_v8i8_v8f64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vsext.vf4 v10, v8
; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a1, v0
; RV32ZVE32F-NEXT: andi a2, a1, 1
; RV32ZVE32F-NEXT: bnez a2, .LBB87_10
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: bnez a2, .LBB87_11
; RV32ZVE32F-NEXT: .LBB87_2: # %else2
; RV32ZVE32F-NEXT: andi a2, a1, 4
; RV32ZVE32F-NEXT: bnez a2, .LBB87_12
; RV32ZVE32F-NEXT: .LBB87_3: # %else5
; RV32ZVE32F-NEXT: andi a2, a1, 8
; RV32ZVE32F-NEXT: bnez a2, .LBB87_13
; RV32ZVE32F-NEXT: .LBB87_4: # %else8
; RV32ZVE32F-NEXT: andi a2, a1, 16
; RV32ZVE32F-NEXT: bnez a2, .LBB87_14
; RV32ZVE32F-NEXT: .LBB87_5: # %else11
; RV32ZVE32F-NEXT: andi a2, a1, 32
; RV32ZVE32F-NEXT: bnez a2, .LBB87_15
; RV32ZVE32F-NEXT: .LBB87_6: # %else14
; RV32ZVE32F-NEXT: andi a2, a1, 64
; RV32ZVE32F-NEXT: bnez a2, .LBB87_16
; RV32ZVE32F-NEXT: .LBB87_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a1, -128
; RV32ZVE32F-NEXT: beqz a1, .LBB87_9
; RV32ZVE32F-NEXT: .LBB87_8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa7, 0(a1)
; RV32ZVE32F-NEXT: .LBB87_9: # %else20
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
; RV32ZVE32F-NEXT: fsd fa4, 32(a0)
; RV32ZVE32F-NEXT: fsd fa5, 40(a0)
; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB87_10: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: beqz a2, .LBB87_2
; RV32ZVE32F-NEXT: .LBB87_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa1, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 4
; RV32ZVE32F-NEXT: beqz a2, .LBB87_3
; RV32ZVE32F-NEXT: .LBB87_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa2, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 8
; RV32ZVE32F-NEXT: beqz a2, .LBB87_4
; RV32ZVE32F-NEXT: .LBB87_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa3, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 16
; RV32ZVE32F-NEXT: beqz a2, .LBB87_5
; RV32ZVE32F-NEXT: .LBB87_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa4, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 32
; RV32ZVE32F-NEXT: beqz a2, .LBB87_6
; RV32ZVE32F-NEXT: .LBB87_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa5, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 64
; RV32ZVE32F-NEXT: beqz a2, .LBB87_7
; RV32ZVE32F-NEXT: .LBB87_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa6, 0(a2)
; RV32ZVE32F-NEXT: andi a1, a1, -128
; RV32ZVE32F-NEXT: bnez a1, .LBB87_8
; RV32ZVE32F-NEXT: j .LBB87_9
;
; RV64ZVE32F-LABEL: mgather_baseidx_v8i8_v8f64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB87_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa0, 0(a3)
; RV64ZVE32F-NEXT: .LBB87_2: # %else
; RV64ZVE32F-NEXT: andi a3, a2, 2
; RV64ZVE32F-NEXT: beqz a3, .LBB87_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa1, 0(a3)
; RV64ZVE32F-NEXT: .LBB87_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a3, a2, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a3, .LBB87_14
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: bnez a3, .LBB87_15
; RV64ZVE32F-NEXT: .LBB87_6: # %else8
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: bnez a3, .LBB87_16
; RV64ZVE32F-NEXT: .LBB87_7: # %else11
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: beqz a3, .LBB87_9
; RV64ZVE32F-NEXT: .LBB87_8: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa5, 0(a3)
; RV64ZVE32F-NEXT: .LBB87_9: # %else14
; RV64ZVE32F-NEXT: andi a3, a2, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: beqz a3, .LBB87_11
; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa6, 0(a3)
; RV64ZVE32F-NEXT: .LBB87_11: # %else17
; RV64ZVE32F-NEXT: andi a2, a2, -128
; RV64ZVE32F-NEXT: beqz a2, .LBB87_13
; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: fld fa7, 0(a1)
; RV64ZVE32F-NEXT: .LBB87_13: # %else20
; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
; RV64ZVE32F-NEXT: fsd fa4, 32(a0)
; RV64ZVE32F-NEXT: fsd fa5, 40(a0)
; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB87_14: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa2, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: beqz a3, .LBB87_6
; RV64ZVE32F-NEXT: .LBB87_15: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa3, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: beqz a3, .LBB87_7
; RV64ZVE32F-NEXT: .LBB87_16: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa4, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: bnez a3, .LBB87_8
; RV64ZVE32F-NEXT: j .LBB87_9
%ptrs = getelementptr inbounds double, ptr %base, <8 x i8> %idxs
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
ret <8 x double> %v
}
define <8 x double> @mgather_baseidx_sext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x double> %passthru) {
; RV32V-LABEL: mgather_baseidx_sext_v8i8_v8f64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32V-NEXT: vsext.vf4 v10, v8
; RV32V-NEXT: vsll.vi v8, v10, 3
; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v12
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_sext_v8i8_v8f64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64V-NEXT: vsext.vf8 v16, v8
; RV64V-NEXT: vsll.vi v8, v16, 3
; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8f64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vsext.vf4 v10, v8
; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a1, v0
; RV32ZVE32F-NEXT: andi a2, a1, 1
; RV32ZVE32F-NEXT: bnez a2, .LBB88_10
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: bnez a2, .LBB88_11
; RV32ZVE32F-NEXT: .LBB88_2: # %else2
; RV32ZVE32F-NEXT: andi a2, a1, 4
; RV32ZVE32F-NEXT: bnez a2, .LBB88_12
; RV32ZVE32F-NEXT: .LBB88_3: # %else5
; RV32ZVE32F-NEXT: andi a2, a1, 8
; RV32ZVE32F-NEXT: bnez a2, .LBB88_13
; RV32ZVE32F-NEXT: .LBB88_4: # %else8
; RV32ZVE32F-NEXT: andi a2, a1, 16
; RV32ZVE32F-NEXT: bnez a2, .LBB88_14
; RV32ZVE32F-NEXT: .LBB88_5: # %else11
; RV32ZVE32F-NEXT: andi a2, a1, 32
; RV32ZVE32F-NEXT: bnez a2, .LBB88_15
; RV32ZVE32F-NEXT: .LBB88_6: # %else14
; RV32ZVE32F-NEXT: andi a2, a1, 64
; RV32ZVE32F-NEXT: bnez a2, .LBB88_16
; RV32ZVE32F-NEXT: .LBB88_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a1, -128
; RV32ZVE32F-NEXT: beqz a1, .LBB88_9
; RV32ZVE32F-NEXT: .LBB88_8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa7, 0(a1)
; RV32ZVE32F-NEXT: .LBB88_9: # %else20
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
; RV32ZVE32F-NEXT: fsd fa4, 32(a0)
; RV32ZVE32F-NEXT: fsd fa5, 40(a0)
; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB88_10: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: beqz a2, .LBB88_2
; RV32ZVE32F-NEXT: .LBB88_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa1, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 4
; RV32ZVE32F-NEXT: beqz a2, .LBB88_3
; RV32ZVE32F-NEXT: .LBB88_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa2, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 8
; RV32ZVE32F-NEXT: beqz a2, .LBB88_4
; RV32ZVE32F-NEXT: .LBB88_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa3, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 16
; RV32ZVE32F-NEXT: beqz a2, .LBB88_5
; RV32ZVE32F-NEXT: .LBB88_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa4, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 32
; RV32ZVE32F-NEXT: beqz a2, .LBB88_6
; RV32ZVE32F-NEXT: .LBB88_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa5, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 64
; RV32ZVE32F-NEXT: beqz a2, .LBB88_7
; RV32ZVE32F-NEXT: .LBB88_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa6, 0(a2)
; RV32ZVE32F-NEXT: andi a1, a1, -128
; RV32ZVE32F-NEXT: bnez a1, .LBB88_8
; RV32ZVE32F-NEXT: j .LBB88_9
;
; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8f64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB88_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa0, 0(a3)
; RV64ZVE32F-NEXT: .LBB88_2: # %else
; RV64ZVE32F-NEXT: andi a3, a2, 2
; RV64ZVE32F-NEXT: beqz a3, .LBB88_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa1, 0(a3)
; RV64ZVE32F-NEXT: .LBB88_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a3, a2, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a3, .LBB88_14
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: bnez a3, .LBB88_15
; RV64ZVE32F-NEXT: .LBB88_6: # %else8
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: bnez a3, .LBB88_16
; RV64ZVE32F-NEXT: .LBB88_7: # %else11
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: beqz a3, .LBB88_9
; RV64ZVE32F-NEXT: .LBB88_8: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa5, 0(a3)
; RV64ZVE32F-NEXT: .LBB88_9: # %else14
; RV64ZVE32F-NEXT: andi a3, a2, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: beqz a3, .LBB88_11
; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa6, 0(a3)
; RV64ZVE32F-NEXT: .LBB88_11: # %else17
; RV64ZVE32F-NEXT: andi a2, a2, -128
; RV64ZVE32F-NEXT: beqz a2, .LBB88_13
; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: fld fa7, 0(a1)
; RV64ZVE32F-NEXT: .LBB88_13: # %else20
; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
; RV64ZVE32F-NEXT: fsd fa4, 32(a0)
; RV64ZVE32F-NEXT: fsd fa5, 40(a0)
; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB88_14: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa2, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: beqz a3, .LBB88_6
; RV64ZVE32F-NEXT: .LBB88_15: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa3, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: beqz a3, .LBB88_7
; RV64ZVE32F-NEXT: .LBB88_16: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa4, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: bnez a3, .LBB88_8
; RV64ZVE32F-NEXT: j .LBB88_9
%eidxs = sext <8 x i8> %idxs to <8 x i64>
%ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
ret <8 x double> %v
}
define <8 x double> @mgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x double> %passthru) {
; RV32V-LABEL: mgather_baseidx_zext_v8i8_v8f64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV32V-NEXT: vzext.vf2 v9, v8
; RV32V-NEXT: vsll.vi v8, v9, 3
; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; RV32V-NEXT: vluxei16.v v12, (a0), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v12
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8f64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64V-NEXT: vzext.vf2 v9, v8
; RV64V-NEXT: vsll.vi v8, v9, 3
; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; RV64V-NEXT: vluxei16.v v12, (a0), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8f64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vzext.vf4 v10, v8
; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a1, v0
; RV32ZVE32F-NEXT: andi a2, a1, 1
; RV32ZVE32F-NEXT: bnez a2, .LBB89_10
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: bnez a2, .LBB89_11
; RV32ZVE32F-NEXT: .LBB89_2: # %else2
; RV32ZVE32F-NEXT: andi a2, a1, 4
; RV32ZVE32F-NEXT: bnez a2, .LBB89_12
; RV32ZVE32F-NEXT: .LBB89_3: # %else5
; RV32ZVE32F-NEXT: andi a2, a1, 8
; RV32ZVE32F-NEXT: bnez a2, .LBB89_13
; RV32ZVE32F-NEXT: .LBB89_4: # %else8
; RV32ZVE32F-NEXT: andi a2, a1, 16
; RV32ZVE32F-NEXT: bnez a2, .LBB89_14
; RV32ZVE32F-NEXT: .LBB89_5: # %else11
; RV32ZVE32F-NEXT: andi a2, a1, 32
; RV32ZVE32F-NEXT: bnez a2, .LBB89_15
; RV32ZVE32F-NEXT: .LBB89_6: # %else14
; RV32ZVE32F-NEXT: andi a2, a1, 64
; RV32ZVE32F-NEXT: bnez a2, .LBB89_16
; RV32ZVE32F-NEXT: .LBB89_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a1, -128
; RV32ZVE32F-NEXT: beqz a1, .LBB89_9
; RV32ZVE32F-NEXT: .LBB89_8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa7, 0(a1)
; RV32ZVE32F-NEXT: .LBB89_9: # %else20
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
; RV32ZVE32F-NEXT: fsd fa4, 32(a0)
; RV32ZVE32F-NEXT: fsd fa5, 40(a0)
; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB89_10: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: beqz a2, .LBB89_2
; RV32ZVE32F-NEXT: .LBB89_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa1, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 4
; RV32ZVE32F-NEXT: beqz a2, .LBB89_3
; RV32ZVE32F-NEXT: .LBB89_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa2, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 8
; RV32ZVE32F-NEXT: beqz a2, .LBB89_4
; RV32ZVE32F-NEXT: .LBB89_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa3, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 16
; RV32ZVE32F-NEXT: beqz a2, .LBB89_5
; RV32ZVE32F-NEXT: .LBB89_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa4, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 32
; RV32ZVE32F-NEXT: beqz a2, .LBB89_6
; RV32ZVE32F-NEXT: .LBB89_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa5, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 64
; RV32ZVE32F-NEXT: beqz a2, .LBB89_7
; RV32ZVE32F-NEXT: .LBB89_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa6, 0(a2)
; RV32ZVE32F-NEXT: andi a1, a1, -128
; RV32ZVE32F-NEXT: bnez a1, .LBB89_8
; RV32ZVE32F-NEXT: j .LBB89_9
;
; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8f64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB89_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: andi a3, a3, 255
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa0, 0(a3)
; RV64ZVE32F-NEXT: .LBB89_2: # %else
; RV64ZVE32F-NEXT: andi a3, a2, 2
; RV64ZVE32F-NEXT: beqz a3, .LBB89_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: andi a3, a3, 255
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa1, 0(a3)
; RV64ZVE32F-NEXT: .LBB89_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a3, a2, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a3, .LBB89_14
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: bnez a3, .LBB89_15
; RV64ZVE32F-NEXT: .LBB89_6: # %else8
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: bnez a3, .LBB89_16
; RV64ZVE32F-NEXT: .LBB89_7: # %else11
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: beqz a3, .LBB89_9
; RV64ZVE32F-NEXT: .LBB89_8: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: andi a3, a3, 255
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa5, 0(a3)
; RV64ZVE32F-NEXT: .LBB89_9: # %else14
; RV64ZVE32F-NEXT: andi a3, a2, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: beqz a3, .LBB89_11
; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: andi a3, a3, 255
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa6, 0(a3)
; RV64ZVE32F-NEXT: .LBB89_11: # %else17
; RV64ZVE32F-NEXT: andi a2, a2, -128
; RV64ZVE32F-NEXT: beqz a2, .LBB89_13
; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: fld fa7, 0(a1)
; RV64ZVE32F-NEXT: .LBB89_13: # %else20
; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
; RV64ZVE32F-NEXT: fsd fa4, 32(a0)
; RV64ZVE32F-NEXT: fsd fa5, 40(a0)
; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB89_14: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: andi a3, a3, 255
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa2, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: beqz a3, .LBB89_6
; RV64ZVE32F-NEXT: .LBB89_15: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: andi a3, a3, 255
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa3, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: beqz a3, .LBB89_7
; RV64ZVE32F-NEXT: .LBB89_16: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: andi a3, a3, 255
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa4, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: bnez a3, .LBB89_8
; RV64ZVE32F-NEXT: j .LBB89_9
%eidxs = zext <8 x i8> %idxs to <8 x i64>
%ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
ret <8 x double> %v
}
define <8 x double> @mgather_baseidx_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x double> %passthru) {
; RV32V-LABEL: mgather_baseidx_v8i16_v8f64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32V-NEXT: vsext.vf2 v10, v8
; RV32V-NEXT: vsll.vi v8, v10, 3
; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v12
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_v8i16_v8f64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64V-NEXT: vsext.vf4 v16, v8
; RV64V-NEXT: vsll.vi v8, v16, 3
; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_baseidx_v8i16_v8f64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vsext.vf2 v10, v8
; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a1, v0
; RV32ZVE32F-NEXT: andi a2, a1, 1
; RV32ZVE32F-NEXT: bnez a2, .LBB90_10
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: bnez a2, .LBB90_11
; RV32ZVE32F-NEXT: .LBB90_2: # %else2
; RV32ZVE32F-NEXT: andi a2, a1, 4
; RV32ZVE32F-NEXT: bnez a2, .LBB90_12
; RV32ZVE32F-NEXT: .LBB90_3: # %else5
; RV32ZVE32F-NEXT: andi a2, a1, 8
; RV32ZVE32F-NEXT: bnez a2, .LBB90_13
; RV32ZVE32F-NEXT: .LBB90_4: # %else8
; RV32ZVE32F-NEXT: andi a2, a1, 16
; RV32ZVE32F-NEXT: bnez a2, .LBB90_14
; RV32ZVE32F-NEXT: .LBB90_5: # %else11
; RV32ZVE32F-NEXT: andi a2, a1, 32
; RV32ZVE32F-NEXT: bnez a2, .LBB90_15
; RV32ZVE32F-NEXT: .LBB90_6: # %else14
; RV32ZVE32F-NEXT: andi a2, a1, 64
; RV32ZVE32F-NEXT: bnez a2, .LBB90_16
; RV32ZVE32F-NEXT: .LBB90_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a1, -128
; RV32ZVE32F-NEXT: beqz a1, .LBB90_9
; RV32ZVE32F-NEXT: .LBB90_8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa7, 0(a1)
; RV32ZVE32F-NEXT: .LBB90_9: # %else20
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
; RV32ZVE32F-NEXT: fsd fa4, 32(a0)
; RV32ZVE32F-NEXT: fsd fa5, 40(a0)
; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB90_10: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: beqz a2, .LBB90_2
; RV32ZVE32F-NEXT: .LBB90_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa1, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 4
; RV32ZVE32F-NEXT: beqz a2, .LBB90_3
; RV32ZVE32F-NEXT: .LBB90_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa2, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 8
; RV32ZVE32F-NEXT: beqz a2, .LBB90_4
; RV32ZVE32F-NEXT: .LBB90_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa3, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 16
; RV32ZVE32F-NEXT: beqz a2, .LBB90_5
; RV32ZVE32F-NEXT: .LBB90_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa4, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 32
; RV32ZVE32F-NEXT: beqz a2, .LBB90_6
; RV32ZVE32F-NEXT: .LBB90_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa5, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 64
; RV32ZVE32F-NEXT: beqz a2, .LBB90_7
; RV32ZVE32F-NEXT: .LBB90_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa6, 0(a2)
; RV32ZVE32F-NEXT: andi a1, a1, -128
; RV32ZVE32F-NEXT: bnez a1, .LBB90_8
; RV32ZVE32F-NEXT: j .LBB90_9
;
; RV64ZVE32F-LABEL: mgather_baseidx_v8i16_v8f64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB90_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa0, 0(a3)
; RV64ZVE32F-NEXT: .LBB90_2: # %else
; RV64ZVE32F-NEXT: andi a3, a2, 2
; RV64ZVE32F-NEXT: beqz a3, .LBB90_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa1, 0(a3)
; RV64ZVE32F-NEXT: .LBB90_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: andi a3, a2, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a3, .LBB90_14
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: bnez a3, .LBB90_15
; RV64ZVE32F-NEXT: .LBB90_6: # %else8
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: bnez a3, .LBB90_16
; RV64ZVE32F-NEXT: .LBB90_7: # %else11
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: beqz a3, .LBB90_9
; RV64ZVE32F-NEXT: .LBB90_8: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa5, 0(a3)
; RV64ZVE32F-NEXT: .LBB90_9: # %else14
; RV64ZVE32F-NEXT: andi a3, a2, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: beqz a3, .LBB90_11
; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa6, 0(a3)
; RV64ZVE32F-NEXT: .LBB90_11: # %else17
; RV64ZVE32F-NEXT: andi a2, a2, -128
; RV64ZVE32F-NEXT: beqz a2, .LBB90_13
; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: fld fa7, 0(a1)
; RV64ZVE32F-NEXT: .LBB90_13: # %else20
; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
; RV64ZVE32F-NEXT: fsd fa4, 32(a0)
; RV64ZVE32F-NEXT: fsd fa5, 40(a0)
; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB90_14: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa2, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: beqz a3, .LBB90_6
; RV64ZVE32F-NEXT: .LBB90_15: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa3, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: beqz a3, .LBB90_7
; RV64ZVE32F-NEXT: .LBB90_16: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa4, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: bnez a3, .LBB90_8
; RV64ZVE32F-NEXT: j .LBB90_9
%ptrs = getelementptr inbounds double, ptr %base, <8 x i16> %idxs
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
ret <8 x double> %v
}
define <8 x double> @mgather_baseidx_sext_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x double> %passthru) {
; RV32V-LABEL: mgather_baseidx_sext_v8i16_v8f64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32V-NEXT: vsext.vf2 v10, v8
; RV32V-NEXT: vsll.vi v8, v10, 3
; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v12
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_sext_v8i16_v8f64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64V-NEXT: vsext.vf4 v16, v8
; RV64V-NEXT: vsll.vi v8, v16, 3
; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_baseidx_sext_v8i16_v8f64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vsext.vf2 v10, v8
; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a1, v0
; RV32ZVE32F-NEXT: andi a2, a1, 1
; RV32ZVE32F-NEXT: bnez a2, .LBB91_10
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: bnez a2, .LBB91_11
; RV32ZVE32F-NEXT: .LBB91_2: # %else2
; RV32ZVE32F-NEXT: andi a2, a1, 4
; RV32ZVE32F-NEXT: bnez a2, .LBB91_12
; RV32ZVE32F-NEXT: .LBB91_3: # %else5
; RV32ZVE32F-NEXT: andi a2, a1, 8
; RV32ZVE32F-NEXT: bnez a2, .LBB91_13
; RV32ZVE32F-NEXT: .LBB91_4: # %else8
; RV32ZVE32F-NEXT: andi a2, a1, 16
; RV32ZVE32F-NEXT: bnez a2, .LBB91_14
; RV32ZVE32F-NEXT: .LBB91_5: # %else11
; RV32ZVE32F-NEXT: andi a2, a1, 32
; RV32ZVE32F-NEXT: bnez a2, .LBB91_15
; RV32ZVE32F-NEXT: .LBB91_6: # %else14
; RV32ZVE32F-NEXT: andi a2, a1, 64
; RV32ZVE32F-NEXT: bnez a2, .LBB91_16
; RV32ZVE32F-NEXT: .LBB91_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a1, -128
; RV32ZVE32F-NEXT: beqz a1, .LBB91_9
; RV32ZVE32F-NEXT: .LBB91_8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa7, 0(a1)
; RV32ZVE32F-NEXT: .LBB91_9: # %else20
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
; RV32ZVE32F-NEXT: fsd fa4, 32(a0)
; RV32ZVE32F-NEXT: fsd fa5, 40(a0)
; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB91_10: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: beqz a2, .LBB91_2
; RV32ZVE32F-NEXT: .LBB91_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa1, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 4
; RV32ZVE32F-NEXT: beqz a2, .LBB91_3
; RV32ZVE32F-NEXT: .LBB91_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa2, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 8
; RV32ZVE32F-NEXT: beqz a2, .LBB91_4
; RV32ZVE32F-NEXT: .LBB91_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa3, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 16
; RV32ZVE32F-NEXT: beqz a2, .LBB91_5
; RV32ZVE32F-NEXT: .LBB91_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa4, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 32
; RV32ZVE32F-NEXT: beqz a2, .LBB91_6
; RV32ZVE32F-NEXT: .LBB91_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa5, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 64
; RV32ZVE32F-NEXT: beqz a2, .LBB91_7
; RV32ZVE32F-NEXT: .LBB91_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa6, 0(a2)
; RV32ZVE32F-NEXT: andi a1, a1, -128
; RV32ZVE32F-NEXT: bnez a1, .LBB91_8
; RV32ZVE32F-NEXT: j .LBB91_9
;
; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i16_v8f64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB91_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa0, 0(a3)
; RV64ZVE32F-NEXT: .LBB91_2: # %else
; RV64ZVE32F-NEXT: andi a3, a2, 2
; RV64ZVE32F-NEXT: beqz a3, .LBB91_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa1, 0(a3)
; RV64ZVE32F-NEXT: .LBB91_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: andi a3, a2, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a3, .LBB91_14
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: bnez a3, .LBB91_15
; RV64ZVE32F-NEXT: .LBB91_6: # %else8
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: bnez a3, .LBB91_16
; RV64ZVE32F-NEXT: .LBB91_7: # %else11
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: beqz a3, .LBB91_9
; RV64ZVE32F-NEXT: .LBB91_8: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa5, 0(a3)
; RV64ZVE32F-NEXT: .LBB91_9: # %else14
; RV64ZVE32F-NEXT: andi a3, a2, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: beqz a3, .LBB91_11
; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa6, 0(a3)
; RV64ZVE32F-NEXT: .LBB91_11: # %else17
; RV64ZVE32F-NEXT: andi a2, a2, -128
; RV64ZVE32F-NEXT: beqz a2, .LBB91_13
; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: fld fa7, 0(a1)
; RV64ZVE32F-NEXT: .LBB91_13: # %else20
; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
; RV64ZVE32F-NEXT: fsd fa4, 32(a0)
; RV64ZVE32F-NEXT: fsd fa5, 40(a0)
; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB91_14: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa2, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: beqz a3, .LBB91_6
; RV64ZVE32F-NEXT: .LBB91_15: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa3, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: beqz a3, .LBB91_7
; RV64ZVE32F-NEXT: .LBB91_16: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa4, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: bnez a3, .LBB91_8
; RV64ZVE32F-NEXT: j .LBB91_9
%eidxs = sext <8 x i16> %idxs to <8 x i64>
%ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
ret <8 x double> %v
}
define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x double> %passthru) {
; RV32V-LABEL: mgather_baseidx_zext_v8i16_v8f64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32V-NEXT: vzext.vf2 v10, v8
; RV32V-NEXT: vsll.vi v8, v10, 3
; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v12
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_zext_v8i16_v8f64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64V-NEXT: vzext.vf2 v10, v8
; RV64V-NEXT: vsll.vi v8, v10, 3
; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; RV64V-NEXT: vluxei32.v v12, (a0), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_baseidx_zext_v8i16_v8f64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vzext.vf2 v10, v8
; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a1, v0
; RV32ZVE32F-NEXT: andi a2, a1, 1
; RV32ZVE32F-NEXT: bnez a2, .LBB92_10
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: bnez a2, .LBB92_11
; RV32ZVE32F-NEXT: .LBB92_2: # %else2
; RV32ZVE32F-NEXT: andi a2, a1, 4
; RV32ZVE32F-NEXT: bnez a2, .LBB92_12
; RV32ZVE32F-NEXT: .LBB92_3: # %else5
; RV32ZVE32F-NEXT: andi a2, a1, 8
; RV32ZVE32F-NEXT: bnez a2, .LBB92_13
; RV32ZVE32F-NEXT: .LBB92_4: # %else8
; RV32ZVE32F-NEXT: andi a2, a1, 16
; RV32ZVE32F-NEXT: bnez a2, .LBB92_14
; RV32ZVE32F-NEXT: .LBB92_5: # %else11
; RV32ZVE32F-NEXT: andi a2, a1, 32
; RV32ZVE32F-NEXT: bnez a2, .LBB92_15
; RV32ZVE32F-NEXT: .LBB92_6: # %else14
; RV32ZVE32F-NEXT: andi a2, a1, 64
; RV32ZVE32F-NEXT: bnez a2, .LBB92_16
; RV32ZVE32F-NEXT: .LBB92_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a1, -128
; RV32ZVE32F-NEXT: beqz a1, .LBB92_9
; RV32ZVE32F-NEXT: .LBB92_8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa7, 0(a1)
; RV32ZVE32F-NEXT: .LBB92_9: # %else20
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
; RV32ZVE32F-NEXT: fsd fa4, 32(a0)
; RV32ZVE32F-NEXT: fsd fa5, 40(a0)
; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB92_10: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: beqz a2, .LBB92_2
; RV32ZVE32F-NEXT: .LBB92_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa1, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 4
; RV32ZVE32F-NEXT: beqz a2, .LBB92_3
; RV32ZVE32F-NEXT: .LBB92_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa2, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 8
; RV32ZVE32F-NEXT: beqz a2, .LBB92_4
; RV32ZVE32F-NEXT: .LBB92_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa3, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 16
; RV32ZVE32F-NEXT: beqz a2, .LBB92_5
; RV32ZVE32F-NEXT: .LBB92_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa4, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 32
; RV32ZVE32F-NEXT: beqz a2, .LBB92_6
; RV32ZVE32F-NEXT: .LBB92_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa5, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 64
; RV32ZVE32F-NEXT: beqz a2, .LBB92_7
; RV32ZVE32F-NEXT: .LBB92_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa6, 0(a2)
; RV32ZVE32F-NEXT: andi a1, a1, -128
; RV32ZVE32F-NEXT: bnez a1, .LBB92_8
; RV32ZVE32F-NEXT: j .LBB92_9
;
; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i16_v8f64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: lui a2, 16
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v0
; RV64ZVE32F-NEXT: andi a4, a3, 1
; RV64ZVE32F-NEXT: addiw a2, a2, -1
; RV64ZVE32F-NEXT: beqz a4, .LBB92_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a4, v8
; RV64ZVE32F-NEXT: and a4, a4, a2
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa0, 0(a4)
; RV64ZVE32F-NEXT: .LBB92_2: # %else
; RV64ZVE32F-NEXT: andi a4, a3, 2
; RV64ZVE32F-NEXT: beqz a4, .LBB92_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a4, v9
; RV64ZVE32F-NEXT: and a4, a4, a2
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa1, 0(a4)
; RV64ZVE32F-NEXT: .LBB92_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: andi a4, a3, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a4, .LBB92_14
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a4, a3, 8
; RV64ZVE32F-NEXT: bnez a4, .LBB92_15
; RV64ZVE32F-NEXT: .LBB92_6: # %else8
; RV64ZVE32F-NEXT: andi a4, a3, 16
; RV64ZVE32F-NEXT: bnez a4, .LBB92_16
; RV64ZVE32F-NEXT: .LBB92_7: # %else11
; RV64ZVE32F-NEXT: andi a4, a3, 32
; RV64ZVE32F-NEXT: beqz a4, .LBB92_9
; RV64ZVE32F-NEXT: .LBB92_8: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a4, v8
; RV64ZVE32F-NEXT: and a4, a4, a2
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa5, 0(a4)
; RV64ZVE32F-NEXT: .LBB92_9: # %else14
; RV64ZVE32F-NEXT: andi a4, a3, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: beqz a4, .LBB92_11
; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a4, v8
; RV64ZVE32F-NEXT: and a4, a4, a2
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa6, 0(a4)
; RV64ZVE32F-NEXT: .LBB92_11: # %else17
; RV64ZVE32F-NEXT: andi a3, a3, -128
; RV64ZVE32F-NEXT: beqz a3, .LBB92_13
; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: and a2, a3, a2
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: fld fa7, 0(a1)
; RV64ZVE32F-NEXT: .LBB92_13: # %else20
; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
; RV64ZVE32F-NEXT: fsd fa4, 32(a0)
; RV64ZVE32F-NEXT: fsd fa5, 40(a0)
; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB92_14: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a4, v8
; RV64ZVE32F-NEXT: and a4, a4, a2
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa2, 0(a4)
; RV64ZVE32F-NEXT: andi a4, a3, 8
; RV64ZVE32F-NEXT: beqz a4, .LBB92_6
; RV64ZVE32F-NEXT: .LBB92_15: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a4, v8
; RV64ZVE32F-NEXT: and a4, a4, a2
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa3, 0(a4)
; RV64ZVE32F-NEXT: andi a4, a3, 16
; RV64ZVE32F-NEXT: beqz a4, .LBB92_7
; RV64ZVE32F-NEXT: .LBB92_16: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s a4, v9
; RV64ZVE32F-NEXT: and a4, a4, a2
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa4, 0(a4)
; RV64ZVE32F-NEXT: andi a4, a3, 32
; RV64ZVE32F-NEXT: bnez a4, .LBB92_8
; RV64ZVE32F-NEXT: j .LBB92_9
%eidxs = zext <8 x i16> %idxs to <8 x i64>
%ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
ret <8 x double> %v
}
define <8 x double> @mgather_baseidx_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, <8 x double> %passthru) {
; RV32V-LABEL: mgather_baseidx_v8i32_v8f64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32V-NEXT: vsll.vi v8, v8, 3
; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v12
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_v8i32_v8f64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64V-NEXT: vsext.vf2 v16, v8
; RV64V-NEXT: vsll.vi v8, v16, 3
; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_baseidx_v8i32_v8f64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a1, v0
; RV32ZVE32F-NEXT: andi a2, a1, 1
; RV32ZVE32F-NEXT: bnez a2, .LBB93_10
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: bnez a2, .LBB93_11
; RV32ZVE32F-NEXT: .LBB93_2: # %else2
; RV32ZVE32F-NEXT: andi a2, a1, 4
; RV32ZVE32F-NEXT: bnez a2, .LBB93_12
; RV32ZVE32F-NEXT: .LBB93_3: # %else5
; RV32ZVE32F-NEXT: andi a2, a1, 8
; RV32ZVE32F-NEXT: bnez a2, .LBB93_13
; RV32ZVE32F-NEXT: .LBB93_4: # %else8
; RV32ZVE32F-NEXT: andi a2, a1, 16
; RV32ZVE32F-NEXT: bnez a2, .LBB93_14
; RV32ZVE32F-NEXT: .LBB93_5: # %else11
; RV32ZVE32F-NEXT: andi a2, a1, 32
; RV32ZVE32F-NEXT: bnez a2, .LBB93_15
; RV32ZVE32F-NEXT: .LBB93_6: # %else14
; RV32ZVE32F-NEXT: andi a2, a1, 64
; RV32ZVE32F-NEXT: bnez a2, .LBB93_16
; RV32ZVE32F-NEXT: .LBB93_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a1, -128
; RV32ZVE32F-NEXT: beqz a1, .LBB93_9
; RV32ZVE32F-NEXT: .LBB93_8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa7, 0(a1)
; RV32ZVE32F-NEXT: .LBB93_9: # %else20
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
; RV32ZVE32F-NEXT: fsd fa4, 32(a0)
; RV32ZVE32F-NEXT: fsd fa5, 40(a0)
; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB93_10: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: beqz a2, .LBB93_2
; RV32ZVE32F-NEXT: .LBB93_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa1, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 4
; RV32ZVE32F-NEXT: beqz a2, .LBB93_3
; RV32ZVE32F-NEXT: .LBB93_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa2, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 8
; RV32ZVE32F-NEXT: beqz a2, .LBB93_4
; RV32ZVE32F-NEXT: .LBB93_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa3, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 16
; RV32ZVE32F-NEXT: beqz a2, .LBB93_5
; RV32ZVE32F-NEXT: .LBB93_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa4, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 32
; RV32ZVE32F-NEXT: beqz a2, .LBB93_6
; RV32ZVE32F-NEXT: .LBB93_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa5, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 64
; RV32ZVE32F-NEXT: beqz a2, .LBB93_7
; RV32ZVE32F-NEXT: .LBB93_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa6, 0(a2)
; RV32ZVE32F-NEXT: andi a1, a1, -128
; RV32ZVE32F-NEXT: bnez a1, .LBB93_8
; RV32ZVE32F-NEXT: j .LBB93_9
;
; RV64ZVE32F-LABEL: mgather_baseidx_v8i32_v8f64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB93_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa0, 0(a3)
; RV64ZVE32F-NEXT: .LBB93_2: # %else
; RV64ZVE32F-NEXT: andi a3, a2, 2
; RV64ZVE32F-NEXT: beqz a3, .LBB93_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v10
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa1, 0(a3)
; RV64ZVE32F-NEXT: .LBB93_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: andi a3, a2, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a3, .LBB93_14
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: bnez a3, .LBB93_15
; RV64ZVE32F-NEXT: .LBB93_6: # %else8
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: bnez a3, .LBB93_16
; RV64ZVE32F-NEXT: .LBB93_7: # %else11
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: beqz a3, .LBB93_9
; RV64ZVE32F-NEXT: .LBB93_8: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa5, 0(a3)
; RV64ZVE32F-NEXT: .LBB93_9: # %else14
; RV64ZVE32F-NEXT: andi a3, a2, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV64ZVE32F-NEXT: beqz a3, .LBB93_11
; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa6, 0(a3)
; RV64ZVE32F-NEXT: .LBB93_11: # %else17
; RV64ZVE32F-NEXT: andi a2, a2, -128
; RV64ZVE32F-NEXT: beqz a2, .LBB93_13
; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: fld fa7, 0(a1)
; RV64ZVE32F-NEXT: .LBB93_13: # %else20
; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
; RV64ZVE32F-NEXT: fsd fa4, 32(a0)
; RV64ZVE32F-NEXT: fsd fa5, 40(a0)
; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB93_14: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa2, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: beqz a3, .LBB93_6
; RV64ZVE32F-NEXT: .LBB93_15: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa3, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: beqz a3, .LBB93_7
; RV64ZVE32F-NEXT: .LBB93_16: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s a3, v10
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa4, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: bnez a3, .LBB93_8
; RV64ZVE32F-NEXT: j .LBB93_9
%ptrs = getelementptr inbounds double, ptr %base, <8 x i32> %idxs
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
ret <8 x double> %v
}
define <8 x double> @mgather_baseidx_sext_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, <8 x double> %passthru) {
; RV32V-LABEL: mgather_baseidx_sext_v8i32_v8f64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32V-NEXT: vsll.vi v8, v8, 3
; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v12
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_sext_v8i32_v8f64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64V-NEXT: vsext.vf2 v16, v8
; RV64V-NEXT: vsll.vi v8, v16, 3
; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_baseidx_sext_v8i32_v8f64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a1, v0
; RV32ZVE32F-NEXT: andi a2, a1, 1
; RV32ZVE32F-NEXT: bnez a2, .LBB94_10
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: bnez a2, .LBB94_11
; RV32ZVE32F-NEXT: .LBB94_2: # %else2
; RV32ZVE32F-NEXT: andi a2, a1, 4
; RV32ZVE32F-NEXT: bnez a2, .LBB94_12
; RV32ZVE32F-NEXT: .LBB94_3: # %else5
; RV32ZVE32F-NEXT: andi a2, a1, 8
; RV32ZVE32F-NEXT: bnez a2, .LBB94_13
; RV32ZVE32F-NEXT: .LBB94_4: # %else8
; RV32ZVE32F-NEXT: andi a2, a1, 16
; RV32ZVE32F-NEXT: bnez a2, .LBB94_14
; RV32ZVE32F-NEXT: .LBB94_5: # %else11
; RV32ZVE32F-NEXT: andi a2, a1, 32
; RV32ZVE32F-NEXT: bnez a2, .LBB94_15
; RV32ZVE32F-NEXT: .LBB94_6: # %else14
; RV32ZVE32F-NEXT: andi a2, a1, 64
; RV32ZVE32F-NEXT: bnez a2, .LBB94_16
; RV32ZVE32F-NEXT: .LBB94_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a1, -128
; RV32ZVE32F-NEXT: beqz a1, .LBB94_9
; RV32ZVE32F-NEXT: .LBB94_8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa7, 0(a1)
; RV32ZVE32F-NEXT: .LBB94_9: # %else20
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
; RV32ZVE32F-NEXT: fsd fa4, 32(a0)
; RV32ZVE32F-NEXT: fsd fa5, 40(a0)
; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB94_10: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: beqz a2, .LBB94_2
; RV32ZVE32F-NEXT: .LBB94_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa1, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 4
; RV32ZVE32F-NEXT: beqz a2, .LBB94_3
; RV32ZVE32F-NEXT: .LBB94_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa2, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 8
; RV32ZVE32F-NEXT: beqz a2, .LBB94_4
; RV32ZVE32F-NEXT: .LBB94_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa3, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 16
; RV32ZVE32F-NEXT: beqz a2, .LBB94_5
; RV32ZVE32F-NEXT: .LBB94_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa4, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 32
; RV32ZVE32F-NEXT: beqz a2, .LBB94_6
; RV32ZVE32F-NEXT: .LBB94_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa5, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 64
; RV32ZVE32F-NEXT: beqz a2, .LBB94_7
; RV32ZVE32F-NEXT: .LBB94_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa6, 0(a2)
; RV32ZVE32F-NEXT: andi a1, a1, -128
; RV32ZVE32F-NEXT: bnez a1, .LBB94_8
; RV32ZVE32F-NEXT: j .LBB94_9
;
; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i32_v8f64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB94_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa0, 0(a3)
; RV64ZVE32F-NEXT: .LBB94_2: # %else
; RV64ZVE32F-NEXT: andi a3, a2, 2
; RV64ZVE32F-NEXT: beqz a3, .LBB94_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v10
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa1, 0(a3)
; RV64ZVE32F-NEXT: .LBB94_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: andi a3, a2, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a3, .LBB94_14
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: bnez a3, .LBB94_15
; RV64ZVE32F-NEXT: .LBB94_6: # %else8
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: bnez a3, .LBB94_16
; RV64ZVE32F-NEXT: .LBB94_7: # %else11
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: beqz a3, .LBB94_9
; RV64ZVE32F-NEXT: .LBB94_8: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa5, 0(a3)
; RV64ZVE32F-NEXT: .LBB94_9: # %else14
; RV64ZVE32F-NEXT: andi a3, a2, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV64ZVE32F-NEXT: beqz a3, .LBB94_11
; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa6, 0(a3)
; RV64ZVE32F-NEXT: .LBB94_11: # %else17
; RV64ZVE32F-NEXT: andi a2, a2, -128
; RV64ZVE32F-NEXT: beqz a2, .LBB94_13
; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: fld fa7, 0(a1)
; RV64ZVE32F-NEXT: .LBB94_13: # %else20
; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
; RV64ZVE32F-NEXT: fsd fa4, 32(a0)
; RV64ZVE32F-NEXT: fsd fa5, 40(a0)
; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB94_14: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa2, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: beqz a3, .LBB94_6
; RV64ZVE32F-NEXT: .LBB94_15: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa3, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: beqz a3, .LBB94_7
; RV64ZVE32F-NEXT: .LBB94_16: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s a3, v10
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa4, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: bnez a3, .LBB94_8
; RV64ZVE32F-NEXT: j .LBB94_9
%eidxs = sext <8 x i32> %idxs to <8 x i64>
%ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
ret <8 x double> %v
}
define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, <8 x double> %passthru) {
; RV32V-LABEL: mgather_baseidx_zext_v8i32_v8f64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32V-NEXT: vsll.vi v8, v8, 3
; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v12
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_zext_v8i32_v8f64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64V-NEXT: vzext.vf2 v16, v8
; RV64V-NEXT: vsll.vi v8, v16, 3
; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_baseidx_zext_v8i32_v8f64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a1, v0
; RV32ZVE32F-NEXT: andi a2, a1, 1
; RV32ZVE32F-NEXT: bnez a2, .LBB95_10
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: bnez a2, .LBB95_11
; RV32ZVE32F-NEXT: .LBB95_2: # %else2
; RV32ZVE32F-NEXT: andi a2, a1, 4
; RV32ZVE32F-NEXT: bnez a2, .LBB95_12
; RV32ZVE32F-NEXT: .LBB95_3: # %else5
; RV32ZVE32F-NEXT: andi a2, a1, 8
; RV32ZVE32F-NEXT: bnez a2, .LBB95_13
; RV32ZVE32F-NEXT: .LBB95_4: # %else8
; RV32ZVE32F-NEXT: andi a2, a1, 16
; RV32ZVE32F-NEXT: bnez a2, .LBB95_14
; RV32ZVE32F-NEXT: .LBB95_5: # %else11
; RV32ZVE32F-NEXT: andi a2, a1, 32
; RV32ZVE32F-NEXT: bnez a2, .LBB95_15
; RV32ZVE32F-NEXT: .LBB95_6: # %else14
; RV32ZVE32F-NEXT: andi a2, a1, 64
; RV32ZVE32F-NEXT: bnez a2, .LBB95_16
; RV32ZVE32F-NEXT: .LBB95_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a1, -128
; RV32ZVE32F-NEXT: beqz a1, .LBB95_9
; RV32ZVE32F-NEXT: .LBB95_8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa7, 0(a1)
; RV32ZVE32F-NEXT: .LBB95_9: # %else20
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
; RV32ZVE32F-NEXT: fsd fa4, 32(a0)
; RV32ZVE32F-NEXT: fsd fa5, 40(a0)
; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB95_10: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: beqz a2, .LBB95_2
; RV32ZVE32F-NEXT: .LBB95_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa1, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 4
; RV32ZVE32F-NEXT: beqz a2, .LBB95_3
; RV32ZVE32F-NEXT: .LBB95_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa2, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 8
; RV32ZVE32F-NEXT: beqz a2, .LBB95_4
; RV32ZVE32F-NEXT: .LBB95_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa3, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 16
; RV32ZVE32F-NEXT: beqz a2, .LBB95_5
; RV32ZVE32F-NEXT: .LBB95_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa4, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 32
; RV32ZVE32F-NEXT: beqz a2, .LBB95_6
; RV32ZVE32F-NEXT: .LBB95_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa5, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 64
; RV32ZVE32F-NEXT: beqz a2, .LBB95_7
; RV32ZVE32F-NEXT: .LBB95_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa6, 0(a2)
; RV32ZVE32F-NEXT: andi a1, a1, -128
; RV32ZVE32F-NEXT: bnez a1, .LBB95_8
; RV32ZVE32F-NEXT: j .LBB95_9
;
; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i32_v8f64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB95_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 32
; RV64ZVE32F-NEXT: srli a3, a3, 29
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa0, 0(a3)
; RV64ZVE32F-NEXT: .LBB95_2: # %else
; RV64ZVE32F-NEXT: andi a3, a2, 2
; RV64ZVE32F-NEXT: beqz a3, .LBB95_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v10
; RV64ZVE32F-NEXT: slli a3, a3, 32
; RV64ZVE32F-NEXT: srli a3, a3, 29
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa1, 0(a3)
; RV64ZVE32F-NEXT: .LBB95_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: andi a3, a2, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a3, .LBB95_14
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: bnez a3, .LBB95_15
; RV64ZVE32F-NEXT: .LBB95_6: # %else8
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: bnez a3, .LBB95_16
; RV64ZVE32F-NEXT: .LBB95_7: # %else11
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: beqz a3, .LBB95_9
; RV64ZVE32F-NEXT: .LBB95_8: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 32
; RV64ZVE32F-NEXT: srli a3, a3, 29
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa5, 0(a3)
; RV64ZVE32F-NEXT: .LBB95_9: # %else14
; RV64ZVE32F-NEXT: andi a3, a2, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV64ZVE32F-NEXT: beqz a3, .LBB95_11
; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 32
; RV64ZVE32F-NEXT: srli a3, a3, 29
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa6, 0(a3)
; RV64ZVE32F-NEXT: .LBB95_11: # %else17
; RV64ZVE32F-NEXT: andi a2, a2, -128
; RV64ZVE32F-NEXT: beqz a2, .LBB95_13
; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 32
; RV64ZVE32F-NEXT: srli a2, a2, 29
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: fld fa7, 0(a1)
; RV64ZVE32F-NEXT: .LBB95_13: # %else20
; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
; RV64ZVE32F-NEXT: fsd fa4, 32(a0)
; RV64ZVE32F-NEXT: fsd fa5, 40(a0)
; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB95_14: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 32
; RV64ZVE32F-NEXT: srli a3, a3, 29
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa2, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: beqz a3, .LBB95_6
; RV64ZVE32F-NEXT: .LBB95_15: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 32
; RV64ZVE32F-NEXT: srli a3, a3, 29
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa3, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: beqz a3, .LBB95_7
; RV64ZVE32F-NEXT: .LBB95_16: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s a3, v10
; RV64ZVE32F-NEXT: slli a3, a3, 32
; RV64ZVE32F-NEXT: srli a3, a3, 29
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa4, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: bnez a3, .LBB95_8
; RV64ZVE32F-NEXT: j .LBB95_9
%eidxs = zext <8 x i32> %idxs to <8 x i64>
%ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
ret <8 x double> %v
}
define <8 x double> @mgather_baseidx_v8f64(ptr %base, <8 x i64> %idxs, <8 x i1> %m, <8 x double> %passthru) {
; RV32V-LABEL: mgather_baseidx_v8f64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32V-NEXT: vnsrl.wi v16, v8, 0
; RV32V-NEXT: vsll.vi v8, v16, 3
; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v12
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_v8f64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64V-NEXT: vsll.vi v8, v8, 3
; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_baseidx_v8f64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: lw a3, 56(a2)
; RV32ZVE32F-NEXT: lw a4, 48(a2)
; RV32ZVE32F-NEXT: lw a5, 40(a2)
; RV32ZVE32F-NEXT: lw a6, 32(a2)
; RV32ZVE32F-NEXT: lw a7, 24(a2)
; RV32ZVE32F-NEXT: lw t0, 16(a2)
; RV32ZVE32F-NEXT: lw t1, 8(a2)
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vlse32.v v8, (a2), zero
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t1
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t0
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a7
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a6
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a5
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a4
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a3
; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a1, v0
; RV32ZVE32F-NEXT: andi a2, a1, 1
; RV32ZVE32F-NEXT: bnez a2, .LBB96_10
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: bnez a2, .LBB96_11
; RV32ZVE32F-NEXT: .LBB96_2: # %else2
; RV32ZVE32F-NEXT: andi a2, a1, 4
; RV32ZVE32F-NEXT: bnez a2, .LBB96_12
; RV32ZVE32F-NEXT: .LBB96_3: # %else5
; RV32ZVE32F-NEXT: andi a2, a1, 8
; RV32ZVE32F-NEXT: bnez a2, .LBB96_13
; RV32ZVE32F-NEXT: .LBB96_4: # %else8
; RV32ZVE32F-NEXT: andi a2, a1, 16
; RV32ZVE32F-NEXT: bnez a2, .LBB96_14
; RV32ZVE32F-NEXT: .LBB96_5: # %else11
; RV32ZVE32F-NEXT: andi a2, a1, 32
; RV32ZVE32F-NEXT: bnez a2, .LBB96_15
; RV32ZVE32F-NEXT: .LBB96_6: # %else14
; RV32ZVE32F-NEXT: andi a2, a1, 64
; RV32ZVE32F-NEXT: bnez a2, .LBB96_16
; RV32ZVE32F-NEXT: .LBB96_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a1, -128
; RV32ZVE32F-NEXT: beqz a1, .LBB96_9
; RV32ZVE32F-NEXT: .LBB96_8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa7, 0(a1)
; RV32ZVE32F-NEXT: .LBB96_9: # %else20
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
; RV32ZVE32F-NEXT: fsd fa4, 32(a0)
; RV32ZVE32F-NEXT: fsd fa5, 40(a0)
; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB96_10: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: beqz a2, .LBB96_2
; RV32ZVE32F-NEXT: .LBB96_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa1, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 4
; RV32ZVE32F-NEXT: beqz a2, .LBB96_3
; RV32ZVE32F-NEXT: .LBB96_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa2, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 8
; RV32ZVE32F-NEXT: beqz a2, .LBB96_4
; RV32ZVE32F-NEXT: .LBB96_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa3, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 16
; RV32ZVE32F-NEXT: beqz a2, .LBB96_5
; RV32ZVE32F-NEXT: .LBB96_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa4, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 32
; RV32ZVE32F-NEXT: beqz a2, .LBB96_6
; RV32ZVE32F-NEXT: .LBB96_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa5, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 64
; RV32ZVE32F-NEXT: beqz a2, .LBB96_7
; RV32ZVE32F-NEXT: .LBB96_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa6, 0(a2)
; RV32ZVE32F-NEXT: andi a1, a1, -128
; RV32ZVE32F-NEXT: bnez a1, .LBB96_8
; RV32ZVE32F-NEXT: j .LBB96_9
;
; RV64ZVE32F-LABEL: mgather_baseidx_v8f64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v0
; RV64ZVE32F-NEXT: andi a4, a3, 1
; RV64ZVE32F-NEXT: bnez a4, .LBB96_10
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a4, a3, 2
; RV64ZVE32F-NEXT: bnez a4, .LBB96_11
; RV64ZVE32F-NEXT: .LBB96_2: # %else2
; RV64ZVE32F-NEXT: andi a4, a3, 4
; RV64ZVE32F-NEXT: bnez a4, .LBB96_12
; RV64ZVE32F-NEXT: .LBB96_3: # %else5
; RV64ZVE32F-NEXT: andi a4, a3, 8
; RV64ZVE32F-NEXT: bnez a4, .LBB96_13
; RV64ZVE32F-NEXT: .LBB96_4: # %else8
; RV64ZVE32F-NEXT: andi a4, a3, 16
; RV64ZVE32F-NEXT: bnez a4, .LBB96_14
; RV64ZVE32F-NEXT: .LBB96_5: # %else11
; RV64ZVE32F-NEXT: andi a4, a3, 32
; RV64ZVE32F-NEXT: bnez a4, .LBB96_15
; RV64ZVE32F-NEXT: .LBB96_6: # %else14
; RV64ZVE32F-NEXT: andi a4, a3, 64
; RV64ZVE32F-NEXT: bnez a4, .LBB96_16
; RV64ZVE32F-NEXT: .LBB96_7: # %else17
; RV64ZVE32F-NEXT: andi a3, a3, -128
; RV64ZVE32F-NEXT: beqz a3, .LBB96_9
; RV64ZVE32F-NEXT: .LBB96_8: # %cond.load19
; RV64ZVE32F-NEXT: ld a2, 56(a2)
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: fld fa7, 0(a1)
; RV64ZVE32F-NEXT: .LBB96_9: # %else20
; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
; RV64ZVE32F-NEXT: fsd fa4, 32(a0)
; RV64ZVE32F-NEXT: fsd fa5, 40(a0)
; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB96_10: # %cond.load
; RV64ZVE32F-NEXT: ld a4, 0(a2)
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa0, 0(a4)
; RV64ZVE32F-NEXT: andi a4, a3, 2
; RV64ZVE32F-NEXT: beqz a4, .LBB96_2
; RV64ZVE32F-NEXT: .LBB96_11: # %cond.load1
; RV64ZVE32F-NEXT: ld a4, 8(a2)
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa1, 0(a4)
; RV64ZVE32F-NEXT: andi a4, a3, 4
; RV64ZVE32F-NEXT: beqz a4, .LBB96_3
; RV64ZVE32F-NEXT: .LBB96_12: # %cond.load4
; RV64ZVE32F-NEXT: ld a4, 16(a2)
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa2, 0(a4)
; RV64ZVE32F-NEXT: andi a4, a3, 8
; RV64ZVE32F-NEXT: beqz a4, .LBB96_4
; RV64ZVE32F-NEXT: .LBB96_13: # %cond.load7
; RV64ZVE32F-NEXT: ld a4, 24(a2)
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa3, 0(a4)
; RV64ZVE32F-NEXT: andi a4, a3, 16
; RV64ZVE32F-NEXT: beqz a4, .LBB96_5
; RV64ZVE32F-NEXT: .LBB96_14: # %cond.load10
; RV64ZVE32F-NEXT: ld a4, 32(a2)
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa4, 0(a4)
; RV64ZVE32F-NEXT: andi a4, a3, 32
; RV64ZVE32F-NEXT: beqz a4, .LBB96_6
; RV64ZVE32F-NEXT: .LBB96_15: # %cond.load13
; RV64ZVE32F-NEXT: ld a4, 40(a2)
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa5, 0(a4)
; RV64ZVE32F-NEXT: andi a4, a3, 64
; RV64ZVE32F-NEXT: beqz a4, .LBB96_7
; RV64ZVE32F-NEXT: .LBB96_16: # %cond.load16
; RV64ZVE32F-NEXT: ld a4, 48(a2)
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa6, 0(a4)
; RV64ZVE32F-NEXT: andi a3, a3, -128
; RV64ZVE32F-NEXT: bnez a3, .LBB96_8
; RV64ZVE32F-NEXT: j .LBB96_9
%ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %idxs
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
ret <8 x double> %v
}
declare <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x i8>)
define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m, <16 x i8> %passthru) {
; RV32-LABEL: mgather_baseidx_v16i8:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV32-NEXT: vsext.vf4 v12, v8
; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, mu
; RV32-NEXT: vluxei32.v v9, (a0), v12, v0.t
; RV32-NEXT: vmv.v.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_v16i8:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64V-NEXT: vsext.vf8 v16, v8
; RV64V-NEXT: vsetvli zero, zero, e8, m1, ta, mu
; RV64V-NEXT: vluxei64.v v9, (a0), v16, v0.t
; RV64V-NEXT: vmv.v.v v8, v9
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_v16i8:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: beqz a2, .LBB97_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, mf2, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: .LBB97_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB97_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1
; RV64ZVE32F-NEXT: .LBB97_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB97_25
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB97_26
; RV64ZVE32F-NEXT: .LBB97_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB97_8
; RV64ZVE32F-NEXT: .LBB97_7: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 5, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v11, a2
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 4
; RV64ZVE32F-NEXT: .LBB97_8: # %else11
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB97_10
; RV64ZVE32F-NEXT: # %bb.9: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v11, a2
; RV64ZVE32F-NEXT: vsetivli zero, 6, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 5
; RV64ZVE32F-NEXT: .LBB97_10: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB97_27
; RV64ZVE32F-NEXT: # %bb.11: # %else17
; RV64ZVE32F-NEXT: andi a2, a1, 128
; RV64ZVE32F-NEXT: bnez a2, .LBB97_28
; RV64ZVE32F-NEXT: .LBB97_12: # %else20
; RV64ZVE32F-NEXT: andi a2, a1, 256
; RV64ZVE32F-NEXT: bnez a2, .LBB97_29
; RV64ZVE32F-NEXT: .LBB97_13: # %else23
; RV64ZVE32F-NEXT: andi a2, a1, 512
; RV64ZVE32F-NEXT: beqz a2, .LBB97_15
; RV64ZVE32F-NEXT: .LBB97_14: # %cond.load25
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vsetivli zero, 10, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 9
; RV64ZVE32F-NEXT: .LBB97_15: # %else26
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 1024
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB97_30
; RV64ZVE32F-NEXT: # %bb.16: # %else29
; RV64ZVE32F-NEXT: slli a2, a1, 52
; RV64ZVE32F-NEXT: bltz a2, .LBB97_31
; RV64ZVE32F-NEXT: .LBB97_17: # %else32
; RV64ZVE32F-NEXT: slli a2, a1, 51
; RV64ZVE32F-NEXT: bltz a2, .LBB97_32
; RV64ZVE32F-NEXT: .LBB97_18: # %else35
; RV64ZVE32F-NEXT: slli a2, a1, 50
; RV64ZVE32F-NEXT: bgez a2, .LBB97_20
; RV64ZVE32F-NEXT: .LBB97_19: # %cond.load37
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 14, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 13
; RV64ZVE32F-NEXT: .LBB97_20: # %else38
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: slli a2, a1, 49
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV64ZVE32F-NEXT: bgez a2, .LBB97_22
; RV64ZVE32F-NEXT: # %bb.21: # %cond.load40
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vsetivli zero, 15, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 14
; RV64ZVE32F-NEXT: .LBB97_22: # %else41
; RV64ZVE32F-NEXT: lui a2, 1048568
; RV64ZVE32F-NEXT: and a1, a1, a2
; RV64ZVE32F-NEXT: beqz a1, .LBB97_24
; RV64ZVE32F-NEXT: # %bb.23: # %cond.load43
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: lbu a0, 0(a0)
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 15
; RV64ZVE32F-NEXT: .LBB97_24: # %else44
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB97_25: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB97_6
; RV64ZVE32F-NEXT: .LBB97_26: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v11, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB97_7
; RV64ZVE32F-NEXT: j .LBB97_8
; RV64ZVE32F-NEXT: .LBB97_27: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v11, a2
; RV64ZVE32F-NEXT: vsetivli zero, 7, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 6
; RV64ZVE32F-NEXT: andi a2, a1, 128
; RV64ZVE32F-NEXT: beqz a2, .LBB97_12
; RV64ZVE32F-NEXT: .LBB97_28: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 7
; RV64ZVE32F-NEXT: andi a2, a1, 256
; RV64ZVE32F-NEXT: beqz a2, .LBB97_13
; RV64ZVE32F-NEXT: .LBB97_29: # %cond.load22
; RV64ZVE32F-NEXT: vsetivli zero, 9, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 8
; RV64ZVE32F-NEXT: andi a2, a1, 512
; RV64ZVE32F-NEXT: bnez a2, .LBB97_14
; RV64ZVE32F-NEXT: j .LBB97_15
; RV64ZVE32F-NEXT: .LBB97_30: # %cond.load28
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v11, a2
; RV64ZVE32F-NEXT: vsetivli zero, 11, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 10
; RV64ZVE32F-NEXT: slli a2, a1, 52
; RV64ZVE32F-NEXT: bgez a2, .LBB97_17
; RV64ZVE32F-NEXT: .LBB97_31: # %cond.load31
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 12, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 11
; RV64ZVE32F-NEXT: slli a2, a1, 51
; RV64ZVE32F-NEXT: bgez a2, .LBB97_18
; RV64ZVE32F-NEXT: .LBB97_32: # %cond.load34
; RV64ZVE32F-NEXT: vsetivli zero, 13, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 12
; RV64ZVE32F-NEXT: slli a2, a1, 50
; RV64ZVE32F-NEXT: bltz a2, .LBB97_19
; RV64ZVE32F-NEXT: j .LBB97_20
%ptrs = getelementptr inbounds i8, ptr %base, <16 x i8> %idxs
%v = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %ptrs, i32 2, <16 x i1> %m, <16 x i8> %passthru)
ret <16 x i8> %v
}
declare <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr>, i32, <32 x i1>, <32 x i8>)
define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m, <32 x i8> %passthru) {
; RV32-LABEL: mgather_baseidx_v32i8:
; RV32: # %bb.0:
; RV32-NEXT: li a1, 32
; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; RV32-NEXT: vsext.vf4 v16, v8
; RV32-NEXT: vsetvli zero, zero, e8, m2, ta, mu
; RV32-NEXT: vluxei32.v v10, (a0), v16, v0.t
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_v32i8:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64V-NEXT: vsext.vf8 v16, v8
; RV64V-NEXT: vsetvli zero, zero, e8, m1, ta, mu
; RV64V-NEXT: vmv1r.v v12, v10
; RV64V-NEXT: vluxei64.v v12, (a0), v16, v0.t
; RV64V-NEXT: vsetivli zero, 16, e8, m2, ta, ma
; RV64V-NEXT: vslidedown.vi v10, v10, 16
; RV64V-NEXT: vslidedown.vi v8, v8, 16
; RV64V-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64V-NEXT: vsext.vf8 v16, v8
; RV64V-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64V-NEXT: vslidedown.vi v0, v0, 2
; RV64V-NEXT: vsetivli zero, 16, e8, m1, ta, mu
; RV64V-NEXT: vluxei64.v v10, (a0), v16, v0.t
; RV64V-NEXT: li a0, 32
; RV64V-NEXT: vsetvli zero, a0, e8, m2, ta, ma
; RV64V-NEXT: vslideup.vi v12, v10, 16
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_v32i8:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: beqz a2, .LBB98_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, mf4, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: .LBB98_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB98_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
; RV64ZVE32F-NEXT: .LBB98_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v13, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB98_49
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB98_50
; RV64ZVE32F-NEXT: .LBB98_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB98_8
; RV64ZVE32F-NEXT: .LBB98_7: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 5, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
; RV64ZVE32F-NEXT: .LBB98_8: # %else11
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB98_10
; RV64ZVE32F-NEXT: # %bb.9: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v14, v13, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v14
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v14, a2
; RV64ZVE32F-NEXT: vsetivli zero, 6, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 5
; RV64ZVE32F-NEXT: .LBB98_10: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB98_51
; RV64ZVE32F-NEXT: # %bb.11: # %else17
; RV64ZVE32F-NEXT: andi a2, a1, 128
; RV64ZVE32F-NEXT: bnez a2, .LBB98_52
; RV64ZVE32F-NEXT: .LBB98_12: # %else20
; RV64ZVE32F-NEXT: andi a2, a1, 256
; RV64ZVE32F-NEXT: bnez a2, .LBB98_53
; RV64ZVE32F-NEXT: .LBB98_13: # %else23
; RV64ZVE32F-NEXT: andi a2, a1, 512
; RV64ZVE32F-NEXT: beqz a2, .LBB98_15
; RV64ZVE32F-NEXT: .LBB98_14: # %cond.load25
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v13, a2
; RV64ZVE32F-NEXT: vsetivli zero, 10, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v13, 9
; RV64ZVE32F-NEXT: .LBB98_15: # %else26
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 1024
; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB98_17
; RV64ZVE32F-NEXT: # %bb.16: # %cond.load28
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v14, a2
; RV64ZVE32F-NEXT: vsetivli zero, 11, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 10
; RV64ZVE32F-NEXT: .LBB98_17: # %else29
; RV64ZVE32F-NEXT: slli a2, a1, 52
; RV64ZVE32F-NEXT: bgez a2, .LBB98_19
; RV64ZVE32F-NEXT: # %bb.18: # %cond.load31
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 12, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 11
; RV64ZVE32F-NEXT: .LBB98_19: # %else32
; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m2, ta, ma
; RV64ZVE32F-NEXT: slli a2, a1, 51
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 16
; RV64ZVE32F-NEXT: bgez a2, .LBB98_21
; RV64ZVE32F-NEXT: # %bb.20: # %cond.load34
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vsetivli zero, 13, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 12
; RV64ZVE32F-NEXT: .LBB98_21: # %else35
; RV64ZVE32F-NEXT: slli a2, a1, 50
; RV64ZVE32F-NEXT: bgez a2, .LBB98_23
; RV64ZVE32F-NEXT: # %bb.22: # %cond.load37
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v13, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vsetivli zero, 14, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 13
; RV64ZVE32F-NEXT: .LBB98_23: # %else38
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: slli a2, a1, 49
; RV64ZVE32F-NEXT: vslidedown.vi v9, v13, 2
; RV64ZVE32F-NEXT: bltz a2, .LBB98_54
; RV64ZVE32F-NEXT: # %bb.24: # %else41
; RV64ZVE32F-NEXT: slli a2, a1, 48
; RV64ZVE32F-NEXT: bltz a2, .LBB98_55
; RV64ZVE32F-NEXT: .LBB98_25: # %else44
; RV64ZVE32F-NEXT: slli a2, a1, 47
; RV64ZVE32F-NEXT: bltz a2, .LBB98_56
; RV64ZVE32F-NEXT: .LBB98_26: # %else47
; RV64ZVE32F-NEXT: slli a2, a1, 46
; RV64ZVE32F-NEXT: bgez a2, .LBB98_28
; RV64ZVE32F-NEXT: .LBB98_27: # %cond.load49
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 18, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 17
; RV64ZVE32F-NEXT: .LBB98_28: # %else50
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: slli a2, a1, 45
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
; RV64ZVE32F-NEXT: bltz a2, .LBB98_57
; RV64ZVE32F-NEXT: # %bb.29: # %else53
; RV64ZVE32F-NEXT: slli a2, a1, 44
; RV64ZVE32F-NEXT: bltz a2, .LBB98_58
; RV64ZVE32F-NEXT: .LBB98_30: # %else56
; RV64ZVE32F-NEXT: slli a2, a1, 43
; RV64ZVE32F-NEXT: bgez a2, .LBB98_32
; RV64ZVE32F-NEXT: .LBB98_31: # %cond.load58
; RV64ZVE32F-NEXT: vsetivli zero, 21, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 20
; RV64ZVE32F-NEXT: .LBB98_32: # %else59
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma
; RV64ZVE32F-NEXT: slli a2, a1, 42
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 8
; RV64ZVE32F-NEXT: bgez a2, .LBB98_34
; RV64ZVE32F-NEXT: # %bb.33: # %cond.load61
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 22, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 21
; RV64ZVE32F-NEXT: .LBB98_34: # %else62
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: slli a2, a1, 41
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
; RV64ZVE32F-NEXT: bltz a2, .LBB98_59
; RV64ZVE32F-NEXT: # %bb.35: # %else65
; RV64ZVE32F-NEXT: slli a2, a1, 40
; RV64ZVE32F-NEXT: bltz a2, .LBB98_60
; RV64ZVE32F-NEXT: .LBB98_36: # %else68
; RV64ZVE32F-NEXT: slli a2, a1, 39
; RV64ZVE32F-NEXT: bltz a2, .LBB98_61
; RV64ZVE32F-NEXT: .LBB98_37: # %else71
; RV64ZVE32F-NEXT: slli a2, a1, 38
; RV64ZVE32F-NEXT: bgez a2, .LBB98_39
; RV64ZVE32F-NEXT: .LBB98_38: # %cond.load73
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 26, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 25
; RV64ZVE32F-NEXT: .LBB98_39: # %else74
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: slli a2, a1, 37
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bltz a2, .LBB98_62
; RV64ZVE32F-NEXT: # %bb.40: # %else77
; RV64ZVE32F-NEXT: slli a2, a1, 36
; RV64ZVE32F-NEXT: bltz a2, .LBB98_63
; RV64ZVE32F-NEXT: .LBB98_41: # %else80
; RV64ZVE32F-NEXT: slli a2, a1, 35
; RV64ZVE32F-NEXT: bltz a2, .LBB98_64
; RV64ZVE32F-NEXT: .LBB98_42: # %else83
; RV64ZVE32F-NEXT: slli a2, a1, 34
; RV64ZVE32F-NEXT: bgez a2, .LBB98_44
; RV64ZVE32F-NEXT: .LBB98_43: # %cond.load85
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 30, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 29
; RV64ZVE32F-NEXT: .LBB98_44: # %else86
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: slli a2, a1, 33
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: bgez a2, .LBB98_46
; RV64ZVE32F-NEXT: # %bb.45: # %cond.load88
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 31, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 30
; RV64ZVE32F-NEXT: .LBB98_46: # %else89
; RV64ZVE32F-NEXT: lui a2, 524288
; RV64ZVE32F-NEXT: and a1, a1, a2
; RV64ZVE32F-NEXT: beqz a1, .LBB98_48
; RV64ZVE32F-NEXT: # %bb.47: # %cond.load91
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: lbu a0, 0(a0)
; RV64ZVE32F-NEXT: li a1, 32
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 31
; RV64ZVE32F-NEXT: .LBB98_48: # %else92
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB98_49: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v14, a2
; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB98_6
; RV64ZVE32F-NEXT: .LBB98_50: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB98_7
; RV64ZVE32F-NEXT: j .LBB98_8
; RV64ZVE32F-NEXT: .LBB98_51: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v14, a2
; RV64ZVE32F-NEXT: vsetivli zero, 7, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 6
; RV64ZVE32F-NEXT: andi a2, a1, 128
; RV64ZVE32F-NEXT: beqz a2, .LBB98_12
; RV64ZVE32F-NEXT: .LBB98_52: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v13, a2
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v13, 7
; RV64ZVE32F-NEXT: andi a2, a1, 256
; RV64ZVE32F-NEXT: beqz a2, .LBB98_13
; RV64ZVE32F-NEXT: .LBB98_53: # %cond.load22
; RV64ZVE32F-NEXT: vsetivli zero, 9, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v13, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v13, 8
; RV64ZVE32F-NEXT: andi a2, a1, 512
; RV64ZVE32F-NEXT: bnez a2, .LBB98_14
; RV64ZVE32F-NEXT: j .LBB98_15
; RV64ZVE32F-NEXT: .LBB98_54: # %cond.load40
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 15, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 14
; RV64ZVE32F-NEXT: slli a2, a1, 48
; RV64ZVE32F-NEXT: bgez a2, .LBB98_25
; RV64ZVE32F-NEXT: .LBB98_55: # %cond.load43
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 15
; RV64ZVE32F-NEXT: slli a2, a1, 47
; RV64ZVE32F-NEXT: bgez a2, .LBB98_26
; RV64ZVE32F-NEXT: .LBB98_56: # %cond.load46
; RV64ZVE32F-NEXT: vsetivli zero, 17, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 16
; RV64ZVE32F-NEXT: slli a2, a1, 46
; RV64ZVE32F-NEXT: bltz a2, .LBB98_27
; RV64ZVE32F-NEXT: j .LBB98_28
; RV64ZVE32F-NEXT: .LBB98_57: # %cond.load52
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v14, a2
; RV64ZVE32F-NEXT: vsetivli zero, 19, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 18
; RV64ZVE32F-NEXT: slli a2, a1, 44
; RV64ZVE32F-NEXT: bgez a2, .LBB98_30
; RV64ZVE32F-NEXT: .LBB98_58: # %cond.load55
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 20, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 19
; RV64ZVE32F-NEXT: slli a2, a1, 43
; RV64ZVE32F-NEXT: bltz a2, .LBB98_31
; RV64ZVE32F-NEXT: j .LBB98_32
; RV64ZVE32F-NEXT: .LBB98_59: # %cond.load64
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 23, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 22
; RV64ZVE32F-NEXT: slli a2, a1, 40
; RV64ZVE32F-NEXT: bgez a2, .LBB98_36
; RV64ZVE32F-NEXT: .LBB98_60: # %cond.load67
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 24, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 23
; RV64ZVE32F-NEXT: slli a2, a1, 39
; RV64ZVE32F-NEXT: bgez a2, .LBB98_37
; RV64ZVE32F-NEXT: .LBB98_61: # %cond.load70
; RV64ZVE32F-NEXT: vsetivli zero, 25, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 24
; RV64ZVE32F-NEXT: slli a2, a1, 38
; RV64ZVE32F-NEXT: bltz a2, .LBB98_38
; RV64ZVE32F-NEXT: j .LBB98_39
; RV64ZVE32F-NEXT: .LBB98_62: # %cond.load76
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 27, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 26
; RV64ZVE32F-NEXT: slli a2, a1, 36
; RV64ZVE32F-NEXT: bgez a2, .LBB98_41
; RV64ZVE32F-NEXT: .LBB98_63: # %cond.load79
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 28, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 27
; RV64ZVE32F-NEXT: slli a2, a1, 35
; RV64ZVE32F-NEXT: bgez a2, .LBB98_42
; RV64ZVE32F-NEXT: .LBB98_64: # %cond.load82
; RV64ZVE32F-NEXT: vsetivli zero, 29, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 28
; RV64ZVE32F-NEXT: slli a2, a1, 34
; RV64ZVE32F-NEXT: bltz a2, .LBB98_43
; RV64ZVE32F-NEXT: j .LBB98_44
%ptrs = getelementptr inbounds i8, ptr %base, <32 x i8> %idxs
%v = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> %ptrs, i32 2, <32 x i1> %m, <32 x i8> %passthru)
ret <32 x i8> %v
}
define <4 x i32> @mgather_broadcast_load_unmasked(ptr %base) {
; CHECK-LABEL: mgather_broadcast_load_unmasked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vlse32.v v8, (a0), zero
; CHECK-NEXT: ret
%ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> zeroinitializer
%v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> poison)
ret <4 x i32> %v
}
; Same as previous, but use an explicit splat instead of splat-via-gep
define <4 x i32> @mgather_broadcast_load_unmasked2(ptr %base) {
; CHECK-LABEL: mgather_broadcast_load_unmasked2:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vlse32.v v8, (a0), zero
; CHECK-NEXT: ret
%ptrhead = insertelement <4 x ptr> poison, ptr %base, i32 0
%ptrs = shufflevector <4 x ptr> %ptrhead, <4 x ptr> poison, <4 x i32> zeroinitializer
%v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> poison)
ret <4 x i32> %v
}
define <4 x i32> @mgather_broadcast_load_masked(ptr %base, <4 x i1> %m) {
; CHECK-LABEL: mgather_broadcast_load_masked:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vlse32.v v8, (a0), zero, v0.t
; CHECK-NEXT: ret
%ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> zeroinitializer
%v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %m, <4 x i32> poison)
ret <4 x i32> %v
}
define <4 x i32> @mgather_unit_stride_load(ptr %base) {
; CHECK-LABEL: mgather_unit_stride_load:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: ret
%ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> poison)
ret <4 x i32> %v
}
define <4 x i32> @mgather_unit_stride_load_with_offset(ptr %base) {
; CHECK-LABEL: mgather_unit_stride_load_with_offset:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: ret
%ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> poison)
ret <4 x i32> %v
}
define <4 x i32> @mgather_unit_stride_load_narrow_idx(ptr %base) {
; CHECK-LABEL: mgather_unit_stride_load_narrow_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: ret
%ptrs = getelementptr inbounds i32, ptr %base, <4 x i8> <i8 0, i8 1, i8 2, i8 3>
%v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> poison)
ret <4 x i32> %v
}
define <4 x i32> @mgather_unit_stride_load_wide_idx(ptr %base) {
; CHECK-LABEL: mgather_unit_stride_load_wide_idx:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: ret
%ptrs = getelementptr inbounds i32, ptr %base, <4 x i128> <i128 0, i128 1, i128 2, i128 3>
%v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> poison)
ret <4 x i32> %v
}
; This looks like a strided load (at i8), but isn't at index type.
define <4 x i32> @mgather_narrow_edge_case(ptr %base) {
; RV32-LABEL: mgather_narrow_edge_case:
; RV32: # %bb.0:
; RV32-NEXT: li a1, -512
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vmv.v.i v0, 5
; RV32-NEXT: vmv.v.x v8, a1
; RV32-NEXT: vmerge.vim v8, v8, 0, v0
; RV32-NEXT: vluxei32.v v8, (a0), v8
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_narrow_edge_case:
; RV64V: # %bb.0:
; RV64V-NEXT: li a1, -512
; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV64V-NEXT: vmv.v.x v8, a1
; RV64V-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
; RV64V-NEXT: vmv.v.i v0, 5
; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV64V-NEXT: vmerge.vim v10, v8, 0, v0
; RV64V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64V-NEXT: vluxei64.v v8, (a0), v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_narrow_edge_case:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: addi a1, a0, -512
; RV64ZVE32F-NEXT: lw a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vlse32.v v8, (a1), zero
; RV64ZVE32F-NEXT: vmv.v.i v0, 5
; RV64ZVE32F-NEXT: vmerge.vxm v8, v8, a0, v0
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i32, ptr %base, <4 x i8> <i8 0, i8 128, i8 0, i8 128>
%v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> poison)
ret <4 x i32> %v
}
define <8 x i16> @mgather_strided_unaligned(ptr %base) {
; RV32-LABEL: mgather_strided_unaligned:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vid.v v8
; RV32-NEXT: vsll.vi v8, v8, 2
; RV32-NEXT: vadd.vx v8, v8, a0
; RV32-NEXT: vmv.x.s a0, v8
; RV32-NEXT: lbu a1, 0(a0)
; RV32-NEXT: lbu a0, 1(a0)
; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32-NEXT: vslidedown.vi v10, v8, 1
; RV32-NEXT: vmv.x.s a2, v10
; RV32-NEXT: lbu a3, 1(a2)
; RV32-NEXT: lbu a2, 0(a2)
; RV32-NEXT: slli a0, a0, 8
; RV32-NEXT: or a0, a0, a1
; RV32-NEXT: slli a3, a3, 8
; RV32-NEXT: or a2, a3, a2
; RV32-NEXT: vslidedown.vi v10, v8, 2
; RV32-NEXT: vmv.x.s a1, v10
; RV32-NEXT: lbu a3, 0(a1)
; RV32-NEXT: lbu a1, 1(a1)
; RV32-NEXT: vslidedown.vi v10, v8, 3
; RV32-NEXT: vmv.x.s a4, v10
; RV32-NEXT: lbu a5, 1(a4)
; RV32-NEXT: lbu a4, 0(a4)
; RV32-NEXT: slli a1, a1, 8
; RV32-NEXT: or a1, a1, a3
; RV32-NEXT: slli a5, a5, 8
; RV32-NEXT: or a4, a5, a4
; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32-NEXT: vslidedown.vi v10, v8, 4
; RV32-NEXT: vmv.x.s a3, v10
; RV32-NEXT: lbu a5, 0(a3)
; RV32-NEXT: lbu a3, 1(a3)
; RV32-NEXT: vslidedown.vi v10, v8, 5
; RV32-NEXT: vmv.x.s a6, v10
; RV32-NEXT: lbu a7, 1(a6)
; RV32-NEXT: lbu a6, 0(a6)
; RV32-NEXT: slli a3, a3, 8
; RV32-NEXT: or a3, a3, a5
; RV32-NEXT: slli a7, a7, 8
; RV32-NEXT: or a5, a7, a6
; RV32-NEXT: vslidedown.vi v10, v8, 6
; RV32-NEXT: vmv.x.s a6, v10
; RV32-NEXT: lbu a7, 0(a6)
; RV32-NEXT: lbu a6, 1(a6)
; RV32-NEXT: vslidedown.vi v8, v8, 7
; RV32-NEXT: vmv.x.s t0, v8
; RV32-NEXT: lbu t1, 1(t0)
; RV32-NEXT: lbu t0, 0(t0)
; RV32-NEXT: slli a6, a6, 8
; RV32-NEXT: or a6, a6, a7
; RV32-NEXT: slli t1, t1, 8
; RV32-NEXT: or a7, t1, t0
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu
; RV32-NEXT: vmv.v.x v8, a0
; RV32-NEXT: vslide1down.vx v8, v8, a2
; RV32-NEXT: vslide1down.vx v8, v8, a1
; RV32-NEXT: vslide1down.vx v9, v8, a4
; RV32-NEXT: vmv.v.x v8, a3
; RV32-NEXT: vslide1down.vx v8, v8, a5
; RV32-NEXT: vslide1down.vx v8, v8, a6
; RV32-NEXT: vslide1down.vx v8, v8, a7
; RV32-NEXT: vmv.v.i v0, 15
; RV32-NEXT: vslidedown.vi v8, v9, 4, v0.t
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_strided_unaligned:
; RV64V: # %bb.0:
; RV64V-NEXT: addi sp, sp, -128
; RV64V-NEXT: .cfi_def_cfa_offset 128
; RV64V-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
; RV64V-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
; RV64V-NEXT: .cfi_offset ra, -8
; RV64V-NEXT: .cfi_offset s0, -16
; RV64V-NEXT: addi s0, sp, 128
; RV64V-NEXT: .cfi_def_cfa s0, 0
; RV64V-NEXT: andi sp, sp, -64
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64V-NEXT: vid.v v8
; RV64V-NEXT: vsll.vi v8, v8, 2
; RV64V-NEXT: vadd.vx v8, v8, a0
; RV64V-NEXT: vmv.x.s a0, v8
; RV64V-NEXT: lbu a1, 0(a0)
; RV64V-NEXT: lbu a0, 1(a0)
; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64V-NEXT: vslidedown.vi v12, v8, 1
; RV64V-NEXT: vmv.x.s a2, v12
; RV64V-NEXT: lbu a3, 1(a2)
; RV64V-NEXT: lbu a2, 0(a2)
; RV64V-NEXT: slli a0, a0, 8
; RV64V-NEXT: or a0, a0, a1
; RV64V-NEXT: slli a1, a3, 8
; RV64V-NEXT: or a1, a1, a2
; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma
; RV64V-NEXT: vslidedown.vi v12, v8, 2
; RV64V-NEXT: vmv.x.s a2, v12
; RV64V-NEXT: lbu a3, 0(a2)
; RV64V-NEXT: lbu a2, 1(a2)
; RV64V-NEXT: vslidedown.vi v12, v8, 3
; RV64V-NEXT: vmv.x.s a4, v12
; RV64V-NEXT: lbu a5, 0(a4)
; RV64V-NEXT: lbu a4, 1(a4)
; RV64V-NEXT: mv a6, sp
; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, mu
; RV64V-NEXT: vse64.v v8, (a6)
; RV64V-NEXT: ld a6, 32(sp)
; RV64V-NEXT: slli a2, a2, 8
; RV64V-NEXT: or a2, a2, a3
; RV64V-NEXT: slli a4, a4, 8
; RV64V-NEXT: lbu a3, 1(a6)
; RV64V-NEXT: ld a7, 40(sp)
; RV64V-NEXT: lbu a6, 0(a6)
; RV64V-NEXT: or a4, a4, a5
; RV64V-NEXT: slli a3, a3, 8
; RV64V-NEXT: lbu a5, 1(a7)
; RV64V-NEXT: or a3, a3, a6
; RV64V-NEXT: lbu a6, 0(a7)
; RV64V-NEXT: ld a7, 48(sp)
; RV64V-NEXT: slli a5, a5, 8
; RV64V-NEXT: ld t0, 56(sp)
; RV64V-NEXT: or a5, a5, a6
; RV64V-NEXT: lbu a6, 1(a7)
; RV64V-NEXT: lbu a7, 0(a7)
; RV64V-NEXT: lbu t1, 1(t0)
; RV64V-NEXT: lbu t0, 0(t0)
; RV64V-NEXT: slli a6, a6, 8
; RV64V-NEXT: or a6, a6, a7
; RV64V-NEXT: slli t1, t1, 8
; RV64V-NEXT: or a7, t1, t0
; RV64V-NEXT: vmv.v.x v8, a0
; RV64V-NEXT: vslide1down.vx v8, v8, a1
; RV64V-NEXT: vslide1down.vx v8, v8, a2
; RV64V-NEXT: vslide1down.vx v9, v8, a4
; RV64V-NEXT: vmv.v.x v8, a3
; RV64V-NEXT: vslide1down.vx v8, v8, a5
; RV64V-NEXT: vslide1down.vx v8, v8, a6
; RV64V-NEXT: vslide1down.vx v8, v8, a7
; RV64V-NEXT: vmv.v.i v0, 15
; RV64V-NEXT: vslidedown.vi v8, v9, 4, v0.t
; RV64V-NEXT: addi sp, s0, -128
; RV64V-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
; RV64V-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
; RV64V-NEXT: addi sp, sp, 128
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_strided_unaligned:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: lbu a1, 1(a0)
; RV64ZVE32F-NEXT: lbu a2, 0(a0)
; RV64ZVE32F-NEXT: lbu a3, 5(a0)
; RV64ZVE32F-NEXT: lbu a4, 4(a0)
; RV64ZVE32F-NEXT: slli a1, a1, 8
; RV64ZVE32F-NEXT: or a1, a1, a2
; RV64ZVE32F-NEXT: slli a3, a3, 8
; RV64ZVE32F-NEXT: or a3, a3, a4
; RV64ZVE32F-NEXT: lbu a2, 9(a0)
; RV64ZVE32F-NEXT: lbu a4, 8(a0)
; RV64ZVE32F-NEXT: lbu a5, 13(a0)
; RV64ZVE32F-NEXT: lbu a6, 12(a0)
; RV64ZVE32F-NEXT: slli a2, a2, 8
; RV64ZVE32F-NEXT: or a2, a2, a4
; RV64ZVE32F-NEXT: slli a5, a5, 8
; RV64ZVE32F-NEXT: or a4, a5, a6
; RV64ZVE32F-NEXT: lbu a5, 17(a0)
; RV64ZVE32F-NEXT: lbu a6, 16(a0)
; RV64ZVE32F-NEXT: lbu a7, 21(a0)
; RV64ZVE32F-NEXT: lbu t0, 20(a0)
; RV64ZVE32F-NEXT: slli a5, a5, 8
; RV64ZVE32F-NEXT: or a5, a5, a6
; RV64ZVE32F-NEXT: slli a7, a7, 8
; RV64ZVE32F-NEXT: or a6, a7, t0
; RV64ZVE32F-NEXT: lbu a7, 25(a0)
; RV64ZVE32F-NEXT: lbu t0, 24(a0)
; RV64ZVE32F-NEXT: lbu t1, 29(a0)
; RV64ZVE32F-NEXT: lbu a0, 28(a0)
; RV64ZVE32F-NEXT: slli a7, a7, 8
; RV64ZVE32F-NEXT: or a7, a7, t0
; RV64ZVE32F-NEXT: slli t1, t1, 8
; RV64ZVE32F-NEXT: or a0, t1, a0
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu
; RV64ZVE32F-NEXT: vmv.v.x v8, a1
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
; RV64ZVE32F-NEXT: vslide1down.vx v9, v8, a4
; RV64ZVE32F-NEXT: vmv.v.x v8, a5
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0
; RV64ZVE32F-NEXT: vmv.v.i v0, 15
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
%v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 1, <8 x i1> splat (i1 true), <8 x i16> poison)
ret <8 x i16> %v
}
; TODO: Recognize as strided load with SEW=32
define <8 x i16> @mgather_strided_2xSEW(ptr %base) {
; RV32-LABEL: mgather_strided_2xSEW:
; RV32: # %bb.0:
; RV32-NEXT: li a1, 8
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vlse32.v v8, (a0), a1
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_strided_2xSEW:
; RV64V: # %bb.0:
; RV64V-NEXT: li a1, 8
; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64V-NEXT: vlse32.v v8, (a0), a1
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_strided_2xSEW:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: lh a1, 2(a0)
; RV64ZVE32F-NEXT: lh a2, 8(a0)
; RV64ZVE32F-NEXT: lh a3, 10(a0)
; RV64ZVE32F-NEXT: lh a4, 18(a0)
; RV64ZVE32F-NEXT: lh a5, 24(a0)
; RV64ZVE32F-NEXT: lh a6, 26(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu
; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero
; RV64ZVE32F-NEXT: addi a0, a0, 16
; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3
; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
; RV64ZVE32F-NEXT: vmv.v.i v0, 15
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9, i32 12, i32 13>
%v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison)
ret <8 x i16> %v
}
; TODO: Recognize as strided load with SEW=32
define <8 x i16> @mgather_strided_2xSEW_with_offset(ptr %base) {
; RV32-LABEL: mgather_strided_2xSEW_with_offset:
; RV32: # %bb.0:
; RV32-NEXT: addi a0, a0, 4
; RV32-NEXT: li a1, 8
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vlse32.v v8, (a0), a1
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_strided_2xSEW_with_offset:
; RV64V: # %bb.0:
; RV64V-NEXT: addi a0, a0, 4
; RV64V-NEXT: li a1, 8
; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64V-NEXT: vlse32.v v8, (a0), a1
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_strided_2xSEW_with_offset:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: addi a1, a0, 4
; RV64ZVE32F-NEXT: lh a2, 6(a0)
; RV64ZVE32F-NEXT: lh a3, 12(a0)
; RV64ZVE32F-NEXT: lh a4, 14(a0)
; RV64ZVE32F-NEXT: lh a5, 22(a0)
; RV64ZVE32F-NEXT: lh a6, 28(a0)
; RV64ZVE32F-NEXT: lh a7, 30(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu
; RV64ZVE32F-NEXT: vlse16.v v8, (a1), zero
; RV64ZVE32F-NEXT: addi a0, a0, 20
; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3
; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4
; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7
; RV64ZVE32F-NEXT: vmv.v.i v0, 15
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 2, i64 3, i64 6, i64 7, i64 10, i64 11, i64 14, i64 15>
%v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison)
ret <8 x i16> %v
}
; TODO: Recognize as strided load with SEW=32
define <8 x i16> @mgather_reverse_unit_strided_2xSEW(ptr %base) {
; RV32-LABEL: mgather_reverse_unit_strided_2xSEW:
; RV32: # %bb.0:
; RV32-NEXT: addi a0, a0, 28
; RV32-NEXT: li a1, -4
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vlse32.v v8, (a0), a1
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_reverse_unit_strided_2xSEW:
; RV64V: # %bb.0:
; RV64V-NEXT: addi a0, a0, 28
; RV64V-NEXT: li a1, -4
; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64V-NEXT: vlse32.v v8, (a0), a1
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_reverse_unit_strided_2xSEW:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: addi a1, a0, 28
; RV64ZVE32F-NEXT: lh a2, 30(a0)
; RV64ZVE32F-NEXT: lh a3, 24(a0)
; RV64ZVE32F-NEXT: lh a4, 26(a0)
; RV64ZVE32F-NEXT: lh a5, 22(a0)
; RV64ZVE32F-NEXT: lh a6, 16(a0)
; RV64ZVE32F-NEXT: lh a7, 18(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu
; RV64ZVE32F-NEXT: vlse16.v v8, (a1), zero
; RV64ZVE32F-NEXT: addi a0, a0, 20
; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3
; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4
; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7
; RV64ZVE32F-NEXT: vmv.v.i v0, 15
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 14, i64 15, i64 12, i64 13, i64 10, i64 11, i64 8, i64 9>
%v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison)
ret <8 x i16> %v
}
; TODO: Recognize as strided load with SEW=32
define <8 x i16> @mgather_reverse_strided_2xSEW(ptr %base) {
; RV32-LABEL: mgather_reverse_strided_2xSEW:
; RV32: # %bb.0:
; RV32-NEXT: addi a0, a0, 28
; RV32-NEXT: li a1, -8
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vlse32.v v8, (a0), a1
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_reverse_strided_2xSEW:
; RV64V: # %bb.0:
; RV64V-NEXT: addi a0, a0, 28
; RV64V-NEXT: li a1, -8
; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64V-NEXT: vlse32.v v8, (a0), a1
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_reverse_strided_2xSEW:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: addi a1, a0, 28
; RV64ZVE32F-NEXT: lh a2, 30(a0)
; RV64ZVE32F-NEXT: lh a3, 20(a0)
; RV64ZVE32F-NEXT: lh a4, 22(a0)
; RV64ZVE32F-NEXT: lh a5, 14(a0)
; RV64ZVE32F-NEXT: lh a6, 4(a0)
; RV64ZVE32F-NEXT: lh a7, 6(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu
; RV64ZVE32F-NEXT: vlse16.v v8, (a1), zero
; RV64ZVE32F-NEXT: addi a0, a0, 12
; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3
; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4
; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7
; RV64ZVE32F-NEXT: vmv.v.i v0, 15
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 14, i64 15, i64 10, i64 11, i64 6, i64 7, i64 2, i64 3>
%v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison)
ret <8 x i16> %v
}
define <8 x i16> @mgather_gather_2xSEW(ptr %base) {
; RV32-LABEL: mgather_gather_2xSEW:
; RV32: # %bb.0:
; RV32-NEXT: lui a1, 16513
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vmv.s.x v9, a1
; RV32-NEXT: vluxei8.v v8, (a0), v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_gather_2xSEW:
; RV64V: # %bb.0:
; RV64V-NEXT: lui a1, 16513
; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64V-NEXT: vmv.s.x v9, a1
; RV64V-NEXT: vluxei8.v v8, (a0), v9
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_gather_2xSEW:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: lh a1, 2(a0)
; RV64ZVE32F-NEXT: lh a2, 16(a0)
; RV64ZVE32F-NEXT: lh a3, 18(a0)
; RV64ZVE32F-NEXT: lh a4, 10(a0)
; RV64ZVE32F-NEXT: lh a5, 4(a0)
; RV64ZVE32F-NEXT: lh a6, 6(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu
; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero
; RV64ZVE32F-NEXT: addi a0, a0, 8
; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3
; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
; RV64ZVE32F-NEXT: vmv.v.i v0, 15
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 2, i32 3>
%v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison)
ret <8 x i16> %v
}
; Base pointer isn't sufficiently aligned to form gather with e32
define <8 x i16> @mgather_gather_2xSEW_unaligned(ptr %base) {
; RV32-LABEL: mgather_gather_2xSEW_unaligned:
; RV32: # %bb.0:
; RV32-NEXT: lui a1, %hi(.LCPI113_0)
; RV32-NEXT: addi a1, a1, %lo(.LCPI113_0)
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV32-NEXT: vle8.v v9, (a1)
; RV32-NEXT: vluxei8.v v8, (a0), v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_gather_2xSEW_unaligned:
; RV64V: # %bb.0:
; RV64V-NEXT: lui a1, %hi(.LCPI113_0)
; RV64V-NEXT: addi a1, a1, %lo(.LCPI113_0)
; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64V-NEXT: vle8.v v9, (a1)
; RV64V-NEXT: vluxei8.v v8, (a0), v9
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_gather_2xSEW_unaligned:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: lh a1, 2(a0)
; RV64ZVE32F-NEXT: lh a2, 18(a0)
; RV64ZVE32F-NEXT: lh a3, 20(a0)
; RV64ZVE32F-NEXT: lh a4, 10(a0)
; RV64ZVE32F-NEXT: lh a5, 4(a0)
; RV64ZVE32F-NEXT: lh a6, 6(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu
; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero
; RV64ZVE32F-NEXT: addi a0, a0, 8
; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3
; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
; RV64ZVE32F-NEXT: vmv.v.i v0, 15
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 1, i32 9, i32 10, i32 4, i32 5, i32 2, i32 3>
%v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> splat (i1 true), <8 x i16> poison)
ret <8 x i16> %v
}
; Despite sufficient starting alignment, the index values aren't properly
; aligned for e32.
define <8 x i16> @mgather_gather_2xSEW_unaligned2(ptr %base) {
; RV32-LABEL: mgather_gather_2xSEW_unaligned2:
; RV32: # %bb.0:
; RV32-NEXT: lui a1, %hi(.LCPI114_0)
; RV32-NEXT: addi a1, a1, %lo(.LCPI114_0)
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV32-NEXT: vle8.v v9, (a1)
; RV32-NEXT: vluxei8.v v8, (a0), v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_gather_2xSEW_unaligned2:
; RV64V: # %bb.0:
; RV64V-NEXT: lui a1, %hi(.LCPI114_0)
; RV64V-NEXT: addi a1, a1, %lo(.LCPI114_0)
; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64V-NEXT: vle8.v v9, (a1)
; RV64V-NEXT: vluxei8.v v8, (a0), v9
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_gather_2xSEW_unaligned2:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: addi a1, a0, 2
; RV64ZVE32F-NEXT: lh a2, 4(a0)
; RV64ZVE32F-NEXT: lh a3, 18(a0)
; RV64ZVE32F-NEXT: lh a4, 20(a0)
; RV64ZVE32F-NEXT: lh a5, 10(a0)
; RV64ZVE32F-NEXT: lh a6, 6(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu
; RV64ZVE32F-NEXT: vlse16.v v8, (a1), zero
; RV64ZVE32F-NEXT: addi a0, a0, 8
; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3
; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4
; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
; RV64ZVE32F-NEXT: vmv.v.i v0, 15
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 1, i32 2, i32 9, i32 10, i32 4, i32 5, i32 2, i32 3>
%v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison)
ret <8 x i16> %v
}
define <8 x i16> @mgather_gather_4xSEW(ptr %base) {
; RV32V-LABEL: mgather_gather_4xSEW:
; RV32V: # %bb.0:
; RV32V-NEXT: li a1, 16
; RV32V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32V-NEXT: vlse64.v v8, (a0), a1
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_gather_4xSEW:
; RV64V: # %bb.0:
; RV64V-NEXT: li a1, 16
; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64V-NEXT: vlse64.v v8, (a0), a1
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_gather_4xSEW:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: lui a1, 82176
; RV32ZVE32F-NEXT: addi a1, a1, 1024
; RV32ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.s.x v9, a1
; RV32ZVE32F-NEXT: vluxei8.v v8, (a0), v9
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_gather_4xSEW:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: lh a1, 2(a0)
; RV64ZVE32F-NEXT: lh a2, 4(a0)
; RV64ZVE32F-NEXT: lh a3, 6(a0)
; RV64ZVE32F-NEXT: lh a4, 18(a0)
; RV64ZVE32F-NEXT: lh a5, 20(a0)
; RV64ZVE32F-NEXT: lh a6, 22(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu
; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero
; RV64ZVE32F-NEXT: addi a0, a0, 16
; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3
; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
; RV64ZVE32F-NEXT: vmv.v.i v0, 15
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
%v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> splat (i1 true), <8 x i16> poison)
ret <8 x i16> %v
}
; This is a case where we'd be able to do 4xSEW if we had proper alignment
; but we only have sufficient alignment for 2xSEW.
define <8 x i16> @mgather_gather_4xSEW_partial_align(ptr %base) {
; RV32-LABEL: mgather_gather_4xSEW_partial_align:
; RV32: # %bb.0:
; RV32-NEXT: lui a1, 82176
; RV32-NEXT: addi a1, a1, 1024
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vmv.s.x v9, a1
; RV32-NEXT: vluxei8.v v8, (a0), v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_gather_4xSEW_partial_align:
; RV64V: # %bb.0:
; RV64V-NEXT: lui a1, 82176
; RV64V-NEXT: addi a1, a1, 1024
; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64V-NEXT: vmv.s.x v9, a1
; RV64V-NEXT: vluxei8.v v8, (a0), v9
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_gather_4xSEW_partial_align:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: lh a1, 2(a0)
; RV64ZVE32F-NEXT: lh a2, 4(a0)
; RV64ZVE32F-NEXT: lh a3, 6(a0)
; RV64ZVE32F-NEXT: lh a4, 18(a0)
; RV64ZVE32F-NEXT: lh a5, 20(a0)
; RV64ZVE32F-NEXT: lh a6, 22(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu
; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero
; RV64ZVE32F-NEXT: addi a0, a0, 16
; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3
; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
; RV64ZVE32F-NEXT: vmv.v.i v0, 15
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
%v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison)
ret <8 x i16> %v
}
define <8 x i16> @mgather_shuffle_reverse(ptr %base) {
; CHECK-LABEL: mgather_shuffle_reverse:
; CHECK: # %bb.0:
; CHECK-NEXT: addi a0, a0, 14
; CHECK-NEXT: li a1, -2
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vlse16.v v8, (a0), a1
; CHECK-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>
%v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison)
ret <8 x i16> %v
}
define <8 x i16> @mgather_shuffle_rotate(ptr %base) {
; RV32-LABEL: mgather_shuffle_rotate:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV32-NEXT: vle16.v v9, (a0)
; RV32-NEXT: vslidedown.vi v8, v9, 4
; RV32-NEXT: vslideup.vi v8, v9, 4
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_shuffle_rotate:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64V-NEXT: vle16.v v9, (a0)
; RV64V-NEXT: vslidedown.vi v8, v9, 4
; RV64V-NEXT: vslideup.vi v8, v9, 4
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_shuffle_rotate:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: lh a1, 10(a0)
; RV64ZVE32F-NEXT: lh a2, 12(a0)
; RV64ZVE32F-NEXT: lh a3, 14(a0)
; RV64ZVE32F-NEXT: lh a4, 2(a0)
; RV64ZVE32F-NEXT: lh a5, 4(a0)
; RV64ZVE32F-NEXT: lh a6, 6(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu
; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero
; RV64ZVE32F-NEXT: addi a0, a0, 8
; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a4
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a1
; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a2
; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a3
; RV64ZVE32F-NEXT: vmv.v.i v0, 15
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 4, i64 5, i64 6, i64 7, i64 0, i64 1, i64 2, i64 3>
%v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison)
ret <8 x i16> %v
}
define <8 x i16> @mgather_shuffle_vrgather(ptr %base) {
; RV32-LABEL: mgather_shuffle_vrgather:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV32-NEXT: vle16.v v9, (a0)
; RV32-NEXT: lui a0, %hi(.LCPI119_0)
; RV32-NEXT: addi a0, a0, %lo(.LCPI119_0)
; RV32-NEXT: vle16.v v10, (a0)
; RV32-NEXT: vrgather.vv v8, v9, v10
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_shuffle_vrgather:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64V-NEXT: vle16.v v9, (a0)
; RV64V-NEXT: lui a0, %hi(.LCPI119_0)
; RV64V-NEXT: addi a0, a0, %lo(.LCPI119_0)
; RV64V-NEXT: vle16.v v10, (a0)
; RV64V-NEXT: vrgather.vv v8, v9, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_shuffle_vrgather:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: lh a1, 4(a0)
; RV64ZVE32F-NEXT: lh a2, 6(a0)
; RV64ZVE32F-NEXT: lh a3, 2(a0)
; RV64ZVE32F-NEXT: lh a4, 10(a0)
; RV64ZVE32F-NEXT: lh a5, 12(a0)
; RV64ZVE32F-NEXT: lh a6, 14(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu
; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero
; RV64ZVE32F-NEXT: addi a0, a0, 8
; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3
; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6
; RV64ZVE32F-NEXT: vmv.v.i v0, 15
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 0, i64 2, i64 3, i64 1, i64 4, i64 5, i64 6, i64 7>
%v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison)
ret <8 x i16> %v
}
; v32i64 is not a legal type, so make sure we don't try to combine the mgather
; to a vlse intrinsic until it is legalized and split.
define <32 x i64> @mgather_strided_split(ptr %base) {
; RV32V-LABEL: mgather_strided_split:
; RV32V: # %bb.0:
; RV32V-NEXT: li a1, 16
; RV32V-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32V-NEXT: vlse64.v v8, (a0), a1
; RV32V-NEXT: addi a0, a0, 256
; RV32V-NEXT: vlse64.v v16, (a0), a1
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_strided_split:
; RV64V: # %bb.0:
; RV64V-NEXT: li a1, 16
; RV64V-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64V-NEXT: vlse64.v v8, (a0), a1
; RV64V-NEXT: addi a0, a0, 256
; RV64V-NEXT: vlse64.v v16, (a0), a1
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_strided_split:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: addi sp, sp, -512
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 512
; RV32ZVE32F-NEXT: sw ra, 508(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: sw s0, 504(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: sw s2, 500(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: sw s3, 496(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: sw s4, 492(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: sw s5, 488(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: sw s6, 484(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: sw s7, 480(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: sw s8, 476(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: sw s9, 472(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: sw s10, 468(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: sw s11, 464(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: .cfi_offset ra, -4
; RV32ZVE32F-NEXT: .cfi_offset s0, -8
; RV32ZVE32F-NEXT: .cfi_offset s2, -12
; RV32ZVE32F-NEXT: .cfi_offset s3, -16
; RV32ZVE32F-NEXT: .cfi_offset s4, -20
; RV32ZVE32F-NEXT: .cfi_offset s5, -24
; RV32ZVE32F-NEXT: .cfi_offset s6, -28
; RV32ZVE32F-NEXT: .cfi_offset s7, -32
; RV32ZVE32F-NEXT: .cfi_offset s8, -36
; RV32ZVE32F-NEXT: .cfi_offset s9, -40
; RV32ZVE32F-NEXT: .cfi_offset s10, -44
; RV32ZVE32F-NEXT: .cfi_offset s11, -48
; RV32ZVE32F-NEXT: addi s0, sp, 512
; RV32ZVE32F-NEXT: .cfi_def_cfa s0, 0
; RV32ZVE32F-NEXT: andi sp, sp, -128
; RV32ZVE32F-NEXT: li a2, 32
; RV32ZVE32F-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32ZVE32F-NEXT: vid.v v8
; RV32ZVE32F-NEXT: vsll.vi v8, v8, 4
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: lw a3, 0(a1)
; RV32ZVE32F-NEXT: sw a3, 216(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: lw a1, 4(a1)
; RV32ZVE32F-NEXT: sw a1, 208(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v16, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v16
; RV32ZVE32F-NEXT: lw a3, 0(a1)
; RV32ZVE32F-NEXT: sw a3, 252(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: lw a1, 4(a1)
; RV32ZVE32F-NEXT: sw a1, 248(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: vslidedown.vi v16, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a1, v16
; RV32ZVE32F-NEXT: lw a3, 0(a1)
; RV32ZVE32F-NEXT: sw a3, 244(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: lw a1, 4(a1)
; RV32ZVE32F-NEXT: sw a1, 236(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: vslidedown.vi v16, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a1, v16
; RV32ZVE32F-NEXT: lw a3, 0(a1)
; RV32ZVE32F-NEXT: sw a3, 228(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: lw a1, 4(a1)
; RV32ZVE32F-NEXT: sw a1, 220(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v16, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a1, v16
; RV32ZVE32F-NEXT: lw a3, 0(a1)
; RV32ZVE32F-NEXT: sw a3, 240(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: lw a1, 4(a1)
; RV32ZVE32F-NEXT: sw a1, 232(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: vslidedown.vi v16, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a1, v16
; RV32ZVE32F-NEXT: lw a3, 0(a1)
; RV32ZVE32F-NEXT: sw a3, 224(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: lw a1, 4(a1)
; RV32ZVE32F-NEXT: sw a1, 212(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: vslidedown.vi v16, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a1, v16
; RV32ZVE32F-NEXT: lw a3, 0(a1)
; RV32ZVE32F-NEXT: sw a3, 204(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: lw a1, 4(a1)
; RV32ZVE32F-NEXT: sw a1, 200(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: vslidedown.vi v16, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v16
; RV32ZVE32F-NEXT: lw a3, 0(a1)
; RV32ZVE32F-NEXT: sw a3, 196(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: lw a1, 4(a1)
; RV32ZVE32F-NEXT: sw a1, 192(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: addi a1, sp, 256
; RV32ZVE32F-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; RV32ZVE32F-NEXT: vse32.v v8, (a1)
; RV32ZVE32F-NEXT: lw a1, 288(sp)
; RV32ZVE32F-NEXT: lw a2, 292(sp)
; RV32ZVE32F-NEXT: lw a3, 0(a1)
; RV32ZVE32F-NEXT: sw a3, 188(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: lw a1, 4(a1)
; RV32ZVE32F-NEXT: sw a1, 184(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: lw a1, 296(sp)
; RV32ZVE32F-NEXT: lw a3, 0(a2)
; RV32ZVE32F-NEXT: sw a3, 180(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: lw a2, 4(a2)
; RV32ZVE32F-NEXT: sw a2, 176(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: lw a2, 300(sp)
; RV32ZVE32F-NEXT: lw a3, 0(a1)
; RV32ZVE32F-NEXT: sw a3, 172(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: lw a1, 4(a1)
; RV32ZVE32F-NEXT: sw a1, 168(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: lw a1, 304(sp)
; RV32ZVE32F-NEXT: lw a3, 0(a2)
; RV32ZVE32F-NEXT: sw a3, 164(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: lw a2, 4(a2)
; RV32ZVE32F-NEXT: sw a2, 160(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: lw a2, 308(sp)
; RV32ZVE32F-NEXT: lw a3, 0(a1)
; RV32ZVE32F-NEXT: sw a3, 156(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: lw a1, 4(a1)
; RV32ZVE32F-NEXT: sw a1, 152(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: lw a1, 312(sp)
; RV32ZVE32F-NEXT: lw a3, 0(a2)
; RV32ZVE32F-NEXT: sw a3, 148(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: lw a2, 4(a2)
; RV32ZVE32F-NEXT: sw a2, 144(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: lw a2, 316(sp)
; RV32ZVE32F-NEXT: lw a3, 0(a1)
; RV32ZVE32F-NEXT: sw a3, 140(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: lw a1, 4(a1)
; RV32ZVE32F-NEXT: sw a1, 136(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: lw a1, 320(sp)
; RV32ZVE32F-NEXT: lw a3, 0(a2)
; RV32ZVE32F-NEXT: sw a3, 132(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: lw a2, 4(a2)
; RV32ZVE32F-NEXT: sw a2, 128(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: lw a2, 324(sp)
; RV32ZVE32F-NEXT: lw a3, 0(a1)
; RV32ZVE32F-NEXT: sw a3, 124(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: lw a1, 4(a1)
; RV32ZVE32F-NEXT: sw a1, 120(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: lw a1, 328(sp)
; RV32ZVE32F-NEXT: lw a3, 0(a2)
; RV32ZVE32F-NEXT: sw a3, 116(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: lw a2, 4(a2)
; RV32ZVE32F-NEXT: sw a2, 112(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: lw a2, 332(sp)
; RV32ZVE32F-NEXT: lw a3, 0(a1)
; RV32ZVE32F-NEXT: sw a3, 104(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: lw ra, 4(a1)
; RV32ZVE32F-NEXT: lw a1, 336(sp)
; RV32ZVE32F-NEXT: lw s10, 0(a2)
; RV32ZVE32F-NEXT: lw s8, 4(a2)
; RV32ZVE32F-NEXT: lw a2, 340(sp)
; RV32ZVE32F-NEXT: lw s6, 0(a1)
; RV32ZVE32F-NEXT: lw s4, 4(a1)
; RV32ZVE32F-NEXT: lw a4, 344(sp)
; RV32ZVE32F-NEXT: lw s2, 0(a2)
; RV32ZVE32F-NEXT: lw t5, 4(a2)
; RV32ZVE32F-NEXT: lw a2, 348(sp)
; RV32ZVE32F-NEXT: lw t3, 0(a4)
; RV32ZVE32F-NEXT: lw t2, 4(a4)
; RV32ZVE32F-NEXT: lw a4, 352(sp)
; RV32ZVE32F-NEXT: lw t0, 0(a2)
; RV32ZVE32F-NEXT: lw a7, 4(a2)
; RV32ZVE32F-NEXT: lw a2, 356(sp)
; RV32ZVE32F-NEXT: lw a6, 0(a4)
; RV32ZVE32F-NEXT: lw a5, 4(a4)
; RV32ZVE32F-NEXT: lw a4, 360(sp)
; RV32ZVE32F-NEXT: lw a1, 0(a2)
; RV32ZVE32F-NEXT: sw a1, 108(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: lw a1, 4(a2)
; RV32ZVE32F-NEXT: sw a1, 100(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: lw a2, 364(sp)
; RV32ZVE32F-NEXT: lw s11, 0(a4)
; RV32ZVE32F-NEXT: lw s9, 4(a4)
; RV32ZVE32F-NEXT: lw a1, 368(sp)
; RV32ZVE32F-NEXT: lw s7, 0(a2)
; RV32ZVE32F-NEXT: lw s5, 4(a2)
; RV32ZVE32F-NEXT: lw a3, 372(sp)
; RV32ZVE32F-NEXT: lw s3, 0(a1)
; RV32ZVE32F-NEXT: lw t6, 4(a1)
; RV32ZVE32F-NEXT: lw a2, 376(sp)
; RV32ZVE32F-NEXT: lw t4, 0(a3)
; RV32ZVE32F-NEXT: lw a1, 380(sp)
; RV32ZVE32F-NEXT: lw t1, 4(a3)
; RV32ZVE32F-NEXT: lw a4, 0(a2)
; RV32ZVE32F-NEXT: lw a3, 4(a2)
; RV32ZVE32F-NEXT: lw a2, 0(a1)
; RV32ZVE32F-NEXT: lw a1, 4(a1)
; RV32ZVE32F-NEXT: sw a5, 196(a0)
; RV32ZVE32F-NEXT: sw a6, 192(a0)
; RV32ZVE32F-NEXT: sw a7, 188(a0)
; RV32ZVE32F-NEXT: sw t0, 184(a0)
; RV32ZVE32F-NEXT: sw t2, 180(a0)
; RV32ZVE32F-NEXT: sw t3, 176(a0)
; RV32ZVE32F-NEXT: sw t5, 172(a0)
; RV32ZVE32F-NEXT: sw s2, 168(a0)
; RV32ZVE32F-NEXT: sw s4, 164(a0)
; RV32ZVE32F-NEXT: sw s6, 160(a0)
; RV32ZVE32F-NEXT: sw s8, 156(a0)
; RV32ZVE32F-NEXT: sw s10, 152(a0)
; RV32ZVE32F-NEXT: sw ra, 148(a0)
; RV32ZVE32F-NEXT: lw a5, 104(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: sw a5, 144(a0)
; RV32ZVE32F-NEXT: lw a5, 112(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: sw a5, 140(a0)
; RV32ZVE32F-NEXT: lw a5, 116(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: sw a5, 136(a0)
; RV32ZVE32F-NEXT: lw a5, 120(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: sw a5, 132(a0)
; RV32ZVE32F-NEXT: lw a5, 124(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: sw a5, 128(a0)
; RV32ZVE32F-NEXT: lw a5, 128(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: sw a5, 124(a0)
; RV32ZVE32F-NEXT: lw a5, 132(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: sw a5, 120(a0)
; RV32ZVE32F-NEXT: lw a5, 136(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: sw a5, 116(a0)
; RV32ZVE32F-NEXT: lw a5, 140(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: sw a5, 112(a0)
; RV32ZVE32F-NEXT: lw a5, 144(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: sw a5, 108(a0)
; RV32ZVE32F-NEXT: lw a5, 148(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: sw a5, 104(a0)
; RV32ZVE32F-NEXT: lw a5, 152(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: sw a5, 100(a0)
; RV32ZVE32F-NEXT: lw a5, 156(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: sw a5, 96(a0)
; RV32ZVE32F-NEXT: lw a5, 160(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: sw a5, 92(a0)
; RV32ZVE32F-NEXT: lw a5, 164(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: sw a5, 88(a0)
; RV32ZVE32F-NEXT: lw a5, 168(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: sw a5, 84(a0)
; RV32ZVE32F-NEXT: lw a5, 172(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: sw a5, 80(a0)
; RV32ZVE32F-NEXT: lw a5, 176(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: sw a5, 76(a0)
; RV32ZVE32F-NEXT: lw a5, 180(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: sw a5, 72(a0)
; RV32ZVE32F-NEXT: lw a5, 184(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: sw a5, 68(a0)
; RV32ZVE32F-NEXT: lw a5, 188(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: sw a5, 64(a0)
; RV32ZVE32F-NEXT: lw a5, 208(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: sw a5, 4(a0)
; RV32ZVE32F-NEXT: lw a5, 216(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: sw a5, 0(a0)
; RV32ZVE32F-NEXT: sw a1, 252(a0)
; RV32ZVE32F-NEXT: sw a2, 248(a0)
; RV32ZVE32F-NEXT: sw a3, 244(a0)
; RV32ZVE32F-NEXT: sw a4, 240(a0)
; RV32ZVE32F-NEXT: sw t1, 236(a0)
; RV32ZVE32F-NEXT: sw t4, 232(a0)
; RV32ZVE32F-NEXT: sw t6, 228(a0)
; RV32ZVE32F-NEXT: sw s3, 224(a0)
; RV32ZVE32F-NEXT: sw s5, 220(a0)
; RV32ZVE32F-NEXT: sw s7, 216(a0)
; RV32ZVE32F-NEXT: sw s9, 212(a0)
; RV32ZVE32F-NEXT: sw s11, 208(a0)
; RV32ZVE32F-NEXT: lw a1, 100(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: sw a1, 204(a0)
; RV32ZVE32F-NEXT: lw a1, 108(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: sw a1, 200(a0)
; RV32ZVE32F-NEXT: lw a1, 220(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: sw a1, 28(a0)
; RV32ZVE32F-NEXT: lw a1, 228(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: sw a1, 24(a0)
; RV32ZVE32F-NEXT: lw a1, 236(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: sw a1, 20(a0)
; RV32ZVE32F-NEXT: lw a1, 244(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: sw a1, 16(a0)
; RV32ZVE32F-NEXT: lw a1, 248(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: sw a1, 12(a0)
; RV32ZVE32F-NEXT: lw a1, 252(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: sw a1, 8(a0)
; RV32ZVE32F-NEXT: lw a1, 192(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: sw a1, 60(a0)
; RV32ZVE32F-NEXT: lw a1, 196(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: sw a1, 56(a0)
; RV32ZVE32F-NEXT: lw a1, 200(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: sw a1, 52(a0)
; RV32ZVE32F-NEXT: lw a1, 204(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: sw a1, 48(a0)
; RV32ZVE32F-NEXT: lw a1, 212(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: sw a1, 44(a0)
; RV32ZVE32F-NEXT: lw a1, 224(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: sw a1, 40(a0)
; RV32ZVE32F-NEXT: lw a1, 232(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: sw a1, 36(a0)
; RV32ZVE32F-NEXT: lw a1, 240(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: sw a1, 32(a0)
; RV32ZVE32F-NEXT: addi sp, s0, -512
; RV32ZVE32F-NEXT: lw ra, 508(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s0, 504(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s2, 500(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s3, 496(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s4, 492(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s5, 488(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s6, 484(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s7, 480(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s8, 476(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s9, 472(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s10, 468(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s11, 464(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: addi sp, sp, 512
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_strided_split:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: addi sp, sp, -144
; RV64ZVE32F-NEXT: .cfi_def_cfa_offset 144
; RV64ZVE32F-NEXT: sd ra, 136(sp) # 8-byte Folded Spill
; RV64ZVE32F-NEXT: sd s0, 128(sp) # 8-byte Folded Spill
; RV64ZVE32F-NEXT: sd s1, 120(sp) # 8-byte Folded Spill
; RV64ZVE32F-NEXT: sd s2, 112(sp) # 8-byte Folded Spill
; RV64ZVE32F-NEXT: sd s3, 104(sp) # 8-byte Folded Spill
; RV64ZVE32F-NEXT: sd s4, 96(sp) # 8-byte Folded Spill
; RV64ZVE32F-NEXT: sd s5, 88(sp) # 8-byte Folded Spill
; RV64ZVE32F-NEXT: sd s6, 80(sp) # 8-byte Folded Spill
; RV64ZVE32F-NEXT: sd s7, 72(sp) # 8-byte Folded Spill
; RV64ZVE32F-NEXT: sd s8, 64(sp) # 8-byte Folded Spill
; RV64ZVE32F-NEXT: sd s9, 56(sp) # 8-byte Folded Spill
; RV64ZVE32F-NEXT: sd s10, 48(sp) # 8-byte Folded Spill
; RV64ZVE32F-NEXT: sd s11, 40(sp) # 8-byte Folded Spill
; RV64ZVE32F-NEXT: .cfi_offset ra, -8
; RV64ZVE32F-NEXT: .cfi_offset s0, -16
; RV64ZVE32F-NEXT: .cfi_offset s1, -24
; RV64ZVE32F-NEXT: .cfi_offset s2, -32
; RV64ZVE32F-NEXT: .cfi_offset s3, -40
; RV64ZVE32F-NEXT: .cfi_offset s4, -48
; RV64ZVE32F-NEXT: .cfi_offset s5, -56
; RV64ZVE32F-NEXT: .cfi_offset s6, -64
; RV64ZVE32F-NEXT: .cfi_offset s7, -72
; RV64ZVE32F-NEXT: .cfi_offset s8, -80
; RV64ZVE32F-NEXT: .cfi_offset s9, -88
; RV64ZVE32F-NEXT: .cfi_offset s10, -96
; RV64ZVE32F-NEXT: .cfi_offset s11, -104
; RV64ZVE32F-NEXT: ld a2, 0(a1)
; RV64ZVE32F-NEXT: sd a2, 32(sp) # 8-byte Folded Spill
; RV64ZVE32F-NEXT: ld a2, 16(a1)
; RV64ZVE32F-NEXT: sd a2, 24(sp) # 8-byte Folded Spill
; RV64ZVE32F-NEXT: ld a2, 32(a1)
; RV64ZVE32F-NEXT: sd a2, 16(sp) # 8-byte Folded Spill
; RV64ZVE32F-NEXT: ld a2, 48(a1)
; RV64ZVE32F-NEXT: sd a2, 8(sp) # 8-byte Folded Spill
; RV64ZVE32F-NEXT: ld a2, 64(a1)
; RV64ZVE32F-NEXT: sd a2, 0(sp) # 8-byte Folded Spill
; RV64ZVE32F-NEXT: ld a7, 80(a1)
; RV64ZVE32F-NEXT: ld t0, 96(a1)
; RV64ZVE32F-NEXT: ld t1, 112(a1)
; RV64ZVE32F-NEXT: ld t2, 128(a1)
; RV64ZVE32F-NEXT: ld t3, 144(a1)
; RV64ZVE32F-NEXT: ld t4, 160(a1)
; RV64ZVE32F-NEXT: ld t5, 176(a1)
; RV64ZVE32F-NEXT: ld t6, 192(a1)
; RV64ZVE32F-NEXT: ld s0, 208(a1)
; RV64ZVE32F-NEXT: ld s1, 224(a1)
; RV64ZVE32F-NEXT: ld s2, 240(a1)
; RV64ZVE32F-NEXT: ld s3, 256(a1)
; RV64ZVE32F-NEXT: ld s4, 272(a1)
; RV64ZVE32F-NEXT: ld s5, 288(a1)
; RV64ZVE32F-NEXT: ld s6, 304(a1)
; RV64ZVE32F-NEXT: ld s7, 320(a1)
; RV64ZVE32F-NEXT: ld s8, 336(a1)
; RV64ZVE32F-NEXT: ld s9, 352(a1)
; RV64ZVE32F-NEXT: ld s10, 368(a1)
; RV64ZVE32F-NEXT: ld s11, 384(a1)
; RV64ZVE32F-NEXT: ld ra, 400(a1)
; RV64ZVE32F-NEXT: ld a6, 416(a1)
; RV64ZVE32F-NEXT: ld a5, 432(a1)
; RV64ZVE32F-NEXT: ld a2, 496(a1)
; RV64ZVE32F-NEXT: ld a3, 480(a1)
; RV64ZVE32F-NEXT: ld a4, 464(a1)
; RV64ZVE32F-NEXT: ld a1, 448(a1)
; RV64ZVE32F-NEXT: sd a2, 248(a0)
; RV64ZVE32F-NEXT: sd a3, 240(a0)
; RV64ZVE32F-NEXT: sd a4, 232(a0)
; RV64ZVE32F-NEXT: sd a1, 224(a0)
; RV64ZVE32F-NEXT: sd a5, 216(a0)
; RV64ZVE32F-NEXT: sd a6, 208(a0)
; RV64ZVE32F-NEXT: sd ra, 200(a0)
; RV64ZVE32F-NEXT: sd s11, 192(a0)
; RV64ZVE32F-NEXT: sd s10, 184(a0)
; RV64ZVE32F-NEXT: sd s9, 176(a0)
; RV64ZVE32F-NEXT: sd s8, 168(a0)
; RV64ZVE32F-NEXT: sd s7, 160(a0)
; RV64ZVE32F-NEXT: sd s6, 152(a0)
; RV64ZVE32F-NEXT: sd s5, 144(a0)
; RV64ZVE32F-NEXT: sd s4, 136(a0)
; RV64ZVE32F-NEXT: sd s3, 128(a0)
; RV64ZVE32F-NEXT: sd s2, 120(a0)
; RV64ZVE32F-NEXT: sd s1, 112(a0)
; RV64ZVE32F-NEXT: sd s0, 104(a0)
; RV64ZVE32F-NEXT: sd t6, 96(a0)
; RV64ZVE32F-NEXT: sd t5, 88(a0)
; RV64ZVE32F-NEXT: sd t4, 80(a0)
; RV64ZVE32F-NEXT: sd t3, 72(a0)
; RV64ZVE32F-NEXT: sd t2, 64(a0)
; RV64ZVE32F-NEXT: sd t1, 56(a0)
; RV64ZVE32F-NEXT: sd t0, 48(a0)
; RV64ZVE32F-NEXT: sd a7, 40(a0)
; RV64ZVE32F-NEXT: ld a1, 0(sp) # 8-byte Folded Reload
; RV64ZVE32F-NEXT: sd a1, 32(a0)
; RV64ZVE32F-NEXT: ld a1, 8(sp) # 8-byte Folded Reload
; RV64ZVE32F-NEXT: sd a1, 24(a0)
; RV64ZVE32F-NEXT: ld a1, 16(sp) # 8-byte Folded Reload
; RV64ZVE32F-NEXT: sd a1, 16(a0)
; RV64ZVE32F-NEXT: ld a1, 24(sp) # 8-byte Folded Reload
; RV64ZVE32F-NEXT: sd a1, 8(a0)
; RV64ZVE32F-NEXT: ld a1, 32(sp) # 8-byte Folded Reload
; RV64ZVE32F-NEXT: sd a1, 0(a0)
; RV64ZVE32F-NEXT: ld ra, 136(sp) # 8-byte Folded Reload
; RV64ZVE32F-NEXT: ld s0, 128(sp) # 8-byte Folded Reload
; RV64ZVE32F-NEXT: ld s1, 120(sp) # 8-byte Folded Reload
; RV64ZVE32F-NEXT: ld s2, 112(sp) # 8-byte Folded Reload
; RV64ZVE32F-NEXT: ld s3, 104(sp) # 8-byte Folded Reload
; RV64ZVE32F-NEXT: ld s4, 96(sp) # 8-byte Folded Reload
; RV64ZVE32F-NEXT: ld s5, 88(sp) # 8-byte Folded Reload
; RV64ZVE32F-NEXT: ld s6, 80(sp) # 8-byte Folded Reload
; RV64ZVE32F-NEXT: ld s7, 72(sp) # 8-byte Folded Reload
; RV64ZVE32F-NEXT: ld s8, 64(sp) # 8-byte Folded Reload
; RV64ZVE32F-NEXT: ld s9, 56(sp) # 8-byte Folded Reload
; RV64ZVE32F-NEXT: ld s10, 48(sp) # 8-byte Folded Reload
; RV64ZVE32F-NEXT: ld s11, 40(sp) # 8-byte Folded Reload
; RV64ZVE32F-NEXT: addi sp, sp, 144
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i64, ptr %base, <32 x i64> <i64 0, i64 2, i64 4, i64 6, i64 8, i64 10, i64 12, i64 14, i64 16, i64 18, i64 20, i64 22, i64 24, i64 26, i64 28, i64 30, i64 32, i64 34, i64 36, i64 38, i64 40, i64 42, i64 44, i64 46, i64 48, i64 50, i64 52, i64 54, i64 56, i64 58, i64 60, i64 62>
%x = call <32 x i64> @llvm.masked.gather.v32i64.v32p0(<32 x ptr> %ptrs, i32 8, <32 x i1> splat (i1 true), <32 x i64> poison)
ret <32 x i64> %x
}
define <4 x i32> @masked_gather_widen_sew_negative_stride(ptr %base) {
; RV32V-LABEL: masked_gather_widen_sew_negative_stride:
; RV32V: # %bb.0:
; RV32V-NEXT: addi a0, a0, 136
; RV32V-NEXT: li a1, -136
; RV32V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32V-NEXT: vlse64.v v8, (a0), a1
; RV32V-NEXT: ret
;
; RV64V-LABEL: masked_gather_widen_sew_negative_stride:
; RV64V: # %bb.0:
; RV64V-NEXT: addi a0, a0, 136
; RV64V-NEXT: li a1, -136
; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64V-NEXT: vlse64.v v8, (a0), a1
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: masked_gather_widen_sew_negative_stride:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: lui a1, 16393
; RV32ZVE32F-NEXT: addi a1, a1, -888
; RV32ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.s.x v9, a1
; RV32ZVE32F-NEXT: vluxei8.v v8, (a0), v9
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: masked_gather_widen_sew_negative_stride:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: addi a1, a0, 136
; RV64ZVE32F-NEXT: lw a2, 140(a0)
; RV64ZVE32F-NEXT: lw a3, 0(a0)
; RV64ZVE32F-NEXT: lw a0, 4(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vlse32.v v8, (a1), zero
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3
; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr i32, ptr %base, <4 x i64> <i64 34, i64 35, i64 0, i64 1>
%x = call <4 x i32> @llvm.masked.gather.v4i32.v32p0(<4 x ptr> %ptrs, i32 8, <4 x i1> splat (i1 true), <4 x i32> poison)
ret <4 x i32> %x
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; RV64: {{.*}}