| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+experimental-zvfh,+v -target-abi=ilp32d \ |
| ; RUN: -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32V |
| ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+experimental-zvfh,+v -target-abi=lp64d \ |
| ; RUN: -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 |
| ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+experimental-zvfh,+zve32f -target-abi=ilp32d \ |
| ; RUN: -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVE32F |
| ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+experimental-zvfh,+zve32f -target-abi=lp64d \ |
| ; RUN: -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64ZVE32F |
| |
| declare void @llvm.masked.scatter.v1i8.v1p0i8(<1 x i8>, <1 x i8*>, i32, <1 x i1>) |
| |
| define void @mscatter_v1i8(<1 x i8> %val, <1 x i8*> %ptrs, <1 x i1> %m) { |
| ; RV32V-LABEL: mscatter_v1i8: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetivli zero, 1, e8, mf8, ta, mu |
| ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_v1i8: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV32ZVE32F-LABEL: mscatter_v1i8: |
| ; RV32ZVE32F: # %bb.0: |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t |
| ; RV32ZVE32F-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_v1i8: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.v.i v9, 0 |
| ; RV64ZVE32F-NEXT: vmerge.vim v9, v9, 1, v0 |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v9 |
| ; RV64ZVE32F-NEXT: andi a1, a1, 1 |
| ; RV64ZVE32F-NEXT: beqz a1, .LBB0_2 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vse8.v v8, (a0) |
| ; RV64ZVE32F-NEXT: .LBB0_2: # %else |
| ; RV64ZVE32F-NEXT: ret |
| call void @llvm.masked.scatter.v1i8.v1p0i8(<1 x i8> %val, <1 x i8*> %ptrs, i32 1, <1 x i1> %m) |
| ret void |
| } |
| |
| declare void @llvm.masked.scatter.v2i8.v2p0i8(<2 x i8>, <2 x i8*>, i32, <2 x i1>) |
| |
| define void @mscatter_v2i8(<2 x i8> %val, <2 x i8*> %ptrs, <2 x i1> %m) { |
| ; RV32V-LABEL: mscatter_v2i8: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu |
| ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_v2i8: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV32ZVE32F-LABEL: mscatter_v2i8: |
| ; RV32ZVE32F: # %bb.0: |
| ; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t |
| ; RV32ZVE32F-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_v2i8: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v0 |
| ; RV64ZVE32F-NEXT: andi a3, a2, 1 |
| ; RV64ZVE32F-NEXT: bnez a3, .LBB1_3 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %else |
| ; RV64ZVE32F-NEXT: andi a0, a2, 2 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB1_4 |
| ; RV64ZVE32F-NEXT: .LBB1_2: # %else2 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB1_3: # %cond.store |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vse8.v v8, (a0) |
| ; RV64ZVE32F-NEXT: andi a0, a2, 2 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB1_2 |
| ; RV64ZVE32F-NEXT: .LBB1_4: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 |
| ; RV64ZVE32F-NEXT: vse8.v v8, (a1) |
| ; RV64ZVE32F-NEXT: ret |
| call void @llvm.masked.scatter.v2i8.v2p0i8(<2 x i8> %val, <2 x i8*> %ptrs, i32 1, <2 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_v2i16_truncstore_v2i8(<2 x i16> %val, <2 x i8*> %ptrs, <2 x i1> %m) { |
| ; RV32V-LABEL: mscatter_v2i16_truncstore_v2i8: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu |
| ; RV32V-NEXT: vncvt.x.x.w v8, v8 |
| ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_v2i16_truncstore_v2i8: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, mu |
| ; RV64-NEXT: vncvt.x.x.w v8, v8 |
| ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV32ZVE32F-LABEL: mscatter_v2i16_truncstore_v2i8: |
| ; RV32ZVE32F: # %bb.0: |
| ; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV32ZVE32F-NEXT: vncvt.x.x.w v8, v8 |
| ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t |
| ; RV32ZVE32F-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_v2i16_truncstore_v2i8: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v0 |
| ; RV64ZVE32F-NEXT: andi a3, a2, 1 |
| ; RV64ZVE32F-NEXT: vncvt.x.x.w v8, v8 |
| ; RV64ZVE32F-NEXT: bnez a3, .LBB2_3 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %else |
| ; RV64ZVE32F-NEXT: andi a0, a2, 2 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB2_4 |
| ; RV64ZVE32F-NEXT: .LBB2_2: # %else2 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB2_3: # %cond.store |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vse8.v v8, (a0) |
| ; RV64ZVE32F-NEXT: andi a0, a2, 2 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB2_2 |
| ; RV64ZVE32F-NEXT: .LBB2_4: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 |
| ; RV64ZVE32F-NEXT: vse8.v v8, (a1) |
| ; RV64ZVE32F-NEXT: ret |
| %tval = trunc <2 x i16> %val to <2 x i8> |
| call void @llvm.masked.scatter.v2i8.v2p0i8(<2 x i8> %tval, <2 x i8*> %ptrs, i32 1, <2 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_v2i32_truncstore_v2i8(<2 x i32> %val, <2 x i8*> %ptrs, <2 x i1> %m) { |
| ; RV32V-LABEL: mscatter_v2i32_truncstore_v2i8: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu |
| ; RV32V-NEXT: vncvt.x.x.w v8, v8 |
| ; RV32V-NEXT: vsetvli zero, zero, e8, mf8, ta, mu |
| ; RV32V-NEXT: vncvt.x.x.w v8, v8 |
| ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_v2i32_truncstore_v2i8: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, mu |
| ; RV64-NEXT: vncvt.x.x.w v8, v8 |
| ; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, mu |
| ; RV64-NEXT: vncvt.x.x.w v8, v8 |
| ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV32ZVE32F-LABEL: mscatter_v2i32_truncstore_v2i8: |
| ; RV32ZVE32F: # %bb.0: |
| ; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu |
| ; RV32ZVE32F-NEXT: vncvt.x.x.w v8, v8 |
| ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, mu |
| ; RV32ZVE32F-NEXT: vncvt.x.x.w v8, v8 |
| ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t |
| ; RV32ZVE32F-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_v2i32_truncstore_v2i8: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vncvt.x.x.w v8, v8 |
| ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v0 |
| ; RV64ZVE32F-NEXT: andi a3, a2, 1 |
| ; RV64ZVE32F-NEXT: vncvt.x.x.w v8, v8 |
| ; RV64ZVE32F-NEXT: bnez a3, .LBB3_3 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %else |
| ; RV64ZVE32F-NEXT: andi a0, a2, 2 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB3_4 |
| ; RV64ZVE32F-NEXT: .LBB3_2: # %else2 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB3_3: # %cond.store |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vse8.v v8, (a0) |
| ; RV64ZVE32F-NEXT: andi a0, a2, 2 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB3_2 |
| ; RV64ZVE32F-NEXT: .LBB3_4: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 |
| ; RV64ZVE32F-NEXT: vse8.v v8, (a1) |
| ; RV64ZVE32F-NEXT: ret |
| %tval = trunc <2 x i32> %val to <2 x i8> |
| call void @llvm.masked.scatter.v2i8.v2p0i8(<2 x i8> %tval, <2 x i8*> %ptrs, i32 1, <2 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_v2i64_truncstore_v2i8(<2 x i64> %val, <2 x i8*> %ptrs, <2 x i1> %m) { |
| ; RV32V-LABEL: mscatter_v2i64_truncstore_v2i8: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetivli zero, 2, e32, mf2, ta, mu |
| ; RV32V-NEXT: vncvt.x.x.w v8, v8 |
| ; RV32V-NEXT: vsetvli zero, zero, e16, mf4, ta, mu |
| ; RV32V-NEXT: vncvt.x.x.w v8, v8 |
| ; RV32V-NEXT: vsetvli zero, zero, e8, mf8, ta, mu |
| ; RV32V-NEXT: vncvt.x.x.w v8, v8 |
| ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_v2i64_truncstore_v2i8: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu |
| ; RV64-NEXT: vncvt.x.x.w v8, v8 |
| ; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, mu |
| ; RV64-NEXT: vncvt.x.x.w v8, v8 |
| ; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, mu |
| ; RV64-NEXT: vncvt.x.x.w v8, v8 |
| ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV32ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i8: |
| ; RV32ZVE32F: # %bb.0: |
| ; RV32ZVE32F-NEXT: addi sp, sp, -16 |
| ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 |
| ; RV32ZVE32F-NEXT: lw a1, 8(a0) |
| ; RV32ZVE32F-NEXT: lw a0, 0(a0) |
| ; RV32ZVE32F-NEXT: sb a1, 15(sp) |
| ; RV32ZVE32F-NEXT: sb a0, 14(sp) |
| ; RV32ZVE32F-NEXT: addi a0, sp, 15 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV32ZVE32F-NEXT: vle8.v v9, (a0) |
| ; RV32ZVE32F-NEXT: addi a0, sp, 14 |
| ; RV32ZVE32F-NEXT: vle8.v v10, (a0) |
| ; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, tu, mu |
| ; RV32ZVE32F-NEXT: vslideup.vi v10, v9, 1 |
| ; RV32ZVE32F-NEXT: vsoxei32.v v10, (zero), v8, v0.t |
| ; RV32ZVE32F-NEXT: addi sp, sp, 16 |
| ; RV32ZVE32F-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i8: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: addi sp, sp, -16 |
| ; RV64ZVE32F-NEXT: .cfi_def_cfa_offset 16 |
| ; RV64ZVE32F-NEXT: sb a1, 15(sp) |
| ; RV64ZVE32F-NEXT: sb a0, 14(sp) |
| ; RV64ZVE32F-NEXT: addi a0, sp, 15 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vle8.v v9, (a0) |
| ; RV64ZVE32F-NEXT: addi a0, sp, 14 |
| ; RV64ZVE32F-NEXT: vle8.v v8, (a0) |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, tu, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v0 |
| ; RV64ZVE32F-NEXT: andi a1, a0, 1 |
| ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 |
| ; RV64ZVE32F-NEXT: bnez a1, .LBB4_3 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %else |
| ; RV64ZVE32F-NEXT: andi a0, a0, 2 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB4_4 |
| ; RV64ZVE32F-NEXT: .LBB4_2: # %else2 |
| ; RV64ZVE32F-NEXT: addi sp, sp, 16 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB4_3: # %cond.store |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vse8.v v8, (a2) |
| ; RV64ZVE32F-NEXT: andi a0, a0, 2 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB4_2 |
| ; RV64ZVE32F-NEXT: .LBB4_4: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 |
| ; RV64ZVE32F-NEXT: vse8.v v8, (a3) |
| ; RV64ZVE32F-NEXT: addi sp, sp, 16 |
| ; RV64ZVE32F-NEXT: ret |
| %tval = trunc <2 x i64> %val to <2 x i8> |
| call void @llvm.masked.scatter.v2i8.v2p0i8(<2 x i8> %tval, <2 x i8*> %ptrs, i32 1, <2 x i1> %m) |
| ret void |
| } |
| |
| declare void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8>, <4 x i8*>, i32, <4 x i1>) |
| |
| define void @mscatter_v4i8(<4 x i8> %val, <4 x i8*> %ptrs, <4 x i1> %m) { |
| ; RV32-LABEL: mscatter_v4i8: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, mu |
| ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t |
| ; RV32-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_v4i8: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_v4i8: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: ld a1, 24(a0) |
| ; RV64ZVE32F-NEXT: ld a2, 16(a0) |
| ; RV64ZVE32F-NEXT: ld a4, 8(a0) |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a3, v0 |
| ; RV64ZVE32F-NEXT: andi a5, a3, 1 |
| ; RV64ZVE32F-NEXT: bnez a5, .LBB5_5 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %else |
| ; RV64ZVE32F-NEXT: andi a0, a3, 2 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB5_6 |
| ; RV64ZVE32F-NEXT: .LBB5_2: # %else2 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 4 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB5_7 |
| ; RV64ZVE32F-NEXT: .LBB5_3: # %else4 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 8 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB5_8 |
| ; RV64ZVE32F-NEXT: .LBB5_4: # %else6 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB5_5: # %cond.store |
| ; RV64ZVE32F-NEXT: ld a0, 0(a0) |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vse8.v v8, (a0) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 2 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB5_2 |
| ; RV64ZVE32F-NEXT: .LBB5_6: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 |
| ; RV64ZVE32F-NEXT: vse8.v v9, (a4) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 4 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB5_3 |
| ; RV64ZVE32F-NEXT: .LBB5_7: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 |
| ; RV64ZVE32F-NEXT: vse8.v v9, (a2) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 8 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB5_4 |
| ; RV64ZVE32F-NEXT: .LBB5_8: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3 |
| ; RV64ZVE32F-NEXT: vse8.v v8, (a1) |
| ; RV64ZVE32F-NEXT: ret |
| call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> %val, <4 x i8*> %ptrs, i32 1, <4 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_truemask_v4i8(<4 x i8> %val, <4 x i8*> %ptrs) { |
| ; RV32-LABEL: mscatter_truemask_v4i8: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, mu |
| ; RV32-NEXT: vsoxei32.v v8, (zero), v9 |
| ; RV32-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_truemask_v4i8: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (zero), v10 |
| ; RV64-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_truemask_v4i8: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: ld a1, 24(a0) |
| ; RV64ZVE32F-NEXT: ld a2, 16(a0) |
| ; RV64ZVE32F-NEXT: ld a4, 8(a0) |
| ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vmset.m v9 |
| ; RV64ZVE32F-NEXT: vmv.x.s a3, v9 |
| ; RV64ZVE32F-NEXT: beqz zero, .LBB6_5 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %else |
| ; RV64ZVE32F-NEXT: andi a0, a3, 2 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB6_6 |
| ; RV64ZVE32F-NEXT: .LBB6_2: # %else2 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 4 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB6_7 |
| ; RV64ZVE32F-NEXT: .LBB6_3: # %else4 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 8 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB6_8 |
| ; RV64ZVE32F-NEXT: .LBB6_4: # %else6 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB6_5: # %cond.store |
| ; RV64ZVE32F-NEXT: ld a0, 0(a0) |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vse8.v v8, (a0) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 2 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB6_2 |
| ; RV64ZVE32F-NEXT: .LBB6_6: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 |
| ; RV64ZVE32F-NEXT: vse8.v v9, (a4) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 4 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB6_3 |
| ; RV64ZVE32F-NEXT: .LBB6_7: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 |
| ; RV64ZVE32F-NEXT: vse8.v v9, (a2) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 8 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB6_4 |
| ; RV64ZVE32F-NEXT: .LBB6_8: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3 |
| ; RV64ZVE32F-NEXT: vse8.v v8, (a1) |
| ; RV64ZVE32F-NEXT: ret |
| %mhead = insertelement <4 x i1> poison, i1 1, i32 0 |
| %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer |
| call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> %val, <4 x i8*> %ptrs, i32 1, <4 x i1> %mtrue) |
| ret void |
| } |
| |
| define void @mscatter_falsemask_v4i8(<4 x i8> %val, <4 x i8*> %ptrs) { |
| ; CHECK-LABEL: mscatter_falsemask_v4i8: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: ret |
| call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> %val, <4 x i8*> %ptrs, i32 1, <4 x i1> zeroinitializer) |
| ret void |
| } |
| |
| declare void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8>, <8 x i8*>, i32, <8 x i1>) |
| |
| define void @mscatter_v8i8(<8 x i8> %val, <8 x i8*> %ptrs, <8 x i1> %m) { |
| ; RV32-LABEL: mscatter_v8i8: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu |
| ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t |
| ; RV32-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_v8i8: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_v8i8: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: ld a1, 56(a0) |
| ; RV64ZVE32F-NEXT: ld a2, 48(a0) |
| ; RV64ZVE32F-NEXT: ld a4, 40(a0) |
| ; RV64ZVE32F-NEXT: ld a5, 32(a0) |
| ; RV64ZVE32F-NEXT: ld a6, 24(a0) |
| ; RV64ZVE32F-NEXT: ld a7, 16(a0) |
| ; RV64ZVE32F-NEXT: ld t0, 8(a0) |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a3, v0 |
| ; RV64ZVE32F-NEXT: andi t1, a3, 1 |
| ; RV64ZVE32F-NEXT: bnez t1, .LBB8_9 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %else |
| ; RV64ZVE32F-NEXT: andi a0, a3, 2 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB8_10 |
| ; RV64ZVE32F-NEXT: .LBB8_2: # %else2 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 4 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB8_11 |
| ; RV64ZVE32F-NEXT: .LBB8_3: # %else4 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 8 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB8_12 |
| ; RV64ZVE32F-NEXT: .LBB8_4: # %else6 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 16 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB8_13 |
| ; RV64ZVE32F-NEXT: .LBB8_5: # %else8 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 32 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB8_14 |
| ; RV64ZVE32F-NEXT: .LBB8_6: # %else10 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 64 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB8_15 |
| ; RV64ZVE32F-NEXT: .LBB8_7: # %else12 |
| ; RV64ZVE32F-NEXT: andi a0, a3, -128 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB8_16 |
| ; RV64ZVE32F-NEXT: .LBB8_8: # %else14 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB8_9: # %cond.store |
| ; RV64ZVE32F-NEXT: ld a0, 0(a0) |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vse8.v v8, (a0) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 2 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB8_2 |
| ; RV64ZVE32F-NEXT: .LBB8_10: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 |
| ; RV64ZVE32F-NEXT: vse8.v v9, (t0) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 4 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB8_3 |
| ; RV64ZVE32F-NEXT: .LBB8_11: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 |
| ; RV64ZVE32F-NEXT: vse8.v v9, (a7) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 8 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB8_4 |
| ; RV64ZVE32F-NEXT: .LBB8_12: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3 |
| ; RV64ZVE32F-NEXT: vse8.v v9, (a6) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 16 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB8_5 |
| ; RV64ZVE32F-NEXT: .LBB8_13: # %cond.store7 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 |
| ; RV64ZVE32F-NEXT: vse8.v v9, (a5) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 32 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB8_6 |
| ; RV64ZVE32F-NEXT: .LBB8_14: # %cond.store9 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5 |
| ; RV64ZVE32F-NEXT: vse8.v v9, (a4) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 64 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB8_7 |
| ; RV64ZVE32F-NEXT: .LBB8_15: # %cond.store11 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 6 |
| ; RV64ZVE32F-NEXT: vse8.v v9, (a2) |
| ; RV64ZVE32F-NEXT: andi a0, a3, -128 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB8_8 |
| ; RV64ZVE32F-NEXT: .LBB8_16: # %cond.store13 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 |
| ; RV64ZVE32F-NEXT: vse8.v v8, (a1) |
| ; RV64ZVE32F-NEXT: ret |
| call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> %val, <8 x i8*> %ptrs, i32 1, <8 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_baseidx_v8i8(<8 x i8> %val, i8* %base, <8 x i8> %idxs, <8 x i1> %m) { |
| ; RV32-LABEL: mscatter_baseidx_v8i8: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| ; RV32-NEXT: vsext.vf4 v10, v9 |
| ; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, mu |
| ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t |
| ; RV32-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_baseidx_v8i8: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV64-NEXT: vsext.vf8 v12, v9 |
| ; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 1 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB9_2 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vse8.v v8, (a2) |
| ; RV64ZVE32F-NEXT: .LBB9_2: # %else |
| ; RV64ZVE32F-NEXT: andi a2, a1, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB9_4 |
| ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 |
| ; RV64ZVE32F-NEXT: vse8.v v10, (a2) |
| ; RV64ZVE32F-NEXT: .LBB9_4: # %else2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 4 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB9_6 |
| ; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2 |
| ; RV64ZVE32F-NEXT: vse8.v v11, (a2) |
| ; RV64ZVE32F-NEXT: .LBB9_6: # %else4 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 8 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 4 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB9_13 |
| ; RV64ZVE32F-NEXT: # %bb.7: # %else6 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB9_14 |
| ; RV64ZVE32F-NEXT: .LBB9_8: # %else8 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB9_10 |
| ; RV64ZVE32F-NEXT: .LBB9_9: # %cond.store9 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5 |
| ; RV64ZVE32F-NEXT: vse8.v v10, (a2) |
| ; RV64ZVE32F-NEXT: .LBB9_10: # %else10 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 64 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB9_15 |
| ; RV64ZVE32F-NEXT: # %bb.11: # %else12 |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: bnez a1, .LBB9_16 |
| ; RV64ZVE32F-NEXT: .LBB9_12: # %else14 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB9_13: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3 |
| ; RV64ZVE32F-NEXT: vse8.v v10, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB9_8 |
| ; RV64ZVE32F-NEXT: .LBB9_14: # %cond.store7 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 |
| ; RV64ZVE32F-NEXT: vse8.v v10, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB9_9 |
| ; RV64ZVE32F-NEXT: j .LBB9_10 |
| ; RV64ZVE32F-NEXT: .LBB9_15: # %cond.store11 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6 |
| ; RV64ZVE32F-NEXT: vse8.v v10, (a2) |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: beqz a1, .LBB9_12 |
| ; RV64ZVE32F-NEXT: .LBB9_16: # %cond.store13 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v9 |
| ; RV64ZVE32F-NEXT: add a0, a0, a1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 |
| ; RV64ZVE32F-NEXT: vse8.v v8, (a0) |
| ; RV64ZVE32F-NEXT: ret |
| %ptrs = getelementptr inbounds i8, i8* %base, <8 x i8> %idxs |
| call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> %val, <8 x i8*> %ptrs, i32 1, <8 x i1> %m) |
| ret void |
| } |
| |
| declare void @llvm.masked.scatter.v1i16.v1p0i16(<1 x i16>, <1 x i16*>, i32, <1 x i1>) |
| |
| define void @mscatter_v1i16(<1 x i16> %val, <1 x i16*> %ptrs, <1 x i1> %m) { |
| ; RV32V-LABEL: mscatter_v1i16: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetivli zero, 1, e16, mf4, ta, mu |
| ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_v1i16: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV32ZVE32F-LABEL: mscatter_v1i16: |
| ; RV32ZVE32F: # %bb.0: |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t |
| ; RV32ZVE32F-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_v1i16: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.v.i v9, 0 |
| ; RV64ZVE32F-NEXT: vmerge.vim v9, v9, 1, v0 |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v9 |
| ; RV64ZVE32F-NEXT: andi a1, a1, 1 |
| ; RV64ZVE32F-NEXT: beqz a1, .LBB10_2 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vse16.v v8, (a0) |
| ; RV64ZVE32F-NEXT: .LBB10_2: # %else |
| ; RV64ZVE32F-NEXT: ret |
| call void @llvm.masked.scatter.v1i16.v1p0i16(<1 x i16> %val, <1 x i16*> %ptrs, i32 2, <1 x i1> %m) |
| ret void |
| } |
| |
| declare void @llvm.masked.scatter.v2i16.v2p0i16(<2 x i16>, <2 x i16*>, i32, <2 x i1>) |
| |
| define void @mscatter_v2i16(<2 x i16> %val, <2 x i16*> %ptrs, <2 x i1> %m) { |
| ; RV32V-LABEL: mscatter_v2i16: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu |
| ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_v2i16: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV32ZVE32F-LABEL: mscatter_v2i16: |
| ; RV32ZVE32F: # %bb.0: |
| ; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu |
| ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t |
| ; RV32ZVE32F-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_v2i16: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v0 |
| ; RV64ZVE32F-NEXT: andi a3, a2, 1 |
| ; RV64ZVE32F-NEXT: bnez a3, .LBB11_3 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %else |
| ; RV64ZVE32F-NEXT: andi a0, a2, 2 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB11_4 |
| ; RV64ZVE32F-NEXT: .LBB11_2: # %else2 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB11_3: # %cond.store |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vse16.v v8, (a0) |
| ; RV64ZVE32F-NEXT: andi a0, a2, 2 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB11_2 |
| ; RV64ZVE32F-NEXT: .LBB11_4: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 |
| ; RV64ZVE32F-NEXT: vse16.v v8, (a1) |
| ; RV64ZVE32F-NEXT: ret |
| call void @llvm.masked.scatter.v2i16.v2p0i16(<2 x i16> %val, <2 x i16*> %ptrs, i32 2, <2 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_v2i32_truncstore_v2i16(<2 x i32> %val, <2 x i16*> %ptrs, <2 x i1> %m) { |
| ; RV32V-LABEL: mscatter_v2i32_truncstore_v2i16: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu |
| ; RV32V-NEXT: vncvt.x.x.w v8, v8 |
| ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_v2i32_truncstore_v2i16: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, mu |
| ; RV64-NEXT: vncvt.x.x.w v8, v8 |
| ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV32ZVE32F-LABEL: mscatter_v2i32_truncstore_v2i16: |
| ; RV32ZVE32F: # %bb.0: |
| ; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu |
| ; RV32ZVE32F-NEXT: vncvt.x.x.w v8, v8 |
| ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t |
| ; RV32ZVE32F-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_v2i32_truncstore_v2i16: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vncvt.x.x.w v8, v8 |
| ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v0 |
| ; RV64ZVE32F-NEXT: andi a3, a2, 1 |
| ; RV64ZVE32F-NEXT: bnez a3, .LBB12_3 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %else |
| ; RV64ZVE32F-NEXT: andi a0, a2, 2 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB12_4 |
| ; RV64ZVE32F-NEXT: .LBB12_2: # %else2 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB12_3: # %cond.store |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vse16.v v8, (a0) |
| ; RV64ZVE32F-NEXT: andi a0, a2, 2 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB12_2 |
| ; RV64ZVE32F-NEXT: .LBB12_4: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 |
| ; RV64ZVE32F-NEXT: vse16.v v8, (a1) |
| ; RV64ZVE32F-NEXT: ret |
| %tval = trunc <2 x i32> %val to <2 x i16> |
| call void @llvm.masked.scatter.v2i16.v2p0i16(<2 x i16> %tval, <2 x i16*> %ptrs, i32 2, <2 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_v2i64_truncstore_v2i16(<2 x i64> %val, <2 x i16*> %ptrs, <2 x i1> %m) { |
| ; RV32V-LABEL: mscatter_v2i64_truncstore_v2i16: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetivli zero, 2, e32, mf2, ta, mu |
| ; RV32V-NEXT: vncvt.x.x.w v8, v8 |
| ; RV32V-NEXT: vsetvli zero, zero, e16, mf4, ta, mu |
| ; RV32V-NEXT: vncvt.x.x.w v8, v8 |
| ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_v2i64_truncstore_v2i16: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu |
| ; RV64-NEXT: vncvt.x.x.w v8, v8 |
| ; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, mu |
| ; RV64-NEXT: vncvt.x.x.w v8, v8 |
| ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV32ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i16: |
| ; RV32ZVE32F: # %bb.0: |
| ; RV32ZVE32F-NEXT: addi sp, sp, -16 |
| ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 |
| ; RV32ZVE32F-NEXT: lw a1, 8(a0) |
| ; RV32ZVE32F-NEXT: lw a0, 0(a0) |
| ; RV32ZVE32F-NEXT: sh a1, 14(sp) |
| ; RV32ZVE32F-NEXT: sh a0, 12(sp) |
| ; RV32ZVE32F-NEXT: addi a0, sp, 14 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV32ZVE32F-NEXT: vle16.v v9, (a0) |
| ; RV32ZVE32F-NEXT: addi a0, sp, 12 |
| ; RV32ZVE32F-NEXT: vle16.v v10, (a0) |
| ; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, mu |
| ; RV32ZVE32F-NEXT: vslideup.vi v10, v9, 1 |
| ; RV32ZVE32F-NEXT: vsoxei32.v v10, (zero), v8, v0.t |
| ; RV32ZVE32F-NEXT: addi sp, sp, 16 |
| ; RV32ZVE32F-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i16: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: addi sp, sp, -16 |
| ; RV64ZVE32F-NEXT: .cfi_def_cfa_offset 16 |
| ; RV64ZVE32F-NEXT: sh a1, 14(sp) |
| ; RV64ZVE32F-NEXT: sh a0, 12(sp) |
| ; RV64ZVE32F-NEXT: addi a0, sp, 14 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vle16.v v9, (a0) |
| ; RV64ZVE32F-NEXT: addi a0, sp, 12 |
| ; RV64ZVE32F-NEXT: vle16.v v8, (a0) |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, mu |
| ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 |
| ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v0 |
| ; RV64ZVE32F-NEXT: andi a1, a0, 1 |
| ; RV64ZVE32F-NEXT: bnez a1, .LBB13_3 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %else |
| ; RV64ZVE32F-NEXT: andi a0, a0, 2 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB13_4 |
| ; RV64ZVE32F-NEXT: .LBB13_2: # %else2 |
| ; RV64ZVE32F-NEXT: addi sp, sp, 16 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB13_3: # %cond.store |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vse16.v v8, (a2) |
| ; RV64ZVE32F-NEXT: andi a0, a0, 2 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB13_2 |
| ; RV64ZVE32F-NEXT: .LBB13_4: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 |
| ; RV64ZVE32F-NEXT: vse16.v v8, (a3) |
| ; RV64ZVE32F-NEXT: addi sp, sp, 16 |
| ; RV64ZVE32F-NEXT: ret |
| %tval = trunc <2 x i64> %val to <2 x i16> |
| call void @llvm.masked.scatter.v2i16.v2p0i16(<2 x i16> %tval, <2 x i16*> %ptrs, i32 2, <2 x i1> %m) |
| ret void |
| } |
| |
| declare void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16>, <4 x i16*>, i32, <4 x i1>) |
| |
| define void @mscatter_v4i16(<4 x i16> %val, <4 x i16*> %ptrs, <4 x i1> %m) { |
| ; RV32-LABEL: mscatter_v4i16: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, mu |
| ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t |
| ; RV32-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_v4i16: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_v4i16: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: ld a1, 24(a0) |
| ; RV64ZVE32F-NEXT: ld a2, 16(a0) |
| ; RV64ZVE32F-NEXT: ld a4, 8(a0) |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a3, v0 |
| ; RV64ZVE32F-NEXT: andi a5, a3, 1 |
| ; RV64ZVE32F-NEXT: bnez a5, .LBB14_5 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %else |
| ; RV64ZVE32F-NEXT: andi a0, a3, 2 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB14_6 |
| ; RV64ZVE32F-NEXT: .LBB14_2: # %else2 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 4 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB14_7 |
| ; RV64ZVE32F-NEXT: .LBB14_3: # %else4 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 8 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB14_8 |
| ; RV64ZVE32F-NEXT: .LBB14_4: # %else6 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB14_5: # %cond.store |
| ; RV64ZVE32F-NEXT: ld a0, 0(a0) |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vse16.v v8, (a0) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 2 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB14_2 |
| ; RV64ZVE32F-NEXT: .LBB14_6: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 |
| ; RV64ZVE32F-NEXT: vse16.v v9, (a4) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 4 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB14_3 |
| ; RV64ZVE32F-NEXT: .LBB14_7: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 |
| ; RV64ZVE32F-NEXT: vse16.v v9, (a2) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 8 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB14_4 |
| ; RV64ZVE32F-NEXT: .LBB14_8: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3 |
| ; RV64ZVE32F-NEXT: vse16.v v8, (a1) |
| ; RV64ZVE32F-NEXT: ret |
| call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> %val, <4 x i16*> %ptrs, i32 2, <4 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_truemask_v4i16(<4 x i16> %val, <4 x i16*> %ptrs) { |
| ; RV32-LABEL: mscatter_truemask_v4i16: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, mu |
| ; RV32-NEXT: vsoxei32.v v8, (zero), v9 |
| ; RV32-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_truemask_v4i16: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (zero), v10 |
| ; RV64-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_truemask_v4i16: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: ld a1, 24(a0) |
| ; RV64ZVE32F-NEXT: ld a2, 16(a0) |
| ; RV64ZVE32F-NEXT: ld a4, 8(a0) |
| ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vmset.m v9 |
| ; RV64ZVE32F-NEXT: vmv.x.s a3, v9 |
| ; RV64ZVE32F-NEXT: beqz zero, .LBB15_5 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %else |
| ; RV64ZVE32F-NEXT: andi a0, a3, 2 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB15_6 |
| ; RV64ZVE32F-NEXT: .LBB15_2: # %else2 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 4 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB15_7 |
| ; RV64ZVE32F-NEXT: .LBB15_3: # %else4 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 8 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB15_8 |
| ; RV64ZVE32F-NEXT: .LBB15_4: # %else6 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB15_5: # %cond.store |
| ; RV64ZVE32F-NEXT: ld a0, 0(a0) |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vse16.v v8, (a0) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 2 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB15_2 |
| ; RV64ZVE32F-NEXT: .LBB15_6: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 |
| ; RV64ZVE32F-NEXT: vse16.v v9, (a4) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 4 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB15_3 |
| ; RV64ZVE32F-NEXT: .LBB15_7: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 |
| ; RV64ZVE32F-NEXT: vse16.v v9, (a2) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 8 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB15_4 |
| ; RV64ZVE32F-NEXT: .LBB15_8: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3 |
| ; RV64ZVE32F-NEXT: vse16.v v8, (a1) |
| ; RV64ZVE32F-NEXT: ret |
| %mhead = insertelement <4 x i1> poison, i1 1, i32 0 |
| %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer |
| call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> %val, <4 x i16*> %ptrs, i32 2, <4 x i1> %mtrue) |
| ret void |
| } |
| |
| define void @mscatter_falsemask_v4i16(<4 x i16> %val, <4 x i16*> %ptrs) { |
| ; CHECK-LABEL: mscatter_falsemask_v4i16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: ret |
| call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> %val, <4 x i16*> %ptrs, i32 2, <4 x i1> zeroinitializer) |
| ret void |
| } |
| |
| declare void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16>, <8 x i16*>, i32, <8 x i1>) |
| |
| define void @mscatter_v8i16(<8 x i16> %val, <8 x i16*> %ptrs, <8 x i1> %m) { |
| ; RV32-LABEL: mscatter_v8i16: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu |
| ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t |
| ; RV32-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_v8i16: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_v8i16: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: ld a1, 56(a0) |
| ; RV64ZVE32F-NEXT: ld a2, 48(a0) |
| ; RV64ZVE32F-NEXT: ld a4, 40(a0) |
| ; RV64ZVE32F-NEXT: ld a5, 32(a0) |
| ; RV64ZVE32F-NEXT: ld a6, 24(a0) |
| ; RV64ZVE32F-NEXT: ld a7, 16(a0) |
| ; RV64ZVE32F-NEXT: ld t0, 8(a0) |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a3, v0 |
| ; RV64ZVE32F-NEXT: andi t1, a3, 1 |
| ; RV64ZVE32F-NEXT: bnez t1, .LBB17_9 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %else |
| ; RV64ZVE32F-NEXT: andi a0, a3, 2 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB17_10 |
| ; RV64ZVE32F-NEXT: .LBB17_2: # %else2 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 4 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB17_11 |
| ; RV64ZVE32F-NEXT: .LBB17_3: # %else4 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 8 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB17_12 |
| ; RV64ZVE32F-NEXT: .LBB17_4: # %else6 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 16 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB17_13 |
| ; RV64ZVE32F-NEXT: .LBB17_5: # %else8 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 32 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB17_14 |
| ; RV64ZVE32F-NEXT: .LBB17_6: # %else10 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 64 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB17_15 |
| ; RV64ZVE32F-NEXT: .LBB17_7: # %else12 |
| ; RV64ZVE32F-NEXT: andi a0, a3, -128 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB17_16 |
| ; RV64ZVE32F-NEXT: .LBB17_8: # %else14 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB17_9: # %cond.store |
| ; RV64ZVE32F-NEXT: ld a0, 0(a0) |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vse16.v v8, (a0) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 2 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB17_2 |
| ; RV64ZVE32F-NEXT: .LBB17_10: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 |
| ; RV64ZVE32F-NEXT: vse16.v v9, (t0) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 4 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB17_3 |
| ; RV64ZVE32F-NEXT: .LBB17_11: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 |
| ; RV64ZVE32F-NEXT: vse16.v v9, (a7) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 8 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB17_4 |
| ; RV64ZVE32F-NEXT: .LBB17_12: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3 |
| ; RV64ZVE32F-NEXT: vse16.v v9, (a6) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 16 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB17_5 |
| ; RV64ZVE32F-NEXT: .LBB17_13: # %cond.store7 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 |
| ; RV64ZVE32F-NEXT: vse16.v v9, (a5) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 32 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB17_6 |
| ; RV64ZVE32F-NEXT: .LBB17_14: # %cond.store9 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5 |
| ; RV64ZVE32F-NEXT: vse16.v v9, (a4) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 64 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB17_7 |
| ; RV64ZVE32F-NEXT: .LBB17_15: # %cond.store11 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 6 |
| ; RV64ZVE32F-NEXT: vse16.v v9, (a2) |
| ; RV64ZVE32F-NEXT: andi a0, a3, -128 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB17_8 |
| ; RV64ZVE32F-NEXT: .LBB17_16: # %cond.store13 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 |
| ; RV64ZVE32F-NEXT: vse16.v v8, (a1) |
| ; RV64ZVE32F-NEXT: ret |
| call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %val, <8 x i16*> %ptrs, i32 2, <8 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_baseidx_v8i8_v8i16(<8 x i16> %val, i16* %base, <8 x i8> %idxs, <8 x i1> %m) { |
| ; RV32-LABEL: mscatter_baseidx_v8i8_v8i16: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| ; RV32-NEXT: vsext.vf4 v10, v9 |
| ; RV32-NEXT: vadd.vv v10, v10, v10 |
| ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu |
| ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t |
| ; RV32-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_baseidx_v8i8_v8i16: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV64-NEXT: vsext.vf8 v12, v9 |
| ; RV64-NEXT: vadd.vv v12, v12, v12 |
| ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8_v8i16: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 1 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB18_2 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vse16.v v8, (a2) |
| ; RV64ZVE32F-NEXT: .LBB18_2: # %else |
| ; RV64ZVE32F-NEXT: andi a2, a1, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB18_4 |
| ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 |
| ; RV64ZVE32F-NEXT: vse16.v v10, (a2) |
| ; RV64ZVE32F-NEXT: .LBB18_4: # %else2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 4 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB18_6 |
| ; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2 |
| ; RV64ZVE32F-NEXT: vse16.v v11, (a2) |
| ; RV64ZVE32F-NEXT: .LBB18_6: # %else4 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 8 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 4 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB18_13 |
| ; RV64ZVE32F-NEXT: # %bb.7: # %else6 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB18_14 |
| ; RV64ZVE32F-NEXT: .LBB18_8: # %else8 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB18_10 |
| ; RV64ZVE32F-NEXT: .LBB18_9: # %cond.store9 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5 |
| ; RV64ZVE32F-NEXT: vse16.v v10, (a2) |
| ; RV64ZVE32F-NEXT: .LBB18_10: # %else10 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 64 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB18_15 |
| ; RV64ZVE32F-NEXT: # %bb.11: # %else12 |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: bnez a1, .LBB18_16 |
| ; RV64ZVE32F-NEXT: .LBB18_12: # %else14 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB18_13: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3 |
| ; RV64ZVE32F-NEXT: vse16.v v10, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB18_8 |
| ; RV64ZVE32F-NEXT: .LBB18_14: # %cond.store7 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 |
| ; RV64ZVE32F-NEXT: vse16.v v10, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB18_9 |
| ; RV64ZVE32F-NEXT: j .LBB18_10 |
| ; RV64ZVE32F-NEXT: .LBB18_15: # %cond.store11 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6 |
| ; RV64ZVE32F-NEXT: vse16.v v10, (a2) |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: beqz a1, .LBB18_12 |
| ; RV64ZVE32F-NEXT: .LBB18_16: # %cond.store13 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v9 |
| ; RV64ZVE32F-NEXT: slli a1, a1, 1 |
| ; RV64ZVE32F-NEXT: add a0, a0, a1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 |
| ; RV64ZVE32F-NEXT: vse16.v v8, (a0) |
| ; RV64ZVE32F-NEXT: ret |
| %ptrs = getelementptr inbounds i16, i16* %base, <8 x i8> %idxs |
| call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %val, <8 x i16*> %ptrs, i32 2, <8 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_baseidx_sext_v8i8_v8i16(<8 x i16> %val, i16* %base, <8 x i8> %idxs, <8 x i1> %m) { |
| ; RV32-LABEL: mscatter_baseidx_sext_v8i8_v8i16: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| ; RV32-NEXT: vsext.vf4 v10, v9 |
| ; RV32-NEXT: vadd.vv v10, v10, v10 |
| ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu |
| ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t |
| ; RV32-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_baseidx_sext_v8i8_v8i16: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV64-NEXT: vsext.vf8 v12, v9 |
| ; RV64-NEXT: vadd.vv v12, v12, v12 |
| ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8i16: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 1 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB19_2 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vse16.v v8, (a2) |
| ; RV64ZVE32F-NEXT: .LBB19_2: # %else |
| ; RV64ZVE32F-NEXT: andi a2, a1, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB19_4 |
| ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 |
| ; RV64ZVE32F-NEXT: vse16.v v10, (a2) |
| ; RV64ZVE32F-NEXT: .LBB19_4: # %else2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 4 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB19_6 |
| ; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2 |
| ; RV64ZVE32F-NEXT: vse16.v v11, (a2) |
| ; RV64ZVE32F-NEXT: .LBB19_6: # %else4 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 8 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 4 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB19_13 |
| ; RV64ZVE32F-NEXT: # %bb.7: # %else6 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB19_14 |
| ; RV64ZVE32F-NEXT: .LBB19_8: # %else8 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB19_10 |
| ; RV64ZVE32F-NEXT: .LBB19_9: # %cond.store9 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5 |
| ; RV64ZVE32F-NEXT: vse16.v v10, (a2) |
| ; RV64ZVE32F-NEXT: .LBB19_10: # %else10 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 64 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB19_15 |
| ; RV64ZVE32F-NEXT: # %bb.11: # %else12 |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: bnez a1, .LBB19_16 |
| ; RV64ZVE32F-NEXT: .LBB19_12: # %else14 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB19_13: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3 |
| ; RV64ZVE32F-NEXT: vse16.v v10, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB19_8 |
| ; RV64ZVE32F-NEXT: .LBB19_14: # %cond.store7 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 |
| ; RV64ZVE32F-NEXT: vse16.v v10, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB19_9 |
| ; RV64ZVE32F-NEXT: j .LBB19_10 |
| ; RV64ZVE32F-NEXT: .LBB19_15: # %cond.store11 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6 |
| ; RV64ZVE32F-NEXT: vse16.v v10, (a2) |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: beqz a1, .LBB19_12 |
| ; RV64ZVE32F-NEXT: .LBB19_16: # %cond.store13 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v9 |
| ; RV64ZVE32F-NEXT: slli a1, a1, 1 |
| ; RV64ZVE32F-NEXT: add a0, a0, a1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 |
| ; RV64ZVE32F-NEXT: vse16.v v8, (a0) |
| ; RV64ZVE32F-NEXT: ret |
| %eidxs = sext <8 x i8> %idxs to <8 x i16> |
| %ptrs = getelementptr inbounds i16, i16* %base, <8 x i16> %eidxs |
| call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %val, <8 x i16*> %ptrs, i32 2, <8 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_baseidx_zext_v8i8_v8i16(<8 x i16> %val, i16* %base, <8 x i8> %idxs, <8 x i1> %m) { |
| ; RV32-LABEL: mscatter_baseidx_zext_v8i8_v8i16: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| ; RV32-NEXT: vzext.vf4 v10, v9 |
| ; RV32-NEXT: vadd.vv v10, v10, v10 |
| ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu |
| ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t |
| ; RV32-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_baseidx_zext_v8i8_v8i16: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV64-NEXT: vzext.vf8 v12, v9 |
| ; RV64-NEXT: vadd.vv v12, v12, v12 |
| ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8i16: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 1 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB20_2 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 |
| ; RV64ZVE32F-NEXT: andi a2, a2, 255 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vse16.v v8, (a2) |
| ; RV64ZVE32F-NEXT: .LBB20_2: # %else |
| ; RV64ZVE32F-NEXT: andi a2, a1, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB20_4 |
| ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: andi a2, a2, 255 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 |
| ; RV64ZVE32F-NEXT: vse16.v v10, (a2) |
| ; RV64ZVE32F-NEXT: .LBB20_4: # %else2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 4 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB20_6 |
| ; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: andi a2, a2, 255 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2 |
| ; RV64ZVE32F-NEXT: vse16.v v11, (a2) |
| ; RV64ZVE32F-NEXT: .LBB20_6: # %else4 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 8 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 4 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB20_13 |
| ; RV64ZVE32F-NEXT: # %bb.7: # %else6 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB20_14 |
| ; RV64ZVE32F-NEXT: .LBB20_8: # %else8 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB20_10 |
| ; RV64ZVE32F-NEXT: .LBB20_9: # %cond.store9 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: andi a2, a2, 255 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5 |
| ; RV64ZVE32F-NEXT: vse16.v v10, (a2) |
| ; RV64ZVE32F-NEXT: .LBB20_10: # %else10 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 64 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB20_15 |
| ; RV64ZVE32F-NEXT: # %bb.11: # %else12 |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: bnez a1, .LBB20_16 |
| ; RV64ZVE32F-NEXT: .LBB20_12: # %else14 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB20_13: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: andi a2, a2, 255 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3 |
| ; RV64ZVE32F-NEXT: vse16.v v10, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB20_8 |
| ; RV64ZVE32F-NEXT: .LBB20_14: # %cond.store7 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 |
| ; RV64ZVE32F-NEXT: andi a2, a2, 255 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 |
| ; RV64ZVE32F-NEXT: vse16.v v10, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB20_9 |
| ; RV64ZVE32F-NEXT: j .LBB20_10 |
| ; RV64ZVE32F-NEXT: .LBB20_15: # %cond.store11 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 |
| ; RV64ZVE32F-NEXT: andi a2, a2, 255 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6 |
| ; RV64ZVE32F-NEXT: vse16.v v10, (a2) |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: beqz a1, .LBB20_12 |
| ; RV64ZVE32F-NEXT: .LBB20_16: # %cond.store13 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v9 |
| ; RV64ZVE32F-NEXT: andi a1, a1, 255 |
| ; RV64ZVE32F-NEXT: slli a1, a1, 1 |
| ; RV64ZVE32F-NEXT: add a0, a0, a1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 |
| ; RV64ZVE32F-NEXT: vse16.v v8, (a0) |
| ; RV64ZVE32F-NEXT: ret |
| %eidxs = zext <8 x i8> %idxs to <8 x i16> |
| %ptrs = getelementptr inbounds i16, i16* %base, <8 x i16> %eidxs |
| call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %val, <8 x i16*> %ptrs, i32 2, <8 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_baseidx_v8i16(<8 x i16> %val, i16* %base, <8 x i16> %idxs, <8 x i1> %m) { |
| ; RV32-LABEL: mscatter_baseidx_v8i16: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| ; RV32-NEXT: vsext.vf2 v10, v9 |
| ; RV32-NEXT: vadd.vv v10, v10, v10 |
| ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu |
| ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t |
| ; RV32-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_baseidx_v8i16: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV64-NEXT: vsext.vf4 v12, v9 |
| ; RV64-NEXT: vadd.vv v12, v12, v12 |
| ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i16: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 1 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB21_2 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store |
| ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vse16.v v8, (a2) |
| ; RV64ZVE32F-NEXT: .LBB21_2: # %else |
| ; RV64ZVE32F-NEXT: andi a2, a1, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB21_4 |
| ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 |
| ; RV64ZVE32F-NEXT: vse16.v v10, (a2) |
| ; RV64ZVE32F-NEXT: .LBB21_4: # %else2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 4 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB21_6 |
| ; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2 |
| ; RV64ZVE32F-NEXT: vse16.v v11, (a2) |
| ; RV64ZVE32F-NEXT: .LBB21_6: # %else4 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 8 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 4 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB21_13 |
| ; RV64ZVE32F-NEXT: # %bb.7: # %else6 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB21_14 |
| ; RV64ZVE32F-NEXT: .LBB21_8: # %else8 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB21_10 |
| ; RV64ZVE32F-NEXT: .LBB21_9: # %cond.store9 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5 |
| ; RV64ZVE32F-NEXT: vse16.v v10, (a2) |
| ; RV64ZVE32F-NEXT: .LBB21_10: # %else10 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 64 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB21_15 |
| ; RV64ZVE32F-NEXT: # %bb.11: # %else12 |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: bnez a1, .LBB21_16 |
| ; RV64ZVE32F-NEXT: .LBB21_12: # %else14 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB21_13: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3 |
| ; RV64ZVE32F-NEXT: vse16.v v10, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB21_8 |
| ; RV64ZVE32F-NEXT: .LBB21_14: # %cond.store7 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 |
| ; RV64ZVE32F-NEXT: vse16.v v10, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB21_9 |
| ; RV64ZVE32F-NEXT: j .LBB21_10 |
| ; RV64ZVE32F-NEXT: .LBB21_15: # %cond.store11 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6 |
| ; RV64ZVE32F-NEXT: vse16.v v10, (a2) |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: beqz a1, .LBB21_12 |
| ; RV64ZVE32F-NEXT: .LBB21_16: # %cond.store13 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v9 |
| ; RV64ZVE32F-NEXT: slli a1, a1, 1 |
| ; RV64ZVE32F-NEXT: add a0, a0, a1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 |
| ; RV64ZVE32F-NEXT: vse16.v v8, (a0) |
| ; RV64ZVE32F-NEXT: ret |
| %ptrs = getelementptr inbounds i16, i16* %base, <8 x i16> %idxs |
| call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> %val, <8 x i16*> %ptrs, i32 2, <8 x i1> %m) |
| ret void |
| } |
| |
| declare void @llvm.masked.scatter.v1i32.v1p0i32(<1 x i32>, <1 x i32*>, i32, <1 x i1>) |
| |
| define void @mscatter_v1i32(<1 x i32> %val, <1 x i32*> %ptrs, <1 x i1> %m) { |
| ; RV32V-LABEL: mscatter_v1i32: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetivli zero, 1, e32, mf2, ta, mu |
| ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_v1i32: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV32ZVE32F-LABEL: mscatter_v1i32: |
| ; RV32ZVE32F: # %bb.0: |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t |
| ; RV32ZVE32F-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_v1i32: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.v.i v9, 0 |
| ; RV64ZVE32F-NEXT: vmerge.vim v9, v9, 1, v0 |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v9 |
| ; RV64ZVE32F-NEXT: andi a1, a1, 1 |
| ; RV64ZVE32F-NEXT: beqz a1, .LBB22_2 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vse32.v v8, (a0) |
| ; RV64ZVE32F-NEXT: .LBB22_2: # %else |
| ; RV64ZVE32F-NEXT: ret |
| call void @llvm.masked.scatter.v1i32.v1p0i32(<1 x i32> %val, <1 x i32*> %ptrs, i32 4, <1 x i1> %m) |
| ret void |
| } |
| |
| declare void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32>, <2 x i32*>, i32, <2 x i1>) |
| |
| define void @mscatter_v2i32(<2 x i32> %val, <2 x i32*> %ptrs, <2 x i1> %m) { |
| ; RV32V-LABEL: mscatter_v2i32: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetivli zero, 2, e32, mf2, ta, mu |
| ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_v2i32: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV32ZVE32F-LABEL: mscatter_v2i32: |
| ; RV32ZVE32F: # %bb.0: |
| ; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu |
| ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t |
| ; RV32ZVE32F-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_v2i32: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v0 |
| ; RV64ZVE32F-NEXT: andi a3, a2, 1 |
| ; RV64ZVE32F-NEXT: bnez a3, .LBB23_3 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %else |
| ; RV64ZVE32F-NEXT: andi a0, a2, 2 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB23_4 |
| ; RV64ZVE32F-NEXT: .LBB23_2: # %else2 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB23_3: # %cond.store |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vse32.v v8, (a0) |
| ; RV64ZVE32F-NEXT: andi a0, a2, 2 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB23_2 |
| ; RV64ZVE32F-NEXT: .LBB23_4: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 |
| ; RV64ZVE32F-NEXT: vse32.v v8, (a1) |
| ; RV64ZVE32F-NEXT: ret |
| call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %val, <2 x i32*> %ptrs, i32 4, <2 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_v2i64_truncstore_v2i32(<2 x i64> %val, <2 x i32*> %ptrs, <2 x i1> %m) { |
| ; RV32V-LABEL: mscatter_v2i64_truncstore_v2i32: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetivli zero, 2, e32, mf2, ta, mu |
| ; RV32V-NEXT: vncvt.x.x.w v8, v8 |
| ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_v2i64_truncstore_v2i32: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu |
| ; RV64-NEXT: vncvt.x.x.w v8, v8 |
| ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV32ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i32: |
| ; RV32ZVE32F: # %bb.0: |
| ; RV32ZVE32F-NEXT: lw a1, 0(a0) |
| ; RV32ZVE32F-NEXT: addi a0, a0, 8 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu |
| ; RV32ZVE32F-NEXT: vlse32.v v9, (a0), zero |
| ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, tu, mu |
| ; RV32ZVE32F-NEXT: vmv.s.x v9, a1 |
| ; RV32ZVE32F-NEXT: vsoxei32.v v9, (zero), v8, v0.t |
| ; RV32ZVE32F-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i32: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: addi sp, sp, -16 |
| ; RV64ZVE32F-NEXT: .cfi_def_cfa_offset 16 |
| ; RV64ZVE32F-NEXT: sw a1, 12(sp) |
| ; RV64ZVE32F-NEXT: sw a0, 8(sp) |
| ; RV64ZVE32F-NEXT: addi a0, sp, 12 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vle32.v v9, (a0) |
| ; RV64ZVE32F-NEXT: addi a0, sp, 8 |
| ; RV64ZVE32F-NEXT: vle32.v v8, (a0) |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, mu |
| ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 |
| ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v0 |
| ; RV64ZVE32F-NEXT: andi a1, a0, 1 |
| ; RV64ZVE32F-NEXT: bnez a1, .LBB24_3 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %else |
| ; RV64ZVE32F-NEXT: andi a0, a0, 2 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB24_4 |
| ; RV64ZVE32F-NEXT: .LBB24_2: # %else2 |
| ; RV64ZVE32F-NEXT: addi sp, sp, 16 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB24_3: # %cond.store |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vse32.v v8, (a2) |
| ; RV64ZVE32F-NEXT: andi a0, a0, 2 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB24_2 |
| ; RV64ZVE32F-NEXT: .LBB24_4: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 |
| ; RV64ZVE32F-NEXT: vse32.v v8, (a3) |
| ; RV64ZVE32F-NEXT: addi sp, sp, 16 |
| ; RV64ZVE32F-NEXT: ret |
| %tval = trunc <2 x i64> %val to <2 x i32> |
| call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %tval, <2 x i32*> %ptrs, i32 4, <2 x i1> %m) |
| ret void |
| } |
| |
| declare void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32>, <4 x i32*>, i32, <4 x i1>) |
| |
| define void @mscatter_v4i32(<4 x i32> %val, <4 x i32*> %ptrs, <4 x i1> %m) { |
| ; RV32-LABEL: mscatter_v4i32: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu |
| ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t |
| ; RV32-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_v4i32: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_v4i32: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: ld a1, 24(a0) |
| ; RV64ZVE32F-NEXT: ld a2, 16(a0) |
| ; RV64ZVE32F-NEXT: ld a4, 8(a0) |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a3, v0 |
| ; RV64ZVE32F-NEXT: andi a5, a3, 1 |
| ; RV64ZVE32F-NEXT: bnez a5, .LBB25_5 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %else |
| ; RV64ZVE32F-NEXT: andi a0, a3, 2 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB25_6 |
| ; RV64ZVE32F-NEXT: .LBB25_2: # %else2 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 4 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB25_7 |
| ; RV64ZVE32F-NEXT: .LBB25_3: # %else4 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 8 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB25_8 |
| ; RV64ZVE32F-NEXT: .LBB25_4: # %else6 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB25_5: # %cond.store |
| ; RV64ZVE32F-NEXT: ld a0, 0(a0) |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vse32.v v8, (a0) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 2 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB25_2 |
| ; RV64ZVE32F-NEXT: .LBB25_6: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 |
| ; RV64ZVE32F-NEXT: vse32.v v9, (a4) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 4 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB25_3 |
| ; RV64ZVE32F-NEXT: .LBB25_7: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 |
| ; RV64ZVE32F-NEXT: vse32.v v9, (a2) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 8 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB25_4 |
| ; RV64ZVE32F-NEXT: .LBB25_8: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3 |
| ; RV64ZVE32F-NEXT: vse32.v v8, (a1) |
| ; RV64ZVE32F-NEXT: ret |
| call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %val, <4 x i32*> %ptrs, i32 4, <4 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_truemask_v4i32(<4 x i32> %val, <4 x i32*> %ptrs) { |
| ; RV32-LABEL: mscatter_truemask_v4i32: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu |
| ; RV32-NEXT: vsoxei32.v v8, (zero), v9 |
| ; RV32-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_truemask_v4i32: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (zero), v10 |
| ; RV64-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_truemask_v4i32: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: ld a1, 24(a0) |
| ; RV64ZVE32F-NEXT: ld a2, 16(a0) |
| ; RV64ZVE32F-NEXT: ld a4, 8(a0) |
| ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vmset.m v9 |
| ; RV64ZVE32F-NEXT: vmv.x.s a3, v9 |
| ; RV64ZVE32F-NEXT: beqz zero, .LBB26_5 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %else |
| ; RV64ZVE32F-NEXT: andi a0, a3, 2 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB26_6 |
| ; RV64ZVE32F-NEXT: .LBB26_2: # %else2 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 4 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB26_7 |
| ; RV64ZVE32F-NEXT: .LBB26_3: # %else4 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 8 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB26_8 |
| ; RV64ZVE32F-NEXT: .LBB26_4: # %else6 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB26_5: # %cond.store |
| ; RV64ZVE32F-NEXT: ld a0, 0(a0) |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vse32.v v8, (a0) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 2 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB26_2 |
| ; RV64ZVE32F-NEXT: .LBB26_6: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 |
| ; RV64ZVE32F-NEXT: vse32.v v9, (a4) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 4 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB26_3 |
| ; RV64ZVE32F-NEXT: .LBB26_7: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 |
| ; RV64ZVE32F-NEXT: vse32.v v9, (a2) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 8 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB26_4 |
| ; RV64ZVE32F-NEXT: .LBB26_8: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3 |
| ; RV64ZVE32F-NEXT: vse32.v v8, (a1) |
| ; RV64ZVE32F-NEXT: ret |
| %mhead = insertelement <4 x i1> poison, i1 1, i32 0 |
| %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer |
| call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %val, <4 x i32*> %ptrs, i32 4, <4 x i1> %mtrue) |
| ret void |
| } |
| |
| define void @mscatter_falsemask_v4i32(<4 x i32> %val, <4 x i32*> %ptrs) { |
| ; CHECK-LABEL: mscatter_falsemask_v4i32: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: ret |
| call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %val, <4 x i32*> %ptrs, i32 4, <4 x i1> zeroinitializer) |
| ret void |
| } |
| |
| declare void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32>, <8 x i32*>, i32, <8 x i1>) |
| |
| define void @mscatter_v8i32(<8 x i32> %val, <8 x i32*> %ptrs, <8 x i1> %m) { |
| ; RV32-LABEL: mscatter_v8i32: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t |
| ; RV32-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_v8i32: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_v8i32: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: ld a1, 56(a0) |
| ; RV64ZVE32F-NEXT: ld a2, 48(a0) |
| ; RV64ZVE32F-NEXT: ld a4, 40(a0) |
| ; RV64ZVE32F-NEXT: ld a5, 32(a0) |
| ; RV64ZVE32F-NEXT: ld a6, 24(a0) |
| ; RV64ZVE32F-NEXT: ld a7, 16(a0) |
| ; RV64ZVE32F-NEXT: ld t0, 8(a0) |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a3, v0 |
| ; RV64ZVE32F-NEXT: andi t1, a3, 1 |
| ; RV64ZVE32F-NEXT: bnez t1, .LBB28_9 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %else |
| ; RV64ZVE32F-NEXT: andi a0, a3, 2 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB28_10 |
| ; RV64ZVE32F-NEXT: .LBB28_2: # %else2 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 4 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB28_11 |
| ; RV64ZVE32F-NEXT: .LBB28_3: # %else4 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 8 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB28_12 |
| ; RV64ZVE32F-NEXT: .LBB28_4: # %else6 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 16 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB28_13 |
| ; RV64ZVE32F-NEXT: .LBB28_5: # %else8 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 32 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB28_14 |
| ; RV64ZVE32F-NEXT: .LBB28_6: # %else10 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 64 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB28_15 |
| ; RV64ZVE32F-NEXT: .LBB28_7: # %else12 |
| ; RV64ZVE32F-NEXT: andi a0, a3, -128 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB28_16 |
| ; RV64ZVE32F-NEXT: .LBB28_8: # %else14 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB28_9: # %cond.store |
| ; RV64ZVE32F-NEXT: ld a0, 0(a0) |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vse32.v v8, (a0) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 2 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB28_2 |
| ; RV64ZVE32F-NEXT: .LBB28_10: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 |
| ; RV64ZVE32F-NEXT: vse32.v v10, (t0) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 4 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB28_3 |
| ; RV64ZVE32F-NEXT: .LBB28_11: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2 |
| ; RV64ZVE32F-NEXT: vse32.v v10, (a7) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 8 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB28_4 |
| ; RV64ZVE32F-NEXT: .LBB28_12: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3 |
| ; RV64ZVE32F-NEXT: vse32.v v10, (a6) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 16 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB28_5 |
| ; RV64ZVE32F-NEXT: .LBB28_13: # %cond.store7 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 |
| ; RV64ZVE32F-NEXT: vse32.v v10, (a5) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 32 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB28_6 |
| ; RV64ZVE32F-NEXT: .LBB28_14: # %cond.store9 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5 |
| ; RV64ZVE32F-NEXT: vse32.v v10, (a4) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 64 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB28_7 |
| ; RV64ZVE32F-NEXT: .LBB28_15: # %cond.store11 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6 |
| ; RV64ZVE32F-NEXT: vse32.v v10, (a2) |
| ; RV64ZVE32F-NEXT: andi a0, a3, -128 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB28_8 |
| ; RV64ZVE32F-NEXT: .LBB28_16: # %cond.store13 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 |
| ; RV64ZVE32F-NEXT: vse32.v v8, (a1) |
| ; RV64ZVE32F-NEXT: ret |
| call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %val, <8 x i32*> %ptrs, i32 4, <8 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_baseidx_v8i8_v8i32(<8 x i32> %val, i32* %base, <8 x i8> %idxs, <8 x i1> %m) { |
| ; RV32-LABEL: mscatter_baseidx_v8i8_v8i32: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| ; RV32-NEXT: vsext.vf4 v12, v10 |
| ; RV32-NEXT: vsll.vi v10, v12, 2 |
| ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t |
| ; RV32-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_baseidx_v8i8_v8i32: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV64-NEXT: vsext.vf8 v12, v10 |
| ; RV64-NEXT: vsll.vi v12, v12, 2 |
| ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8_v8i32: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 1 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB29_2 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vse32.v v8, (a2) |
| ; RV64ZVE32F-NEXT: .LBB29_2: # %else |
| ; RV64ZVE32F-NEXT: andi a2, a1, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB29_4 |
| ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: .LBB29_4: # %else2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 4 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB29_6 |
| ; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: .LBB29_6: # %else4 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 8 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 4 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB29_13 |
| ; RV64ZVE32F-NEXT: # %bb.7: # %else6 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB29_14 |
| ; RV64ZVE32F-NEXT: .LBB29_8: # %else8 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB29_10 |
| ; RV64ZVE32F-NEXT: .LBB29_9: # %cond.store9 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: .LBB29_10: # %else10 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 64 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB29_15 |
| ; RV64ZVE32F-NEXT: # %bb.11: # %else12 |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: bnez a1, .LBB29_16 |
| ; RV64ZVE32F-NEXT: .LBB29_12: # %else14 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB29_13: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 3 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB29_8 |
| ; RV64ZVE32F-NEXT: .LBB29_14: # %cond.store7 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB29_9 |
| ; RV64ZVE32F-NEXT: j .LBB29_10 |
| ; RV64ZVE32F-NEXT: .LBB29_15: # %cond.store11 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: beqz a1, .LBB29_12 |
| ; RV64ZVE32F-NEXT: .LBB29_16: # %cond.store13 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV64ZVE32F-NEXT: slli a1, a1, 2 |
| ; RV64ZVE32F-NEXT: add a0, a0, a1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 |
| ; RV64ZVE32F-NEXT: vse32.v v8, (a0) |
| ; RV64ZVE32F-NEXT: ret |
| %ptrs = getelementptr inbounds i32, i32* %base, <8 x i8> %idxs |
| call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %val, <8 x i32*> %ptrs, i32 4, <8 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_baseidx_sext_v8i8_v8i32(<8 x i32> %val, i32* %base, <8 x i8> %idxs, <8 x i1> %m) { |
| ; RV32-LABEL: mscatter_baseidx_sext_v8i8_v8i32: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| ; RV32-NEXT: vsext.vf4 v12, v10 |
| ; RV32-NEXT: vsll.vi v10, v12, 2 |
| ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t |
| ; RV32-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_baseidx_sext_v8i8_v8i32: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV64-NEXT: vsext.vf8 v12, v10 |
| ; RV64-NEXT: vsll.vi v12, v12, 2 |
| ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8i32: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 1 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB30_2 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vse32.v v8, (a2) |
| ; RV64ZVE32F-NEXT: .LBB30_2: # %else |
| ; RV64ZVE32F-NEXT: andi a2, a1, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB30_4 |
| ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: .LBB30_4: # %else2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 4 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB30_6 |
| ; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: .LBB30_6: # %else4 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 8 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 4 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB30_13 |
| ; RV64ZVE32F-NEXT: # %bb.7: # %else6 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB30_14 |
| ; RV64ZVE32F-NEXT: .LBB30_8: # %else8 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB30_10 |
| ; RV64ZVE32F-NEXT: .LBB30_9: # %cond.store9 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: .LBB30_10: # %else10 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 64 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB30_15 |
| ; RV64ZVE32F-NEXT: # %bb.11: # %else12 |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: bnez a1, .LBB30_16 |
| ; RV64ZVE32F-NEXT: .LBB30_12: # %else14 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB30_13: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 3 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB30_8 |
| ; RV64ZVE32F-NEXT: .LBB30_14: # %cond.store7 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB30_9 |
| ; RV64ZVE32F-NEXT: j .LBB30_10 |
| ; RV64ZVE32F-NEXT: .LBB30_15: # %cond.store11 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: beqz a1, .LBB30_12 |
| ; RV64ZVE32F-NEXT: .LBB30_16: # %cond.store13 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV64ZVE32F-NEXT: slli a1, a1, 2 |
| ; RV64ZVE32F-NEXT: add a0, a0, a1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 |
| ; RV64ZVE32F-NEXT: vse32.v v8, (a0) |
| ; RV64ZVE32F-NEXT: ret |
| %eidxs = sext <8 x i8> %idxs to <8 x i32> |
| %ptrs = getelementptr inbounds i32, i32* %base, <8 x i32> %eidxs |
| call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %val, <8 x i32*> %ptrs, i32 4, <8 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_baseidx_zext_v8i8_v8i32(<8 x i32> %val, i32* %base, <8 x i8> %idxs, <8 x i1> %m) { |
| ; RV32-LABEL: mscatter_baseidx_zext_v8i8_v8i32: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| ; RV32-NEXT: vzext.vf4 v12, v10 |
| ; RV32-NEXT: vsll.vi v10, v12, 2 |
| ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t |
| ; RV32-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_baseidx_zext_v8i8_v8i32: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV64-NEXT: vzext.vf8 v12, v10 |
| ; RV64-NEXT: vsll.vi v12, v12, 2 |
| ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8i32: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 1 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB31_2 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: andi a2, a2, 255 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vse32.v v8, (a2) |
| ; RV64ZVE32F-NEXT: .LBB31_2: # %else |
| ; RV64ZVE32F-NEXT: andi a2, a1, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB31_4 |
| ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 |
| ; RV64ZVE32F-NEXT: andi a2, a2, 255 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: .LBB31_4: # %else2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 4 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB31_6 |
| ; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 |
| ; RV64ZVE32F-NEXT: andi a2, a2, 255 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: .LBB31_6: # %else4 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 8 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 4 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB31_13 |
| ; RV64ZVE32F-NEXT: # %bb.7: # %else6 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB31_14 |
| ; RV64ZVE32F-NEXT: .LBB31_8: # %else8 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB31_10 |
| ; RV64ZVE32F-NEXT: .LBB31_9: # %cond.store9 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 |
| ; RV64ZVE32F-NEXT: andi a2, a2, 255 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: .LBB31_10: # %else10 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 64 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB31_15 |
| ; RV64ZVE32F-NEXT: # %bb.11: # %else12 |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: bnez a1, .LBB31_16 |
| ; RV64ZVE32F-NEXT: .LBB31_12: # %else14 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB31_13: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 |
| ; RV64ZVE32F-NEXT: andi a2, a2, 255 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 3 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB31_8 |
| ; RV64ZVE32F-NEXT: .LBB31_14: # %cond.store7 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: andi a2, a2, 255 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB31_9 |
| ; RV64ZVE32F-NEXT: j .LBB31_10 |
| ; RV64ZVE32F-NEXT: .LBB31_15: # %cond.store11 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: andi a2, a2, 255 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: beqz a1, .LBB31_12 |
| ; RV64ZVE32F-NEXT: .LBB31_16: # %cond.store13 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV64ZVE32F-NEXT: andi a1, a1, 255 |
| ; RV64ZVE32F-NEXT: slli a1, a1, 2 |
| ; RV64ZVE32F-NEXT: add a0, a0, a1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 |
| ; RV64ZVE32F-NEXT: vse32.v v8, (a0) |
| ; RV64ZVE32F-NEXT: ret |
| %eidxs = zext <8 x i8> %idxs to <8 x i32> |
| %ptrs = getelementptr inbounds i32, i32* %base, <8 x i32> %eidxs |
| call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %val, <8 x i32*> %ptrs, i32 4, <8 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_baseidx_v8i16_v8i32(<8 x i32> %val, i32* %base, <8 x i16> %idxs, <8 x i1> %m) { |
| ; RV32-LABEL: mscatter_baseidx_v8i16_v8i32: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| ; RV32-NEXT: vsext.vf2 v12, v10 |
| ; RV32-NEXT: vsll.vi v10, v12, 2 |
| ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t |
| ; RV32-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_baseidx_v8i16_v8i32: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV64-NEXT: vsext.vf4 v12, v10 |
| ; RV64-NEXT: vsll.vi v12, v12, 2 |
| ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i16_v8i32: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 1 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB32_2 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store |
| ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vse32.v v8, (a2) |
| ; RV64ZVE32F-NEXT: .LBB32_2: # %else |
| ; RV64ZVE32F-NEXT: andi a2, a1, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB32_4 |
| ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: .LBB32_4: # %else2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 4 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB32_6 |
| ; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: .LBB32_6: # %else4 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 8 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 4 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB32_13 |
| ; RV64ZVE32F-NEXT: # %bb.7: # %else6 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB32_14 |
| ; RV64ZVE32F-NEXT: .LBB32_8: # %else8 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB32_10 |
| ; RV64ZVE32F-NEXT: .LBB32_9: # %cond.store9 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: .LBB32_10: # %else10 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 64 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB32_15 |
| ; RV64ZVE32F-NEXT: # %bb.11: # %else12 |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: bnez a1, .LBB32_16 |
| ; RV64ZVE32F-NEXT: .LBB32_12: # %else14 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB32_13: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 3 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB32_8 |
| ; RV64ZVE32F-NEXT: .LBB32_14: # %cond.store7 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB32_9 |
| ; RV64ZVE32F-NEXT: j .LBB32_10 |
| ; RV64ZVE32F-NEXT: .LBB32_15: # %cond.store11 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: beqz a1, .LBB32_12 |
| ; RV64ZVE32F-NEXT: .LBB32_16: # %cond.store13 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV64ZVE32F-NEXT: slli a1, a1, 2 |
| ; RV64ZVE32F-NEXT: add a0, a0, a1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 |
| ; RV64ZVE32F-NEXT: vse32.v v8, (a0) |
| ; RV64ZVE32F-NEXT: ret |
| %ptrs = getelementptr inbounds i32, i32* %base, <8 x i16> %idxs |
| call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %val, <8 x i32*> %ptrs, i32 4, <8 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_baseidx_sext_v8i16_v8i32(<8 x i32> %val, i32* %base, <8 x i16> %idxs, <8 x i1> %m) { |
| ; RV32-LABEL: mscatter_baseidx_sext_v8i16_v8i32: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| ; RV32-NEXT: vsext.vf2 v12, v10 |
| ; RV32-NEXT: vsll.vi v10, v12, 2 |
| ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t |
| ; RV32-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_baseidx_sext_v8i16_v8i32: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV64-NEXT: vsext.vf4 v12, v10 |
| ; RV64-NEXT: vsll.vi v12, v12, 2 |
| ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i16_v8i32: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 1 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB33_2 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store |
| ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vse32.v v8, (a2) |
| ; RV64ZVE32F-NEXT: .LBB33_2: # %else |
| ; RV64ZVE32F-NEXT: andi a2, a1, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB33_4 |
| ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: .LBB33_4: # %else2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 4 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB33_6 |
| ; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: .LBB33_6: # %else4 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 8 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 4 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB33_13 |
| ; RV64ZVE32F-NEXT: # %bb.7: # %else6 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB33_14 |
| ; RV64ZVE32F-NEXT: .LBB33_8: # %else8 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB33_10 |
| ; RV64ZVE32F-NEXT: .LBB33_9: # %cond.store9 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: .LBB33_10: # %else10 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 64 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB33_15 |
| ; RV64ZVE32F-NEXT: # %bb.11: # %else12 |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: bnez a1, .LBB33_16 |
| ; RV64ZVE32F-NEXT: .LBB33_12: # %else14 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB33_13: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 3 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB33_8 |
| ; RV64ZVE32F-NEXT: .LBB33_14: # %cond.store7 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB33_9 |
| ; RV64ZVE32F-NEXT: j .LBB33_10 |
| ; RV64ZVE32F-NEXT: .LBB33_15: # %cond.store11 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: beqz a1, .LBB33_12 |
| ; RV64ZVE32F-NEXT: .LBB33_16: # %cond.store13 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV64ZVE32F-NEXT: slli a1, a1, 2 |
| ; RV64ZVE32F-NEXT: add a0, a0, a1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 |
| ; RV64ZVE32F-NEXT: vse32.v v8, (a0) |
| ; RV64ZVE32F-NEXT: ret |
| %eidxs = sext <8 x i16> %idxs to <8 x i32> |
| %ptrs = getelementptr inbounds i32, i32* %base, <8 x i32> %eidxs |
| call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %val, <8 x i32*> %ptrs, i32 4, <8 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_baseidx_zext_v8i16_v8i32(<8 x i32> %val, i32* %base, <8 x i16> %idxs, <8 x i1> %m) { |
| ; RV32-LABEL: mscatter_baseidx_zext_v8i16_v8i32: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| ; RV32-NEXT: vzext.vf2 v12, v10 |
| ; RV32-NEXT: vsll.vi v10, v12, 2 |
| ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t |
| ; RV32-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_baseidx_zext_v8i16_v8i32: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV64-NEXT: vzext.vf4 v12, v10 |
| ; RV64-NEXT: vsll.vi v12, v12, 2 |
| ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8i32: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: lui a1, 16 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v0 |
| ; RV64ZVE32F-NEXT: andi a3, a2, 1 |
| ; RV64ZVE32F-NEXT: addiw a1, a1, -1 |
| ; RV64ZVE32F-NEXT: beqz a3, .LBB34_2 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store |
| ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a3, v10 |
| ; RV64ZVE32F-NEXT: and a3, a3, a1 |
| ; RV64ZVE32F-NEXT: slli a3, a3, 2 |
| ; RV64ZVE32F-NEXT: add a3, a0, a3 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vse32.v v8, (a3) |
| ; RV64ZVE32F-NEXT: .LBB34_2: # %else |
| ; RV64ZVE32F-NEXT: andi a3, a2, 2 |
| ; RV64ZVE32F-NEXT: beqz a3, .LBB34_4 |
| ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a3, v11 |
| ; RV64ZVE32F-NEXT: and a3, a3, a1 |
| ; RV64ZVE32F-NEXT: slli a3, a3, 2 |
| ; RV64ZVE32F-NEXT: add a3, a0, a3 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a3) |
| ; RV64ZVE32F-NEXT: .LBB34_4: # %else2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: andi a3, a2, 4 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 2 |
| ; RV64ZVE32F-NEXT: beqz a3, .LBB34_6 |
| ; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vmv.x.s a3, v11 |
| ; RV64ZVE32F-NEXT: and a3, a3, a1 |
| ; RV64ZVE32F-NEXT: slli a3, a3, 2 |
| ; RV64ZVE32F-NEXT: add a3, a0, a3 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a3) |
| ; RV64ZVE32F-NEXT: .LBB34_6: # %else4 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: andi a3, a2, 8 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 4 |
| ; RV64ZVE32F-NEXT: bnez a3, .LBB34_13 |
| ; RV64ZVE32F-NEXT: # %bb.7: # %else6 |
| ; RV64ZVE32F-NEXT: andi a3, a2, 16 |
| ; RV64ZVE32F-NEXT: bnez a3, .LBB34_14 |
| ; RV64ZVE32F-NEXT: .LBB34_8: # %else8 |
| ; RV64ZVE32F-NEXT: andi a3, a2, 32 |
| ; RV64ZVE32F-NEXT: beqz a3, .LBB34_10 |
| ; RV64ZVE32F-NEXT: .LBB34_9: # %cond.store9 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a3, v11 |
| ; RV64ZVE32F-NEXT: and a3, a3, a1 |
| ; RV64ZVE32F-NEXT: slli a3, a3, 2 |
| ; RV64ZVE32F-NEXT: add a3, a0, a3 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a3) |
| ; RV64ZVE32F-NEXT: .LBB34_10: # %else10 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: andi a3, a2, 64 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 |
| ; RV64ZVE32F-NEXT: bnez a3, .LBB34_15 |
| ; RV64ZVE32F-NEXT: # %bb.11: # %else12 |
| ; RV64ZVE32F-NEXT: andi a2, a2, -128 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB34_16 |
| ; RV64ZVE32F-NEXT: .LBB34_12: # %else14 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB34_13: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a3, v11 |
| ; RV64ZVE32F-NEXT: and a3, a3, a1 |
| ; RV64ZVE32F-NEXT: slli a3, a3, 2 |
| ; RV64ZVE32F-NEXT: add a3, a0, a3 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 3 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a3) |
| ; RV64ZVE32F-NEXT: andi a3, a2, 16 |
| ; RV64ZVE32F-NEXT: beqz a3, .LBB34_8 |
| ; RV64ZVE32F-NEXT: .LBB34_14: # %cond.store7 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a3, v10 |
| ; RV64ZVE32F-NEXT: and a3, a3, a1 |
| ; RV64ZVE32F-NEXT: slli a3, a3, 2 |
| ; RV64ZVE32F-NEXT: add a3, a0, a3 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a3) |
| ; RV64ZVE32F-NEXT: andi a3, a2, 32 |
| ; RV64ZVE32F-NEXT: bnez a3, .LBB34_9 |
| ; RV64ZVE32F-NEXT: j .LBB34_10 |
| ; RV64ZVE32F-NEXT: .LBB34_15: # %cond.store11 |
| ; RV64ZVE32F-NEXT: vmv.x.s a3, v10 |
| ; RV64ZVE32F-NEXT: and a3, a3, a1 |
| ; RV64ZVE32F-NEXT: slli a3, a3, 2 |
| ; RV64ZVE32F-NEXT: add a3, a0, a3 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a3) |
| ; RV64ZVE32F-NEXT: andi a2, a2, -128 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB34_12 |
| ; RV64ZVE32F-NEXT: .LBB34_16: # %cond.store13 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: and a1, a2, a1 |
| ; RV64ZVE32F-NEXT: slli a1, a1, 2 |
| ; RV64ZVE32F-NEXT: add a0, a0, a1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 |
| ; RV64ZVE32F-NEXT: vse32.v v8, (a0) |
| ; RV64ZVE32F-NEXT: ret |
| %eidxs = zext <8 x i16> %idxs to <8 x i32> |
| %ptrs = getelementptr inbounds i32, i32* %base, <8 x i32> %eidxs |
| call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %val, <8 x i32*> %ptrs, i32 4, <8 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_baseidx_v8i32(<8 x i32> %val, i32* %base, <8 x i32> %idxs, <8 x i1> %m) { |
| ; RV32-LABEL: mscatter_baseidx_v8i32: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| ; RV32-NEXT: vsll.vi v10, v10, 2 |
| ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t |
| ; RV32-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_baseidx_v8i32: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV64-NEXT: vsext.vf2 v12, v10 |
| ; RV64-NEXT: vsll.vi v12, v12, 2 |
| ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i32: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 1 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB35_2 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store |
| ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vse32.v v8, (a2) |
| ; RV64ZVE32F-NEXT: .LBB35_2: # %else |
| ; RV64ZVE32F-NEXT: andi a2, a1, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB35_4 |
| ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: .LBB35_4: # %else2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 4 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 4 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB35_12 |
| ; RV64ZVE32F-NEXT: # %bb.5: # %else4 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 8 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB35_13 |
| ; RV64ZVE32F-NEXT: .LBB35_6: # %else6 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB35_14 |
| ; RV64ZVE32F-NEXT: .LBB35_7: # %else8 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB35_9 |
| ; RV64ZVE32F-NEXT: .LBB35_8: # %cond.store9 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v12, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5 |
| ; RV64ZVE32F-NEXT: vse32.v v10, (a2) |
| ; RV64ZVE32F-NEXT: .LBB35_9: # %else10 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 64 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v12, 2 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB35_15 |
| ; RV64ZVE32F-NEXT: # %bb.10: # %else12 |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: bnez a1, .LBB35_16 |
| ; RV64ZVE32F-NEXT: .LBB35_11: # %else14 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB35_12: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 2 |
| ; RV64ZVE32F-NEXT: vse32.v v14, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 8 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB35_6 |
| ; RV64ZVE32F-NEXT: .LBB35_13: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3 |
| ; RV64ZVE32F-NEXT: vse32.v v10, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB35_7 |
| ; RV64ZVE32F-NEXT: .LBB35_14: # %cond.store7 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 |
| ; RV64ZVE32F-NEXT: vse32.v v10, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB35_8 |
| ; RV64ZVE32F-NEXT: j .LBB35_9 |
| ; RV64ZVE32F-NEXT: .LBB35_15: # %cond.store11 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: beqz a1, .LBB35_11 |
| ; RV64ZVE32F-NEXT: .LBB35_16: # %cond.store13 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV64ZVE32F-NEXT: slli a1, a1, 2 |
| ; RV64ZVE32F-NEXT: add a0, a0, a1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 |
| ; RV64ZVE32F-NEXT: vse32.v v8, (a0) |
| ; RV64ZVE32F-NEXT: ret |
| %ptrs = getelementptr inbounds i32, i32* %base, <8 x i32> %idxs |
| call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %val, <8 x i32*> %ptrs, i32 4, <8 x i1> %m) |
| ret void |
| } |
| |
| declare void @llvm.masked.scatter.v1i64.v1p0i64(<1 x i64>, <1 x i64*>, i32, <1 x i1>) |
| |
| define void @mscatter_v1i64(<1 x i64> %val, <1 x i64*> %ptrs, <1 x i1> %m) { |
| ; RV32V-LABEL: mscatter_v1i64: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetivli zero, 1, e64, m1, ta, mu |
| ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_v1i64: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV32ZVE32F-LABEL: mscatter_v1i64: |
| ; RV32ZVE32F: # %bb.0: |
| ; RV32ZVE32F-NEXT: vsetvli a2, zero, e8, mf4, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.v.i v9, 0 |
| ; RV32ZVE32F-NEXT: vmerge.vim v9, v9, 1, v0 |
| ; RV32ZVE32F-NEXT: vmv.x.s a2, v9 |
| ; RV32ZVE32F-NEXT: andi a2, a2, 1 |
| ; RV32ZVE32F-NEXT: beqz a2, .LBB36_2 |
| ; RV32ZVE32F-NEXT: # %bb.1: # %cond.store |
| ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s a2, v8 |
| ; RV32ZVE32F-NEXT: sw a1, 4(a2) |
| ; RV32ZVE32F-NEXT: sw a0, 0(a2) |
| ; RV32ZVE32F-NEXT: .LBB36_2: # %else |
| ; RV32ZVE32F-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_v1i64: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: vsetvli a2, zero, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.v.i v8, 0 |
| ; RV64ZVE32F-NEXT: vmerge.vim v8, v8, 1, v0 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 |
| ; RV64ZVE32F-NEXT: andi a2, a2, 1 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB36_2 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store |
| ; RV64ZVE32F-NEXT: sd a0, 0(a1) |
| ; RV64ZVE32F-NEXT: .LBB36_2: # %else |
| ; RV64ZVE32F-NEXT: ret |
| call void @llvm.masked.scatter.v1i64.v1p0i64(<1 x i64> %val, <1 x i64*> %ptrs, i32 8, <1 x i1> %m) |
| ret void |
| } |
| |
| declare void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64>, <2 x i64*>, i32, <2 x i1>) |
| |
| define void @mscatter_v2i64(<2 x i64> %val, <2 x i64*> %ptrs, <2 x i1> %m) { |
| ; RV32V-LABEL: mscatter_v2i64: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetivli zero, 2, e64, m1, ta, mu |
| ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_v2i64: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV32ZVE32F-LABEL: mscatter_v2i64: |
| ; RV32ZVE32F: # %bb.0: |
| ; RV32ZVE32F-NEXT: lw a2, 12(a0) |
| ; RV32ZVE32F-NEXT: lw a1, 8(a0) |
| ; RV32ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s a3, v0 |
| ; RV32ZVE32F-NEXT: andi a4, a3, 1 |
| ; RV32ZVE32F-NEXT: bnez a4, .LBB37_3 |
| ; RV32ZVE32F-NEXT: # %bb.1: # %else |
| ; RV32ZVE32F-NEXT: andi a0, a3, 2 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB37_4 |
| ; RV32ZVE32F-NEXT: .LBB37_2: # %else2 |
| ; RV32ZVE32F-NEXT: ret |
| ; RV32ZVE32F-NEXT: .LBB37_3: # %cond.store |
| ; RV32ZVE32F-NEXT: lw a4, 4(a0) |
| ; RV32ZVE32F-NEXT: lw a0, 0(a0) |
| ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s a5, v8 |
| ; RV32ZVE32F-NEXT: sw a4, 4(a5) |
| ; RV32ZVE32F-NEXT: sw a0, 0(a5) |
| ; RV32ZVE32F-NEXT: andi a0, a3, 2 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB37_2 |
| ; RV32ZVE32F-NEXT: .LBB37_4: # %cond.store1 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 1 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV32ZVE32F-NEXT: sw a2, 4(a0) |
| ; RV32ZVE32F-NEXT: sw a1, 0(a0) |
| ; RV32ZVE32F-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_v2i64: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a4, v0 |
| ; RV64ZVE32F-NEXT: andi a5, a4, 1 |
| ; RV64ZVE32F-NEXT: bnez a5, .LBB37_3 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %else |
| ; RV64ZVE32F-NEXT: andi a0, a4, 2 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB37_4 |
| ; RV64ZVE32F-NEXT: .LBB37_2: # %else2 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB37_3: # %cond.store |
| ; RV64ZVE32F-NEXT: sd a0, 0(a2) |
| ; RV64ZVE32F-NEXT: andi a0, a4, 2 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB37_2 |
| ; RV64ZVE32F-NEXT: .LBB37_4: # %cond.store1 |
| ; RV64ZVE32F-NEXT: sd a1, 0(a3) |
| ; RV64ZVE32F-NEXT: ret |
| call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> %val, <2 x i64*> %ptrs, i32 8, <2 x i1> %m) |
| ret void |
| } |
| |
| declare void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64>, <4 x i64*>, i32, <4 x i1>) |
| |
| define void @mscatter_v4i64(<4 x i64> %val, <4 x i64*> %ptrs, <4 x i1> %m) { |
| ; RV32V-LABEL: mscatter_v4i64: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetivli zero, 4, e64, m2, ta, mu |
| ; RV32V-NEXT: vsoxei32.v v8, (zero), v10, v0.t |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_v4i64: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV32ZVE32F-LABEL: mscatter_v4i64: |
| ; RV32ZVE32F: # %bb.0: |
| ; RV32ZVE32F-NEXT: lw a1, 28(a0) |
| ; RV32ZVE32F-NEXT: lw a2, 24(a0) |
| ; RV32ZVE32F-NEXT: lw a3, 20(a0) |
| ; RV32ZVE32F-NEXT: lw a4, 16(a0) |
| ; RV32ZVE32F-NEXT: lw a7, 12(a0) |
| ; RV32ZVE32F-NEXT: lw a6, 8(a0) |
| ; RV32ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s a5, v0 |
| ; RV32ZVE32F-NEXT: andi t0, a5, 1 |
| ; RV32ZVE32F-NEXT: bnez t0, .LBB38_5 |
| ; RV32ZVE32F-NEXT: # %bb.1: # %else |
| ; RV32ZVE32F-NEXT: andi a0, a5, 2 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB38_6 |
| ; RV32ZVE32F-NEXT: .LBB38_2: # %else2 |
| ; RV32ZVE32F-NEXT: andi a0, a5, 4 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB38_7 |
| ; RV32ZVE32F-NEXT: .LBB38_3: # %else4 |
| ; RV32ZVE32F-NEXT: andi a0, a5, 8 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB38_8 |
| ; RV32ZVE32F-NEXT: .LBB38_4: # %else6 |
| ; RV32ZVE32F-NEXT: ret |
| ; RV32ZVE32F-NEXT: .LBB38_5: # %cond.store |
| ; RV32ZVE32F-NEXT: lw t0, 4(a0) |
| ; RV32ZVE32F-NEXT: lw a0, 0(a0) |
| ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s t1, v8 |
| ; RV32ZVE32F-NEXT: sw t0, 4(t1) |
| ; RV32ZVE32F-NEXT: sw a0, 0(t1) |
| ; RV32ZVE32F-NEXT: andi a0, a5, 2 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB38_2 |
| ; RV32ZVE32F-NEXT: .LBB38_6: # %cond.store1 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v9 |
| ; RV32ZVE32F-NEXT: sw a7, 4(a0) |
| ; RV32ZVE32F-NEXT: sw a6, 0(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a5, 4 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB38_3 |
| ; RV32ZVE32F-NEXT: .LBB38_7: # %cond.store3 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v9 |
| ; RV32ZVE32F-NEXT: sw a4, 0(a0) |
| ; RV32ZVE32F-NEXT: sw a3, 4(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a5, 8 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB38_4 |
| ; RV32ZVE32F-NEXT: .LBB38_8: # %cond.store5 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV32ZVE32F-NEXT: sw a2, 0(a0) |
| ; RV32ZVE32F-NEXT: sw a1, 4(a0) |
| ; RV32ZVE32F-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_v4i64: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: ld a2, 24(a1) |
| ; RV64ZVE32F-NEXT: ld a4, 16(a1) |
| ; RV64ZVE32F-NEXT: ld a7, 8(a1) |
| ; RV64ZVE32F-NEXT: ld a3, 24(a0) |
| ; RV64ZVE32F-NEXT: ld a5, 16(a0) |
| ; RV64ZVE32F-NEXT: ld t0, 8(a0) |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a6, v0 |
| ; RV64ZVE32F-NEXT: andi t1, a6, 1 |
| ; RV64ZVE32F-NEXT: bnez t1, .LBB38_5 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %else |
| ; RV64ZVE32F-NEXT: andi a0, a6, 2 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB38_6 |
| ; RV64ZVE32F-NEXT: .LBB38_2: # %else2 |
| ; RV64ZVE32F-NEXT: andi a0, a6, 4 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB38_7 |
| ; RV64ZVE32F-NEXT: .LBB38_3: # %else4 |
| ; RV64ZVE32F-NEXT: andi a0, a6, 8 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB38_8 |
| ; RV64ZVE32F-NEXT: .LBB38_4: # %else6 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB38_5: # %cond.store |
| ; RV64ZVE32F-NEXT: ld a1, 0(a1) |
| ; RV64ZVE32F-NEXT: ld a0, 0(a0) |
| ; RV64ZVE32F-NEXT: sd a0, 0(a1) |
| ; RV64ZVE32F-NEXT: andi a0, a6, 2 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB38_2 |
| ; RV64ZVE32F-NEXT: .LBB38_6: # %cond.store1 |
| ; RV64ZVE32F-NEXT: sd t0, 0(a7) |
| ; RV64ZVE32F-NEXT: andi a0, a6, 4 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB38_3 |
| ; RV64ZVE32F-NEXT: .LBB38_7: # %cond.store3 |
| ; RV64ZVE32F-NEXT: sd a5, 0(a4) |
| ; RV64ZVE32F-NEXT: andi a0, a6, 8 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB38_4 |
| ; RV64ZVE32F-NEXT: .LBB38_8: # %cond.store5 |
| ; RV64ZVE32F-NEXT: sd a3, 0(a2) |
| ; RV64ZVE32F-NEXT: ret |
| call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> %val, <4 x i64*> %ptrs, i32 8, <4 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_truemask_v4i64(<4 x i64> %val, <4 x i64*> %ptrs) { |
| ; RV32V-LABEL: mscatter_truemask_v4i64: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetivli zero, 4, e64, m2, ta, mu |
| ; RV32V-NEXT: vsoxei32.v v8, (zero), v10 |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_truemask_v4i64: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (zero), v10 |
| ; RV64-NEXT: ret |
| ; |
| ; RV32ZVE32F-LABEL: mscatter_truemask_v4i64: |
| ; RV32ZVE32F: # %bb.0: |
| ; RV32ZVE32F-NEXT: lw a1, 28(a0) |
| ; RV32ZVE32F-NEXT: lw a2, 24(a0) |
| ; RV32ZVE32F-NEXT: lw a3, 20(a0) |
| ; RV32ZVE32F-NEXT: lw a4, 16(a0) |
| ; RV32ZVE32F-NEXT: lw a7, 12(a0) |
| ; RV32ZVE32F-NEXT: lw a6, 8(a0) |
| ; RV32ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, mu |
| ; RV32ZVE32F-NEXT: vmset.m v9 |
| ; RV32ZVE32F-NEXT: vmv.x.s a5, v9 |
| ; RV32ZVE32F-NEXT: beqz zero, .LBB39_5 |
| ; RV32ZVE32F-NEXT: # %bb.1: # %else |
| ; RV32ZVE32F-NEXT: andi a0, a5, 2 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB39_6 |
| ; RV32ZVE32F-NEXT: .LBB39_2: # %else2 |
| ; RV32ZVE32F-NEXT: andi a0, a5, 4 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB39_7 |
| ; RV32ZVE32F-NEXT: .LBB39_3: # %else4 |
| ; RV32ZVE32F-NEXT: andi a0, a5, 8 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB39_8 |
| ; RV32ZVE32F-NEXT: .LBB39_4: # %else6 |
| ; RV32ZVE32F-NEXT: ret |
| ; RV32ZVE32F-NEXT: .LBB39_5: # %cond.store |
| ; RV32ZVE32F-NEXT: lw t0, 4(a0) |
| ; RV32ZVE32F-NEXT: lw a0, 0(a0) |
| ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m1, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s t1, v8 |
| ; RV32ZVE32F-NEXT: sw t0, 4(t1) |
| ; RV32ZVE32F-NEXT: sw a0, 0(t1) |
| ; RV32ZVE32F-NEXT: andi a0, a5, 2 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB39_2 |
| ; RV32ZVE32F-NEXT: .LBB39_6: # %cond.store1 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v9 |
| ; RV32ZVE32F-NEXT: sw a7, 4(a0) |
| ; RV32ZVE32F-NEXT: sw a6, 0(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a5, 4 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB39_3 |
| ; RV32ZVE32F-NEXT: .LBB39_7: # %cond.store3 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v9 |
| ; RV32ZVE32F-NEXT: sw a4, 0(a0) |
| ; RV32ZVE32F-NEXT: sw a3, 4(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a5, 8 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB39_4 |
| ; RV32ZVE32F-NEXT: .LBB39_8: # %cond.store5 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV32ZVE32F-NEXT: sw a2, 0(a0) |
| ; RV32ZVE32F-NEXT: sw a1, 4(a0) |
| ; RV32ZVE32F-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_truemask_v4i64: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: ld a2, 24(a1) |
| ; RV64ZVE32F-NEXT: ld a4, 16(a1) |
| ; RV64ZVE32F-NEXT: ld a7, 8(a1) |
| ; RV64ZVE32F-NEXT: ld a3, 24(a0) |
| ; RV64ZVE32F-NEXT: ld a5, 16(a0) |
| ; RV64ZVE32F-NEXT: ld t0, 8(a0) |
| ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vmset.m v8 |
| ; RV64ZVE32F-NEXT: vmv.x.s a6, v8 |
| ; RV64ZVE32F-NEXT: beqz zero, .LBB39_5 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %else |
| ; RV64ZVE32F-NEXT: andi a0, a6, 2 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB39_6 |
| ; RV64ZVE32F-NEXT: .LBB39_2: # %else2 |
| ; RV64ZVE32F-NEXT: andi a0, a6, 4 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB39_7 |
| ; RV64ZVE32F-NEXT: .LBB39_3: # %else4 |
| ; RV64ZVE32F-NEXT: andi a0, a6, 8 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB39_8 |
| ; RV64ZVE32F-NEXT: .LBB39_4: # %else6 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB39_5: # %cond.store |
| ; RV64ZVE32F-NEXT: ld a1, 0(a1) |
| ; RV64ZVE32F-NEXT: ld a0, 0(a0) |
| ; RV64ZVE32F-NEXT: sd a0, 0(a1) |
| ; RV64ZVE32F-NEXT: andi a0, a6, 2 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB39_2 |
| ; RV64ZVE32F-NEXT: .LBB39_6: # %cond.store1 |
| ; RV64ZVE32F-NEXT: sd t0, 0(a7) |
| ; RV64ZVE32F-NEXT: andi a0, a6, 4 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB39_3 |
| ; RV64ZVE32F-NEXT: .LBB39_7: # %cond.store3 |
| ; RV64ZVE32F-NEXT: sd a5, 0(a4) |
| ; RV64ZVE32F-NEXT: andi a0, a6, 8 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB39_4 |
| ; RV64ZVE32F-NEXT: .LBB39_8: # %cond.store5 |
| ; RV64ZVE32F-NEXT: sd a3, 0(a2) |
| ; RV64ZVE32F-NEXT: ret |
| %mhead = insertelement <4 x i1> poison, i1 1, i32 0 |
| %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer |
| call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> %val, <4 x i64*> %ptrs, i32 8, <4 x i1> %mtrue) |
| ret void |
| } |
| |
| define void @mscatter_falsemask_v4i64(<4 x i64> %val, <4 x i64*> %ptrs) { |
| ; CHECK-LABEL: mscatter_falsemask_v4i64: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: ret |
| call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> %val, <4 x i64*> %ptrs, i32 8, <4 x i1> zeroinitializer) |
| ret void |
| } |
| |
| declare void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64>, <8 x i64*>, i32, <8 x i1>) |
| |
| define void @mscatter_v8i64(<8 x i64> %val, <8 x i64*> %ptrs, <8 x i1> %m) { |
| ; RV32V-LABEL: mscatter_v8i64: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV32V-NEXT: vsoxei32.v v8, (zero), v12, v0.t |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_v8i64: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV32ZVE32F-LABEL: mscatter_v8i64: |
| ; RV32ZVE32F: # %bb.0: |
| ; RV32ZVE32F-NEXT: addi sp, sp, -16 |
| ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 |
| ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill |
| ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill |
| ; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill |
| ; RV32ZVE32F-NEXT: .cfi_offset s0, -4 |
| ; RV32ZVE32F-NEXT: .cfi_offset s1, -8 |
| ; RV32ZVE32F-NEXT: .cfi_offset s2, -12 |
| ; RV32ZVE32F-NEXT: lw a1, 60(a0) |
| ; RV32ZVE32F-NEXT: lw a2, 56(a0) |
| ; RV32ZVE32F-NEXT: lw a3, 52(a0) |
| ; RV32ZVE32F-NEXT: lw a4, 48(a0) |
| ; RV32ZVE32F-NEXT: lw a5, 44(a0) |
| ; RV32ZVE32F-NEXT: lw a7, 40(a0) |
| ; RV32ZVE32F-NEXT: lw t0, 36(a0) |
| ; RV32ZVE32F-NEXT: lw t1, 32(a0) |
| ; RV32ZVE32F-NEXT: lw t2, 28(a0) |
| ; RV32ZVE32F-NEXT: lw t3, 24(a0) |
| ; RV32ZVE32F-NEXT: lw t4, 20(a0) |
| ; RV32ZVE32F-NEXT: lw t5, 16(a0) |
| ; RV32ZVE32F-NEXT: lw s0, 12(a0) |
| ; RV32ZVE32F-NEXT: lw t6, 8(a0) |
| ; RV32ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s a6, v0 |
| ; RV32ZVE32F-NEXT: andi s1, a6, 1 |
| ; RV32ZVE32F-NEXT: bnez s1, .LBB41_10 |
| ; RV32ZVE32F-NEXT: # %bb.1: # %else |
| ; RV32ZVE32F-NEXT: andi a0, a6, 2 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB41_11 |
| ; RV32ZVE32F-NEXT: .LBB41_2: # %else2 |
| ; RV32ZVE32F-NEXT: andi a0, a6, 4 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB41_12 |
| ; RV32ZVE32F-NEXT: .LBB41_3: # %else4 |
| ; RV32ZVE32F-NEXT: andi a0, a6, 8 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB41_13 |
| ; RV32ZVE32F-NEXT: .LBB41_4: # %else6 |
| ; RV32ZVE32F-NEXT: andi a0, a6, 16 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB41_14 |
| ; RV32ZVE32F-NEXT: .LBB41_5: # %else8 |
| ; RV32ZVE32F-NEXT: andi a0, a6, 32 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB41_15 |
| ; RV32ZVE32F-NEXT: .LBB41_6: # %else10 |
| ; RV32ZVE32F-NEXT: andi a0, a6, 64 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB41_16 |
| ; RV32ZVE32F-NEXT: .LBB41_7: # %else12 |
| ; RV32ZVE32F-NEXT: andi a0, a6, -128 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB41_9 |
| ; RV32ZVE32F-NEXT: .LBB41_8: # %cond.store13 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV32ZVE32F-NEXT: sw a2, 0(a0) |
| ; RV32ZVE32F-NEXT: sw a1, 4(a0) |
| ; RV32ZVE32F-NEXT: .LBB41_9: # %else14 |
| ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload |
| ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload |
| ; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload |
| ; RV32ZVE32F-NEXT: addi sp, sp, 16 |
| ; RV32ZVE32F-NEXT: ret |
| ; RV32ZVE32F-NEXT: .LBB41_10: # %cond.store |
| ; RV32ZVE32F-NEXT: lw s1, 4(a0) |
| ; RV32ZVE32F-NEXT: lw a0, 0(a0) |
| ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s s2, v8 |
| ; RV32ZVE32F-NEXT: sw s1, 4(s2) |
| ; RV32ZVE32F-NEXT: sw a0, 0(s2) |
| ; RV32ZVE32F-NEXT: andi a0, a6, 2 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB41_2 |
| ; RV32ZVE32F-NEXT: .LBB41_11: # %cond.store1 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw s0, 4(a0) |
| ; RV32ZVE32F-NEXT: sw t6, 0(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a6, 4 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB41_3 |
| ; RV32ZVE32F-NEXT: .LBB41_12: # %cond.store3 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw t5, 0(a0) |
| ; RV32ZVE32F-NEXT: sw t4, 4(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a6, 8 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB41_4 |
| ; RV32ZVE32F-NEXT: .LBB41_13: # %cond.store5 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw t3, 0(a0) |
| ; RV32ZVE32F-NEXT: sw t2, 4(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a6, 16 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB41_5 |
| ; RV32ZVE32F-NEXT: .LBB41_14: # %cond.store7 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw t1, 0(a0) |
| ; RV32ZVE32F-NEXT: sw t0, 4(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a6, 32 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB41_6 |
| ; RV32ZVE32F-NEXT: .LBB41_15: # %cond.store9 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw a7, 0(a0) |
| ; RV32ZVE32F-NEXT: sw a5, 4(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a6, 64 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB41_7 |
| ; RV32ZVE32F-NEXT: .LBB41_16: # %cond.store11 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw a4, 0(a0) |
| ; RV32ZVE32F-NEXT: sw a3, 4(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a6, -128 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB41_8 |
| ; RV32ZVE32F-NEXT: j .LBB41_9 |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_v8i64: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: addi sp, sp, -32 |
| ; RV64ZVE32F-NEXT: .cfi_def_cfa_offset 32 |
| ; RV64ZVE32F-NEXT: sd s0, 24(sp) # 8-byte Folded Spill |
| ; RV64ZVE32F-NEXT: sd s1, 16(sp) # 8-byte Folded Spill |
| ; RV64ZVE32F-NEXT: sd s2, 8(sp) # 8-byte Folded Spill |
| ; RV64ZVE32F-NEXT: .cfi_offset s0, -8 |
| ; RV64ZVE32F-NEXT: .cfi_offset s1, -16 |
| ; RV64ZVE32F-NEXT: .cfi_offset s2, -24 |
| ; RV64ZVE32F-NEXT: ld a2, 56(a1) |
| ; RV64ZVE32F-NEXT: ld a4, 48(a1) |
| ; RV64ZVE32F-NEXT: ld a6, 40(a1) |
| ; RV64ZVE32F-NEXT: ld t1, 32(a1) |
| ; RV64ZVE32F-NEXT: ld t3, 24(a1) |
| ; RV64ZVE32F-NEXT: ld t5, 16(a1) |
| ; RV64ZVE32F-NEXT: ld s0, 8(a1) |
| ; RV64ZVE32F-NEXT: ld a3, 56(a0) |
| ; RV64ZVE32F-NEXT: ld a5, 48(a0) |
| ; RV64ZVE32F-NEXT: ld t0, 40(a0) |
| ; RV64ZVE32F-NEXT: ld t2, 32(a0) |
| ; RV64ZVE32F-NEXT: ld t4, 24(a0) |
| ; RV64ZVE32F-NEXT: ld t6, 16(a0) |
| ; RV64ZVE32F-NEXT: ld s1, 8(a0) |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a7, v0 |
| ; RV64ZVE32F-NEXT: andi s2, a7, 1 |
| ; RV64ZVE32F-NEXT: bnez s2, .LBB41_10 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %else |
| ; RV64ZVE32F-NEXT: andi a0, a7, 2 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB41_11 |
| ; RV64ZVE32F-NEXT: .LBB41_2: # %else2 |
| ; RV64ZVE32F-NEXT: andi a0, a7, 4 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB41_12 |
| ; RV64ZVE32F-NEXT: .LBB41_3: # %else4 |
| ; RV64ZVE32F-NEXT: andi a0, a7, 8 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB41_13 |
| ; RV64ZVE32F-NEXT: .LBB41_4: # %else6 |
| ; RV64ZVE32F-NEXT: andi a0, a7, 16 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB41_14 |
| ; RV64ZVE32F-NEXT: .LBB41_5: # %else8 |
| ; RV64ZVE32F-NEXT: andi a0, a7, 32 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB41_15 |
| ; RV64ZVE32F-NEXT: .LBB41_6: # %else10 |
| ; RV64ZVE32F-NEXT: andi a0, a7, 64 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB41_16 |
| ; RV64ZVE32F-NEXT: .LBB41_7: # %else12 |
| ; RV64ZVE32F-NEXT: andi a0, a7, -128 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB41_9 |
| ; RV64ZVE32F-NEXT: .LBB41_8: # %cond.store13 |
| ; RV64ZVE32F-NEXT: sd a3, 0(a2) |
| ; RV64ZVE32F-NEXT: .LBB41_9: # %else14 |
| ; RV64ZVE32F-NEXT: ld s0, 24(sp) # 8-byte Folded Reload |
| ; RV64ZVE32F-NEXT: ld s1, 16(sp) # 8-byte Folded Reload |
| ; RV64ZVE32F-NEXT: ld s2, 8(sp) # 8-byte Folded Reload |
| ; RV64ZVE32F-NEXT: addi sp, sp, 32 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB41_10: # %cond.store |
| ; RV64ZVE32F-NEXT: ld a1, 0(a1) |
| ; RV64ZVE32F-NEXT: ld a0, 0(a0) |
| ; RV64ZVE32F-NEXT: sd a0, 0(a1) |
| ; RV64ZVE32F-NEXT: andi a0, a7, 2 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB41_2 |
| ; RV64ZVE32F-NEXT: .LBB41_11: # %cond.store1 |
| ; RV64ZVE32F-NEXT: sd s1, 0(s0) |
| ; RV64ZVE32F-NEXT: andi a0, a7, 4 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB41_3 |
| ; RV64ZVE32F-NEXT: .LBB41_12: # %cond.store3 |
| ; RV64ZVE32F-NEXT: sd t6, 0(t5) |
| ; RV64ZVE32F-NEXT: andi a0, a7, 8 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB41_4 |
| ; RV64ZVE32F-NEXT: .LBB41_13: # %cond.store5 |
| ; RV64ZVE32F-NEXT: sd t4, 0(t3) |
| ; RV64ZVE32F-NEXT: andi a0, a7, 16 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB41_5 |
| ; RV64ZVE32F-NEXT: .LBB41_14: # %cond.store7 |
| ; RV64ZVE32F-NEXT: sd t2, 0(t1) |
| ; RV64ZVE32F-NEXT: andi a0, a7, 32 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB41_6 |
| ; RV64ZVE32F-NEXT: .LBB41_15: # %cond.store9 |
| ; RV64ZVE32F-NEXT: sd t0, 0(a6) |
| ; RV64ZVE32F-NEXT: andi a0, a7, 64 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB41_7 |
| ; RV64ZVE32F-NEXT: .LBB41_16: # %cond.store11 |
| ; RV64ZVE32F-NEXT: sd a5, 0(a4) |
| ; RV64ZVE32F-NEXT: andi a0, a7, -128 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB41_8 |
| ; RV64ZVE32F-NEXT: j .LBB41_9 |
| call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> %val, <8 x i64*> %ptrs, i32 8, <8 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, i64* %base, <8 x i8> %idxs, <8 x i1> %m) { |
| ; RV32V-LABEL: mscatter_baseidx_v8i8_v8i64: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| ; RV32V-NEXT: vsext.vf4 v14, v12 |
| ; RV32V-NEXT: vsll.vi v12, v14, 3 |
| ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu |
| ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_baseidx_v8i8_v8i64: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV64-NEXT: vsext.vf8 v16, v12 |
| ; RV64-NEXT: vsll.vi v12, v16, 3 |
| ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV32ZVE32F-LABEL: mscatter_baseidx_v8i8_v8i64: |
| ; RV32ZVE32F: # %bb.0: |
| ; RV32ZVE32F-NEXT: addi sp, sp, -16 |
| ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 |
| ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill |
| ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill |
| ; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill |
| ; RV32ZVE32F-NEXT: .cfi_offset s0, -4 |
| ; RV32ZVE32F-NEXT: .cfi_offset s1, -8 |
| ; RV32ZVE32F-NEXT: .cfi_offset s2, -12 |
| ; RV32ZVE32F-NEXT: lw a2, 60(a0) |
| ; RV32ZVE32F-NEXT: lw a3, 56(a0) |
| ; RV32ZVE32F-NEXT: lw a4, 52(a0) |
| ; RV32ZVE32F-NEXT: lw a5, 48(a0) |
| ; RV32ZVE32F-NEXT: lw a6, 44(a0) |
| ; RV32ZVE32F-NEXT: lw a7, 40(a0) |
| ; RV32ZVE32F-NEXT: lw t0, 36(a0) |
| ; RV32ZVE32F-NEXT: lw t1, 32(a0) |
| ; RV32ZVE32F-NEXT: lw t2, 28(a0) |
| ; RV32ZVE32F-NEXT: lw t3, 24(a0) |
| ; RV32ZVE32F-NEXT: lw t4, 20(a0) |
| ; RV32ZVE32F-NEXT: lw t5, 16(a0) |
| ; RV32ZVE32F-NEXT: lw s0, 12(a0) |
| ; RV32ZVE32F-NEXT: lw t6, 8(a0) |
| ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vsext.vf4 v10, v8 |
| ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 |
| ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v0 |
| ; RV32ZVE32F-NEXT: andi s1, a1, 1 |
| ; RV32ZVE32F-NEXT: bnez s1, .LBB42_10 |
| ; RV32ZVE32F-NEXT: # %bb.1: # %else |
| ; RV32ZVE32F-NEXT: andi a0, a1, 2 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB42_11 |
| ; RV32ZVE32F-NEXT: .LBB42_2: # %else2 |
| ; RV32ZVE32F-NEXT: andi a0, a1, 4 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB42_12 |
| ; RV32ZVE32F-NEXT: .LBB42_3: # %else4 |
| ; RV32ZVE32F-NEXT: andi a0, a1, 8 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB42_13 |
| ; RV32ZVE32F-NEXT: .LBB42_4: # %else6 |
| ; RV32ZVE32F-NEXT: andi a0, a1, 16 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB42_14 |
| ; RV32ZVE32F-NEXT: .LBB42_5: # %else8 |
| ; RV32ZVE32F-NEXT: andi a0, a1, 32 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB42_15 |
| ; RV32ZVE32F-NEXT: .LBB42_6: # %else10 |
| ; RV32ZVE32F-NEXT: andi a0, a1, 64 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB42_16 |
| ; RV32ZVE32F-NEXT: .LBB42_7: # %else12 |
| ; RV32ZVE32F-NEXT: andi a0, a1, -128 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB42_9 |
| ; RV32ZVE32F-NEXT: .LBB42_8: # %cond.store13 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV32ZVE32F-NEXT: sw a3, 0(a0) |
| ; RV32ZVE32F-NEXT: sw a2, 4(a0) |
| ; RV32ZVE32F-NEXT: .LBB42_9: # %else14 |
| ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload |
| ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload |
| ; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload |
| ; RV32ZVE32F-NEXT: addi sp, sp, 16 |
| ; RV32ZVE32F-NEXT: ret |
| ; RV32ZVE32F-NEXT: .LBB42_10: # %cond.store |
| ; RV32ZVE32F-NEXT: lw s1, 4(a0) |
| ; RV32ZVE32F-NEXT: lw a0, 0(a0) |
| ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s s2, v8 |
| ; RV32ZVE32F-NEXT: sw s1, 4(s2) |
| ; RV32ZVE32F-NEXT: sw a0, 0(s2) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 2 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB42_2 |
| ; RV32ZVE32F-NEXT: .LBB42_11: # %cond.store1 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw s0, 4(a0) |
| ; RV32ZVE32F-NEXT: sw t6, 0(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 4 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB42_3 |
| ; RV32ZVE32F-NEXT: .LBB42_12: # %cond.store3 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw t5, 0(a0) |
| ; RV32ZVE32F-NEXT: sw t4, 4(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 8 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB42_4 |
| ; RV32ZVE32F-NEXT: .LBB42_13: # %cond.store5 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw t3, 0(a0) |
| ; RV32ZVE32F-NEXT: sw t2, 4(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 16 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB42_5 |
| ; RV32ZVE32F-NEXT: .LBB42_14: # %cond.store7 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw t1, 0(a0) |
| ; RV32ZVE32F-NEXT: sw t0, 4(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 32 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB42_6 |
| ; RV32ZVE32F-NEXT: .LBB42_15: # %cond.store9 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw a7, 0(a0) |
| ; RV32ZVE32F-NEXT: sw a6, 4(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 64 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB42_7 |
| ; RV32ZVE32F-NEXT: .LBB42_16: # %cond.store11 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw a5, 0(a0) |
| ; RV32ZVE32F-NEXT: sw a4, 4(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, -128 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB42_8 |
| ; RV32ZVE32F-NEXT: j .LBB42_9 |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8_v8i64: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: ld a2, 56(a0) |
| ; RV64ZVE32F-NEXT: ld a3, 48(a0) |
| ; RV64ZVE32F-NEXT: ld a5, 40(a0) |
| ; RV64ZVE32F-NEXT: ld a6, 32(a0) |
| ; RV64ZVE32F-NEXT: ld a7, 24(a0) |
| ; RV64ZVE32F-NEXT: ld t0, 16(a0) |
| ; RV64ZVE32F-NEXT: ld t1, 8(a0) |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a4, v0 |
| ; RV64ZVE32F-NEXT: andi t2, a4, 1 |
| ; RV64ZVE32F-NEXT: beqz t2, .LBB42_2 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store |
| ; RV64ZVE32F-NEXT: ld a0, 0(a0) |
| ; RV64ZVE32F-NEXT: vmv.x.s t2, v8 |
| ; RV64ZVE32F-NEXT: slli t2, t2, 3 |
| ; RV64ZVE32F-NEXT: add t2, a1, t2 |
| ; RV64ZVE32F-NEXT: sd a0, 0(t2) |
| ; RV64ZVE32F-NEXT: .LBB42_2: # %else |
| ; RV64ZVE32F-NEXT: andi a0, a4, 2 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB42_4 |
| ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v9 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd t1, 0(a0) |
| ; RV64ZVE32F-NEXT: .LBB42_4: # %else2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: andi a0, a4, 4 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB42_6 |
| ; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v9 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd t0, 0(a0) |
| ; RV64ZVE32F-NEXT: .LBB42_6: # %else4 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: andi a0, a4, 8 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB42_13 |
| ; RV64ZVE32F-NEXT: # %bb.7: # %else6 |
| ; RV64ZVE32F-NEXT: andi a0, a4, 16 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB42_14 |
| ; RV64ZVE32F-NEXT: .LBB42_8: # %else8 |
| ; RV64ZVE32F-NEXT: andi a0, a4, 32 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB42_10 |
| ; RV64ZVE32F-NEXT: .LBB42_9: # %cond.store9 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v9 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd a5, 0(a0) |
| ; RV64ZVE32F-NEXT: .LBB42_10: # %else10 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: andi a0, a4, 64 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB42_15 |
| ; RV64ZVE32F-NEXT: # %bb.11: # %else12 |
| ; RV64ZVE32F-NEXT: andi a0, a4, -128 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB42_16 |
| ; RV64ZVE32F-NEXT: .LBB42_12: # %else14 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB42_13: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v9 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd a7, 0(a0) |
| ; RV64ZVE32F-NEXT: andi a0, a4, 16 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB42_8 |
| ; RV64ZVE32F-NEXT: .LBB42_14: # %cond.store7 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd a6, 0(a0) |
| ; RV64ZVE32F-NEXT: andi a0, a4, 32 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB42_9 |
| ; RV64ZVE32F-NEXT: j .LBB42_10 |
| ; RV64ZVE32F-NEXT: .LBB42_15: # %cond.store11 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd a3, 0(a0) |
| ; RV64ZVE32F-NEXT: andi a0, a4, -128 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB42_12 |
| ; RV64ZVE32F-NEXT: .LBB42_16: # %cond.store13 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd a2, 0(a0) |
| ; RV64ZVE32F-NEXT: ret |
| %ptrs = getelementptr inbounds i64, i64* %base, <8 x i8> %idxs |
| call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> %val, <8 x i64*> %ptrs, i32 8, <8 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, i64* %base, <8 x i8> %idxs, <8 x i1> %m) { |
| ; RV32V-LABEL: mscatter_baseidx_sext_v8i8_v8i64: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV32V-NEXT: vsext.vf8 v16, v12 |
| ; RV32V-NEXT: vsll.vi v12, v16, 3 |
| ; RV32V-NEXT: vsetvli zero, zero, e32, m2, ta, mu |
| ; RV32V-NEXT: vncvt.x.x.w v16, v12 |
| ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu |
| ; RV32V-NEXT: vsoxei32.v v8, (a0), v16, v0.t |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_baseidx_sext_v8i8_v8i64: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV64-NEXT: vsext.vf8 v16, v12 |
| ; RV64-NEXT: vsll.vi v12, v16, 3 |
| ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV32ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8i64: |
| ; RV32ZVE32F: # %bb.0: |
| ; RV32ZVE32F-NEXT: addi sp, sp, -16 |
| ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 |
| ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill |
| ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill |
| ; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill |
| ; RV32ZVE32F-NEXT: .cfi_offset s0, -4 |
| ; RV32ZVE32F-NEXT: .cfi_offset s1, -8 |
| ; RV32ZVE32F-NEXT: .cfi_offset s2, -12 |
| ; RV32ZVE32F-NEXT: lw a2, 60(a0) |
| ; RV32ZVE32F-NEXT: lw a3, 56(a0) |
| ; RV32ZVE32F-NEXT: lw a4, 52(a0) |
| ; RV32ZVE32F-NEXT: lw a5, 48(a0) |
| ; RV32ZVE32F-NEXT: lw a6, 44(a0) |
| ; RV32ZVE32F-NEXT: lw a7, 40(a0) |
| ; RV32ZVE32F-NEXT: lw t0, 36(a0) |
| ; RV32ZVE32F-NEXT: lw t1, 32(a0) |
| ; RV32ZVE32F-NEXT: lw t2, 28(a0) |
| ; RV32ZVE32F-NEXT: lw t3, 24(a0) |
| ; RV32ZVE32F-NEXT: lw t4, 20(a0) |
| ; RV32ZVE32F-NEXT: lw t5, 16(a0) |
| ; RV32ZVE32F-NEXT: lw s0, 12(a0) |
| ; RV32ZVE32F-NEXT: lw t6, 8(a0) |
| ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vsext.vf4 v10, v8 |
| ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 |
| ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v0 |
| ; RV32ZVE32F-NEXT: andi s1, a1, 1 |
| ; RV32ZVE32F-NEXT: bnez s1, .LBB43_10 |
| ; RV32ZVE32F-NEXT: # %bb.1: # %else |
| ; RV32ZVE32F-NEXT: andi a0, a1, 2 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB43_11 |
| ; RV32ZVE32F-NEXT: .LBB43_2: # %else2 |
| ; RV32ZVE32F-NEXT: andi a0, a1, 4 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB43_12 |
| ; RV32ZVE32F-NEXT: .LBB43_3: # %else4 |
| ; RV32ZVE32F-NEXT: andi a0, a1, 8 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB43_13 |
| ; RV32ZVE32F-NEXT: .LBB43_4: # %else6 |
| ; RV32ZVE32F-NEXT: andi a0, a1, 16 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB43_14 |
| ; RV32ZVE32F-NEXT: .LBB43_5: # %else8 |
| ; RV32ZVE32F-NEXT: andi a0, a1, 32 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB43_15 |
| ; RV32ZVE32F-NEXT: .LBB43_6: # %else10 |
| ; RV32ZVE32F-NEXT: andi a0, a1, 64 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB43_16 |
| ; RV32ZVE32F-NEXT: .LBB43_7: # %else12 |
| ; RV32ZVE32F-NEXT: andi a0, a1, -128 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB43_9 |
| ; RV32ZVE32F-NEXT: .LBB43_8: # %cond.store13 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV32ZVE32F-NEXT: sw a3, 0(a0) |
| ; RV32ZVE32F-NEXT: sw a2, 4(a0) |
| ; RV32ZVE32F-NEXT: .LBB43_9: # %else14 |
| ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload |
| ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload |
| ; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload |
| ; RV32ZVE32F-NEXT: addi sp, sp, 16 |
| ; RV32ZVE32F-NEXT: ret |
| ; RV32ZVE32F-NEXT: .LBB43_10: # %cond.store |
| ; RV32ZVE32F-NEXT: lw s1, 4(a0) |
| ; RV32ZVE32F-NEXT: lw a0, 0(a0) |
| ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s s2, v8 |
| ; RV32ZVE32F-NEXT: sw s1, 4(s2) |
| ; RV32ZVE32F-NEXT: sw a0, 0(s2) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 2 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB43_2 |
| ; RV32ZVE32F-NEXT: .LBB43_11: # %cond.store1 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw s0, 4(a0) |
| ; RV32ZVE32F-NEXT: sw t6, 0(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 4 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB43_3 |
| ; RV32ZVE32F-NEXT: .LBB43_12: # %cond.store3 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw t5, 0(a0) |
| ; RV32ZVE32F-NEXT: sw t4, 4(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 8 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB43_4 |
| ; RV32ZVE32F-NEXT: .LBB43_13: # %cond.store5 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw t3, 0(a0) |
| ; RV32ZVE32F-NEXT: sw t2, 4(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 16 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB43_5 |
| ; RV32ZVE32F-NEXT: .LBB43_14: # %cond.store7 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw t1, 0(a0) |
| ; RV32ZVE32F-NEXT: sw t0, 4(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 32 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB43_6 |
| ; RV32ZVE32F-NEXT: .LBB43_15: # %cond.store9 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw a7, 0(a0) |
| ; RV32ZVE32F-NEXT: sw a6, 4(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 64 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB43_7 |
| ; RV32ZVE32F-NEXT: .LBB43_16: # %cond.store11 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw a5, 0(a0) |
| ; RV32ZVE32F-NEXT: sw a4, 4(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, -128 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB43_8 |
| ; RV32ZVE32F-NEXT: j .LBB43_9 |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8i64: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: ld a2, 56(a0) |
| ; RV64ZVE32F-NEXT: ld a3, 48(a0) |
| ; RV64ZVE32F-NEXT: ld a5, 40(a0) |
| ; RV64ZVE32F-NEXT: ld a6, 32(a0) |
| ; RV64ZVE32F-NEXT: ld a7, 24(a0) |
| ; RV64ZVE32F-NEXT: ld t0, 16(a0) |
| ; RV64ZVE32F-NEXT: ld t1, 8(a0) |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a4, v0 |
| ; RV64ZVE32F-NEXT: andi t2, a4, 1 |
| ; RV64ZVE32F-NEXT: beqz t2, .LBB43_2 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store |
| ; RV64ZVE32F-NEXT: ld a0, 0(a0) |
| ; RV64ZVE32F-NEXT: vmv.x.s t2, v8 |
| ; RV64ZVE32F-NEXT: slli t2, t2, 3 |
| ; RV64ZVE32F-NEXT: add t2, a1, t2 |
| ; RV64ZVE32F-NEXT: sd a0, 0(t2) |
| ; RV64ZVE32F-NEXT: .LBB43_2: # %else |
| ; RV64ZVE32F-NEXT: andi a0, a4, 2 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB43_4 |
| ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v9 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd t1, 0(a0) |
| ; RV64ZVE32F-NEXT: .LBB43_4: # %else2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: andi a0, a4, 4 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB43_6 |
| ; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v9 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd t0, 0(a0) |
| ; RV64ZVE32F-NEXT: .LBB43_6: # %else4 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: andi a0, a4, 8 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB43_13 |
| ; RV64ZVE32F-NEXT: # %bb.7: # %else6 |
| ; RV64ZVE32F-NEXT: andi a0, a4, 16 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB43_14 |
| ; RV64ZVE32F-NEXT: .LBB43_8: # %else8 |
| ; RV64ZVE32F-NEXT: andi a0, a4, 32 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB43_10 |
| ; RV64ZVE32F-NEXT: .LBB43_9: # %cond.store9 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v9 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd a5, 0(a0) |
| ; RV64ZVE32F-NEXT: .LBB43_10: # %else10 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: andi a0, a4, 64 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB43_15 |
| ; RV64ZVE32F-NEXT: # %bb.11: # %else12 |
| ; RV64ZVE32F-NEXT: andi a0, a4, -128 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB43_16 |
| ; RV64ZVE32F-NEXT: .LBB43_12: # %else14 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB43_13: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v9 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd a7, 0(a0) |
| ; RV64ZVE32F-NEXT: andi a0, a4, 16 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB43_8 |
| ; RV64ZVE32F-NEXT: .LBB43_14: # %cond.store7 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd a6, 0(a0) |
| ; RV64ZVE32F-NEXT: andi a0, a4, 32 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB43_9 |
| ; RV64ZVE32F-NEXT: j .LBB43_10 |
| ; RV64ZVE32F-NEXT: .LBB43_15: # %cond.store11 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd a3, 0(a0) |
| ; RV64ZVE32F-NEXT: andi a0, a4, -128 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB43_12 |
| ; RV64ZVE32F-NEXT: .LBB43_16: # %cond.store13 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd a2, 0(a0) |
| ; RV64ZVE32F-NEXT: ret |
| %eidxs = sext <8 x i8> %idxs to <8 x i64> |
| %ptrs = getelementptr inbounds i64, i64* %base, <8 x i64> %eidxs |
| call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> %val, <8 x i64*> %ptrs, i32 8, <8 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, i64* %base, <8 x i8> %idxs, <8 x i1> %m) { |
| ; RV32V-LABEL: mscatter_baseidx_zext_v8i8_v8i64: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV32V-NEXT: vzext.vf8 v16, v12 |
| ; RV32V-NEXT: vsll.vi v12, v16, 3 |
| ; RV32V-NEXT: vsetvli zero, zero, e32, m2, ta, mu |
| ; RV32V-NEXT: vncvt.x.x.w v16, v12 |
| ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu |
| ; RV32V-NEXT: vsoxei32.v v8, (a0), v16, v0.t |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_baseidx_zext_v8i8_v8i64: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV64-NEXT: vzext.vf8 v16, v12 |
| ; RV64-NEXT: vsll.vi v12, v16, 3 |
| ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8i64: |
| ; RV32ZVE32F: # %bb.0: |
| ; RV32ZVE32F-NEXT: addi sp, sp, -16 |
| ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 |
| ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill |
| ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill |
| ; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill |
| ; RV32ZVE32F-NEXT: .cfi_offset s0, -4 |
| ; RV32ZVE32F-NEXT: .cfi_offset s1, -8 |
| ; RV32ZVE32F-NEXT: .cfi_offset s2, -12 |
| ; RV32ZVE32F-NEXT: lw a2, 60(a0) |
| ; RV32ZVE32F-NEXT: lw a3, 56(a0) |
| ; RV32ZVE32F-NEXT: lw a4, 52(a0) |
| ; RV32ZVE32F-NEXT: lw a5, 48(a0) |
| ; RV32ZVE32F-NEXT: lw a6, 44(a0) |
| ; RV32ZVE32F-NEXT: lw a7, 40(a0) |
| ; RV32ZVE32F-NEXT: lw t0, 36(a0) |
| ; RV32ZVE32F-NEXT: lw t1, 32(a0) |
| ; RV32ZVE32F-NEXT: lw t2, 28(a0) |
| ; RV32ZVE32F-NEXT: lw t3, 24(a0) |
| ; RV32ZVE32F-NEXT: lw t4, 20(a0) |
| ; RV32ZVE32F-NEXT: lw t5, 16(a0) |
| ; RV32ZVE32F-NEXT: lw s0, 12(a0) |
| ; RV32ZVE32F-NEXT: lw t6, 8(a0) |
| ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vzext.vf4 v10, v8 |
| ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 |
| ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v0 |
| ; RV32ZVE32F-NEXT: andi s1, a1, 1 |
| ; RV32ZVE32F-NEXT: bnez s1, .LBB44_10 |
| ; RV32ZVE32F-NEXT: # %bb.1: # %else |
| ; RV32ZVE32F-NEXT: andi a0, a1, 2 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB44_11 |
| ; RV32ZVE32F-NEXT: .LBB44_2: # %else2 |
| ; RV32ZVE32F-NEXT: andi a0, a1, 4 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB44_12 |
| ; RV32ZVE32F-NEXT: .LBB44_3: # %else4 |
| ; RV32ZVE32F-NEXT: andi a0, a1, 8 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB44_13 |
| ; RV32ZVE32F-NEXT: .LBB44_4: # %else6 |
| ; RV32ZVE32F-NEXT: andi a0, a1, 16 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB44_14 |
| ; RV32ZVE32F-NEXT: .LBB44_5: # %else8 |
| ; RV32ZVE32F-NEXT: andi a0, a1, 32 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB44_15 |
| ; RV32ZVE32F-NEXT: .LBB44_6: # %else10 |
| ; RV32ZVE32F-NEXT: andi a0, a1, 64 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB44_16 |
| ; RV32ZVE32F-NEXT: .LBB44_7: # %else12 |
| ; RV32ZVE32F-NEXT: andi a0, a1, -128 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB44_9 |
| ; RV32ZVE32F-NEXT: .LBB44_8: # %cond.store13 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV32ZVE32F-NEXT: sw a3, 0(a0) |
| ; RV32ZVE32F-NEXT: sw a2, 4(a0) |
| ; RV32ZVE32F-NEXT: .LBB44_9: # %else14 |
| ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload |
| ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload |
| ; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload |
| ; RV32ZVE32F-NEXT: addi sp, sp, 16 |
| ; RV32ZVE32F-NEXT: ret |
| ; RV32ZVE32F-NEXT: .LBB44_10: # %cond.store |
| ; RV32ZVE32F-NEXT: lw s1, 4(a0) |
| ; RV32ZVE32F-NEXT: lw a0, 0(a0) |
| ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s s2, v8 |
| ; RV32ZVE32F-NEXT: sw s1, 4(s2) |
| ; RV32ZVE32F-NEXT: sw a0, 0(s2) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 2 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB44_2 |
| ; RV32ZVE32F-NEXT: .LBB44_11: # %cond.store1 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw s0, 4(a0) |
| ; RV32ZVE32F-NEXT: sw t6, 0(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 4 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB44_3 |
| ; RV32ZVE32F-NEXT: .LBB44_12: # %cond.store3 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw t5, 0(a0) |
| ; RV32ZVE32F-NEXT: sw t4, 4(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 8 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB44_4 |
| ; RV32ZVE32F-NEXT: .LBB44_13: # %cond.store5 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw t3, 0(a0) |
| ; RV32ZVE32F-NEXT: sw t2, 4(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 16 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB44_5 |
| ; RV32ZVE32F-NEXT: .LBB44_14: # %cond.store7 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw t1, 0(a0) |
| ; RV32ZVE32F-NEXT: sw t0, 4(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 32 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB44_6 |
| ; RV32ZVE32F-NEXT: .LBB44_15: # %cond.store9 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw a7, 0(a0) |
| ; RV32ZVE32F-NEXT: sw a6, 4(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 64 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB44_7 |
| ; RV32ZVE32F-NEXT: .LBB44_16: # %cond.store11 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw a5, 0(a0) |
| ; RV32ZVE32F-NEXT: sw a4, 4(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, -128 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB44_8 |
| ; RV32ZVE32F-NEXT: j .LBB44_9 |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8i64: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: ld a2, 56(a0) |
| ; RV64ZVE32F-NEXT: ld a3, 48(a0) |
| ; RV64ZVE32F-NEXT: ld a5, 40(a0) |
| ; RV64ZVE32F-NEXT: ld a6, 32(a0) |
| ; RV64ZVE32F-NEXT: ld a7, 24(a0) |
| ; RV64ZVE32F-NEXT: ld t0, 16(a0) |
| ; RV64ZVE32F-NEXT: ld t1, 8(a0) |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a4, v0 |
| ; RV64ZVE32F-NEXT: andi t2, a4, 1 |
| ; RV64ZVE32F-NEXT: beqz t2, .LBB44_2 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store |
| ; RV64ZVE32F-NEXT: ld a0, 0(a0) |
| ; RV64ZVE32F-NEXT: vmv.x.s t2, v8 |
| ; RV64ZVE32F-NEXT: andi t2, t2, 255 |
| ; RV64ZVE32F-NEXT: slli t2, t2, 3 |
| ; RV64ZVE32F-NEXT: add t2, a1, t2 |
| ; RV64ZVE32F-NEXT: sd a0, 0(t2) |
| ; RV64ZVE32F-NEXT: .LBB44_2: # %else |
| ; RV64ZVE32F-NEXT: andi a0, a4, 2 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB44_4 |
| ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v9 |
| ; RV64ZVE32F-NEXT: andi a0, a0, 255 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd t1, 0(a0) |
| ; RV64ZVE32F-NEXT: .LBB44_4: # %else2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: andi a0, a4, 4 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB44_6 |
| ; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v9 |
| ; RV64ZVE32F-NEXT: andi a0, a0, 255 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd t0, 0(a0) |
| ; RV64ZVE32F-NEXT: .LBB44_6: # %else4 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: andi a0, a4, 8 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB44_13 |
| ; RV64ZVE32F-NEXT: # %bb.7: # %else6 |
| ; RV64ZVE32F-NEXT: andi a0, a4, 16 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB44_14 |
| ; RV64ZVE32F-NEXT: .LBB44_8: # %else8 |
| ; RV64ZVE32F-NEXT: andi a0, a4, 32 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB44_10 |
| ; RV64ZVE32F-NEXT: .LBB44_9: # %cond.store9 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v9 |
| ; RV64ZVE32F-NEXT: andi a0, a0, 255 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd a5, 0(a0) |
| ; RV64ZVE32F-NEXT: .LBB44_10: # %else10 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: andi a0, a4, 64 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB44_15 |
| ; RV64ZVE32F-NEXT: # %bb.11: # %else12 |
| ; RV64ZVE32F-NEXT: andi a0, a4, -128 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB44_16 |
| ; RV64ZVE32F-NEXT: .LBB44_12: # %else14 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB44_13: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v9 |
| ; RV64ZVE32F-NEXT: andi a0, a0, 255 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd a7, 0(a0) |
| ; RV64ZVE32F-NEXT: andi a0, a4, 16 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB44_8 |
| ; RV64ZVE32F-NEXT: .LBB44_14: # %cond.store7 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV64ZVE32F-NEXT: andi a0, a0, 255 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd a6, 0(a0) |
| ; RV64ZVE32F-NEXT: andi a0, a4, 32 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB44_9 |
| ; RV64ZVE32F-NEXT: j .LBB44_10 |
| ; RV64ZVE32F-NEXT: .LBB44_15: # %cond.store11 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV64ZVE32F-NEXT: andi a0, a0, 255 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd a3, 0(a0) |
| ; RV64ZVE32F-NEXT: andi a0, a4, -128 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB44_12 |
| ; RV64ZVE32F-NEXT: .LBB44_16: # %cond.store13 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV64ZVE32F-NEXT: andi a0, a0, 255 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd a2, 0(a0) |
| ; RV64ZVE32F-NEXT: ret |
| %eidxs = zext <8 x i8> %idxs to <8 x i64> |
| %ptrs = getelementptr inbounds i64, i64* %base, <8 x i64> %eidxs |
| call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> %val, <8 x i64*> %ptrs, i32 8, <8 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, i64* %base, <8 x i16> %idxs, <8 x i1> %m) { |
| ; RV32V-LABEL: mscatter_baseidx_v8i16_v8i64: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| ; RV32V-NEXT: vsext.vf2 v14, v12 |
| ; RV32V-NEXT: vsll.vi v12, v14, 3 |
| ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu |
| ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_baseidx_v8i16_v8i64: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV64-NEXT: vsext.vf4 v16, v12 |
| ; RV64-NEXT: vsll.vi v12, v16, 3 |
| ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV32ZVE32F-LABEL: mscatter_baseidx_v8i16_v8i64: |
| ; RV32ZVE32F: # %bb.0: |
| ; RV32ZVE32F-NEXT: addi sp, sp, -16 |
| ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 |
| ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill |
| ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill |
| ; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill |
| ; RV32ZVE32F-NEXT: .cfi_offset s0, -4 |
| ; RV32ZVE32F-NEXT: .cfi_offset s1, -8 |
| ; RV32ZVE32F-NEXT: .cfi_offset s2, -12 |
| ; RV32ZVE32F-NEXT: lw a2, 60(a0) |
| ; RV32ZVE32F-NEXT: lw a3, 56(a0) |
| ; RV32ZVE32F-NEXT: lw a4, 52(a0) |
| ; RV32ZVE32F-NEXT: lw a5, 48(a0) |
| ; RV32ZVE32F-NEXT: lw a6, 44(a0) |
| ; RV32ZVE32F-NEXT: lw a7, 40(a0) |
| ; RV32ZVE32F-NEXT: lw t0, 36(a0) |
| ; RV32ZVE32F-NEXT: lw t1, 32(a0) |
| ; RV32ZVE32F-NEXT: lw t2, 28(a0) |
| ; RV32ZVE32F-NEXT: lw t3, 24(a0) |
| ; RV32ZVE32F-NEXT: lw t4, 20(a0) |
| ; RV32ZVE32F-NEXT: lw t5, 16(a0) |
| ; RV32ZVE32F-NEXT: lw s0, 12(a0) |
| ; RV32ZVE32F-NEXT: lw t6, 8(a0) |
| ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vsext.vf2 v10, v8 |
| ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 |
| ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v0 |
| ; RV32ZVE32F-NEXT: andi s1, a1, 1 |
| ; RV32ZVE32F-NEXT: bnez s1, .LBB45_10 |
| ; RV32ZVE32F-NEXT: # %bb.1: # %else |
| ; RV32ZVE32F-NEXT: andi a0, a1, 2 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB45_11 |
| ; RV32ZVE32F-NEXT: .LBB45_2: # %else2 |
| ; RV32ZVE32F-NEXT: andi a0, a1, 4 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB45_12 |
| ; RV32ZVE32F-NEXT: .LBB45_3: # %else4 |
| ; RV32ZVE32F-NEXT: andi a0, a1, 8 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB45_13 |
| ; RV32ZVE32F-NEXT: .LBB45_4: # %else6 |
| ; RV32ZVE32F-NEXT: andi a0, a1, 16 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB45_14 |
| ; RV32ZVE32F-NEXT: .LBB45_5: # %else8 |
| ; RV32ZVE32F-NEXT: andi a0, a1, 32 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB45_15 |
| ; RV32ZVE32F-NEXT: .LBB45_6: # %else10 |
| ; RV32ZVE32F-NEXT: andi a0, a1, 64 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB45_16 |
| ; RV32ZVE32F-NEXT: .LBB45_7: # %else12 |
| ; RV32ZVE32F-NEXT: andi a0, a1, -128 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB45_9 |
| ; RV32ZVE32F-NEXT: .LBB45_8: # %cond.store13 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV32ZVE32F-NEXT: sw a3, 0(a0) |
| ; RV32ZVE32F-NEXT: sw a2, 4(a0) |
| ; RV32ZVE32F-NEXT: .LBB45_9: # %else14 |
| ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload |
| ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload |
| ; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload |
| ; RV32ZVE32F-NEXT: addi sp, sp, 16 |
| ; RV32ZVE32F-NEXT: ret |
| ; RV32ZVE32F-NEXT: .LBB45_10: # %cond.store |
| ; RV32ZVE32F-NEXT: lw s1, 4(a0) |
| ; RV32ZVE32F-NEXT: lw a0, 0(a0) |
| ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s s2, v8 |
| ; RV32ZVE32F-NEXT: sw s1, 4(s2) |
| ; RV32ZVE32F-NEXT: sw a0, 0(s2) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 2 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB45_2 |
| ; RV32ZVE32F-NEXT: .LBB45_11: # %cond.store1 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw s0, 4(a0) |
| ; RV32ZVE32F-NEXT: sw t6, 0(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 4 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB45_3 |
| ; RV32ZVE32F-NEXT: .LBB45_12: # %cond.store3 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw t5, 0(a0) |
| ; RV32ZVE32F-NEXT: sw t4, 4(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 8 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB45_4 |
| ; RV32ZVE32F-NEXT: .LBB45_13: # %cond.store5 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw t3, 0(a0) |
| ; RV32ZVE32F-NEXT: sw t2, 4(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 16 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB45_5 |
| ; RV32ZVE32F-NEXT: .LBB45_14: # %cond.store7 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw t1, 0(a0) |
| ; RV32ZVE32F-NEXT: sw t0, 4(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 32 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB45_6 |
| ; RV32ZVE32F-NEXT: .LBB45_15: # %cond.store9 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw a7, 0(a0) |
| ; RV32ZVE32F-NEXT: sw a6, 4(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 64 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB45_7 |
| ; RV32ZVE32F-NEXT: .LBB45_16: # %cond.store11 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw a5, 0(a0) |
| ; RV32ZVE32F-NEXT: sw a4, 4(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, -128 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB45_8 |
| ; RV32ZVE32F-NEXT: j .LBB45_9 |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i16_v8i64: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: ld a2, 56(a0) |
| ; RV64ZVE32F-NEXT: ld a3, 48(a0) |
| ; RV64ZVE32F-NEXT: ld a5, 40(a0) |
| ; RV64ZVE32F-NEXT: ld a6, 32(a0) |
| ; RV64ZVE32F-NEXT: ld a7, 24(a0) |
| ; RV64ZVE32F-NEXT: ld t0, 16(a0) |
| ; RV64ZVE32F-NEXT: ld t1, 8(a0) |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a4, v0 |
| ; RV64ZVE32F-NEXT: andi t2, a4, 1 |
| ; RV64ZVE32F-NEXT: beqz t2, .LBB45_2 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store |
| ; RV64ZVE32F-NEXT: ld a0, 0(a0) |
| ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s t2, v8 |
| ; RV64ZVE32F-NEXT: slli t2, t2, 3 |
| ; RV64ZVE32F-NEXT: add t2, a1, t2 |
| ; RV64ZVE32F-NEXT: sd a0, 0(t2) |
| ; RV64ZVE32F-NEXT: .LBB45_2: # %else |
| ; RV64ZVE32F-NEXT: andi a0, a4, 2 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB45_4 |
| ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v9 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd t1, 0(a0) |
| ; RV64ZVE32F-NEXT: .LBB45_4: # %else2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: andi a0, a4, 4 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB45_6 |
| ; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v9 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd t0, 0(a0) |
| ; RV64ZVE32F-NEXT: .LBB45_6: # %else4 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: andi a0, a4, 8 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB45_13 |
| ; RV64ZVE32F-NEXT: # %bb.7: # %else6 |
| ; RV64ZVE32F-NEXT: andi a0, a4, 16 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB45_14 |
| ; RV64ZVE32F-NEXT: .LBB45_8: # %else8 |
| ; RV64ZVE32F-NEXT: andi a0, a4, 32 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB45_10 |
| ; RV64ZVE32F-NEXT: .LBB45_9: # %cond.store9 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v9 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd a5, 0(a0) |
| ; RV64ZVE32F-NEXT: .LBB45_10: # %else10 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: andi a0, a4, 64 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB45_15 |
| ; RV64ZVE32F-NEXT: # %bb.11: # %else12 |
| ; RV64ZVE32F-NEXT: andi a0, a4, -128 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB45_16 |
| ; RV64ZVE32F-NEXT: .LBB45_12: # %else14 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB45_13: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v9 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd a7, 0(a0) |
| ; RV64ZVE32F-NEXT: andi a0, a4, 16 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB45_8 |
| ; RV64ZVE32F-NEXT: .LBB45_14: # %cond.store7 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd a6, 0(a0) |
| ; RV64ZVE32F-NEXT: andi a0, a4, 32 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB45_9 |
| ; RV64ZVE32F-NEXT: j .LBB45_10 |
| ; RV64ZVE32F-NEXT: .LBB45_15: # %cond.store11 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd a3, 0(a0) |
| ; RV64ZVE32F-NEXT: andi a0, a4, -128 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB45_12 |
| ; RV64ZVE32F-NEXT: .LBB45_16: # %cond.store13 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd a2, 0(a0) |
| ; RV64ZVE32F-NEXT: ret |
| %ptrs = getelementptr inbounds i64, i64* %base, <8 x i16> %idxs |
| call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> %val, <8 x i64*> %ptrs, i32 8, <8 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, i64* %base, <8 x i16> %idxs, <8 x i1> %m) { |
| ; RV32V-LABEL: mscatter_baseidx_sext_v8i16_v8i64: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV32V-NEXT: vsext.vf4 v16, v12 |
| ; RV32V-NEXT: vsll.vi v12, v16, 3 |
| ; RV32V-NEXT: vsetvli zero, zero, e32, m2, ta, mu |
| ; RV32V-NEXT: vncvt.x.x.w v16, v12 |
| ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu |
| ; RV32V-NEXT: vsoxei32.v v8, (a0), v16, v0.t |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_baseidx_sext_v8i16_v8i64: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV64-NEXT: vsext.vf4 v16, v12 |
| ; RV64-NEXT: vsll.vi v12, v16, 3 |
| ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV32ZVE32F-LABEL: mscatter_baseidx_sext_v8i16_v8i64: |
| ; RV32ZVE32F: # %bb.0: |
| ; RV32ZVE32F-NEXT: addi sp, sp, -16 |
| ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 |
| ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill |
| ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill |
| ; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill |
| ; RV32ZVE32F-NEXT: .cfi_offset s0, -4 |
| ; RV32ZVE32F-NEXT: .cfi_offset s1, -8 |
| ; RV32ZVE32F-NEXT: .cfi_offset s2, -12 |
| ; RV32ZVE32F-NEXT: lw a2, 60(a0) |
| ; RV32ZVE32F-NEXT: lw a3, 56(a0) |
| ; RV32ZVE32F-NEXT: lw a4, 52(a0) |
| ; RV32ZVE32F-NEXT: lw a5, 48(a0) |
| ; RV32ZVE32F-NEXT: lw a6, 44(a0) |
| ; RV32ZVE32F-NEXT: lw a7, 40(a0) |
| ; RV32ZVE32F-NEXT: lw t0, 36(a0) |
| ; RV32ZVE32F-NEXT: lw t1, 32(a0) |
| ; RV32ZVE32F-NEXT: lw t2, 28(a0) |
| ; RV32ZVE32F-NEXT: lw t3, 24(a0) |
| ; RV32ZVE32F-NEXT: lw t4, 20(a0) |
| ; RV32ZVE32F-NEXT: lw t5, 16(a0) |
| ; RV32ZVE32F-NEXT: lw s0, 12(a0) |
| ; RV32ZVE32F-NEXT: lw t6, 8(a0) |
| ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vsext.vf2 v10, v8 |
| ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 |
| ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v0 |
| ; RV32ZVE32F-NEXT: andi s1, a1, 1 |
| ; RV32ZVE32F-NEXT: bnez s1, .LBB46_10 |
| ; RV32ZVE32F-NEXT: # %bb.1: # %else |
| ; RV32ZVE32F-NEXT: andi a0, a1, 2 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB46_11 |
| ; RV32ZVE32F-NEXT: .LBB46_2: # %else2 |
| ; RV32ZVE32F-NEXT: andi a0, a1, 4 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB46_12 |
| ; RV32ZVE32F-NEXT: .LBB46_3: # %else4 |
| ; RV32ZVE32F-NEXT: andi a0, a1, 8 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB46_13 |
| ; RV32ZVE32F-NEXT: .LBB46_4: # %else6 |
| ; RV32ZVE32F-NEXT: andi a0, a1, 16 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB46_14 |
| ; RV32ZVE32F-NEXT: .LBB46_5: # %else8 |
| ; RV32ZVE32F-NEXT: andi a0, a1, 32 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB46_15 |
| ; RV32ZVE32F-NEXT: .LBB46_6: # %else10 |
| ; RV32ZVE32F-NEXT: andi a0, a1, 64 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB46_16 |
| ; RV32ZVE32F-NEXT: .LBB46_7: # %else12 |
| ; RV32ZVE32F-NEXT: andi a0, a1, -128 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB46_9 |
| ; RV32ZVE32F-NEXT: .LBB46_8: # %cond.store13 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV32ZVE32F-NEXT: sw a3, 0(a0) |
| ; RV32ZVE32F-NEXT: sw a2, 4(a0) |
| ; RV32ZVE32F-NEXT: .LBB46_9: # %else14 |
| ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload |
| ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload |
| ; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload |
| ; RV32ZVE32F-NEXT: addi sp, sp, 16 |
| ; RV32ZVE32F-NEXT: ret |
| ; RV32ZVE32F-NEXT: .LBB46_10: # %cond.store |
| ; RV32ZVE32F-NEXT: lw s1, 4(a0) |
| ; RV32ZVE32F-NEXT: lw a0, 0(a0) |
| ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s s2, v8 |
| ; RV32ZVE32F-NEXT: sw s1, 4(s2) |
| ; RV32ZVE32F-NEXT: sw a0, 0(s2) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 2 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB46_2 |
| ; RV32ZVE32F-NEXT: .LBB46_11: # %cond.store1 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw s0, 4(a0) |
| ; RV32ZVE32F-NEXT: sw t6, 0(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 4 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB46_3 |
| ; RV32ZVE32F-NEXT: .LBB46_12: # %cond.store3 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw t5, 0(a0) |
| ; RV32ZVE32F-NEXT: sw t4, 4(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 8 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB46_4 |
| ; RV32ZVE32F-NEXT: .LBB46_13: # %cond.store5 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw t3, 0(a0) |
| ; RV32ZVE32F-NEXT: sw t2, 4(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 16 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB46_5 |
| ; RV32ZVE32F-NEXT: .LBB46_14: # %cond.store7 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw t1, 0(a0) |
| ; RV32ZVE32F-NEXT: sw t0, 4(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 32 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB46_6 |
| ; RV32ZVE32F-NEXT: .LBB46_15: # %cond.store9 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw a7, 0(a0) |
| ; RV32ZVE32F-NEXT: sw a6, 4(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 64 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB46_7 |
| ; RV32ZVE32F-NEXT: .LBB46_16: # %cond.store11 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw a5, 0(a0) |
| ; RV32ZVE32F-NEXT: sw a4, 4(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, -128 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB46_8 |
| ; RV32ZVE32F-NEXT: j .LBB46_9 |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i16_v8i64: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: ld a2, 56(a0) |
| ; RV64ZVE32F-NEXT: ld a3, 48(a0) |
| ; RV64ZVE32F-NEXT: ld a5, 40(a0) |
| ; RV64ZVE32F-NEXT: ld a6, 32(a0) |
| ; RV64ZVE32F-NEXT: ld a7, 24(a0) |
| ; RV64ZVE32F-NEXT: ld t0, 16(a0) |
| ; RV64ZVE32F-NEXT: ld t1, 8(a0) |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a4, v0 |
| ; RV64ZVE32F-NEXT: andi t2, a4, 1 |
| ; RV64ZVE32F-NEXT: beqz t2, .LBB46_2 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store |
| ; RV64ZVE32F-NEXT: ld a0, 0(a0) |
| ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s t2, v8 |
| ; RV64ZVE32F-NEXT: slli t2, t2, 3 |
| ; RV64ZVE32F-NEXT: add t2, a1, t2 |
| ; RV64ZVE32F-NEXT: sd a0, 0(t2) |
| ; RV64ZVE32F-NEXT: .LBB46_2: # %else |
| ; RV64ZVE32F-NEXT: andi a0, a4, 2 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB46_4 |
| ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v9 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd t1, 0(a0) |
| ; RV64ZVE32F-NEXT: .LBB46_4: # %else2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: andi a0, a4, 4 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB46_6 |
| ; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v9 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd t0, 0(a0) |
| ; RV64ZVE32F-NEXT: .LBB46_6: # %else4 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: andi a0, a4, 8 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB46_13 |
| ; RV64ZVE32F-NEXT: # %bb.7: # %else6 |
| ; RV64ZVE32F-NEXT: andi a0, a4, 16 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB46_14 |
| ; RV64ZVE32F-NEXT: .LBB46_8: # %else8 |
| ; RV64ZVE32F-NEXT: andi a0, a4, 32 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB46_10 |
| ; RV64ZVE32F-NEXT: .LBB46_9: # %cond.store9 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v9 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd a5, 0(a0) |
| ; RV64ZVE32F-NEXT: .LBB46_10: # %else10 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: andi a0, a4, 64 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB46_15 |
| ; RV64ZVE32F-NEXT: # %bb.11: # %else12 |
| ; RV64ZVE32F-NEXT: andi a0, a4, -128 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB46_16 |
| ; RV64ZVE32F-NEXT: .LBB46_12: # %else14 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB46_13: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v9 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd a7, 0(a0) |
| ; RV64ZVE32F-NEXT: andi a0, a4, 16 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB46_8 |
| ; RV64ZVE32F-NEXT: .LBB46_14: # %cond.store7 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd a6, 0(a0) |
| ; RV64ZVE32F-NEXT: andi a0, a4, 32 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB46_9 |
| ; RV64ZVE32F-NEXT: j .LBB46_10 |
| ; RV64ZVE32F-NEXT: .LBB46_15: # %cond.store11 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd a3, 0(a0) |
| ; RV64ZVE32F-NEXT: andi a0, a4, -128 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB46_12 |
| ; RV64ZVE32F-NEXT: .LBB46_16: # %cond.store13 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd a2, 0(a0) |
| ; RV64ZVE32F-NEXT: ret |
| %eidxs = sext <8 x i16> %idxs to <8 x i64> |
| %ptrs = getelementptr inbounds i64, i64* %base, <8 x i64> %eidxs |
| call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> %val, <8 x i64*> %ptrs, i32 8, <8 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, i64* %base, <8 x i16> %idxs, <8 x i1> %m) { |
| ; RV32V-LABEL: mscatter_baseidx_zext_v8i16_v8i64: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV32V-NEXT: vzext.vf4 v16, v12 |
| ; RV32V-NEXT: vsll.vi v12, v16, 3 |
| ; RV32V-NEXT: vsetvli zero, zero, e32, m2, ta, mu |
| ; RV32V-NEXT: vncvt.x.x.w v16, v12 |
| ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu |
| ; RV32V-NEXT: vsoxei32.v v8, (a0), v16, v0.t |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_baseidx_zext_v8i16_v8i64: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV64-NEXT: vzext.vf4 v16, v12 |
| ; RV64-NEXT: vsll.vi v12, v16, 3 |
| ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8i64: |
| ; RV32ZVE32F: # %bb.0: |
| ; RV32ZVE32F-NEXT: addi sp, sp, -16 |
| ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 |
| ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill |
| ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill |
| ; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill |
| ; RV32ZVE32F-NEXT: .cfi_offset s0, -4 |
| ; RV32ZVE32F-NEXT: .cfi_offset s1, -8 |
| ; RV32ZVE32F-NEXT: .cfi_offset s2, -12 |
| ; RV32ZVE32F-NEXT: lw a2, 60(a0) |
| ; RV32ZVE32F-NEXT: lw a3, 56(a0) |
| ; RV32ZVE32F-NEXT: lw a4, 52(a0) |
| ; RV32ZVE32F-NEXT: lw a5, 48(a0) |
| ; RV32ZVE32F-NEXT: lw a6, 44(a0) |
| ; RV32ZVE32F-NEXT: lw a7, 40(a0) |
| ; RV32ZVE32F-NEXT: lw t0, 36(a0) |
| ; RV32ZVE32F-NEXT: lw t1, 32(a0) |
| ; RV32ZVE32F-NEXT: lw t2, 28(a0) |
| ; RV32ZVE32F-NEXT: lw t3, 24(a0) |
| ; RV32ZVE32F-NEXT: lw t4, 20(a0) |
| ; RV32ZVE32F-NEXT: lw t5, 16(a0) |
| ; RV32ZVE32F-NEXT: lw s0, 12(a0) |
| ; RV32ZVE32F-NEXT: lw t6, 8(a0) |
| ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vzext.vf2 v10, v8 |
| ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 |
| ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v0 |
| ; RV32ZVE32F-NEXT: andi s1, a1, 1 |
| ; RV32ZVE32F-NEXT: bnez s1, .LBB47_10 |
| ; RV32ZVE32F-NEXT: # %bb.1: # %else |
| ; RV32ZVE32F-NEXT: andi a0, a1, 2 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB47_11 |
| ; RV32ZVE32F-NEXT: .LBB47_2: # %else2 |
| ; RV32ZVE32F-NEXT: andi a0, a1, 4 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB47_12 |
| ; RV32ZVE32F-NEXT: .LBB47_3: # %else4 |
| ; RV32ZVE32F-NEXT: andi a0, a1, 8 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB47_13 |
| ; RV32ZVE32F-NEXT: .LBB47_4: # %else6 |
| ; RV32ZVE32F-NEXT: andi a0, a1, 16 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB47_14 |
| ; RV32ZVE32F-NEXT: .LBB47_5: # %else8 |
| ; RV32ZVE32F-NEXT: andi a0, a1, 32 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB47_15 |
| ; RV32ZVE32F-NEXT: .LBB47_6: # %else10 |
| ; RV32ZVE32F-NEXT: andi a0, a1, 64 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB47_16 |
| ; RV32ZVE32F-NEXT: .LBB47_7: # %else12 |
| ; RV32ZVE32F-NEXT: andi a0, a1, -128 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB47_9 |
| ; RV32ZVE32F-NEXT: .LBB47_8: # %cond.store13 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV32ZVE32F-NEXT: sw a3, 0(a0) |
| ; RV32ZVE32F-NEXT: sw a2, 4(a0) |
| ; RV32ZVE32F-NEXT: .LBB47_9: # %else14 |
| ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload |
| ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload |
| ; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload |
| ; RV32ZVE32F-NEXT: addi sp, sp, 16 |
| ; RV32ZVE32F-NEXT: ret |
| ; RV32ZVE32F-NEXT: .LBB47_10: # %cond.store |
| ; RV32ZVE32F-NEXT: lw s1, 4(a0) |
| ; RV32ZVE32F-NEXT: lw a0, 0(a0) |
| ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s s2, v8 |
| ; RV32ZVE32F-NEXT: sw s1, 4(s2) |
| ; RV32ZVE32F-NEXT: sw a0, 0(s2) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 2 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB47_2 |
| ; RV32ZVE32F-NEXT: .LBB47_11: # %cond.store1 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw s0, 4(a0) |
| ; RV32ZVE32F-NEXT: sw t6, 0(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 4 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB47_3 |
| ; RV32ZVE32F-NEXT: .LBB47_12: # %cond.store3 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw t5, 0(a0) |
| ; RV32ZVE32F-NEXT: sw t4, 4(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 8 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB47_4 |
| ; RV32ZVE32F-NEXT: .LBB47_13: # %cond.store5 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw t3, 0(a0) |
| ; RV32ZVE32F-NEXT: sw t2, 4(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 16 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB47_5 |
| ; RV32ZVE32F-NEXT: .LBB47_14: # %cond.store7 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw t1, 0(a0) |
| ; RV32ZVE32F-NEXT: sw t0, 4(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 32 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB47_6 |
| ; RV32ZVE32F-NEXT: .LBB47_15: # %cond.store9 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw a7, 0(a0) |
| ; RV32ZVE32F-NEXT: sw a6, 4(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 64 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB47_7 |
| ; RV32ZVE32F-NEXT: .LBB47_16: # %cond.store11 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw a5, 0(a0) |
| ; RV32ZVE32F-NEXT: sw a4, 4(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, -128 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB47_8 |
| ; RV32ZVE32F-NEXT: j .LBB47_9 |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8i64: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: ld a2, 56(a0) |
| ; RV64ZVE32F-NEXT: ld a3, 48(a0) |
| ; RV64ZVE32F-NEXT: ld a6, 40(a0) |
| ; RV64ZVE32F-NEXT: ld a7, 32(a0) |
| ; RV64ZVE32F-NEXT: ld t0, 24(a0) |
| ; RV64ZVE32F-NEXT: ld t1, 16(a0) |
| ; RV64ZVE32F-NEXT: ld t2, 8(a0) |
| ; RV64ZVE32F-NEXT: lui a4, 16 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a5, v0 |
| ; RV64ZVE32F-NEXT: andi t3, a5, 1 |
| ; RV64ZVE32F-NEXT: addiw a4, a4, -1 |
| ; RV64ZVE32F-NEXT: beqz t3, .LBB47_2 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store |
| ; RV64ZVE32F-NEXT: ld a0, 0(a0) |
| ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s t3, v8 |
| ; RV64ZVE32F-NEXT: and t3, t3, a4 |
| ; RV64ZVE32F-NEXT: slli t3, t3, 3 |
| ; RV64ZVE32F-NEXT: add t3, a1, t3 |
| ; RV64ZVE32F-NEXT: sd a0, 0(t3) |
| ; RV64ZVE32F-NEXT: .LBB47_2: # %else |
| ; RV64ZVE32F-NEXT: andi a0, a5, 2 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB47_4 |
| ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v9 |
| ; RV64ZVE32F-NEXT: and a0, a0, a4 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd t2, 0(a0) |
| ; RV64ZVE32F-NEXT: .LBB47_4: # %else2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: andi a0, a5, 4 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB47_6 |
| ; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v9 |
| ; RV64ZVE32F-NEXT: and a0, a0, a4 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd t1, 0(a0) |
| ; RV64ZVE32F-NEXT: .LBB47_6: # %else4 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: andi a0, a5, 8 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB47_13 |
| ; RV64ZVE32F-NEXT: # %bb.7: # %else6 |
| ; RV64ZVE32F-NEXT: andi a0, a5, 16 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB47_14 |
| ; RV64ZVE32F-NEXT: .LBB47_8: # %else8 |
| ; RV64ZVE32F-NEXT: andi a0, a5, 32 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB47_10 |
| ; RV64ZVE32F-NEXT: .LBB47_9: # %cond.store9 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v9 |
| ; RV64ZVE32F-NEXT: and a0, a0, a4 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd a6, 0(a0) |
| ; RV64ZVE32F-NEXT: .LBB47_10: # %else10 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: andi a0, a5, 64 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB47_15 |
| ; RV64ZVE32F-NEXT: # %bb.11: # %else12 |
| ; RV64ZVE32F-NEXT: andi a0, a5, -128 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB47_16 |
| ; RV64ZVE32F-NEXT: .LBB47_12: # %else14 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB47_13: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v9 |
| ; RV64ZVE32F-NEXT: and a0, a0, a4 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd t0, 0(a0) |
| ; RV64ZVE32F-NEXT: andi a0, a5, 16 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB47_8 |
| ; RV64ZVE32F-NEXT: .LBB47_14: # %cond.store7 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV64ZVE32F-NEXT: and a0, a0, a4 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd a7, 0(a0) |
| ; RV64ZVE32F-NEXT: andi a0, a5, 32 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB47_9 |
| ; RV64ZVE32F-NEXT: j .LBB47_10 |
| ; RV64ZVE32F-NEXT: .LBB47_15: # %cond.store11 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV64ZVE32F-NEXT: and a0, a0, a4 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd a3, 0(a0) |
| ; RV64ZVE32F-NEXT: andi a0, a5, -128 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB47_12 |
| ; RV64ZVE32F-NEXT: .LBB47_16: # %cond.store13 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV64ZVE32F-NEXT: and a0, a0, a4 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd a2, 0(a0) |
| ; RV64ZVE32F-NEXT: ret |
| %eidxs = zext <8 x i16> %idxs to <8 x i64> |
| %ptrs = getelementptr inbounds i64, i64* %base, <8 x i64> %eidxs |
| call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> %val, <8 x i64*> %ptrs, i32 8, <8 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, i64* %base, <8 x i32> %idxs, <8 x i1> %m) { |
| ; RV32V-LABEL: mscatter_baseidx_v8i32_v8i64: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| ; RV32V-NEXT: vsll.vi v12, v12, 3 |
| ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu |
| ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_baseidx_v8i32_v8i64: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV64-NEXT: vsext.vf2 v16, v12 |
| ; RV64-NEXT: vsll.vi v12, v16, 3 |
| ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV32ZVE32F-LABEL: mscatter_baseidx_v8i32_v8i64: |
| ; RV32ZVE32F: # %bb.0: |
| ; RV32ZVE32F-NEXT: addi sp, sp, -16 |
| ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 |
| ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill |
| ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill |
| ; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill |
| ; RV32ZVE32F-NEXT: .cfi_offset s0, -4 |
| ; RV32ZVE32F-NEXT: .cfi_offset s1, -8 |
| ; RV32ZVE32F-NEXT: .cfi_offset s2, -12 |
| ; RV32ZVE32F-NEXT: lw a2, 60(a0) |
| ; RV32ZVE32F-NEXT: lw a3, 56(a0) |
| ; RV32ZVE32F-NEXT: lw a4, 52(a0) |
| ; RV32ZVE32F-NEXT: lw a5, 48(a0) |
| ; RV32ZVE32F-NEXT: lw a6, 44(a0) |
| ; RV32ZVE32F-NEXT: lw a7, 40(a0) |
| ; RV32ZVE32F-NEXT: lw t0, 36(a0) |
| ; RV32ZVE32F-NEXT: lw t1, 32(a0) |
| ; RV32ZVE32F-NEXT: lw t2, 28(a0) |
| ; RV32ZVE32F-NEXT: lw t3, 24(a0) |
| ; RV32ZVE32F-NEXT: lw t4, 20(a0) |
| ; RV32ZVE32F-NEXT: lw t5, 16(a0) |
| ; RV32ZVE32F-NEXT: lw s0, 12(a0) |
| ; RV32ZVE32F-NEXT: lw t6, 8(a0) |
| ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 |
| ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v0 |
| ; RV32ZVE32F-NEXT: andi s1, a1, 1 |
| ; RV32ZVE32F-NEXT: bnez s1, .LBB48_10 |
| ; RV32ZVE32F-NEXT: # %bb.1: # %else |
| ; RV32ZVE32F-NEXT: andi a0, a1, 2 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB48_11 |
| ; RV32ZVE32F-NEXT: .LBB48_2: # %else2 |
| ; RV32ZVE32F-NEXT: andi a0, a1, 4 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB48_12 |
| ; RV32ZVE32F-NEXT: .LBB48_3: # %else4 |
| ; RV32ZVE32F-NEXT: andi a0, a1, 8 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB48_13 |
| ; RV32ZVE32F-NEXT: .LBB48_4: # %else6 |
| ; RV32ZVE32F-NEXT: andi a0, a1, 16 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB48_14 |
| ; RV32ZVE32F-NEXT: .LBB48_5: # %else8 |
| ; RV32ZVE32F-NEXT: andi a0, a1, 32 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB48_15 |
| ; RV32ZVE32F-NEXT: .LBB48_6: # %else10 |
| ; RV32ZVE32F-NEXT: andi a0, a1, 64 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB48_16 |
| ; RV32ZVE32F-NEXT: .LBB48_7: # %else12 |
| ; RV32ZVE32F-NEXT: andi a0, a1, -128 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB48_9 |
| ; RV32ZVE32F-NEXT: .LBB48_8: # %cond.store13 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV32ZVE32F-NEXT: sw a3, 0(a0) |
| ; RV32ZVE32F-NEXT: sw a2, 4(a0) |
| ; RV32ZVE32F-NEXT: .LBB48_9: # %else14 |
| ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload |
| ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload |
| ; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload |
| ; RV32ZVE32F-NEXT: addi sp, sp, 16 |
| ; RV32ZVE32F-NEXT: ret |
| ; RV32ZVE32F-NEXT: .LBB48_10: # %cond.store |
| ; RV32ZVE32F-NEXT: lw s1, 4(a0) |
| ; RV32ZVE32F-NEXT: lw a0, 0(a0) |
| ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s s2, v8 |
| ; RV32ZVE32F-NEXT: sw s1, 4(s2) |
| ; RV32ZVE32F-NEXT: sw a0, 0(s2) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 2 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB48_2 |
| ; RV32ZVE32F-NEXT: .LBB48_11: # %cond.store1 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw s0, 4(a0) |
| ; RV32ZVE32F-NEXT: sw t6, 0(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 4 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB48_3 |
| ; RV32ZVE32F-NEXT: .LBB48_12: # %cond.store3 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw t5, 0(a0) |
| ; RV32ZVE32F-NEXT: sw t4, 4(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 8 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB48_4 |
| ; RV32ZVE32F-NEXT: .LBB48_13: # %cond.store5 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw t3, 0(a0) |
| ; RV32ZVE32F-NEXT: sw t2, 4(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 16 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB48_5 |
| ; RV32ZVE32F-NEXT: .LBB48_14: # %cond.store7 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw t1, 0(a0) |
| ; RV32ZVE32F-NEXT: sw t0, 4(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 32 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB48_6 |
| ; RV32ZVE32F-NEXT: .LBB48_15: # %cond.store9 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw a7, 0(a0) |
| ; RV32ZVE32F-NEXT: sw a6, 4(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 64 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB48_7 |
| ; RV32ZVE32F-NEXT: .LBB48_16: # %cond.store11 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw a5, 0(a0) |
| ; RV32ZVE32F-NEXT: sw a4, 4(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, -128 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB48_8 |
| ; RV32ZVE32F-NEXT: j .LBB48_9 |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i32_v8i64: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: ld a2, 56(a0) |
| ; RV64ZVE32F-NEXT: ld a3, 48(a0) |
| ; RV64ZVE32F-NEXT: ld a5, 40(a0) |
| ; RV64ZVE32F-NEXT: ld a6, 32(a0) |
| ; RV64ZVE32F-NEXT: ld a7, 24(a0) |
| ; RV64ZVE32F-NEXT: ld t0, 16(a0) |
| ; RV64ZVE32F-NEXT: ld t1, 8(a0) |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a4, v0 |
| ; RV64ZVE32F-NEXT: andi t2, a4, 1 |
| ; RV64ZVE32F-NEXT: beqz t2, .LBB48_2 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store |
| ; RV64ZVE32F-NEXT: ld a0, 0(a0) |
| ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s t2, v8 |
| ; RV64ZVE32F-NEXT: slli t2, t2, 3 |
| ; RV64ZVE32F-NEXT: add t2, a1, t2 |
| ; RV64ZVE32F-NEXT: sd a0, 0(t2) |
| ; RV64ZVE32F-NEXT: .LBB48_2: # %else |
| ; RV64ZVE32F-NEXT: andi a0, a4, 2 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB48_4 |
| ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd t1, 0(a0) |
| ; RV64ZVE32F-NEXT: .LBB48_4: # %else2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: andi a0, a4, 4 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB48_12 |
| ; RV64ZVE32F-NEXT: # %bb.5: # %else4 |
| ; RV64ZVE32F-NEXT: andi a0, a4, 8 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB48_13 |
| ; RV64ZVE32F-NEXT: .LBB48_6: # %else6 |
| ; RV64ZVE32F-NEXT: andi a0, a4, 16 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB48_14 |
| ; RV64ZVE32F-NEXT: .LBB48_7: # %else8 |
| ; RV64ZVE32F-NEXT: andi a0, a4, 32 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB48_9 |
| ; RV64ZVE32F-NEXT: .LBB48_8: # %cond.store9 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd a5, 0(a0) |
| ; RV64ZVE32F-NEXT: .LBB48_9: # %else10 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: andi a0, a4, 64 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB48_15 |
| ; RV64ZVE32F-NEXT: # %bb.10: # %else12 |
| ; RV64ZVE32F-NEXT: andi a0, a4, -128 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB48_16 |
| ; RV64ZVE32F-NEXT: .LBB48_11: # %else14 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB48_12: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd t0, 0(a0) |
| ; RV64ZVE32F-NEXT: andi a0, a4, 8 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB48_6 |
| ; RV64ZVE32F-NEXT: .LBB48_13: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd a7, 0(a0) |
| ; RV64ZVE32F-NEXT: andi a0, a4, 16 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB48_7 |
| ; RV64ZVE32F-NEXT: .LBB48_14: # %cond.store7 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd a6, 0(a0) |
| ; RV64ZVE32F-NEXT: andi a0, a4, 32 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB48_8 |
| ; RV64ZVE32F-NEXT: j .LBB48_9 |
| ; RV64ZVE32F-NEXT: .LBB48_15: # %cond.store11 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd a3, 0(a0) |
| ; RV64ZVE32F-NEXT: andi a0, a4, -128 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB48_11 |
| ; RV64ZVE32F-NEXT: .LBB48_16: # %cond.store13 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd a2, 0(a0) |
| ; RV64ZVE32F-NEXT: ret |
| %ptrs = getelementptr inbounds i64, i64* %base, <8 x i32> %idxs |
| call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> %val, <8 x i64*> %ptrs, i32 8, <8 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, i64* %base, <8 x i32> %idxs, <8 x i1> %m) { |
| ; RV32V-LABEL: mscatter_baseidx_sext_v8i32_v8i64: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV32V-NEXT: vsext.vf2 v16, v12 |
| ; RV32V-NEXT: vsll.vi v12, v16, 3 |
| ; RV32V-NEXT: vsetvli zero, zero, e32, m2, ta, mu |
| ; RV32V-NEXT: vncvt.x.x.w v16, v12 |
| ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu |
| ; RV32V-NEXT: vsoxei32.v v8, (a0), v16, v0.t |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_baseidx_sext_v8i32_v8i64: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV64-NEXT: vsext.vf2 v16, v12 |
| ; RV64-NEXT: vsll.vi v12, v16, 3 |
| ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV32ZVE32F-LABEL: mscatter_baseidx_sext_v8i32_v8i64: |
| ; RV32ZVE32F: # %bb.0: |
| ; RV32ZVE32F-NEXT: addi sp, sp, -16 |
| ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 |
| ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill |
| ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill |
| ; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill |
| ; RV32ZVE32F-NEXT: .cfi_offset s0, -4 |
| ; RV32ZVE32F-NEXT: .cfi_offset s1, -8 |
| ; RV32ZVE32F-NEXT: .cfi_offset s2, -12 |
| ; RV32ZVE32F-NEXT: lw a2, 60(a0) |
| ; RV32ZVE32F-NEXT: lw a3, 56(a0) |
| ; RV32ZVE32F-NEXT: lw a4, 52(a0) |
| ; RV32ZVE32F-NEXT: lw a5, 48(a0) |
| ; RV32ZVE32F-NEXT: lw a6, 44(a0) |
| ; RV32ZVE32F-NEXT: lw a7, 40(a0) |
| ; RV32ZVE32F-NEXT: lw t0, 36(a0) |
| ; RV32ZVE32F-NEXT: lw t1, 32(a0) |
| ; RV32ZVE32F-NEXT: lw t2, 28(a0) |
| ; RV32ZVE32F-NEXT: lw t3, 24(a0) |
| ; RV32ZVE32F-NEXT: lw t4, 20(a0) |
| ; RV32ZVE32F-NEXT: lw t5, 16(a0) |
| ; RV32ZVE32F-NEXT: lw s0, 12(a0) |
| ; RV32ZVE32F-NEXT: lw t6, 8(a0) |
| ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 |
| ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v0 |
| ; RV32ZVE32F-NEXT: andi s1, a1, 1 |
| ; RV32ZVE32F-NEXT: bnez s1, .LBB49_10 |
| ; RV32ZVE32F-NEXT: # %bb.1: # %else |
| ; RV32ZVE32F-NEXT: andi a0, a1, 2 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB49_11 |
| ; RV32ZVE32F-NEXT: .LBB49_2: # %else2 |
| ; RV32ZVE32F-NEXT: andi a0, a1, 4 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB49_12 |
| ; RV32ZVE32F-NEXT: .LBB49_3: # %else4 |
| ; RV32ZVE32F-NEXT: andi a0, a1, 8 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB49_13 |
| ; RV32ZVE32F-NEXT: .LBB49_4: # %else6 |
| ; RV32ZVE32F-NEXT: andi a0, a1, 16 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB49_14 |
| ; RV32ZVE32F-NEXT: .LBB49_5: # %else8 |
| ; RV32ZVE32F-NEXT: andi a0, a1, 32 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB49_15 |
| ; RV32ZVE32F-NEXT: .LBB49_6: # %else10 |
| ; RV32ZVE32F-NEXT: andi a0, a1, 64 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB49_16 |
| ; RV32ZVE32F-NEXT: .LBB49_7: # %else12 |
| ; RV32ZVE32F-NEXT: andi a0, a1, -128 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB49_9 |
| ; RV32ZVE32F-NEXT: .LBB49_8: # %cond.store13 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV32ZVE32F-NEXT: sw a3, 0(a0) |
| ; RV32ZVE32F-NEXT: sw a2, 4(a0) |
| ; RV32ZVE32F-NEXT: .LBB49_9: # %else14 |
| ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload |
| ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload |
| ; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload |
| ; RV32ZVE32F-NEXT: addi sp, sp, 16 |
| ; RV32ZVE32F-NEXT: ret |
| ; RV32ZVE32F-NEXT: .LBB49_10: # %cond.store |
| ; RV32ZVE32F-NEXT: lw s1, 4(a0) |
| ; RV32ZVE32F-NEXT: lw a0, 0(a0) |
| ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s s2, v8 |
| ; RV32ZVE32F-NEXT: sw s1, 4(s2) |
| ; RV32ZVE32F-NEXT: sw a0, 0(s2) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 2 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB49_2 |
| ; RV32ZVE32F-NEXT: .LBB49_11: # %cond.store1 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw s0, 4(a0) |
| ; RV32ZVE32F-NEXT: sw t6, 0(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 4 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB49_3 |
| ; RV32ZVE32F-NEXT: .LBB49_12: # %cond.store3 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw t5, 0(a0) |
| ; RV32ZVE32F-NEXT: sw t4, 4(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 8 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB49_4 |
| ; RV32ZVE32F-NEXT: .LBB49_13: # %cond.store5 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw t3, 0(a0) |
| ; RV32ZVE32F-NEXT: sw t2, 4(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 16 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB49_5 |
| ; RV32ZVE32F-NEXT: .LBB49_14: # %cond.store7 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw t1, 0(a0) |
| ; RV32ZVE32F-NEXT: sw t0, 4(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 32 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB49_6 |
| ; RV32ZVE32F-NEXT: .LBB49_15: # %cond.store9 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw a7, 0(a0) |
| ; RV32ZVE32F-NEXT: sw a6, 4(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 64 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB49_7 |
| ; RV32ZVE32F-NEXT: .LBB49_16: # %cond.store11 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw a5, 0(a0) |
| ; RV32ZVE32F-NEXT: sw a4, 4(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, -128 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB49_8 |
| ; RV32ZVE32F-NEXT: j .LBB49_9 |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i32_v8i64: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: ld a2, 56(a0) |
| ; RV64ZVE32F-NEXT: ld a3, 48(a0) |
| ; RV64ZVE32F-NEXT: ld a5, 40(a0) |
| ; RV64ZVE32F-NEXT: ld a6, 32(a0) |
| ; RV64ZVE32F-NEXT: ld a7, 24(a0) |
| ; RV64ZVE32F-NEXT: ld t0, 16(a0) |
| ; RV64ZVE32F-NEXT: ld t1, 8(a0) |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a4, v0 |
| ; RV64ZVE32F-NEXT: andi t2, a4, 1 |
| ; RV64ZVE32F-NEXT: beqz t2, .LBB49_2 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store |
| ; RV64ZVE32F-NEXT: ld a0, 0(a0) |
| ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s t2, v8 |
| ; RV64ZVE32F-NEXT: slli t2, t2, 3 |
| ; RV64ZVE32F-NEXT: add t2, a1, t2 |
| ; RV64ZVE32F-NEXT: sd a0, 0(t2) |
| ; RV64ZVE32F-NEXT: .LBB49_2: # %else |
| ; RV64ZVE32F-NEXT: andi a0, a4, 2 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB49_4 |
| ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd t1, 0(a0) |
| ; RV64ZVE32F-NEXT: .LBB49_4: # %else2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: andi a0, a4, 4 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB49_12 |
| ; RV64ZVE32F-NEXT: # %bb.5: # %else4 |
| ; RV64ZVE32F-NEXT: andi a0, a4, 8 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB49_13 |
| ; RV64ZVE32F-NEXT: .LBB49_6: # %else6 |
| ; RV64ZVE32F-NEXT: andi a0, a4, 16 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB49_14 |
| ; RV64ZVE32F-NEXT: .LBB49_7: # %else8 |
| ; RV64ZVE32F-NEXT: andi a0, a4, 32 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB49_9 |
| ; RV64ZVE32F-NEXT: .LBB49_8: # %cond.store9 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd a5, 0(a0) |
| ; RV64ZVE32F-NEXT: .LBB49_9: # %else10 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: andi a0, a4, 64 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB49_15 |
| ; RV64ZVE32F-NEXT: # %bb.10: # %else12 |
| ; RV64ZVE32F-NEXT: andi a0, a4, -128 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB49_16 |
| ; RV64ZVE32F-NEXT: .LBB49_11: # %else14 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB49_12: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd t0, 0(a0) |
| ; RV64ZVE32F-NEXT: andi a0, a4, 8 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB49_6 |
| ; RV64ZVE32F-NEXT: .LBB49_13: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd a7, 0(a0) |
| ; RV64ZVE32F-NEXT: andi a0, a4, 16 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB49_7 |
| ; RV64ZVE32F-NEXT: .LBB49_14: # %cond.store7 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd a6, 0(a0) |
| ; RV64ZVE32F-NEXT: andi a0, a4, 32 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB49_8 |
| ; RV64ZVE32F-NEXT: j .LBB49_9 |
| ; RV64ZVE32F-NEXT: .LBB49_15: # %cond.store11 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd a3, 0(a0) |
| ; RV64ZVE32F-NEXT: andi a0, a4, -128 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB49_11 |
| ; RV64ZVE32F-NEXT: .LBB49_16: # %cond.store13 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd a2, 0(a0) |
| ; RV64ZVE32F-NEXT: ret |
| %eidxs = sext <8 x i32> %idxs to <8 x i64> |
| %ptrs = getelementptr inbounds i64, i64* %base, <8 x i64> %eidxs |
| call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> %val, <8 x i64*> %ptrs, i32 8, <8 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, i64* %base, <8 x i32> %idxs, <8 x i1> %m) { |
| ; RV32V-LABEL: mscatter_baseidx_zext_v8i32_v8i64: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV32V-NEXT: vzext.vf2 v16, v12 |
| ; RV32V-NEXT: vsll.vi v12, v16, 3 |
| ; RV32V-NEXT: vsetvli zero, zero, e32, m2, ta, mu |
| ; RV32V-NEXT: vncvt.x.x.w v16, v12 |
| ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu |
| ; RV32V-NEXT: vsoxei32.v v8, (a0), v16, v0.t |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_baseidx_zext_v8i32_v8i64: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV64-NEXT: vzext.vf2 v16, v12 |
| ; RV64-NEXT: vsll.vi v12, v16, 3 |
| ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i32_v8i64: |
| ; RV32ZVE32F: # %bb.0: |
| ; RV32ZVE32F-NEXT: addi sp, sp, -16 |
| ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 |
| ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill |
| ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill |
| ; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill |
| ; RV32ZVE32F-NEXT: .cfi_offset s0, -4 |
| ; RV32ZVE32F-NEXT: .cfi_offset s1, -8 |
| ; RV32ZVE32F-NEXT: .cfi_offset s2, -12 |
| ; RV32ZVE32F-NEXT: lw a2, 60(a0) |
| ; RV32ZVE32F-NEXT: lw a3, 56(a0) |
| ; RV32ZVE32F-NEXT: lw a4, 52(a0) |
| ; RV32ZVE32F-NEXT: lw a5, 48(a0) |
| ; RV32ZVE32F-NEXT: lw a6, 44(a0) |
| ; RV32ZVE32F-NEXT: lw a7, 40(a0) |
| ; RV32ZVE32F-NEXT: lw t0, 36(a0) |
| ; RV32ZVE32F-NEXT: lw t1, 32(a0) |
| ; RV32ZVE32F-NEXT: lw t2, 28(a0) |
| ; RV32ZVE32F-NEXT: lw t3, 24(a0) |
| ; RV32ZVE32F-NEXT: lw t4, 20(a0) |
| ; RV32ZVE32F-NEXT: lw t5, 16(a0) |
| ; RV32ZVE32F-NEXT: lw s0, 12(a0) |
| ; RV32ZVE32F-NEXT: lw t6, 8(a0) |
| ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 |
| ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v0 |
| ; RV32ZVE32F-NEXT: andi s1, a1, 1 |
| ; RV32ZVE32F-NEXT: bnez s1, .LBB50_10 |
| ; RV32ZVE32F-NEXT: # %bb.1: # %else |
| ; RV32ZVE32F-NEXT: andi a0, a1, 2 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB50_11 |
| ; RV32ZVE32F-NEXT: .LBB50_2: # %else2 |
| ; RV32ZVE32F-NEXT: andi a0, a1, 4 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB50_12 |
| ; RV32ZVE32F-NEXT: .LBB50_3: # %else4 |
| ; RV32ZVE32F-NEXT: andi a0, a1, 8 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB50_13 |
| ; RV32ZVE32F-NEXT: .LBB50_4: # %else6 |
| ; RV32ZVE32F-NEXT: andi a0, a1, 16 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB50_14 |
| ; RV32ZVE32F-NEXT: .LBB50_5: # %else8 |
| ; RV32ZVE32F-NEXT: andi a0, a1, 32 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB50_15 |
| ; RV32ZVE32F-NEXT: .LBB50_6: # %else10 |
| ; RV32ZVE32F-NEXT: andi a0, a1, 64 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB50_16 |
| ; RV32ZVE32F-NEXT: .LBB50_7: # %else12 |
| ; RV32ZVE32F-NEXT: andi a0, a1, -128 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB50_9 |
| ; RV32ZVE32F-NEXT: .LBB50_8: # %cond.store13 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV32ZVE32F-NEXT: sw a3, 0(a0) |
| ; RV32ZVE32F-NEXT: sw a2, 4(a0) |
| ; RV32ZVE32F-NEXT: .LBB50_9: # %else14 |
| ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload |
| ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload |
| ; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload |
| ; RV32ZVE32F-NEXT: addi sp, sp, 16 |
| ; RV32ZVE32F-NEXT: ret |
| ; RV32ZVE32F-NEXT: .LBB50_10: # %cond.store |
| ; RV32ZVE32F-NEXT: lw s1, 4(a0) |
| ; RV32ZVE32F-NEXT: lw a0, 0(a0) |
| ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s s2, v8 |
| ; RV32ZVE32F-NEXT: sw s1, 4(s2) |
| ; RV32ZVE32F-NEXT: sw a0, 0(s2) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 2 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB50_2 |
| ; RV32ZVE32F-NEXT: .LBB50_11: # %cond.store1 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw s0, 4(a0) |
| ; RV32ZVE32F-NEXT: sw t6, 0(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 4 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB50_3 |
| ; RV32ZVE32F-NEXT: .LBB50_12: # %cond.store3 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw t5, 0(a0) |
| ; RV32ZVE32F-NEXT: sw t4, 4(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 8 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB50_4 |
| ; RV32ZVE32F-NEXT: .LBB50_13: # %cond.store5 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw t3, 0(a0) |
| ; RV32ZVE32F-NEXT: sw t2, 4(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 16 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB50_5 |
| ; RV32ZVE32F-NEXT: .LBB50_14: # %cond.store7 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw t1, 0(a0) |
| ; RV32ZVE32F-NEXT: sw t0, 4(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 32 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB50_6 |
| ; RV32ZVE32F-NEXT: .LBB50_15: # %cond.store9 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw a7, 0(a0) |
| ; RV32ZVE32F-NEXT: sw a6, 4(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, 64 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB50_7 |
| ; RV32ZVE32F-NEXT: .LBB50_16: # %cond.store11 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV32ZVE32F-NEXT: sw a5, 0(a0) |
| ; RV32ZVE32F-NEXT: sw a4, 4(a0) |
| ; RV32ZVE32F-NEXT: andi a0, a1, -128 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB50_8 |
| ; RV32ZVE32F-NEXT: j .LBB50_9 |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i32_v8i64: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: ld a2, 56(a0) |
| ; RV64ZVE32F-NEXT: ld a3, 48(a0) |
| ; RV64ZVE32F-NEXT: ld a5, 40(a0) |
| ; RV64ZVE32F-NEXT: ld a6, 32(a0) |
| ; RV64ZVE32F-NEXT: ld a7, 24(a0) |
| ; RV64ZVE32F-NEXT: ld t0, 16(a0) |
| ; RV64ZVE32F-NEXT: ld t1, 8(a0) |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a4, v0 |
| ; RV64ZVE32F-NEXT: andi t2, a4, 1 |
| ; RV64ZVE32F-NEXT: beqz t2, .LBB50_2 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store |
| ; RV64ZVE32F-NEXT: ld a0, 0(a0) |
| ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s t2, v8 |
| ; RV64ZVE32F-NEXT: slli t2, t2, 32 |
| ; RV64ZVE32F-NEXT: srli t2, t2, 29 |
| ; RV64ZVE32F-NEXT: add t2, a1, t2 |
| ; RV64ZVE32F-NEXT: sd a0, 0(t2) |
| ; RV64ZVE32F-NEXT: .LBB50_2: # %else |
| ; RV64ZVE32F-NEXT: andi a0, a4, 2 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB50_4 |
| ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 32 |
| ; RV64ZVE32F-NEXT: srli a0, a0, 29 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd t1, 0(a0) |
| ; RV64ZVE32F-NEXT: .LBB50_4: # %else2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: andi a0, a4, 4 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB50_12 |
| ; RV64ZVE32F-NEXT: # %bb.5: # %else4 |
| ; RV64ZVE32F-NEXT: andi a0, a4, 8 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB50_13 |
| ; RV64ZVE32F-NEXT: .LBB50_6: # %else6 |
| ; RV64ZVE32F-NEXT: andi a0, a4, 16 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB50_14 |
| ; RV64ZVE32F-NEXT: .LBB50_7: # %else8 |
| ; RV64ZVE32F-NEXT: andi a0, a4, 32 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB50_9 |
| ; RV64ZVE32F-NEXT: .LBB50_8: # %cond.store9 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 32 |
| ; RV64ZVE32F-NEXT: srli a0, a0, 29 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd a5, 0(a0) |
| ; RV64ZVE32F-NEXT: .LBB50_9: # %else10 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: andi a0, a4, 64 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB50_15 |
| ; RV64ZVE32F-NEXT: # %bb.10: # %else12 |
| ; RV64ZVE32F-NEXT: andi a0, a4, -128 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB50_16 |
| ; RV64ZVE32F-NEXT: .LBB50_11: # %else14 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB50_12: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 32 |
| ; RV64ZVE32F-NEXT: srli a0, a0, 29 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd t0, 0(a0) |
| ; RV64ZVE32F-NEXT: andi a0, a4, 8 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB50_6 |
| ; RV64ZVE32F-NEXT: .LBB50_13: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 32 |
| ; RV64ZVE32F-NEXT: srli a0, a0, 29 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd a7, 0(a0) |
| ; RV64ZVE32F-NEXT: andi a0, a4, 16 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB50_7 |
| ; RV64ZVE32F-NEXT: .LBB50_14: # %cond.store7 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v10 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 32 |
| ; RV64ZVE32F-NEXT: srli a0, a0, 29 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd a6, 0(a0) |
| ; RV64ZVE32F-NEXT: andi a0, a4, 32 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB50_8 |
| ; RV64ZVE32F-NEXT: j .LBB50_9 |
| ; RV64ZVE32F-NEXT: .LBB50_15: # %cond.store11 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 32 |
| ; RV64ZVE32F-NEXT: srli a0, a0, 29 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd a3, 0(a0) |
| ; RV64ZVE32F-NEXT: andi a0, a4, -128 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB50_11 |
| ; RV64ZVE32F-NEXT: .LBB50_16: # %cond.store13 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV64ZVE32F-NEXT: slli a0, a0, 32 |
| ; RV64ZVE32F-NEXT: srli a0, a0, 29 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd a2, 0(a0) |
| ; RV64ZVE32F-NEXT: ret |
| %eidxs = zext <8 x i32> %idxs to <8 x i64> |
| %ptrs = getelementptr inbounds i64, i64* %base, <8 x i64> %eidxs |
| call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> %val, <8 x i64*> %ptrs, i32 8, <8 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_baseidx_v8i64(<8 x i64> %val, i64* %base, <8 x i64> %idxs, <8 x i1> %m) { |
| ; RV32V-LABEL: mscatter_baseidx_v8i64: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV32V-NEXT: vsll.vi v12, v12, 3 |
| ; RV32V-NEXT: vsetvli zero, zero, e32, m2, ta, mu |
| ; RV32V-NEXT: vncvt.x.x.w v16, v12 |
| ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu |
| ; RV32V-NEXT: vsoxei32.v v8, (a0), v16, v0.t |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_baseidx_v8i64: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV64-NEXT: vsll.vi v12, v12, 3 |
| ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV32ZVE32F-LABEL: mscatter_baseidx_v8i64: |
| ; RV32ZVE32F: # %bb.0: |
| ; RV32ZVE32F-NEXT: addi sp, sp, -96 |
| ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 96 |
| ; RV32ZVE32F-NEXT: sw ra, 92(sp) # 4-byte Folded Spill |
| ; RV32ZVE32F-NEXT: sw s0, 88(sp) # 4-byte Folded Spill |
| ; RV32ZVE32F-NEXT: sw s2, 84(sp) # 4-byte Folded Spill |
| ; RV32ZVE32F-NEXT: sw s3, 80(sp) # 4-byte Folded Spill |
| ; RV32ZVE32F-NEXT: sw s4, 76(sp) # 4-byte Folded Spill |
| ; RV32ZVE32F-NEXT: sw s5, 72(sp) # 4-byte Folded Spill |
| ; RV32ZVE32F-NEXT: sw s6, 68(sp) # 4-byte Folded Spill |
| ; RV32ZVE32F-NEXT: sw s7, 64(sp) # 4-byte Folded Spill |
| ; RV32ZVE32F-NEXT: sw s8, 60(sp) # 4-byte Folded Spill |
| ; RV32ZVE32F-NEXT: sw s9, 56(sp) # 4-byte Folded Spill |
| ; RV32ZVE32F-NEXT: sw s10, 52(sp) # 4-byte Folded Spill |
| ; RV32ZVE32F-NEXT: sw s11, 48(sp) # 4-byte Folded Spill |
| ; RV32ZVE32F-NEXT: .cfi_offset ra, -4 |
| ; RV32ZVE32F-NEXT: .cfi_offset s0, -8 |
| ; RV32ZVE32F-NEXT: .cfi_offset s2, -12 |
| ; RV32ZVE32F-NEXT: .cfi_offset s3, -16 |
| ; RV32ZVE32F-NEXT: .cfi_offset s4, -20 |
| ; RV32ZVE32F-NEXT: .cfi_offset s5, -24 |
| ; RV32ZVE32F-NEXT: .cfi_offset s6, -28 |
| ; RV32ZVE32F-NEXT: .cfi_offset s7, -32 |
| ; RV32ZVE32F-NEXT: .cfi_offset s8, -36 |
| ; RV32ZVE32F-NEXT: .cfi_offset s9, -40 |
| ; RV32ZVE32F-NEXT: .cfi_offset s10, -44 |
| ; RV32ZVE32F-NEXT: .cfi_offset s11, -48 |
| ; RV32ZVE32F-NEXT: addi s0, sp, 96 |
| ; RV32ZVE32F-NEXT: .cfi_def_cfa s0, 0 |
| ; RV32ZVE32F-NEXT: andi sp, sp, -32 |
| ; RV32ZVE32F-NEXT: lw a3, 60(a0) |
| ; RV32ZVE32F-NEXT: lw a4, 56(a0) |
| ; RV32ZVE32F-NEXT: lw a5, 52(a0) |
| ; RV32ZVE32F-NEXT: lw a6, 48(a0) |
| ; RV32ZVE32F-NEXT: lw a7, 44(a0) |
| ; RV32ZVE32F-NEXT: lw t0, 40(a0) |
| ; RV32ZVE32F-NEXT: lw t1, 36(a0) |
| ; RV32ZVE32F-NEXT: lw t2, 32(a0) |
| ; RV32ZVE32F-NEXT: lw t3, 28(a0) |
| ; RV32ZVE32F-NEXT: lw t4, 24(a0) |
| ; RV32ZVE32F-NEXT: lw t5, 20(a0) |
| ; RV32ZVE32F-NEXT: lw t6, 16(a0) |
| ; RV32ZVE32F-NEXT: lw s3, 12(a0) |
| ; RV32ZVE32F-NEXT: lw s2, 8(a0) |
| ; RV32ZVE32F-NEXT: lw s5, 4(a0) |
| ; RV32ZVE32F-NEXT: lw s4, 0(a0) |
| ; RV32ZVE32F-NEXT: lw a0, 0(a2) |
| ; RV32ZVE32F-NEXT: lw s6, 8(a2) |
| ; RV32ZVE32F-NEXT: lw s7, 16(a2) |
| ; RV32ZVE32F-NEXT: lw s8, 24(a2) |
| ; RV32ZVE32F-NEXT: lw s9, 56(a2) |
| ; RV32ZVE32F-NEXT: lw s10, 48(a2) |
| ; RV32ZVE32F-NEXT: lw s11, 40(a2) |
| ; RV32ZVE32F-NEXT: lw a2, 32(a2) |
| ; RV32ZVE32F-NEXT: sw s9, 28(sp) |
| ; RV32ZVE32F-NEXT: sw s10, 24(sp) |
| ; RV32ZVE32F-NEXT: sw s11, 20(sp) |
| ; RV32ZVE32F-NEXT: sw a2, 16(sp) |
| ; RV32ZVE32F-NEXT: sw s8, 12(sp) |
| ; RV32ZVE32F-NEXT: sw s7, 8(sp) |
| ; RV32ZVE32F-NEXT: sw s6, 4(sp) |
| ; RV32ZVE32F-NEXT: sw a0, 0(sp) |
| ; RV32ZVE32F-NEXT: mv a0, sp |
| ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vle32.v v8, (a0) |
| ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 |
| ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v0 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 1 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB51_10 |
| ; RV32ZVE32F-NEXT: # %bb.1: # %else |
| ; RV32ZVE32F-NEXT: andi a1, a0, 2 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB51_11 |
| ; RV32ZVE32F-NEXT: .LBB51_2: # %else2 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 4 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB51_12 |
| ; RV32ZVE32F-NEXT: .LBB51_3: # %else4 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 8 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB51_13 |
| ; RV32ZVE32F-NEXT: .LBB51_4: # %else6 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 16 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB51_14 |
| ; RV32ZVE32F-NEXT: .LBB51_5: # %else8 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 32 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB51_15 |
| ; RV32ZVE32F-NEXT: .LBB51_6: # %else10 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 64 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB51_16 |
| ; RV32ZVE32F-NEXT: .LBB51_7: # %else12 |
| ; RV32ZVE32F-NEXT: andi a0, a0, -128 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB51_9 |
| ; RV32ZVE32F-NEXT: .LBB51_8: # %cond.store13 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV32ZVE32F-NEXT: sw a4, 0(a0) |
| ; RV32ZVE32F-NEXT: sw a3, 4(a0) |
| ; RV32ZVE32F-NEXT: .LBB51_9: # %else14 |
| ; RV32ZVE32F-NEXT: addi sp, s0, -96 |
| ; RV32ZVE32F-NEXT: lw ra, 92(sp) # 4-byte Folded Reload |
| ; RV32ZVE32F-NEXT: lw s0, 88(sp) # 4-byte Folded Reload |
| ; RV32ZVE32F-NEXT: lw s2, 84(sp) # 4-byte Folded Reload |
| ; RV32ZVE32F-NEXT: lw s3, 80(sp) # 4-byte Folded Reload |
| ; RV32ZVE32F-NEXT: lw s4, 76(sp) # 4-byte Folded Reload |
| ; RV32ZVE32F-NEXT: lw s5, 72(sp) # 4-byte Folded Reload |
| ; RV32ZVE32F-NEXT: lw s6, 68(sp) # 4-byte Folded Reload |
| ; RV32ZVE32F-NEXT: lw s7, 64(sp) # 4-byte Folded Reload |
| ; RV32ZVE32F-NEXT: lw s8, 60(sp) # 4-byte Folded Reload |
| ; RV32ZVE32F-NEXT: lw s9, 56(sp) # 4-byte Folded Reload |
| ; RV32ZVE32F-NEXT: lw s10, 52(sp) # 4-byte Folded Reload |
| ; RV32ZVE32F-NEXT: lw s11, 48(sp) # 4-byte Folded Reload |
| ; RV32ZVE32F-NEXT: addi sp, sp, 96 |
| ; RV32ZVE32F-NEXT: ret |
| ; RV32ZVE32F-NEXT: .LBB51_10: # %cond.store |
| ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 |
| ; RV32ZVE32F-NEXT: sw s5, 4(a1) |
| ; RV32ZVE32F-NEXT: sw s4, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 2 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB51_2 |
| ; RV32ZVE32F-NEXT: .LBB51_11: # %cond.store1 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: sw s3, 4(a1) |
| ; RV32ZVE32F-NEXT: sw s2, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 4 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB51_3 |
| ; RV32ZVE32F-NEXT: .LBB51_12: # %cond.store3 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: sw t6, 0(a1) |
| ; RV32ZVE32F-NEXT: sw t5, 4(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 8 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB51_4 |
| ; RV32ZVE32F-NEXT: .LBB51_13: # %cond.store5 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: sw t4, 0(a1) |
| ; RV32ZVE32F-NEXT: sw t3, 4(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 16 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB51_5 |
| ; RV32ZVE32F-NEXT: .LBB51_14: # %cond.store7 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: sw t2, 0(a1) |
| ; RV32ZVE32F-NEXT: sw t1, 4(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 32 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB51_6 |
| ; RV32ZVE32F-NEXT: .LBB51_15: # %cond.store9 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: sw t0, 0(a1) |
| ; RV32ZVE32F-NEXT: sw a7, 4(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 64 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB51_7 |
| ; RV32ZVE32F-NEXT: .LBB51_16: # %cond.store11 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: sw a6, 0(a1) |
| ; RV32ZVE32F-NEXT: sw a5, 4(a1) |
| ; RV32ZVE32F-NEXT: andi a0, a0, -128 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB51_8 |
| ; RV32ZVE32F-NEXT: j .LBB51_9 |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i64: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: addi sp, sp, -32 |
| ; RV64ZVE32F-NEXT: .cfi_def_cfa_offset 32 |
| ; RV64ZVE32F-NEXT: sd s0, 24(sp) # 8-byte Folded Spill |
| ; RV64ZVE32F-NEXT: sd s1, 16(sp) # 8-byte Folded Spill |
| ; RV64ZVE32F-NEXT: sd s2, 8(sp) # 8-byte Folded Spill |
| ; RV64ZVE32F-NEXT: sd s3, 0(sp) # 8-byte Folded Spill |
| ; RV64ZVE32F-NEXT: .cfi_offset s0, -8 |
| ; RV64ZVE32F-NEXT: .cfi_offset s1, -16 |
| ; RV64ZVE32F-NEXT: .cfi_offset s2, -24 |
| ; RV64ZVE32F-NEXT: .cfi_offset s3, -32 |
| ; RV64ZVE32F-NEXT: ld a3, 56(a0) |
| ; RV64ZVE32F-NEXT: ld a4, 48(a0) |
| ; RV64ZVE32F-NEXT: ld a6, 40(a0) |
| ; RV64ZVE32F-NEXT: ld t1, 32(a0) |
| ; RV64ZVE32F-NEXT: ld t3, 24(a0) |
| ; RV64ZVE32F-NEXT: ld t6, 16(a0) |
| ; RV64ZVE32F-NEXT: ld s1, 8(a0) |
| ; RV64ZVE32F-NEXT: ld s2, 8(a2) |
| ; RV64ZVE32F-NEXT: ld s0, 16(a2) |
| ; RV64ZVE32F-NEXT: ld t5, 24(a2) |
| ; RV64ZVE32F-NEXT: ld t4, 32(a2) |
| ; RV64ZVE32F-NEXT: ld t2, 40(a2) |
| ; RV64ZVE32F-NEXT: ld t0, 48(a2) |
| ; RV64ZVE32F-NEXT: ld a5, 56(a2) |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a7, v0 |
| ; RV64ZVE32F-NEXT: andi s3, a7, 1 |
| ; RV64ZVE32F-NEXT: bnez s3, .LBB51_10 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %else |
| ; RV64ZVE32F-NEXT: andi a0, a7, 2 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB51_11 |
| ; RV64ZVE32F-NEXT: .LBB51_2: # %else2 |
| ; RV64ZVE32F-NEXT: andi a0, a7, 4 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB51_12 |
| ; RV64ZVE32F-NEXT: .LBB51_3: # %else4 |
| ; RV64ZVE32F-NEXT: andi a0, a7, 8 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB51_13 |
| ; RV64ZVE32F-NEXT: .LBB51_4: # %else6 |
| ; RV64ZVE32F-NEXT: andi a0, a7, 16 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB51_14 |
| ; RV64ZVE32F-NEXT: .LBB51_5: # %else8 |
| ; RV64ZVE32F-NEXT: andi a0, a7, 32 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB51_15 |
| ; RV64ZVE32F-NEXT: .LBB51_6: # %else10 |
| ; RV64ZVE32F-NEXT: andi a0, a7, 64 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB51_16 |
| ; RV64ZVE32F-NEXT: .LBB51_7: # %else12 |
| ; RV64ZVE32F-NEXT: andi a0, a7, -128 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB51_9 |
| ; RV64ZVE32F-NEXT: .LBB51_8: # %cond.store13 |
| ; RV64ZVE32F-NEXT: slli a0, a5, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd a3, 0(a0) |
| ; RV64ZVE32F-NEXT: .LBB51_9: # %else14 |
| ; RV64ZVE32F-NEXT: ld s0, 24(sp) # 8-byte Folded Reload |
| ; RV64ZVE32F-NEXT: ld s1, 16(sp) # 8-byte Folded Reload |
| ; RV64ZVE32F-NEXT: ld s2, 8(sp) # 8-byte Folded Reload |
| ; RV64ZVE32F-NEXT: ld s3, 0(sp) # 8-byte Folded Reload |
| ; RV64ZVE32F-NEXT: addi sp, sp, 32 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB51_10: # %cond.store |
| ; RV64ZVE32F-NEXT: ld a2, 0(a2) |
| ; RV64ZVE32F-NEXT: ld a0, 0(a0) |
| ; RV64ZVE32F-NEXT: slli a2, a2, 3 |
| ; RV64ZVE32F-NEXT: add a2, a1, a2 |
| ; RV64ZVE32F-NEXT: sd a0, 0(a2) |
| ; RV64ZVE32F-NEXT: andi a0, a7, 2 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB51_2 |
| ; RV64ZVE32F-NEXT: .LBB51_11: # %cond.store1 |
| ; RV64ZVE32F-NEXT: slli a0, s2, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd s1, 0(a0) |
| ; RV64ZVE32F-NEXT: andi a0, a7, 4 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB51_3 |
| ; RV64ZVE32F-NEXT: .LBB51_12: # %cond.store3 |
| ; RV64ZVE32F-NEXT: slli a0, s0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd t6, 0(a0) |
| ; RV64ZVE32F-NEXT: andi a0, a7, 8 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB51_4 |
| ; RV64ZVE32F-NEXT: .LBB51_13: # %cond.store5 |
| ; RV64ZVE32F-NEXT: slli a0, t5, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd t3, 0(a0) |
| ; RV64ZVE32F-NEXT: andi a0, a7, 16 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB51_5 |
| ; RV64ZVE32F-NEXT: .LBB51_14: # %cond.store7 |
| ; RV64ZVE32F-NEXT: slli a0, t4, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd t1, 0(a0) |
| ; RV64ZVE32F-NEXT: andi a0, a7, 32 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB51_6 |
| ; RV64ZVE32F-NEXT: .LBB51_15: # %cond.store9 |
| ; RV64ZVE32F-NEXT: slli a0, t2, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd a6, 0(a0) |
| ; RV64ZVE32F-NEXT: andi a0, a7, 64 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB51_7 |
| ; RV64ZVE32F-NEXT: .LBB51_16: # %cond.store11 |
| ; RV64ZVE32F-NEXT: slli a0, t0, 3 |
| ; RV64ZVE32F-NEXT: add a0, a1, a0 |
| ; RV64ZVE32F-NEXT: sd a4, 0(a0) |
| ; RV64ZVE32F-NEXT: andi a0, a7, -128 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB51_8 |
| ; RV64ZVE32F-NEXT: j .LBB51_9 |
| %ptrs = getelementptr inbounds i64, i64* %base, <8 x i64> %idxs |
| call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> %val, <8 x i64*> %ptrs, i32 8, <8 x i1> %m) |
| ret void |
| } |
| |
| declare void @llvm.masked.scatter.v1f16.v1p0f16(<1 x half>, <1 x half*>, i32, <1 x i1>) |
| |
| define void @mscatter_v1f16(<1 x half> %val, <1 x half*> %ptrs, <1 x i1> %m) { |
| ; RV32V-LABEL: mscatter_v1f16: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetivli zero, 1, e16, mf4, ta, mu |
| ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_v1f16: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV32ZVE32F-LABEL: mscatter_v1f16: |
| ; RV32ZVE32F: # %bb.0: |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t |
| ; RV32ZVE32F-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_v1f16: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.v.i v9, 0 |
| ; RV64ZVE32F-NEXT: vmerge.vim v9, v9, 1, v0 |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v9 |
| ; RV64ZVE32F-NEXT: andi a1, a1, 1 |
| ; RV64ZVE32F-NEXT: beqz a1, .LBB52_2 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vse16.v v8, (a0) |
| ; RV64ZVE32F-NEXT: .LBB52_2: # %else |
| ; RV64ZVE32F-NEXT: ret |
| call void @llvm.masked.scatter.v1f16.v1p0f16(<1 x half> %val, <1 x half*> %ptrs, i32 2, <1 x i1> %m) |
| ret void |
| } |
| |
| declare void @llvm.masked.scatter.v2f16.v2p0f16(<2 x half>, <2 x half*>, i32, <2 x i1>) |
| |
| define void @mscatter_v2f16(<2 x half> %val, <2 x half*> %ptrs, <2 x i1> %m) { |
| ; RV32V-LABEL: mscatter_v2f16: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu |
| ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_v2f16: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV32ZVE32F-LABEL: mscatter_v2f16: |
| ; RV32ZVE32F: # %bb.0: |
| ; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu |
| ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t |
| ; RV32ZVE32F-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_v2f16: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v0 |
| ; RV64ZVE32F-NEXT: andi a3, a2, 1 |
| ; RV64ZVE32F-NEXT: bnez a3, .LBB53_3 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %else |
| ; RV64ZVE32F-NEXT: andi a0, a2, 2 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB53_4 |
| ; RV64ZVE32F-NEXT: .LBB53_2: # %else2 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB53_3: # %cond.store |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vse16.v v8, (a0) |
| ; RV64ZVE32F-NEXT: andi a0, a2, 2 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB53_2 |
| ; RV64ZVE32F-NEXT: .LBB53_4: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 |
| ; RV64ZVE32F-NEXT: vse16.v v8, (a1) |
| ; RV64ZVE32F-NEXT: ret |
| call void @llvm.masked.scatter.v2f16.v2p0f16(<2 x half> %val, <2 x half*> %ptrs, i32 2, <2 x i1> %m) |
| ret void |
| } |
| |
| declare void @llvm.masked.scatter.v4f16.v4p0f16(<4 x half>, <4 x half*>, i32, <4 x i1>) |
| |
| define void @mscatter_v4f16(<4 x half> %val, <4 x half*> %ptrs, <4 x i1> %m) { |
| ; RV32-LABEL: mscatter_v4f16: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, mu |
| ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t |
| ; RV32-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_v4f16: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_v4f16: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: ld a1, 24(a0) |
| ; RV64ZVE32F-NEXT: ld a2, 16(a0) |
| ; RV64ZVE32F-NEXT: ld a4, 8(a0) |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a3, v0 |
| ; RV64ZVE32F-NEXT: andi a5, a3, 1 |
| ; RV64ZVE32F-NEXT: bnez a5, .LBB54_5 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %else |
| ; RV64ZVE32F-NEXT: andi a0, a3, 2 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB54_6 |
| ; RV64ZVE32F-NEXT: .LBB54_2: # %else2 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 4 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB54_7 |
| ; RV64ZVE32F-NEXT: .LBB54_3: # %else4 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 8 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB54_8 |
| ; RV64ZVE32F-NEXT: .LBB54_4: # %else6 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB54_5: # %cond.store |
| ; RV64ZVE32F-NEXT: ld a0, 0(a0) |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vse16.v v8, (a0) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 2 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB54_2 |
| ; RV64ZVE32F-NEXT: .LBB54_6: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 |
| ; RV64ZVE32F-NEXT: vse16.v v9, (a4) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 4 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB54_3 |
| ; RV64ZVE32F-NEXT: .LBB54_7: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 |
| ; RV64ZVE32F-NEXT: vse16.v v9, (a2) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 8 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB54_4 |
| ; RV64ZVE32F-NEXT: .LBB54_8: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3 |
| ; RV64ZVE32F-NEXT: vse16.v v8, (a1) |
| ; RV64ZVE32F-NEXT: ret |
| call void @llvm.masked.scatter.v4f16.v4p0f16(<4 x half> %val, <4 x half*> %ptrs, i32 2, <4 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_truemask_v4f16(<4 x half> %val, <4 x half*> %ptrs) { |
| ; RV32-LABEL: mscatter_truemask_v4f16: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, mu |
| ; RV32-NEXT: vsoxei32.v v8, (zero), v9 |
| ; RV32-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_truemask_v4f16: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (zero), v10 |
| ; RV64-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_truemask_v4f16: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: ld a1, 24(a0) |
| ; RV64ZVE32F-NEXT: ld a2, 16(a0) |
| ; RV64ZVE32F-NEXT: ld a4, 8(a0) |
| ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vmset.m v9 |
| ; RV64ZVE32F-NEXT: vmv.x.s a3, v9 |
| ; RV64ZVE32F-NEXT: beqz zero, .LBB55_5 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %else |
| ; RV64ZVE32F-NEXT: andi a0, a3, 2 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB55_6 |
| ; RV64ZVE32F-NEXT: .LBB55_2: # %else2 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 4 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB55_7 |
| ; RV64ZVE32F-NEXT: .LBB55_3: # %else4 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 8 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB55_8 |
| ; RV64ZVE32F-NEXT: .LBB55_4: # %else6 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB55_5: # %cond.store |
| ; RV64ZVE32F-NEXT: ld a0, 0(a0) |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vse16.v v8, (a0) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 2 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB55_2 |
| ; RV64ZVE32F-NEXT: .LBB55_6: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 |
| ; RV64ZVE32F-NEXT: vse16.v v9, (a4) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 4 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB55_3 |
| ; RV64ZVE32F-NEXT: .LBB55_7: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 |
| ; RV64ZVE32F-NEXT: vse16.v v9, (a2) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 8 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB55_4 |
| ; RV64ZVE32F-NEXT: .LBB55_8: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3 |
| ; RV64ZVE32F-NEXT: vse16.v v8, (a1) |
| ; RV64ZVE32F-NEXT: ret |
| %mhead = insertelement <4 x i1> poison, i1 1, i32 0 |
| %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer |
| call void @llvm.masked.scatter.v4f16.v4p0f16(<4 x half> %val, <4 x half*> %ptrs, i32 2, <4 x i1> %mtrue) |
| ret void |
| } |
| |
| define void @mscatter_falsemask_v4f16(<4 x half> %val, <4 x half*> %ptrs) { |
| ; CHECK-LABEL: mscatter_falsemask_v4f16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: ret |
| call void @llvm.masked.scatter.v4f16.v4p0f16(<4 x half> %val, <4 x half*> %ptrs, i32 2, <4 x i1> zeroinitializer) |
| ret void |
| } |
| |
| declare void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half>, <8 x half*>, i32, <8 x i1>) |
| |
| define void @mscatter_v8f16(<8 x half> %val, <8 x half*> %ptrs, <8 x i1> %m) { |
| ; RV32-LABEL: mscatter_v8f16: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu |
| ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t |
| ; RV32-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_v8f16: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_v8f16: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: ld a1, 56(a0) |
| ; RV64ZVE32F-NEXT: ld a2, 48(a0) |
| ; RV64ZVE32F-NEXT: ld a4, 40(a0) |
| ; RV64ZVE32F-NEXT: ld a5, 32(a0) |
| ; RV64ZVE32F-NEXT: ld a6, 24(a0) |
| ; RV64ZVE32F-NEXT: ld a7, 16(a0) |
| ; RV64ZVE32F-NEXT: ld t0, 8(a0) |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a3, v0 |
| ; RV64ZVE32F-NEXT: andi t1, a3, 1 |
| ; RV64ZVE32F-NEXT: bnez t1, .LBB57_9 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %else |
| ; RV64ZVE32F-NEXT: andi a0, a3, 2 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB57_10 |
| ; RV64ZVE32F-NEXT: .LBB57_2: # %else2 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 4 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB57_11 |
| ; RV64ZVE32F-NEXT: .LBB57_3: # %else4 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 8 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB57_12 |
| ; RV64ZVE32F-NEXT: .LBB57_4: # %else6 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 16 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB57_13 |
| ; RV64ZVE32F-NEXT: .LBB57_5: # %else8 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 32 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB57_14 |
| ; RV64ZVE32F-NEXT: .LBB57_6: # %else10 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 64 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB57_15 |
| ; RV64ZVE32F-NEXT: .LBB57_7: # %else12 |
| ; RV64ZVE32F-NEXT: andi a0, a3, -128 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB57_16 |
| ; RV64ZVE32F-NEXT: .LBB57_8: # %else14 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB57_9: # %cond.store |
| ; RV64ZVE32F-NEXT: ld a0, 0(a0) |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vse16.v v8, (a0) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 2 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB57_2 |
| ; RV64ZVE32F-NEXT: .LBB57_10: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 |
| ; RV64ZVE32F-NEXT: vse16.v v9, (t0) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 4 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB57_3 |
| ; RV64ZVE32F-NEXT: .LBB57_11: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 |
| ; RV64ZVE32F-NEXT: vse16.v v9, (a7) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 8 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB57_4 |
| ; RV64ZVE32F-NEXT: .LBB57_12: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3 |
| ; RV64ZVE32F-NEXT: vse16.v v9, (a6) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 16 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB57_5 |
| ; RV64ZVE32F-NEXT: .LBB57_13: # %cond.store7 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 |
| ; RV64ZVE32F-NEXT: vse16.v v9, (a5) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 32 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB57_6 |
| ; RV64ZVE32F-NEXT: .LBB57_14: # %cond.store9 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5 |
| ; RV64ZVE32F-NEXT: vse16.v v9, (a4) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 64 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB57_7 |
| ; RV64ZVE32F-NEXT: .LBB57_15: # %cond.store11 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 6 |
| ; RV64ZVE32F-NEXT: vse16.v v9, (a2) |
| ; RV64ZVE32F-NEXT: andi a0, a3, -128 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB57_8 |
| ; RV64ZVE32F-NEXT: .LBB57_16: # %cond.store13 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 |
| ; RV64ZVE32F-NEXT: vse16.v v8, (a1) |
| ; RV64ZVE32F-NEXT: ret |
| call void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half> %val, <8 x half*> %ptrs, i32 2, <8 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, half* %base, <8 x i8> %idxs, <8 x i1> %m) { |
| ; RV32-LABEL: mscatter_baseidx_v8i8_v8f16: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| ; RV32-NEXT: vsext.vf4 v10, v9 |
| ; RV32-NEXT: vadd.vv v10, v10, v10 |
| ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu |
| ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t |
| ; RV32-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_baseidx_v8i8_v8f16: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV64-NEXT: vsext.vf8 v12, v9 |
| ; RV64-NEXT: vadd.vv v12, v12, v12 |
| ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8_v8f16: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 1 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB58_2 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vse16.v v8, (a2) |
| ; RV64ZVE32F-NEXT: .LBB58_2: # %else |
| ; RV64ZVE32F-NEXT: andi a2, a1, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB58_4 |
| ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 |
| ; RV64ZVE32F-NEXT: vse16.v v10, (a2) |
| ; RV64ZVE32F-NEXT: .LBB58_4: # %else2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 4 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB58_6 |
| ; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2 |
| ; RV64ZVE32F-NEXT: vse16.v v11, (a2) |
| ; RV64ZVE32F-NEXT: .LBB58_6: # %else4 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 8 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 4 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB58_13 |
| ; RV64ZVE32F-NEXT: # %bb.7: # %else6 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB58_14 |
| ; RV64ZVE32F-NEXT: .LBB58_8: # %else8 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB58_10 |
| ; RV64ZVE32F-NEXT: .LBB58_9: # %cond.store9 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5 |
| ; RV64ZVE32F-NEXT: vse16.v v10, (a2) |
| ; RV64ZVE32F-NEXT: .LBB58_10: # %else10 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 64 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB58_15 |
| ; RV64ZVE32F-NEXT: # %bb.11: # %else12 |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: bnez a1, .LBB58_16 |
| ; RV64ZVE32F-NEXT: .LBB58_12: # %else14 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB58_13: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3 |
| ; RV64ZVE32F-NEXT: vse16.v v10, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB58_8 |
| ; RV64ZVE32F-NEXT: .LBB58_14: # %cond.store7 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 |
| ; RV64ZVE32F-NEXT: vse16.v v10, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB58_9 |
| ; RV64ZVE32F-NEXT: j .LBB58_10 |
| ; RV64ZVE32F-NEXT: .LBB58_15: # %cond.store11 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6 |
| ; RV64ZVE32F-NEXT: vse16.v v10, (a2) |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: beqz a1, .LBB58_12 |
| ; RV64ZVE32F-NEXT: .LBB58_16: # %cond.store13 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v9 |
| ; RV64ZVE32F-NEXT: slli a1, a1, 1 |
| ; RV64ZVE32F-NEXT: add a0, a0, a1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 |
| ; RV64ZVE32F-NEXT: vse16.v v8, (a0) |
| ; RV64ZVE32F-NEXT: ret |
| %ptrs = getelementptr inbounds half, half* %base, <8 x i8> %idxs |
| call void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half> %val, <8 x half*> %ptrs, i32 2, <8 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, half* %base, <8 x i8> %idxs, <8 x i1> %m) { |
| ; RV32-LABEL: mscatter_baseidx_sext_v8i8_v8f16: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| ; RV32-NEXT: vsext.vf4 v10, v9 |
| ; RV32-NEXT: vadd.vv v10, v10, v10 |
| ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu |
| ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t |
| ; RV32-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_baseidx_sext_v8i8_v8f16: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV64-NEXT: vsext.vf8 v12, v9 |
| ; RV64-NEXT: vadd.vv v12, v12, v12 |
| ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8f16: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 1 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB59_2 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vse16.v v8, (a2) |
| ; RV64ZVE32F-NEXT: .LBB59_2: # %else |
| ; RV64ZVE32F-NEXT: andi a2, a1, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB59_4 |
| ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 |
| ; RV64ZVE32F-NEXT: vse16.v v10, (a2) |
| ; RV64ZVE32F-NEXT: .LBB59_4: # %else2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 4 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB59_6 |
| ; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2 |
| ; RV64ZVE32F-NEXT: vse16.v v11, (a2) |
| ; RV64ZVE32F-NEXT: .LBB59_6: # %else4 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 8 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 4 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB59_13 |
| ; RV64ZVE32F-NEXT: # %bb.7: # %else6 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB59_14 |
| ; RV64ZVE32F-NEXT: .LBB59_8: # %else8 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB59_10 |
| ; RV64ZVE32F-NEXT: .LBB59_9: # %cond.store9 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5 |
| ; RV64ZVE32F-NEXT: vse16.v v10, (a2) |
| ; RV64ZVE32F-NEXT: .LBB59_10: # %else10 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 64 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB59_15 |
| ; RV64ZVE32F-NEXT: # %bb.11: # %else12 |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: bnez a1, .LBB59_16 |
| ; RV64ZVE32F-NEXT: .LBB59_12: # %else14 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB59_13: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3 |
| ; RV64ZVE32F-NEXT: vse16.v v10, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB59_8 |
| ; RV64ZVE32F-NEXT: .LBB59_14: # %cond.store7 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 |
| ; RV64ZVE32F-NEXT: vse16.v v10, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB59_9 |
| ; RV64ZVE32F-NEXT: j .LBB59_10 |
| ; RV64ZVE32F-NEXT: .LBB59_15: # %cond.store11 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6 |
| ; RV64ZVE32F-NEXT: vse16.v v10, (a2) |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: beqz a1, .LBB59_12 |
| ; RV64ZVE32F-NEXT: .LBB59_16: # %cond.store13 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v9 |
| ; RV64ZVE32F-NEXT: slli a1, a1, 1 |
| ; RV64ZVE32F-NEXT: add a0, a0, a1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 |
| ; RV64ZVE32F-NEXT: vse16.v v8, (a0) |
| ; RV64ZVE32F-NEXT: ret |
| %eidxs = sext <8 x i8> %idxs to <8 x i16> |
| %ptrs = getelementptr inbounds half, half* %base, <8 x i16> %eidxs |
| call void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half> %val, <8 x half*> %ptrs, i32 2, <8 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, half* %base, <8 x i8> %idxs, <8 x i1> %m) { |
| ; RV32-LABEL: mscatter_baseidx_zext_v8i8_v8f16: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| ; RV32-NEXT: vzext.vf4 v10, v9 |
| ; RV32-NEXT: vadd.vv v10, v10, v10 |
| ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu |
| ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t |
| ; RV32-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_baseidx_zext_v8i8_v8f16: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV64-NEXT: vzext.vf8 v12, v9 |
| ; RV64-NEXT: vadd.vv v12, v12, v12 |
| ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8f16: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 1 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB60_2 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 |
| ; RV64ZVE32F-NEXT: andi a2, a2, 255 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vse16.v v8, (a2) |
| ; RV64ZVE32F-NEXT: .LBB60_2: # %else |
| ; RV64ZVE32F-NEXT: andi a2, a1, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB60_4 |
| ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: andi a2, a2, 255 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 |
| ; RV64ZVE32F-NEXT: vse16.v v10, (a2) |
| ; RV64ZVE32F-NEXT: .LBB60_4: # %else2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 4 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB60_6 |
| ; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: andi a2, a2, 255 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2 |
| ; RV64ZVE32F-NEXT: vse16.v v11, (a2) |
| ; RV64ZVE32F-NEXT: .LBB60_6: # %else4 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 8 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 4 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB60_13 |
| ; RV64ZVE32F-NEXT: # %bb.7: # %else6 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB60_14 |
| ; RV64ZVE32F-NEXT: .LBB60_8: # %else8 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB60_10 |
| ; RV64ZVE32F-NEXT: .LBB60_9: # %cond.store9 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: andi a2, a2, 255 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5 |
| ; RV64ZVE32F-NEXT: vse16.v v10, (a2) |
| ; RV64ZVE32F-NEXT: .LBB60_10: # %else10 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 64 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB60_15 |
| ; RV64ZVE32F-NEXT: # %bb.11: # %else12 |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: bnez a1, .LBB60_16 |
| ; RV64ZVE32F-NEXT: .LBB60_12: # %else14 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB60_13: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: andi a2, a2, 255 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3 |
| ; RV64ZVE32F-NEXT: vse16.v v10, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB60_8 |
| ; RV64ZVE32F-NEXT: .LBB60_14: # %cond.store7 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 |
| ; RV64ZVE32F-NEXT: andi a2, a2, 255 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 |
| ; RV64ZVE32F-NEXT: vse16.v v10, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB60_9 |
| ; RV64ZVE32F-NEXT: j .LBB60_10 |
| ; RV64ZVE32F-NEXT: .LBB60_15: # %cond.store11 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 |
| ; RV64ZVE32F-NEXT: andi a2, a2, 255 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6 |
| ; RV64ZVE32F-NEXT: vse16.v v10, (a2) |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: beqz a1, .LBB60_12 |
| ; RV64ZVE32F-NEXT: .LBB60_16: # %cond.store13 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v9 |
| ; RV64ZVE32F-NEXT: andi a1, a1, 255 |
| ; RV64ZVE32F-NEXT: slli a1, a1, 1 |
| ; RV64ZVE32F-NEXT: add a0, a0, a1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 |
| ; RV64ZVE32F-NEXT: vse16.v v8, (a0) |
| ; RV64ZVE32F-NEXT: ret |
| %eidxs = zext <8 x i8> %idxs to <8 x i16> |
| %ptrs = getelementptr inbounds half, half* %base, <8 x i16> %eidxs |
| call void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half> %val, <8 x half*> %ptrs, i32 2, <8 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_baseidx_v8f16(<8 x half> %val, half* %base, <8 x i16> %idxs, <8 x i1> %m) { |
| ; RV32-LABEL: mscatter_baseidx_v8f16: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| ; RV32-NEXT: vsext.vf2 v10, v9 |
| ; RV32-NEXT: vadd.vv v10, v10, v10 |
| ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu |
| ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t |
| ; RV32-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_baseidx_v8f16: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV64-NEXT: vsext.vf4 v12, v9 |
| ; RV64-NEXT: vadd.vv v12, v12, v12 |
| ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_baseidx_v8f16: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 1 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB61_2 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store |
| ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vse16.v v8, (a2) |
| ; RV64ZVE32F-NEXT: .LBB61_2: # %else |
| ; RV64ZVE32F-NEXT: andi a2, a1, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB61_4 |
| ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 |
| ; RV64ZVE32F-NEXT: vse16.v v10, (a2) |
| ; RV64ZVE32F-NEXT: .LBB61_4: # %else2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 4 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB61_6 |
| ; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2 |
| ; RV64ZVE32F-NEXT: vse16.v v11, (a2) |
| ; RV64ZVE32F-NEXT: .LBB61_6: # %else4 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 8 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 4 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB61_13 |
| ; RV64ZVE32F-NEXT: # %bb.7: # %else6 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB61_14 |
| ; RV64ZVE32F-NEXT: .LBB61_8: # %else8 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB61_10 |
| ; RV64ZVE32F-NEXT: .LBB61_9: # %cond.store9 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5 |
| ; RV64ZVE32F-NEXT: vse16.v v10, (a2) |
| ; RV64ZVE32F-NEXT: .LBB61_10: # %else10 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 64 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB61_15 |
| ; RV64ZVE32F-NEXT: # %bb.11: # %else12 |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: bnez a1, .LBB61_16 |
| ; RV64ZVE32F-NEXT: .LBB61_12: # %else14 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB61_13: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3 |
| ; RV64ZVE32F-NEXT: vse16.v v10, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB61_8 |
| ; RV64ZVE32F-NEXT: .LBB61_14: # %cond.store7 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 |
| ; RV64ZVE32F-NEXT: vse16.v v10, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB61_9 |
| ; RV64ZVE32F-NEXT: j .LBB61_10 |
| ; RV64ZVE32F-NEXT: .LBB61_15: # %cond.store11 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 1 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6 |
| ; RV64ZVE32F-NEXT: vse16.v v10, (a2) |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: beqz a1, .LBB61_12 |
| ; RV64ZVE32F-NEXT: .LBB61_16: # %cond.store13 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v9 |
| ; RV64ZVE32F-NEXT: slli a1, a1, 1 |
| ; RV64ZVE32F-NEXT: add a0, a0, a1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 |
| ; RV64ZVE32F-NEXT: vse16.v v8, (a0) |
| ; RV64ZVE32F-NEXT: ret |
| %ptrs = getelementptr inbounds half, half* %base, <8 x i16> %idxs |
| call void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half> %val, <8 x half*> %ptrs, i32 2, <8 x i1> %m) |
| ret void |
| } |
| |
| declare void @llvm.masked.scatter.v1f32.v1p0f32(<1 x float>, <1 x float*>, i32, <1 x i1>) |
| |
| define void @mscatter_v1f32(<1 x float> %val, <1 x float*> %ptrs, <1 x i1> %m) { |
| ; RV32V-LABEL: mscatter_v1f32: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetivli zero, 1, e32, mf2, ta, mu |
| ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_v1f32: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV32ZVE32F-LABEL: mscatter_v1f32: |
| ; RV32ZVE32F: # %bb.0: |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t |
| ; RV32ZVE32F-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_v1f32: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.v.i v9, 0 |
| ; RV64ZVE32F-NEXT: vmerge.vim v9, v9, 1, v0 |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v9 |
| ; RV64ZVE32F-NEXT: andi a1, a1, 1 |
| ; RV64ZVE32F-NEXT: beqz a1, .LBB62_2 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vse32.v v8, (a0) |
| ; RV64ZVE32F-NEXT: .LBB62_2: # %else |
| ; RV64ZVE32F-NEXT: ret |
| call void @llvm.masked.scatter.v1f32.v1p0f32(<1 x float> %val, <1 x float*> %ptrs, i32 4, <1 x i1> %m) |
| ret void |
| } |
| |
| declare void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float>, <2 x float*>, i32, <2 x i1>) |
| |
| define void @mscatter_v2f32(<2 x float> %val, <2 x float*> %ptrs, <2 x i1> %m) { |
| ; RV32V-LABEL: mscatter_v2f32: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetivli zero, 2, e32, mf2, ta, mu |
| ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_v2f32: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV32ZVE32F-LABEL: mscatter_v2f32: |
| ; RV32ZVE32F: # %bb.0: |
| ; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu |
| ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t |
| ; RV32ZVE32F-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_v2f32: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v0 |
| ; RV64ZVE32F-NEXT: andi a3, a2, 1 |
| ; RV64ZVE32F-NEXT: bnez a3, .LBB63_3 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %else |
| ; RV64ZVE32F-NEXT: andi a0, a2, 2 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB63_4 |
| ; RV64ZVE32F-NEXT: .LBB63_2: # %else2 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB63_3: # %cond.store |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vse32.v v8, (a0) |
| ; RV64ZVE32F-NEXT: andi a0, a2, 2 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB63_2 |
| ; RV64ZVE32F-NEXT: .LBB63_4: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 |
| ; RV64ZVE32F-NEXT: vse32.v v8, (a1) |
| ; RV64ZVE32F-NEXT: ret |
| call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> %val, <2 x float*> %ptrs, i32 4, <2 x i1> %m) |
| ret void |
| } |
| |
| declare void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float>, <4 x float*>, i32, <4 x i1>) |
| |
| define void @mscatter_v4f32(<4 x float> %val, <4 x float*> %ptrs, <4 x i1> %m) { |
| ; RV32-LABEL: mscatter_v4f32: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu |
| ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t |
| ; RV32-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_v4f32: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_v4f32: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: ld a1, 24(a0) |
| ; RV64ZVE32F-NEXT: ld a2, 16(a0) |
| ; RV64ZVE32F-NEXT: ld a4, 8(a0) |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a3, v0 |
| ; RV64ZVE32F-NEXT: andi a5, a3, 1 |
| ; RV64ZVE32F-NEXT: bnez a5, .LBB64_5 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %else |
| ; RV64ZVE32F-NEXT: andi a0, a3, 2 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB64_6 |
| ; RV64ZVE32F-NEXT: .LBB64_2: # %else2 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 4 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB64_7 |
| ; RV64ZVE32F-NEXT: .LBB64_3: # %else4 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 8 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB64_8 |
| ; RV64ZVE32F-NEXT: .LBB64_4: # %else6 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB64_5: # %cond.store |
| ; RV64ZVE32F-NEXT: ld a0, 0(a0) |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vse32.v v8, (a0) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 2 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB64_2 |
| ; RV64ZVE32F-NEXT: .LBB64_6: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 |
| ; RV64ZVE32F-NEXT: vse32.v v9, (a4) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 4 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB64_3 |
| ; RV64ZVE32F-NEXT: .LBB64_7: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 |
| ; RV64ZVE32F-NEXT: vse32.v v9, (a2) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 8 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB64_4 |
| ; RV64ZVE32F-NEXT: .LBB64_8: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3 |
| ; RV64ZVE32F-NEXT: vse32.v v8, (a1) |
| ; RV64ZVE32F-NEXT: ret |
| call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> %val, <4 x float*> %ptrs, i32 4, <4 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_truemask_v4f32(<4 x float> %val, <4 x float*> %ptrs) { |
| ; RV32-LABEL: mscatter_truemask_v4f32: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu |
| ; RV32-NEXT: vsoxei32.v v8, (zero), v9 |
| ; RV32-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_truemask_v4f32: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (zero), v10 |
| ; RV64-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_truemask_v4f32: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: ld a1, 24(a0) |
| ; RV64ZVE32F-NEXT: ld a2, 16(a0) |
| ; RV64ZVE32F-NEXT: ld a4, 8(a0) |
| ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vmset.m v9 |
| ; RV64ZVE32F-NEXT: vmv.x.s a3, v9 |
| ; RV64ZVE32F-NEXT: beqz zero, .LBB65_5 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %else |
| ; RV64ZVE32F-NEXT: andi a0, a3, 2 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB65_6 |
| ; RV64ZVE32F-NEXT: .LBB65_2: # %else2 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 4 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB65_7 |
| ; RV64ZVE32F-NEXT: .LBB65_3: # %else4 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 8 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB65_8 |
| ; RV64ZVE32F-NEXT: .LBB65_4: # %else6 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB65_5: # %cond.store |
| ; RV64ZVE32F-NEXT: ld a0, 0(a0) |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vse32.v v8, (a0) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 2 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB65_2 |
| ; RV64ZVE32F-NEXT: .LBB65_6: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 |
| ; RV64ZVE32F-NEXT: vse32.v v9, (a4) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 4 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB65_3 |
| ; RV64ZVE32F-NEXT: .LBB65_7: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 |
| ; RV64ZVE32F-NEXT: vse32.v v9, (a2) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 8 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB65_4 |
| ; RV64ZVE32F-NEXT: .LBB65_8: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3 |
| ; RV64ZVE32F-NEXT: vse32.v v8, (a1) |
| ; RV64ZVE32F-NEXT: ret |
| %mhead = insertelement <4 x i1> poison, i1 1, i32 0 |
| %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer |
| call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> %val, <4 x float*> %ptrs, i32 4, <4 x i1> %mtrue) |
| ret void |
| } |
| |
| define void @mscatter_falsemask_v4f32(<4 x float> %val, <4 x float*> %ptrs) { |
| ; CHECK-LABEL: mscatter_falsemask_v4f32: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: ret |
| call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> %val, <4 x float*> %ptrs, i32 4, <4 x i1> zeroinitializer) |
| ret void |
| } |
| |
| declare void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float>, <8 x float*>, i32, <8 x i1>) |
| |
| define void @mscatter_v8f32(<8 x float> %val, <8 x float*> %ptrs, <8 x i1> %m) { |
| ; RV32-LABEL: mscatter_v8f32: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t |
| ; RV32-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_v8f32: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_v8f32: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: ld a1, 56(a0) |
| ; RV64ZVE32F-NEXT: ld a2, 48(a0) |
| ; RV64ZVE32F-NEXT: ld a4, 40(a0) |
| ; RV64ZVE32F-NEXT: ld a5, 32(a0) |
| ; RV64ZVE32F-NEXT: ld a6, 24(a0) |
| ; RV64ZVE32F-NEXT: ld a7, 16(a0) |
| ; RV64ZVE32F-NEXT: ld t0, 8(a0) |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a3, v0 |
| ; RV64ZVE32F-NEXT: andi t1, a3, 1 |
| ; RV64ZVE32F-NEXT: bnez t1, .LBB67_9 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %else |
| ; RV64ZVE32F-NEXT: andi a0, a3, 2 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB67_10 |
| ; RV64ZVE32F-NEXT: .LBB67_2: # %else2 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 4 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB67_11 |
| ; RV64ZVE32F-NEXT: .LBB67_3: # %else4 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 8 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB67_12 |
| ; RV64ZVE32F-NEXT: .LBB67_4: # %else6 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 16 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB67_13 |
| ; RV64ZVE32F-NEXT: .LBB67_5: # %else8 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 32 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB67_14 |
| ; RV64ZVE32F-NEXT: .LBB67_6: # %else10 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 64 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB67_15 |
| ; RV64ZVE32F-NEXT: .LBB67_7: # %else12 |
| ; RV64ZVE32F-NEXT: andi a0, a3, -128 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB67_16 |
| ; RV64ZVE32F-NEXT: .LBB67_8: # %else14 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB67_9: # %cond.store |
| ; RV64ZVE32F-NEXT: ld a0, 0(a0) |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vse32.v v8, (a0) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 2 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB67_2 |
| ; RV64ZVE32F-NEXT: .LBB67_10: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 |
| ; RV64ZVE32F-NEXT: vse32.v v10, (t0) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 4 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB67_3 |
| ; RV64ZVE32F-NEXT: .LBB67_11: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2 |
| ; RV64ZVE32F-NEXT: vse32.v v10, (a7) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 8 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB67_4 |
| ; RV64ZVE32F-NEXT: .LBB67_12: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3 |
| ; RV64ZVE32F-NEXT: vse32.v v10, (a6) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 16 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB67_5 |
| ; RV64ZVE32F-NEXT: .LBB67_13: # %cond.store7 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 |
| ; RV64ZVE32F-NEXT: vse32.v v10, (a5) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 32 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB67_6 |
| ; RV64ZVE32F-NEXT: .LBB67_14: # %cond.store9 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5 |
| ; RV64ZVE32F-NEXT: vse32.v v10, (a4) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 64 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB67_7 |
| ; RV64ZVE32F-NEXT: .LBB67_15: # %cond.store11 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6 |
| ; RV64ZVE32F-NEXT: vse32.v v10, (a2) |
| ; RV64ZVE32F-NEXT: andi a0, a3, -128 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB67_8 |
| ; RV64ZVE32F-NEXT: .LBB67_16: # %cond.store13 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 |
| ; RV64ZVE32F-NEXT: vse32.v v8, (a1) |
| ; RV64ZVE32F-NEXT: ret |
| call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> %val, <8 x float*> %ptrs, i32 4, <8 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_baseidx_v8i8_v8f32(<8 x float> %val, float* %base, <8 x i8> %idxs, <8 x i1> %m) { |
| ; RV32-LABEL: mscatter_baseidx_v8i8_v8f32: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| ; RV32-NEXT: vsext.vf4 v12, v10 |
| ; RV32-NEXT: vsll.vi v10, v12, 2 |
| ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t |
| ; RV32-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_baseidx_v8i8_v8f32: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV64-NEXT: vsext.vf8 v12, v10 |
| ; RV64-NEXT: vsll.vi v12, v12, 2 |
| ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8_v8f32: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 1 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB68_2 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vse32.v v8, (a2) |
| ; RV64ZVE32F-NEXT: .LBB68_2: # %else |
| ; RV64ZVE32F-NEXT: andi a2, a1, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB68_4 |
| ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: .LBB68_4: # %else2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 4 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB68_6 |
| ; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: .LBB68_6: # %else4 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 8 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 4 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB68_13 |
| ; RV64ZVE32F-NEXT: # %bb.7: # %else6 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB68_14 |
| ; RV64ZVE32F-NEXT: .LBB68_8: # %else8 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB68_10 |
| ; RV64ZVE32F-NEXT: .LBB68_9: # %cond.store9 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: .LBB68_10: # %else10 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 64 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB68_15 |
| ; RV64ZVE32F-NEXT: # %bb.11: # %else12 |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: bnez a1, .LBB68_16 |
| ; RV64ZVE32F-NEXT: .LBB68_12: # %else14 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB68_13: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 3 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB68_8 |
| ; RV64ZVE32F-NEXT: .LBB68_14: # %cond.store7 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB68_9 |
| ; RV64ZVE32F-NEXT: j .LBB68_10 |
| ; RV64ZVE32F-NEXT: .LBB68_15: # %cond.store11 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: beqz a1, .LBB68_12 |
| ; RV64ZVE32F-NEXT: .LBB68_16: # %cond.store13 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV64ZVE32F-NEXT: slli a1, a1, 2 |
| ; RV64ZVE32F-NEXT: add a0, a0, a1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 |
| ; RV64ZVE32F-NEXT: vse32.v v8, (a0) |
| ; RV64ZVE32F-NEXT: ret |
| %ptrs = getelementptr inbounds float, float* %base, <8 x i8> %idxs |
| call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> %val, <8 x float*> %ptrs, i32 4, <8 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, float* %base, <8 x i8> %idxs, <8 x i1> %m) { |
| ; RV32-LABEL: mscatter_baseidx_sext_v8i8_v8f32: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| ; RV32-NEXT: vsext.vf4 v12, v10 |
| ; RV32-NEXT: vsll.vi v10, v12, 2 |
| ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t |
| ; RV32-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_baseidx_sext_v8i8_v8f32: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV64-NEXT: vsext.vf8 v12, v10 |
| ; RV64-NEXT: vsll.vi v12, v12, 2 |
| ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8f32: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 1 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB69_2 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vse32.v v8, (a2) |
| ; RV64ZVE32F-NEXT: .LBB69_2: # %else |
| ; RV64ZVE32F-NEXT: andi a2, a1, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB69_4 |
| ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: .LBB69_4: # %else2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 4 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB69_6 |
| ; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: .LBB69_6: # %else4 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 8 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 4 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB69_13 |
| ; RV64ZVE32F-NEXT: # %bb.7: # %else6 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB69_14 |
| ; RV64ZVE32F-NEXT: .LBB69_8: # %else8 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB69_10 |
| ; RV64ZVE32F-NEXT: .LBB69_9: # %cond.store9 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: .LBB69_10: # %else10 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 64 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB69_15 |
| ; RV64ZVE32F-NEXT: # %bb.11: # %else12 |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: bnez a1, .LBB69_16 |
| ; RV64ZVE32F-NEXT: .LBB69_12: # %else14 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB69_13: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 3 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB69_8 |
| ; RV64ZVE32F-NEXT: .LBB69_14: # %cond.store7 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB69_9 |
| ; RV64ZVE32F-NEXT: j .LBB69_10 |
| ; RV64ZVE32F-NEXT: .LBB69_15: # %cond.store11 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: beqz a1, .LBB69_12 |
| ; RV64ZVE32F-NEXT: .LBB69_16: # %cond.store13 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV64ZVE32F-NEXT: slli a1, a1, 2 |
| ; RV64ZVE32F-NEXT: add a0, a0, a1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 |
| ; RV64ZVE32F-NEXT: vse32.v v8, (a0) |
| ; RV64ZVE32F-NEXT: ret |
| %eidxs = sext <8 x i8> %idxs to <8 x i32> |
| %ptrs = getelementptr inbounds float, float* %base, <8 x i32> %eidxs |
| call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> %val, <8 x float*> %ptrs, i32 4, <8 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, float* %base, <8 x i8> %idxs, <8 x i1> %m) { |
| ; RV32-LABEL: mscatter_baseidx_zext_v8i8_v8f32: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| ; RV32-NEXT: vzext.vf4 v12, v10 |
| ; RV32-NEXT: vsll.vi v10, v12, 2 |
| ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t |
| ; RV32-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_baseidx_zext_v8i8_v8f32: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV64-NEXT: vzext.vf8 v12, v10 |
| ; RV64-NEXT: vsll.vi v12, v12, 2 |
| ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8f32: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 1 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB70_2 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: andi a2, a2, 255 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vse32.v v8, (a2) |
| ; RV64ZVE32F-NEXT: .LBB70_2: # %else |
| ; RV64ZVE32F-NEXT: andi a2, a1, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB70_4 |
| ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 |
| ; RV64ZVE32F-NEXT: andi a2, a2, 255 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: .LBB70_4: # %else2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 4 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB70_6 |
| ; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 |
| ; RV64ZVE32F-NEXT: andi a2, a2, 255 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: .LBB70_6: # %else4 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 8 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 4 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB70_13 |
| ; RV64ZVE32F-NEXT: # %bb.7: # %else6 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB70_14 |
| ; RV64ZVE32F-NEXT: .LBB70_8: # %else8 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB70_10 |
| ; RV64ZVE32F-NEXT: .LBB70_9: # %cond.store9 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 |
| ; RV64ZVE32F-NEXT: andi a2, a2, 255 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: .LBB70_10: # %else10 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 64 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB70_15 |
| ; RV64ZVE32F-NEXT: # %bb.11: # %else12 |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: bnez a1, .LBB70_16 |
| ; RV64ZVE32F-NEXT: .LBB70_12: # %else14 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB70_13: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 |
| ; RV64ZVE32F-NEXT: andi a2, a2, 255 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 3 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB70_8 |
| ; RV64ZVE32F-NEXT: .LBB70_14: # %cond.store7 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: andi a2, a2, 255 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB70_9 |
| ; RV64ZVE32F-NEXT: j .LBB70_10 |
| ; RV64ZVE32F-NEXT: .LBB70_15: # %cond.store11 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: andi a2, a2, 255 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: beqz a1, .LBB70_12 |
| ; RV64ZVE32F-NEXT: .LBB70_16: # %cond.store13 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV64ZVE32F-NEXT: andi a1, a1, 255 |
| ; RV64ZVE32F-NEXT: slli a1, a1, 2 |
| ; RV64ZVE32F-NEXT: add a0, a0, a1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 |
| ; RV64ZVE32F-NEXT: vse32.v v8, (a0) |
| ; RV64ZVE32F-NEXT: ret |
| %eidxs = zext <8 x i8> %idxs to <8 x i32> |
| %ptrs = getelementptr inbounds float, float* %base, <8 x i32> %eidxs |
| call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> %val, <8 x float*> %ptrs, i32 4, <8 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, float* %base, <8 x i16> %idxs, <8 x i1> %m) { |
| ; RV32-LABEL: mscatter_baseidx_v8i16_v8f32: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| ; RV32-NEXT: vsext.vf2 v12, v10 |
| ; RV32-NEXT: vsll.vi v10, v12, 2 |
| ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t |
| ; RV32-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_baseidx_v8i16_v8f32: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV64-NEXT: vsext.vf4 v12, v10 |
| ; RV64-NEXT: vsll.vi v12, v12, 2 |
| ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i16_v8f32: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 1 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB71_2 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store |
| ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vse32.v v8, (a2) |
| ; RV64ZVE32F-NEXT: .LBB71_2: # %else |
| ; RV64ZVE32F-NEXT: andi a2, a1, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB71_4 |
| ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: .LBB71_4: # %else2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 4 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB71_6 |
| ; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: .LBB71_6: # %else4 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 8 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 4 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB71_13 |
| ; RV64ZVE32F-NEXT: # %bb.7: # %else6 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB71_14 |
| ; RV64ZVE32F-NEXT: .LBB71_8: # %else8 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB71_10 |
| ; RV64ZVE32F-NEXT: .LBB71_9: # %cond.store9 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: .LBB71_10: # %else10 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 64 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB71_15 |
| ; RV64ZVE32F-NEXT: # %bb.11: # %else12 |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: bnez a1, .LBB71_16 |
| ; RV64ZVE32F-NEXT: .LBB71_12: # %else14 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB71_13: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 3 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB71_8 |
| ; RV64ZVE32F-NEXT: .LBB71_14: # %cond.store7 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB71_9 |
| ; RV64ZVE32F-NEXT: j .LBB71_10 |
| ; RV64ZVE32F-NEXT: .LBB71_15: # %cond.store11 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: beqz a1, .LBB71_12 |
| ; RV64ZVE32F-NEXT: .LBB71_16: # %cond.store13 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV64ZVE32F-NEXT: slli a1, a1, 2 |
| ; RV64ZVE32F-NEXT: add a0, a0, a1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 |
| ; RV64ZVE32F-NEXT: vse32.v v8, (a0) |
| ; RV64ZVE32F-NEXT: ret |
| %ptrs = getelementptr inbounds float, float* %base, <8 x i16> %idxs |
| call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> %val, <8 x float*> %ptrs, i32 4, <8 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, float* %base, <8 x i16> %idxs, <8 x i1> %m) { |
| ; RV32-LABEL: mscatter_baseidx_sext_v8i16_v8f32: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| ; RV32-NEXT: vsext.vf2 v12, v10 |
| ; RV32-NEXT: vsll.vi v10, v12, 2 |
| ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t |
| ; RV32-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_baseidx_sext_v8i16_v8f32: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV64-NEXT: vsext.vf4 v12, v10 |
| ; RV64-NEXT: vsll.vi v12, v12, 2 |
| ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i16_v8f32: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 1 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB72_2 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store |
| ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vse32.v v8, (a2) |
| ; RV64ZVE32F-NEXT: .LBB72_2: # %else |
| ; RV64ZVE32F-NEXT: andi a2, a1, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB72_4 |
| ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: .LBB72_4: # %else2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 4 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB72_6 |
| ; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: .LBB72_6: # %else4 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 8 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 4 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB72_13 |
| ; RV64ZVE32F-NEXT: # %bb.7: # %else6 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB72_14 |
| ; RV64ZVE32F-NEXT: .LBB72_8: # %else8 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB72_10 |
| ; RV64ZVE32F-NEXT: .LBB72_9: # %cond.store9 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: .LBB72_10: # %else10 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 64 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB72_15 |
| ; RV64ZVE32F-NEXT: # %bb.11: # %else12 |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: bnez a1, .LBB72_16 |
| ; RV64ZVE32F-NEXT: .LBB72_12: # %else14 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB72_13: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 3 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB72_8 |
| ; RV64ZVE32F-NEXT: .LBB72_14: # %cond.store7 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB72_9 |
| ; RV64ZVE32F-NEXT: j .LBB72_10 |
| ; RV64ZVE32F-NEXT: .LBB72_15: # %cond.store11 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: beqz a1, .LBB72_12 |
| ; RV64ZVE32F-NEXT: .LBB72_16: # %cond.store13 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV64ZVE32F-NEXT: slli a1, a1, 2 |
| ; RV64ZVE32F-NEXT: add a0, a0, a1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 |
| ; RV64ZVE32F-NEXT: vse32.v v8, (a0) |
| ; RV64ZVE32F-NEXT: ret |
| %eidxs = sext <8 x i16> %idxs to <8 x i32> |
| %ptrs = getelementptr inbounds float, float* %base, <8 x i32> %eidxs |
| call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> %val, <8 x float*> %ptrs, i32 4, <8 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, float* %base, <8 x i16> %idxs, <8 x i1> %m) { |
| ; RV32-LABEL: mscatter_baseidx_zext_v8i16_v8f32: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| ; RV32-NEXT: vzext.vf2 v12, v10 |
| ; RV32-NEXT: vsll.vi v10, v12, 2 |
| ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t |
| ; RV32-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_baseidx_zext_v8i16_v8f32: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV64-NEXT: vzext.vf4 v12, v10 |
| ; RV64-NEXT: vsll.vi v12, v12, 2 |
| ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8f32: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: lui a1, 16 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v0 |
| ; RV64ZVE32F-NEXT: andi a3, a2, 1 |
| ; RV64ZVE32F-NEXT: addiw a1, a1, -1 |
| ; RV64ZVE32F-NEXT: beqz a3, .LBB73_2 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store |
| ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a3, v10 |
| ; RV64ZVE32F-NEXT: and a3, a3, a1 |
| ; RV64ZVE32F-NEXT: slli a3, a3, 2 |
| ; RV64ZVE32F-NEXT: add a3, a0, a3 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vse32.v v8, (a3) |
| ; RV64ZVE32F-NEXT: .LBB73_2: # %else |
| ; RV64ZVE32F-NEXT: andi a3, a2, 2 |
| ; RV64ZVE32F-NEXT: beqz a3, .LBB73_4 |
| ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a3, v11 |
| ; RV64ZVE32F-NEXT: and a3, a3, a1 |
| ; RV64ZVE32F-NEXT: slli a3, a3, 2 |
| ; RV64ZVE32F-NEXT: add a3, a0, a3 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a3) |
| ; RV64ZVE32F-NEXT: .LBB73_4: # %else2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: andi a3, a2, 4 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 2 |
| ; RV64ZVE32F-NEXT: beqz a3, .LBB73_6 |
| ; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vmv.x.s a3, v11 |
| ; RV64ZVE32F-NEXT: and a3, a3, a1 |
| ; RV64ZVE32F-NEXT: slli a3, a3, 2 |
| ; RV64ZVE32F-NEXT: add a3, a0, a3 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a3) |
| ; RV64ZVE32F-NEXT: .LBB73_6: # %else4 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: andi a3, a2, 8 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 4 |
| ; RV64ZVE32F-NEXT: bnez a3, .LBB73_13 |
| ; RV64ZVE32F-NEXT: # %bb.7: # %else6 |
| ; RV64ZVE32F-NEXT: andi a3, a2, 16 |
| ; RV64ZVE32F-NEXT: bnez a3, .LBB73_14 |
| ; RV64ZVE32F-NEXT: .LBB73_8: # %else8 |
| ; RV64ZVE32F-NEXT: andi a3, a2, 32 |
| ; RV64ZVE32F-NEXT: beqz a3, .LBB73_10 |
| ; RV64ZVE32F-NEXT: .LBB73_9: # %cond.store9 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a3, v11 |
| ; RV64ZVE32F-NEXT: and a3, a3, a1 |
| ; RV64ZVE32F-NEXT: slli a3, a3, 2 |
| ; RV64ZVE32F-NEXT: add a3, a0, a3 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a3) |
| ; RV64ZVE32F-NEXT: .LBB73_10: # %else10 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: andi a3, a2, 64 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 |
| ; RV64ZVE32F-NEXT: bnez a3, .LBB73_15 |
| ; RV64ZVE32F-NEXT: # %bb.11: # %else12 |
| ; RV64ZVE32F-NEXT: andi a2, a2, -128 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB73_16 |
| ; RV64ZVE32F-NEXT: .LBB73_12: # %else14 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB73_13: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a3, v11 |
| ; RV64ZVE32F-NEXT: and a3, a3, a1 |
| ; RV64ZVE32F-NEXT: slli a3, a3, 2 |
| ; RV64ZVE32F-NEXT: add a3, a0, a3 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 3 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a3) |
| ; RV64ZVE32F-NEXT: andi a3, a2, 16 |
| ; RV64ZVE32F-NEXT: beqz a3, .LBB73_8 |
| ; RV64ZVE32F-NEXT: .LBB73_14: # %cond.store7 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a3, v10 |
| ; RV64ZVE32F-NEXT: and a3, a3, a1 |
| ; RV64ZVE32F-NEXT: slli a3, a3, 2 |
| ; RV64ZVE32F-NEXT: add a3, a0, a3 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a3) |
| ; RV64ZVE32F-NEXT: andi a3, a2, 32 |
| ; RV64ZVE32F-NEXT: bnez a3, .LBB73_9 |
| ; RV64ZVE32F-NEXT: j .LBB73_10 |
| ; RV64ZVE32F-NEXT: .LBB73_15: # %cond.store11 |
| ; RV64ZVE32F-NEXT: vmv.x.s a3, v10 |
| ; RV64ZVE32F-NEXT: and a3, a3, a1 |
| ; RV64ZVE32F-NEXT: slli a3, a3, 2 |
| ; RV64ZVE32F-NEXT: add a3, a0, a3 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a3) |
| ; RV64ZVE32F-NEXT: andi a2, a2, -128 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB73_12 |
| ; RV64ZVE32F-NEXT: .LBB73_16: # %cond.store13 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: and a1, a2, a1 |
| ; RV64ZVE32F-NEXT: slli a1, a1, 2 |
| ; RV64ZVE32F-NEXT: add a0, a0, a1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 |
| ; RV64ZVE32F-NEXT: vse32.v v8, (a0) |
| ; RV64ZVE32F-NEXT: ret |
| %eidxs = zext <8 x i16> %idxs to <8 x i32> |
| %ptrs = getelementptr inbounds float, float* %base, <8 x i32> %eidxs |
| call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> %val, <8 x float*> %ptrs, i32 4, <8 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_baseidx_v8f32(<8 x float> %val, float* %base, <8 x i32> %idxs, <8 x i1> %m) { |
| ; RV32-LABEL: mscatter_baseidx_v8f32: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| ; RV32-NEXT: vsll.vi v10, v10, 2 |
| ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t |
| ; RV32-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_baseidx_v8f32: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV64-NEXT: vsext.vf2 v12, v10 |
| ; RV64-NEXT: vsll.vi v12, v12, 2 |
| ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_baseidx_v8f32: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 1 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB74_2 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store |
| ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vse32.v v8, (a2) |
| ; RV64ZVE32F-NEXT: .LBB74_2: # %else |
| ; RV64ZVE32F-NEXT: andi a2, a1, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB74_4 |
| ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: .LBB74_4: # %else2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 4 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 4 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB74_12 |
| ; RV64ZVE32F-NEXT: # %bb.5: # %else4 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 8 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB74_13 |
| ; RV64ZVE32F-NEXT: .LBB74_6: # %else6 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB74_14 |
| ; RV64ZVE32F-NEXT: .LBB74_7: # %else8 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB74_9 |
| ; RV64ZVE32F-NEXT: .LBB74_8: # %cond.store9 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v12, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5 |
| ; RV64ZVE32F-NEXT: vse32.v v10, (a2) |
| ; RV64ZVE32F-NEXT: .LBB74_9: # %else10 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 64 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v12, 2 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB74_15 |
| ; RV64ZVE32F-NEXT: # %bb.10: # %else12 |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: bnez a1, .LBB74_16 |
| ; RV64ZVE32F-NEXT: .LBB74_11: # %else14 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB74_12: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 2 |
| ; RV64ZVE32F-NEXT: vse32.v v14, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 8 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB74_6 |
| ; RV64ZVE32F-NEXT: .LBB74_13: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3 |
| ; RV64ZVE32F-NEXT: vse32.v v10, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB74_7 |
| ; RV64ZVE32F-NEXT: .LBB74_14: # %cond.store7 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 |
| ; RV64ZVE32F-NEXT: vse32.v v10, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB74_8 |
| ; RV64ZVE32F-NEXT: j .LBB74_9 |
| ; RV64ZVE32F-NEXT: .LBB74_15: # %cond.store11 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 2 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6 |
| ; RV64ZVE32F-NEXT: vse32.v v12, (a2) |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: beqz a1, .LBB74_11 |
| ; RV64ZVE32F-NEXT: .LBB74_16: # %cond.store13 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV64ZVE32F-NEXT: slli a1, a1, 2 |
| ; RV64ZVE32F-NEXT: add a0, a0, a1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 |
| ; RV64ZVE32F-NEXT: vse32.v v8, (a0) |
| ; RV64ZVE32F-NEXT: ret |
| %ptrs = getelementptr inbounds float, float* %base, <8 x i32> %idxs |
| call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> %val, <8 x float*> %ptrs, i32 4, <8 x i1> %m) |
| ret void |
| } |
| |
| declare void @llvm.masked.scatter.v1f64.v1p0f64(<1 x double>, <1 x double*>, i32, <1 x i1>) |
| |
| define void @mscatter_v1f64(<1 x double> %val, <1 x double*> %ptrs, <1 x i1> %m) { |
| ; RV32V-LABEL: mscatter_v1f64: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetivli zero, 1, e64, m1, ta, mu |
| ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_v1f64: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV32ZVE32F-LABEL: mscatter_v1f64: |
| ; RV32ZVE32F: # %bb.0: |
| ; RV32ZVE32F-NEXT: vsetvli a0, zero, e8, mf4, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.v.i v9, 0 |
| ; RV32ZVE32F-NEXT: vmerge.vim v9, v9, 1, v0 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v9 |
| ; RV32ZVE32F-NEXT: andi a0, a0, 1 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB75_2 |
| ; RV32ZVE32F-NEXT: # %bb.1: # %cond.store |
| ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV32ZVE32F-NEXT: fsd fa0, 0(a0) |
| ; RV32ZVE32F-NEXT: .LBB75_2: # %else |
| ; RV32ZVE32F-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_v1f64: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.v.i v8, 0 |
| ; RV64ZVE32F-NEXT: vmerge.vim v8, v8, 1, v0 |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v8 |
| ; RV64ZVE32F-NEXT: andi a1, a1, 1 |
| ; RV64ZVE32F-NEXT: beqz a1, .LBB75_2 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store |
| ; RV64ZVE32F-NEXT: fsd fa0, 0(a0) |
| ; RV64ZVE32F-NEXT: .LBB75_2: # %else |
| ; RV64ZVE32F-NEXT: ret |
| call void @llvm.masked.scatter.v1f64.v1p0f64(<1 x double> %val, <1 x double*> %ptrs, i32 8, <1 x i1> %m) |
| ret void |
| } |
| |
| declare void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double>, <2 x double*>, i32, <2 x i1>) |
| |
| define void @mscatter_v2f64(<2 x double> %val, <2 x double*> %ptrs, <2 x i1> %m) { |
| ; RV32V-LABEL: mscatter_v2f64: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetivli zero, 2, e64, m1, ta, mu |
| ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_v2f64: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV32ZVE32F-LABEL: mscatter_v2f64: |
| ; RV32ZVE32F: # %bb.0: |
| ; RV32ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v0 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 1 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB76_3 |
| ; RV32ZVE32F-NEXT: # %bb.1: # %else |
| ; RV32ZVE32F-NEXT: andi a0, a0, 2 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB76_4 |
| ; RV32ZVE32F-NEXT: .LBB76_2: # %else2 |
| ; RV32ZVE32F-NEXT: ret |
| ; RV32ZVE32F-NEXT: .LBB76_3: # %cond.store |
| ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 |
| ; RV32ZVE32F-NEXT: fsd fa0, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a0, a0, 2 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB76_2 |
| ; RV32ZVE32F-NEXT: .LBB76_4: # %cond.store1 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 1 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV32ZVE32F-NEXT: fsd fa1, 0(a0) |
| ; RV32ZVE32F-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_v2f64: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v0 |
| ; RV64ZVE32F-NEXT: andi a3, a2, 1 |
| ; RV64ZVE32F-NEXT: bnez a3, .LBB76_3 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %else |
| ; RV64ZVE32F-NEXT: andi a0, a2, 2 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB76_4 |
| ; RV64ZVE32F-NEXT: .LBB76_2: # %else2 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB76_3: # %cond.store |
| ; RV64ZVE32F-NEXT: fsd fa0, 0(a0) |
| ; RV64ZVE32F-NEXT: andi a0, a2, 2 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB76_2 |
| ; RV64ZVE32F-NEXT: .LBB76_4: # %cond.store1 |
| ; RV64ZVE32F-NEXT: fsd fa1, 0(a1) |
| ; RV64ZVE32F-NEXT: ret |
| call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> %val, <2 x double*> %ptrs, i32 8, <2 x i1> %m) |
| ret void |
| } |
| |
| declare void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double>, <4 x double*>, i32, <4 x i1>) |
| |
| define void @mscatter_v4f64(<4 x double> %val, <4 x double*> %ptrs, <4 x i1> %m) { |
| ; RV32V-LABEL: mscatter_v4f64: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetivli zero, 4, e64, m2, ta, mu |
| ; RV32V-NEXT: vsoxei32.v v8, (zero), v10, v0.t |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_v4f64: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV32ZVE32F-LABEL: mscatter_v4f64: |
| ; RV32ZVE32F: # %bb.0: |
| ; RV32ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v0 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 1 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB77_5 |
| ; RV32ZVE32F-NEXT: # %bb.1: # %else |
| ; RV32ZVE32F-NEXT: andi a1, a0, 2 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB77_6 |
| ; RV32ZVE32F-NEXT: .LBB77_2: # %else2 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 4 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB77_7 |
| ; RV32ZVE32F-NEXT: .LBB77_3: # %else4 |
| ; RV32ZVE32F-NEXT: andi a0, a0, 8 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB77_8 |
| ; RV32ZVE32F-NEXT: .LBB77_4: # %else6 |
| ; RV32ZVE32F-NEXT: ret |
| ; RV32ZVE32F-NEXT: .LBB77_5: # %cond.store |
| ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 |
| ; RV32ZVE32F-NEXT: fsd fa0, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 2 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB77_2 |
| ; RV32ZVE32F-NEXT: .LBB77_6: # %cond.store1 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v9 |
| ; RV32ZVE32F-NEXT: fsd fa1, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 4 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB77_3 |
| ; RV32ZVE32F-NEXT: .LBB77_7: # %cond.store3 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v9 |
| ; RV32ZVE32F-NEXT: fsd fa2, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a0, a0, 8 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB77_4 |
| ; RV32ZVE32F-NEXT: .LBB77_8: # %cond.store5 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV32ZVE32F-NEXT: fsd fa3, 0(a0) |
| ; RV32ZVE32F-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_v4f64: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: ld a1, 24(a0) |
| ; RV64ZVE32F-NEXT: ld a2, 16(a0) |
| ; RV64ZVE32F-NEXT: ld a4, 8(a0) |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a3, v0 |
| ; RV64ZVE32F-NEXT: andi a5, a3, 1 |
| ; RV64ZVE32F-NEXT: bnez a5, .LBB77_5 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %else |
| ; RV64ZVE32F-NEXT: andi a0, a3, 2 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB77_6 |
| ; RV64ZVE32F-NEXT: .LBB77_2: # %else2 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 4 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB77_7 |
| ; RV64ZVE32F-NEXT: .LBB77_3: # %else4 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 8 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB77_8 |
| ; RV64ZVE32F-NEXT: .LBB77_4: # %else6 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB77_5: # %cond.store |
| ; RV64ZVE32F-NEXT: ld a0, 0(a0) |
| ; RV64ZVE32F-NEXT: fsd fa0, 0(a0) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 2 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB77_2 |
| ; RV64ZVE32F-NEXT: .LBB77_6: # %cond.store1 |
| ; RV64ZVE32F-NEXT: fsd fa1, 0(a4) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 4 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB77_3 |
| ; RV64ZVE32F-NEXT: .LBB77_7: # %cond.store3 |
| ; RV64ZVE32F-NEXT: fsd fa2, 0(a2) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 8 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB77_4 |
| ; RV64ZVE32F-NEXT: .LBB77_8: # %cond.store5 |
| ; RV64ZVE32F-NEXT: fsd fa3, 0(a1) |
| ; RV64ZVE32F-NEXT: ret |
| call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> %val, <4 x double*> %ptrs, i32 8, <4 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_truemask_v4f64(<4 x double> %val, <4 x double*> %ptrs) { |
| ; RV32V-LABEL: mscatter_truemask_v4f64: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetivli zero, 4, e64, m2, ta, mu |
| ; RV32V-NEXT: vsoxei32.v v8, (zero), v10 |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_truemask_v4f64: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (zero), v10 |
| ; RV64-NEXT: ret |
| ; |
| ; RV32ZVE32F-LABEL: mscatter_truemask_v4f64: |
| ; RV32ZVE32F: # %bb.0: |
| ; RV32ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, mu |
| ; RV32ZVE32F-NEXT: vmset.m v9 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v9 |
| ; RV32ZVE32F-NEXT: beqz zero, .LBB78_5 |
| ; RV32ZVE32F-NEXT: # %bb.1: # %else |
| ; RV32ZVE32F-NEXT: andi a1, a0, 2 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB78_6 |
| ; RV32ZVE32F-NEXT: .LBB78_2: # %else2 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 4 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB78_7 |
| ; RV32ZVE32F-NEXT: .LBB78_3: # %else4 |
| ; RV32ZVE32F-NEXT: andi a0, a0, 8 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB78_8 |
| ; RV32ZVE32F-NEXT: .LBB78_4: # %else6 |
| ; RV32ZVE32F-NEXT: ret |
| ; RV32ZVE32F-NEXT: .LBB78_5: # %cond.store |
| ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m1, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 |
| ; RV32ZVE32F-NEXT: fsd fa0, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 2 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB78_2 |
| ; RV32ZVE32F-NEXT: .LBB78_6: # %cond.store1 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v9 |
| ; RV32ZVE32F-NEXT: fsd fa1, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 4 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB78_3 |
| ; RV32ZVE32F-NEXT: .LBB78_7: # %cond.store3 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v9 |
| ; RV32ZVE32F-NEXT: fsd fa2, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a0, a0, 8 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB78_4 |
| ; RV32ZVE32F-NEXT: .LBB78_8: # %cond.store5 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV32ZVE32F-NEXT: fsd fa3, 0(a0) |
| ; RV32ZVE32F-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_truemask_v4f64: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: ld a1, 24(a0) |
| ; RV64ZVE32F-NEXT: ld a2, 16(a0) |
| ; RV64ZVE32F-NEXT: ld a4, 8(a0) |
| ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vmset.m v8 |
| ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 |
| ; RV64ZVE32F-NEXT: beqz zero, .LBB78_5 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %else |
| ; RV64ZVE32F-NEXT: andi a0, a3, 2 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB78_6 |
| ; RV64ZVE32F-NEXT: .LBB78_2: # %else2 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 4 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB78_7 |
| ; RV64ZVE32F-NEXT: .LBB78_3: # %else4 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 8 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB78_8 |
| ; RV64ZVE32F-NEXT: .LBB78_4: # %else6 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB78_5: # %cond.store |
| ; RV64ZVE32F-NEXT: ld a0, 0(a0) |
| ; RV64ZVE32F-NEXT: fsd fa0, 0(a0) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 2 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB78_2 |
| ; RV64ZVE32F-NEXT: .LBB78_6: # %cond.store1 |
| ; RV64ZVE32F-NEXT: fsd fa1, 0(a4) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 4 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB78_3 |
| ; RV64ZVE32F-NEXT: .LBB78_7: # %cond.store3 |
| ; RV64ZVE32F-NEXT: fsd fa2, 0(a2) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 8 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB78_4 |
| ; RV64ZVE32F-NEXT: .LBB78_8: # %cond.store5 |
| ; RV64ZVE32F-NEXT: fsd fa3, 0(a1) |
| ; RV64ZVE32F-NEXT: ret |
| %mhead = insertelement <4 x i1> poison, i1 1, i32 0 |
| %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer |
| call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> %val, <4 x double*> %ptrs, i32 8, <4 x i1> %mtrue) |
| ret void |
| } |
| |
| define void @mscatter_falsemask_v4f64(<4 x double> %val, <4 x double*> %ptrs) { |
| ; CHECK-LABEL: mscatter_falsemask_v4f64: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: ret |
| call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> %val, <4 x double*> %ptrs, i32 8, <4 x i1> zeroinitializer) |
| ret void |
| } |
| |
| declare void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double>, <8 x double*>, i32, <8 x i1>) |
| |
| define void @mscatter_v8f64(<8 x double> %val, <8 x double*> %ptrs, <8 x i1> %m) { |
| ; RV32V-LABEL: mscatter_v8f64: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV32V-NEXT: vsoxei32.v v8, (zero), v12, v0.t |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_v8f64: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV32ZVE32F-LABEL: mscatter_v8f64: |
| ; RV32ZVE32F: # %bb.0: |
| ; RV32ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v0 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 1 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB80_9 |
| ; RV32ZVE32F-NEXT: # %bb.1: # %else |
| ; RV32ZVE32F-NEXT: andi a1, a0, 2 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB80_10 |
| ; RV32ZVE32F-NEXT: .LBB80_2: # %else2 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 4 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB80_11 |
| ; RV32ZVE32F-NEXT: .LBB80_3: # %else4 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 8 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB80_12 |
| ; RV32ZVE32F-NEXT: .LBB80_4: # %else6 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 16 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB80_13 |
| ; RV32ZVE32F-NEXT: .LBB80_5: # %else8 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 32 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB80_14 |
| ; RV32ZVE32F-NEXT: .LBB80_6: # %else10 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 64 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB80_15 |
| ; RV32ZVE32F-NEXT: .LBB80_7: # %else12 |
| ; RV32ZVE32F-NEXT: andi a0, a0, -128 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB80_16 |
| ; RV32ZVE32F-NEXT: .LBB80_8: # %else14 |
| ; RV32ZVE32F-NEXT: ret |
| ; RV32ZVE32F-NEXT: .LBB80_9: # %cond.store |
| ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 |
| ; RV32ZVE32F-NEXT: fsd fa0, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 2 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB80_2 |
| ; RV32ZVE32F-NEXT: .LBB80_10: # %cond.store1 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa1, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 4 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB80_3 |
| ; RV32ZVE32F-NEXT: .LBB80_11: # %cond.store3 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa2, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 8 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB80_4 |
| ; RV32ZVE32F-NEXT: .LBB80_12: # %cond.store5 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa3, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 16 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB80_5 |
| ; RV32ZVE32F-NEXT: .LBB80_13: # %cond.store7 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa4, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 32 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB80_6 |
| ; RV32ZVE32F-NEXT: .LBB80_14: # %cond.store9 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa5, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 64 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB80_7 |
| ; RV32ZVE32F-NEXT: .LBB80_15: # %cond.store11 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa6, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a0, a0, -128 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB80_8 |
| ; RV32ZVE32F-NEXT: .LBB80_16: # %cond.store13 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV32ZVE32F-NEXT: fsd fa7, 0(a0) |
| ; RV32ZVE32F-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_v8f64: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: ld a1, 56(a0) |
| ; RV64ZVE32F-NEXT: ld a2, 48(a0) |
| ; RV64ZVE32F-NEXT: ld a4, 40(a0) |
| ; RV64ZVE32F-NEXT: ld a5, 32(a0) |
| ; RV64ZVE32F-NEXT: ld a6, 24(a0) |
| ; RV64ZVE32F-NEXT: ld a7, 16(a0) |
| ; RV64ZVE32F-NEXT: ld t0, 8(a0) |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a3, v0 |
| ; RV64ZVE32F-NEXT: andi t1, a3, 1 |
| ; RV64ZVE32F-NEXT: bnez t1, .LBB80_9 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %else |
| ; RV64ZVE32F-NEXT: andi a0, a3, 2 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB80_10 |
| ; RV64ZVE32F-NEXT: .LBB80_2: # %else2 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 4 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB80_11 |
| ; RV64ZVE32F-NEXT: .LBB80_3: # %else4 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 8 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB80_12 |
| ; RV64ZVE32F-NEXT: .LBB80_4: # %else6 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 16 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB80_13 |
| ; RV64ZVE32F-NEXT: .LBB80_5: # %else8 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 32 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB80_14 |
| ; RV64ZVE32F-NEXT: .LBB80_6: # %else10 |
| ; RV64ZVE32F-NEXT: andi a0, a3, 64 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB80_15 |
| ; RV64ZVE32F-NEXT: .LBB80_7: # %else12 |
| ; RV64ZVE32F-NEXT: andi a0, a3, -128 |
| ; RV64ZVE32F-NEXT: bnez a0, .LBB80_16 |
| ; RV64ZVE32F-NEXT: .LBB80_8: # %else14 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB80_9: # %cond.store |
| ; RV64ZVE32F-NEXT: ld a0, 0(a0) |
| ; RV64ZVE32F-NEXT: fsd fa0, 0(a0) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 2 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB80_2 |
| ; RV64ZVE32F-NEXT: .LBB80_10: # %cond.store1 |
| ; RV64ZVE32F-NEXT: fsd fa1, 0(t0) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 4 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB80_3 |
| ; RV64ZVE32F-NEXT: .LBB80_11: # %cond.store3 |
| ; RV64ZVE32F-NEXT: fsd fa2, 0(a7) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 8 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB80_4 |
| ; RV64ZVE32F-NEXT: .LBB80_12: # %cond.store5 |
| ; RV64ZVE32F-NEXT: fsd fa3, 0(a6) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 16 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB80_5 |
| ; RV64ZVE32F-NEXT: .LBB80_13: # %cond.store7 |
| ; RV64ZVE32F-NEXT: fsd fa4, 0(a5) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 32 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB80_6 |
| ; RV64ZVE32F-NEXT: .LBB80_14: # %cond.store9 |
| ; RV64ZVE32F-NEXT: fsd fa5, 0(a4) |
| ; RV64ZVE32F-NEXT: andi a0, a3, 64 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB80_7 |
| ; RV64ZVE32F-NEXT: .LBB80_15: # %cond.store11 |
| ; RV64ZVE32F-NEXT: fsd fa6, 0(a2) |
| ; RV64ZVE32F-NEXT: andi a0, a3, -128 |
| ; RV64ZVE32F-NEXT: beqz a0, .LBB80_8 |
| ; RV64ZVE32F-NEXT: .LBB80_16: # %cond.store13 |
| ; RV64ZVE32F-NEXT: fsd fa7, 0(a1) |
| ; RV64ZVE32F-NEXT: ret |
| call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> %val, <8 x double*> %ptrs, i32 8, <8 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_baseidx_v8i8_v8f64(<8 x double> %val, double* %base, <8 x i8> %idxs, <8 x i1> %m) { |
| ; RV32V-LABEL: mscatter_baseidx_v8i8_v8f64: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| ; RV32V-NEXT: vsext.vf4 v14, v12 |
| ; RV32V-NEXT: vsll.vi v12, v14, 3 |
| ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu |
| ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_baseidx_v8i8_v8f64: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV64-NEXT: vsext.vf8 v16, v12 |
| ; RV64-NEXT: vsll.vi v12, v16, 3 |
| ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV32ZVE32F-LABEL: mscatter_baseidx_v8i8_v8f64: |
| ; RV32ZVE32F: # %bb.0: |
| ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vsext.vf4 v10, v8 |
| ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 |
| ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v0 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 1 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB81_9 |
| ; RV32ZVE32F-NEXT: # %bb.1: # %else |
| ; RV32ZVE32F-NEXT: andi a1, a0, 2 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB81_10 |
| ; RV32ZVE32F-NEXT: .LBB81_2: # %else2 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 4 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB81_11 |
| ; RV32ZVE32F-NEXT: .LBB81_3: # %else4 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 8 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB81_12 |
| ; RV32ZVE32F-NEXT: .LBB81_4: # %else6 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 16 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB81_13 |
| ; RV32ZVE32F-NEXT: .LBB81_5: # %else8 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 32 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB81_14 |
| ; RV32ZVE32F-NEXT: .LBB81_6: # %else10 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 64 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB81_15 |
| ; RV32ZVE32F-NEXT: .LBB81_7: # %else12 |
| ; RV32ZVE32F-NEXT: andi a0, a0, -128 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB81_16 |
| ; RV32ZVE32F-NEXT: .LBB81_8: # %else14 |
| ; RV32ZVE32F-NEXT: ret |
| ; RV32ZVE32F-NEXT: .LBB81_9: # %cond.store |
| ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 |
| ; RV32ZVE32F-NEXT: fsd fa0, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 2 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB81_2 |
| ; RV32ZVE32F-NEXT: .LBB81_10: # %cond.store1 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa1, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 4 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB81_3 |
| ; RV32ZVE32F-NEXT: .LBB81_11: # %cond.store3 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa2, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 8 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB81_4 |
| ; RV32ZVE32F-NEXT: .LBB81_12: # %cond.store5 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa3, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 16 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB81_5 |
| ; RV32ZVE32F-NEXT: .LBB81_13: # %cond.store7 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa4, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 32 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB81_6 |
| ; RV32ZVE32F-NEXT: .LBB81_14: # %cond.store9 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa5, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 64 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB81_7 |
| ; RV32ZVE32F-NEXT: .LBB81_15: # %cond.store11 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa6, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a0, a0, -128 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB81_8 |
| ; RV32ZVE32F-NEXT: .LBB81_16: # %cond.store13 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV32ZVE32F-NEXT: fsd fa7, 0(a0) |
| ; RV32ZVE32F-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8_v8f64: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 1 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB81_2 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 3 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa0, 0(a2) |
| ; RV64ZVE32F-NEXT: .LBB81_2: # %else |
| ; RV64ZVE32F-NEXT: andi a2, a1, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB81_4 |
| ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 3 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa1, 0(a2) |
| ; RV64ZVE32F-NEXT: .LBB81_4: # %else2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 4 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB81_6 |
| ; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 3 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa2, 0(a2) |
| ; RV64ZVE32F-NEXT: .LBB81_6: # %else4 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 8 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB81_13 |
| ; RV64ZVE32F-NEXT: # %bb.7: # %else6 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB81_14 |
| ; RV64ZVE32F-NEXT: .LBB81_8: # %else8 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB81_10 |
| ; RV64ZVE32F-NEXT: .LBB81_9: # %cond.store9 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 3 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa5, 0(a2) |
| ; RV64ZVE32F-NEXT: .LBB81_10: # %else10 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 64 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB81_15 |
| ; RV64ZVE32F-NEXT: # %bb.11: # %else12 |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: bnez a1, .LBB81_16 |
| ; RV64ZVE32F-NEXT: .LBB81_12: # %else14 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB81_13: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 3 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa3, 0(a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB81_8 |
| ; RV64ZVE32F-NEXT: .LBB81_14: # %cond.store7 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 3 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa4, 0(a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB81_9 |
| ; RV64ZVE32F-NEXT: j .LBB81_10 |
| ; RV64ZVE32F-NEXT: .LBB81_15: # %cond.store11 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 3 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa6, 0(a2) |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: beqz a1, .LBB81_12 |
| ; RV64ZVE32F-NEXT: .LBB81_16: # %cond.store13 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v8 |
| ; RV64ZVE32F-NEXT: slli a1, a1, 3 |
| ; RV64ZVE32F-NEXT: add a0, a0, a1 |
| ; RV64ZVE32F-NEXT: fsd fa7, 0(a0) |
| ; RV64ZVE32F-NEXT: ret |
| %ptrs = getelementptr inbounds double, double* %base, <8 x i8> %idxs |
| call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> %val, <8 x double*> %ptrs, i32 8, <8 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, double* %base, <8 x i8> %idxs, <8 x i1> %m) { |
| ; RV32V-LABEL: mscatter_baseidx_sext_v8i8_v8f64: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV32V-NEXT: vsext.vf8 v16, v12 |
| ; RV32V-NEXT: vsll.vi v12, v16, 3 |
| ; RV32V-NEXT: vsetvli zero, zero, e32, m2, ta, mu |
| ; RV32V-NEXT: vncvt.x.x.w v16, v12 |
| ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu |
| ; RV32V-NEXT: vsoxei32.v v8, (a0), v16, v0.t |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_baseidx_sext_v8i8_v8f64: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV64-NEXT: vsext.vf8 v16, v12 |
| ; RV64-NEXT: vsll.vi v12, v16, 3 |
| ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV32ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8f64: |
| ; RV32ZVE32F: # %bb.0: |
| ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vsext.vf4 v10, v8 |
| ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 |
| ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v0 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 1 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB82_9 |
| ; RV32ZVE32F-NEXT: # %bb.1: # %else |
| ; RV32ZVE32F-NEXT: andi a1, a0, 2 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB82_10 |
| ; RV32ZVE32F-NEXT: .LBB82_2: # %else2 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 4 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB82_11 |
| ; RV32ZVE32F-NEXT: .LBB82_3: # %else4 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 8 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB82_12 |
| ; RV32ZVE32F-NEXT: .LBB82_4: # %else6 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 16 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB82_13 |
| ; RV32ZVE32F-NEXT: .LBB82_5: # %else8 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 32 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB82_14 |
| ; RV32ZVE32F-NEXT: .LBB82_6: # %else10 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 64 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB82_15 |
| ; RV32ZVE32F-NEXT: .LBB82_7: # %else12 |
| ; RV32ZVE32F-NEXT: andi a0, a0, -128 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB82_16 |
| ; RV32ZVE32F-NEXT: .LBB82_8: # %else14 |
| ; RV32ZVE32F-NEXT: ret |
| ; RV32ZVE32F-NEXT: .LBB82_9: # %cond.store |
| ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 |
| ; RV32ZVE32F-NEXT: fsd fa0, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 2 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB82_2 |
| ; RV32ZVE32F-NEXT: .LBB82_10: # %cond.store1 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa1, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 4 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB82_3 |
| ; RV32ZVE32F-NEXT: .LBB82_11: # %cond.store3 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa2, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 8 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB82_4 |
| ; RV32ZVE32F-NEXT: .LBB82_12: # %cond.store5 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa3, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 16 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB82_5 |
| ; RV32ZVE32F-NEXT: .LBB82_13: # %cond.store7 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa4, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 32 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB82_6 |
| ; RV32ZVE32F-NEXT: .LBB82_14: # %cond.store9 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa5, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 64 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB82_7 |
| ; RV32ZVE32F-NEXT: .LBB82_15: # %cond.store11 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa6, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a0, a0, -128 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB82_8 |
| ; RV32ZVE32F-NEXT: .LBB82_16: # %cond.store13 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV32ZVE32F-NEXT: fsd fa7, 0(a0) |
| ; RV32ZVE32F-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8f64: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 1 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB82_2 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 3 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa0, 0(a2) |
| ; RV64ZVE32F-NEXT: .LBB82_2: # %else |
| ; RV64ZVE32F-NEXT: andi a2, a1, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB82_4 |
| ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 3 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa1, 0(a2) |
| ; RV64ZVE32F-NEXT: .LBB82_4: # %else2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 4 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB82_6 |
| ; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 3 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa2, 0(a2) |
| ; RV64ZVE32F-NEXT: .LBB82_6: # %else4 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 8 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB82_13 |
| ; RV64ZVE32F-NEXT: # %bb.7: # %else6 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB82_14 |
| ; RV64ZVE32F-NEXT: .LBB82_8: # %else8 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB82_10 |
| ; RV64ZVE32F-NEXT: .LBB82_9: # %cond.store9 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 3 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa5, 0(a2) |
| ; RV64ZVE32F-NEXT: .LBB82_10: # %else10 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 64 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB82_15 |
| ; RV64ZVE32F-NEXT: # %bb.11: # %else12 |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: bnez a1, .LBB82_16 |
| ; RV64ZVE32F-NEXT: .LBB82_12: # %else14 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB82_13: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 3 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa3, 0(a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB82_8 |
| ; RV64ZVE32F-NEXT: .LBB82_14: # %cond.store7 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 3 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa4, 0(a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB82_9 |
| ; RV64ZVE32F-NEXT: j .LBB82_10 |
| ; RV64ZVE32F-NEXT: .LBB82_15: # %cond.store11 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 3 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa6, 0(a2) |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: beqz a1, .LBB82_12 |
| ; RV64ZVE32F-NEXT: .LBB82_16: # %cond.store13 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v8 |
| ; RV64ZVE32F-NEXT: slli a1, a1, 3 |
| ; RV64ZVE32F-NEXT: add a0, a0, a1 |
| ; RV64ZVE32F-NEXT: fsd fa7, 0(a0) |
| ; RV64ZVE32F-NEXT: ret |
| %eidxs = sext <8 x i8> %idxs to <8 x i64> |
| %ptrs = getelementptr inbounds double, double* %base, <8 x i64> %eidxs |
| call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> %val, <8 x double*> %ptrs, i32 8, <8 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, double* %base, <8 x i8> %idxs, <8 x i1> %m) { |
| ; RV32V-LABEL: mscatter_baseidx_zext_v8i8_v8f64: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV32V-NEXT: vzext.vf8 v16, v12 |
| ; RV32V-NEXT: vsll.vi v12, v16, 3 |
| ; RV32V-NEXT: vsetvli zero, zero, e32, m2, ta, mu |
| ; RV32V-NEXT: vncvt.x.x.w v16, v12 |
| ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu |
| ; RV32V-NEXT: vsoxei32.v v8, (a0), v16, v0.t |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_baseidx_zext_v8i8_v8f64: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV64-NEXT: vzext.vf8 v16, v12 |
| ; RV64-NEXT: vsll.vi v12, v16, 3 |
| ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8f64: |
| ; RV32ZVE32F: # %bb.0: |
| ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vzext.vf4 v10, v8 |
| ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 |
| ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v0 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 1 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB83_9 |
| ; RV32ZVE32F-NEXT: # %bb.1: # %else |
| ; RV32ZVE32F-NEXT: andi a1, a0, 2 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB83_10 |
| ; RV32ZVE32F-NEXT: .LBB83_2: # %else2 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 4 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB83_11 |
| ; RV32ZVE32F-NEXT: .LBB83_3: # %else4 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 8 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB83_12 |
| ; RV32ZVE32F-NEXT: .LBB83_4: # %else6 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 16 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB83_13 |
| ; RV32ZVE32F-NEXT: .LBB83_5: # %else8 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 32 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB83_14 |
| ; RV32ZVE32F-NEXT: .LBB83_6: # %else10 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 64 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB83_15 |
| ; RV32ZVE32F-NEXT: .LBB83_7: # %else12 |
| ; RV32ZVE32F-NEXT: andi a0, a0, -128 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB83_16 |
| ; RV32ZVE32F-NEXT: .LBB83_8: # %else14 |
| ; RV32ZVE32F-NEXT: ret |
| ; RV32ZVE32F-NEXT: .LBB83_9: # %cond.store |
| ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 |
| ; RV32ZVE32F-NEXT: fsd fa0, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 2 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB83_2 |
| ; RV32ZVE32F-NEXT: .LBB83_10: # %cond.store1 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa1, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 4 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB83_3 |
| ; RV32ZVE32F-NEXT: .LBB83_11: # %cond.store3 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa2, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 8 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB83_4 |
| ; RV32ZVE32F-NEXT: .LBB83_12: # %cond.store5 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa3, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 16 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB83_5 |
| ; RV32ZVE32F-NEXT: .LBB83_13: # %cond.store7 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa4, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 32 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB83_6 |
| ; RV32ZVE32F-NEXT: .LBB83_14: # %cond.store9 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa5, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 64 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB83_7 |
| ; RV32ZVE32F-NEXT: .LBB83_15: # %cond.store11 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa6, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a0, a0, -128 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB83_8 |
| ; RV32ZVE32F-NEXT: .LBB83_16: # %cond.store13 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV32ZVE32F-NEXT: fsd fa7, 0(a0) |
| ; RV32ZVE32F-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8f64: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 1 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB83_2 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 |
| ; RV64ZVE32F-NEXT: andi a2, a2, 255 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 3 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa0, 0(a2) |
| ; RV64ZVE32F-NEXT: .LBB83_2: # %else |
| ; RV64ZVE32F-NEXT: andi a2, a1, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB83_4 |
| ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 |
| ; RV64ZVE32F-NEXT: andi a2, a2, 255 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 3 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa1, 0(a2) |
| ; RV64ZVE32F-NEXT: .LBB83_4: # %else2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 4 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB83_6 |
| ; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 |
| ; RV64ZVE32F-NEXT: andi a2, a2, 255 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 3 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa2, 0(a2) |
| ; RV64ZVE32F-NEXT: .LBB83_6: # %else4 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 8 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB83_13 |
| ; RV64ZVE32F-NEXT: # %bb.7: # %else6 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB83_14 |
| ; RV64ZVE32F-NEXT: .LBB83_8: # %else8 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB83_10 |
| ; RV64ZVE32F-NEXT: .LBB83_9: # %cond.store9 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 |
| ; RV64ZVE32F-NEXT: andi a2, a2, 255 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 3 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa5, 0(a2) |
| ; RV64ZVE32F-NEXT: .LBB83_10: # %else10 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 64 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB83_15 |
| ; RV64ZVE32F-NEXT: # %bb.11: # %else12 |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: bnez a1, .LBB83_16 |
| ; RV64ZVE32F-NEXT: .LBB83_12: # %else14 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB83_13: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 |
| ; RV64ZVE32F-NEXT: andi a2, a2, 255 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 3 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa3, 0(a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB83_8 |
| ; RV64ZVE32F-NEXT: .LBB83_14: # %cond.store7 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 |
| ; RV64ZVE32F-NEXT: andi a2, a2, 255 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 3 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa4, 0(a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB83_9 |
| ; RV64ZVE32F-NEXT: j .LBB83_10 |
| ; RV64ZVE32F-NEXT: .LBB83_15: # %cond.store11 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 |
| ; RV64ZVE32F-NEXT: andi a2, a2, 255 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 3 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa6, 0(a2) |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: beqz a1, .LBB83_12 |
| ; RV64ZVE32F-NEXT: .LBB83_16: # %cond.store13 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v8 |
| ; RV64ZVE32F-NEXT: andi a1, a1, 255 |
| ; RV64ZVE32F-NEXT: slli a1, a1, 3 |
| ; RV64ZVE32F-NEXT: add a0, a0, a1 |
| ; RV64ZVE32F-NEXT: fsd fa7, 0(a0) |
| ; RV64ZVE32F-NEXT: ret |
| %eidxs = zext <8 x i8> %idxs to <8 x i64> |
| %ptrs = getelementptr inbounds double, double* %base, <8 x i64> %eidxs |
| call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> %val, <8 x double*> %ptrs, i32 8, <8 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_baseidx_v8i16_v8f64(<8 x double> %val, double* %base, <8 x i16> %idxs, <8 x i1> %m) { |
| ; RV32V-LABEL: mscatter_baseidx_v8i16_v8f64: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| ; RV32V-NEXT: vsext.vf2 v14, v12 |
| ; RV32V-NEXT: vsll.vi v12, v14, 3 |
| ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu |
| ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_baseidx_v8i16_v8f64: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV64-NEXT: vsext.vf4 v16, v12 |
| ; RV64-NEXT: vsll.vi v12, v16, 3 |
| ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV32ZVE32F-LABEL: mscatter_baseidx_v8i16_v8f64: |
| ; RV32ZVE32F: # %bb.0: |
| ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vsext.vf2 v10, v8 |
| ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 |
| ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v0 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 1 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB84_9 |
| ; RV32ZVE32F-NEXT: # %bb.1: # %else |
| ; RV32ZVE32F-NEXT: andi a1, a0, 2 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB84_10 |
| ; RV32ZVE32F-NEXT: .LBB84_2: # %else2 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 4 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB84_11 |
| ; RV32ZVE32F-NEXT: .LBB84_3: # %else4 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 8 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB84_12 |
| ; RV32ZVE32F-NEXT: .LBB84_4: # %else6 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 16 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB84_13 |
| ; RV32ZVE32F-NEXT: .LBB84_5: # %else8 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 32 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB84_14 |
| ; RV32ZVE32F-NEXT: .LBB84_6: # %else10 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 64 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB84_15 |
| ; RV32ZVE32F-NEXT: .LBB84_7: # %else12 |
| ; RV32ZVE32F-NEXT: andi a0, a0, -128 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB84_16 |
| ; RV32ZVE32F-NEXT: .LBB84_8: # %else14 |
| ; RV32ZVE32F-NEXT: ret |
| ; RV32ZVE32F-NEXT: .LBB84_9: # %cond.store |
| ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 |
| ; RV32ZVE32F-NEXT: fsd fa0, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 2 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB84_2 |
| ; RV32ZVE32F-NEXT: .LBB84_10: # %cond.store1 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa1, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 4 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB84_3 |
| ; RV32ZVE32F-NEXT: .LBB84_11: # %cond.store3 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa2, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 8 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB84_4 |
| ; RV32ZVE32F-NEXT: .LBB84_12: # %cond.store5 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa3, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 16 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB84_5 |
| ; RV32ZVE32F-NEXT: .LBB84_13: # %cond.store7 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa4, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 32 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB84_6 |
| ; RV32ZVE32F-NEXT: .LBB84_14: # %cond.store9 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa5, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 64 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB84_7 |
| ; RV32ZVE32F-NEXT: .LBB84_15: # %cond.store11 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa6, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a0, a0, -128 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB84_8 |
| ; RV32ZVE32F-NEXT: .LBB84_16: # %cond.store13 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV32ZVE32F-NEXT: fsd fa7, 0(a0) |
| ; RV32ZVE32F-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i16_v8f64: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 1 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB84_2 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store |
| ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 3 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa0, 0(a2) |
| ; RV64ZVE32F-NEXT: .LBB84_2: # %else |
| ; RV64ZVE32F-NEXT: andi a2, a1, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB84_4 |
| ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 3 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa1, 0(a2) |
| ; RV64ZVE32F-NEXT: .LBB84_4: # %else2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 4 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB84_6 |
| ; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 3 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa2, 0(a2) |
| ; RV64ZVE32F-NEXT: .LBB84_6: # %else4 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 8 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB84_13 |
| ; RV64ZVE32F-NEXT: # %bb.7: # %else6 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB84_14 |
| ; RV64ZVE32F-NEXT: .LBB84_8: # %else8 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB84_10 |
| ; RV64ZVE32F-NEXT: .LBB84_9: # %cond.store9 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 3 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa5, 0(a2) |
| ; RV64ZVE32F-NEXT: .LBB84_10: # %else10 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 64 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB84_15 |
| ; RV64ZVE32F-NEXT: # %bb.11: # %else12 |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: bnez a1, .LBB84_16 |
| ; RV64ZVE32F-NEXT: .LBB84_12: # %else14 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB84_13: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 3 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa3, 0(a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB84_8 |
| ; RV64ZVE32F-NEXT: .LBB84_14: # %cond.store7 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 3 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa4, 0(a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB84_9 |
| ; RV64ZVE32F-NEXT: j .LBB84_10 |
| ; RV64ZVE32F-NEXT: .LBB84_15: # %cond.store11 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 3 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa6, 0(a2) |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: beqz a1, .LBB84_12 |
| ; RV64ZVE32F-NEXT: .LBB84_16: # %cond.store13 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v8 |
| ; RV64ZVE32F-NEXT: slli a1, a1, 3 |
| ; RV64ZVE32F-NEXT: add a0, a0, a1 |
| ; RV64ZVE32F-NEXT: fsd fa7, 0(a0) |
| ; RV64ZVE32F-NEXT: ret |
| %ptrs = getelementptr inbounds double, double* %base, <8 x i16> %idxs |
| call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> %val, <8 x double*> %ptrs, i32 8, <8 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, double* %base, <8 x i16> %idxs, <8 x i1> %m) { |
| ; RV32V-LABEL: mscatter_baseidx_sext_v8i16_v8f64: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV32V-NEXT: vsext.vf4 v16, v12 |
| ; RV32V-NEXT: vsll.vi v12, v16, 3 |
| ; RV32V-NEXT: vsetvli zero, zero, e32, m2, ta, mu |
| ; RV32V-NEXT: vncvt.x.x.w v16, v12 |
| ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu |
| ; RV32V-NEXT: vsoxei32.v v8, (a0), v16, v0.t |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_baseidx_sext_v8i16_v8f64: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV64-NEXT: vsext.vf4 v16, v12 |
| ; RV64-NEXT: vsll.vi v12, v16, 3 |
| ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV32ZVE32F-LABEL: mscatter_baseidx_sext_v8i16_v8f64: |
| ; RV32ZVE32F: # %bb.0: |
| ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vsext.vf2 v10, v8 |
| ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 |
| ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v0 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 1 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB85_9 |
| ; RV32ZVE32F-NEXT: # %bb.1: # %else |
| ; RV32ZVE32F-NEXT: andi a1, a0, 2 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB85_10 |
| ; RV32ZVE32F-NEXT: .LBB85_2: # %else2 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 4 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB85_11 |
| ; RV32ZVE32F-NEXT: .LBB85_3: # %else4 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 8 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB85_12 |
| ; RV32ZVE32F-NEXT: .LBB85_4: # %else6 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 16 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB85_13 |
| ; RV32ZVE32F-NEXT: .LBB85_5: # %else8 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 32 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB85_14 |
| ; RV32ZVE32F-NEXT: .LBB85_6: # %else10 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 64 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB85_15 |
| ; RV32ZVE32F-NEXT: .LBB85_7: # %else12 |
| ; RV32ZVE32F-NEXT: andi a0, a0, -128 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB85_16 |
| ; RV32ZVE32F-NEXT: .LBB85_8: # %else14 |
| ; RV32ZVE32F-NEXT: ret |
| ; RV32ZVE32F-NEXT: .LBB85_9: # %cond.store |
| ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 |
| ; RV32ZVE32F-NEXT: fsd fa0, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 2 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB85_2 |
| ; RV32ZVE32F-NEXT: .LBB85_10: # %cond.store1 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa1, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 4 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB85_3 |
| ; RV32ZVE32F-NEXT: .LBB85_11: # %cond.store3 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa2, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 8 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB85_4 |
| ; RV32ZVE32F-NEXT: .LBB85_12: # %cond.store5 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa3, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 16 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB85_5 |
| ; RV32ZVE32F-NEXT: .LBB85_13: # %cond.store7 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa4, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 32 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB85_6 |
| ; RV32ZVE32F-NEXT: .LBB85_14: # %cond.store9 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa5, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 64 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB85_7 |
| ; RV32ZVE32F-NEXT: .LBB85_15: # %cond.store11 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa6, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a0, a0, -128 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB85_8 |
| ; RV32ZVE32F-NEXT: .LBB85_16: # %cond.store13 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV32ZVE32F-NEXT: fsd fa7, 0(a0) |
| ; RV32ZVE32F-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i16_v8f64: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 1 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB85_2 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store |
| ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 3 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa0, 0(a2) |
| ; RV64ZVE32F-NEXT: .LBB85_2: # %else |
| ; RV64ZVE32F-NEXT: andi a2, a1, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB85_4 |
| ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 3 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa1, 0(a2) |
| ; RV64ZVE32F-NEXT: .LBB85_4: # %else2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 4 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB85_6 |
| ; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 3 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa2, 0(a2) |
| ; RV64ZVE32F-NEXT: .LBB85_6: # %else4 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 8 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB85_13 |
| ; RV64ZVE32F-NEXT: # %bb.7: # %else6 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB85_14 |
| ; RV64ZVE32F-NEXT: .LBB85_8: # %else8 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB85_10 |
| ; RV64ZVE32F-NEXT: .LBB85_9: # %cond.store9 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 3 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa5, 0(a2) |
| ; RV64ZVE32F-NEXT: .LBB85_10: # %else10 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 64 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB85_15 |
| ; RV64ZVE32F-NEXT: # %bb.11: # %else12 |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: bnez a1, .LBB85_16 |
| ; RV64ZVE32F-NEXT: .LBB85_12: # %else14 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB85_13: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 3 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa3, 0(a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB85_8 |
| ; RV64ZVE32F-NEXT: .LBB85_14: # %cond.store7 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 3 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa4, 0(a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB85_9 |
| ; RV64ZVE32F-NEXT: j .LBB85_10 |
| ; RV64ZVE32F-NEXT: .LBB85_15: # %cond.store11 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 3 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa6, 0(a2) |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: beqz a1, .LBB85_12 |
| ; RV64ZVE32F-NEXT: .LBB85_16: # %cond.store13 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v8 |
| ; RV64ZVE32F-NEXT: slli a1, a1, 3 |
| ; RV64ZVE32F-NEXT: add a0, a0, a1 |
| ; RV64ZVE32F-NEXT: fsd fa7, 0(a0) |
| ; RV64ZVE32F-NEXT: ret |
| %eidxs = sext <8 x i16> %idxs to <8 x i64> |
| %ptrs = getelementptr inbounds double, double* %base, <8 x i64> %eidxs |
| call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> %val, <8 x double*> %ptrs, i32 8, <8 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, double* %base, <8 x i16> %idxs, <8 x i1> %m) { |
| ; RV32V-LABEL: mscatter_baseidx_zext_v8i16_v8f64: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV32V-NEXT: vzext.vf4 v16, v12 |
| ; RV32V-NEXT: vsll.vi v12, v16, 3 |
| ; RV32V-NEXT: vsetvli zero, zero, e32, m2, ta, mu |
| ; RV32V-NEXT: vncvt.x.x.w v16, v12 |
| ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu |
| ; RV32V-NEXT: vsoxei32.v v8, (a0), v16, v0.t |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_baseidx_zext_v8i16_v8f64: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV64-NEXT: vzext.vf4 v16, v12 |
| ; RV64-NEXT: vsll.vi v12, v16, 3 |
| ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8f64: |
| ; RV32ZVE32F: # %bb.0: |
| ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vzext.vf2 v10, v8 |
| ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 |
| ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v0 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 1 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB86_9 |
| ; RV32ZVE32F-NEXT: # %bb.1: # %else |
| ; RV32ZVE32F-NEXT: andi a1, a0, 2 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB86_10 |
| ; RV32ZVE32F-NEXT: .LBB86_2: # %else2 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 4 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB86_11 |
| ; RV32ZVE32F-NEXT: .LBB86_3: # %else4 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 8 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB86_12 |
| ; RV32ZVE32F-NEXT: .LBB86_4: # %else6 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 16 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB86_13 |
| ; RV32ZVE32F-NEXT: .LBB86_5: # %else8 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 32 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB86_14 |
| ; RV32ZVE32F-NEXT: .LBB86_6: # %else10 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 64 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB86_15 |
| ; RV32ZVE32F-NEXT: .LBB86_7: # %else12 |
| ; RV32ZVE32F-NEXT: andi a0, a0, -128 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB86_16 |
| ; RV32ZVE32F-NEXT: .LBB86_8: # %else14 |
| ; RV32ZVE32F-NEXT: ret |
| ; RV32ZVE32F-NEXT: .LBB86_9: # %cond.store |
| ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 |
| ; RV32ZVE32F-NEXT: fsd fa0, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 2 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB86_2 |
| ; RV32ZVE32F-NEXT: .LBB86_10: # %cond.store1 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa1, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 4 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB86_3 |
| ; RV32ZVE32F-NEXT: .LBB86_11: # %cond.store3 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa2, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 8 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB86_4 |
| ; RV32ZVE32F-NEXT: .LBB86_12: # %cond.store5 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa3, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 16 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB86_5 |
| ; RV32ZVE32F-NEXT: .LBB86_13: # %cond.store7 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa4, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 32 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB86_6 |
| ; RV32ZVE32F-NEXT: .LBB86_14: # %cond.store9 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa5, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 64 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB86_7 |
| ; RV32ZVE32F-NEXT: .LBB86_15: # %cond.store11 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa6, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a0, a0, -128 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB86_8 |
| ; RV32ZVE32F-NEXT: .LBB86_16: # %cond.store13 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV32ZVE32F-NEXT: fsd fa7, 0(a0) |
| ; RV32ZVE32F-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8f64: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: lui a1, 16 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v0 |
| ; RV64ZVE32F-NEXT: andi a3, a2, 1 |
| ; RV64ZVE32F-NEXT: addiw a1, a1, -1 |
| ; RV64ZVE32F-NEXT: beqz a3, .LBB86_2 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store |
| ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 |
| ; RV64ZVE32F-NEXT: and a3, a3, a1 |
| ; RV64ZVE32F-NEXT: slli a3, a3, 3 |
| ; RV64ZVE32F-NEXT: add a3, a0, a3 |
| ; RV64ZVE32F-NEXT: fsd fa0, 0(a3) |
| ; RV64ZVE32F-NEXT: .LBB86_2: # %else |
| ; RV64ZVE32F-NEXT: andi a3, a2, 2 |
| ; RV64ZVE32F-NEXT: beqz a3, .LBB86_4 |
| ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a3, v9 |
| ; RV64ZVE32F-NEXT: and a3, a3, a1 |
| ; RV64ZVE32F-NEXT: slli a3, a3, 3 |
| ; RV64ZVE32F-NEXT: add a3, a0, a3 |
| ; RV64ZVE32F-NEXT: fsd fa1, 0(a3) |
| ; RV64ZVE32F-NEXT: .LBB86_4: # %else2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: andi a3, a2, 4 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 |
| ; RV64ZVE32F-NEXT: beqz a3, .LBB86_6 |
| ; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vmv.x.s a3, v9 |
| ; RV64ZVE32F-NEXT: and a3, a3, a1 |
| ; RV64ZVE32F-NEXT: slli a3, a3, 3 |
| ; RV64ZVE32F-NEXT: add a3, a0, a3 |
| ; RV64ZVE32F-NEXT: fsd fa2, 0(a3) |
| ; RV64ZVE32F-NEXT: .LBB86_6: # %else4 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, mu |
| ; RV64ZVE32F-NEXT: andi a3, a2, 8 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4 |
| ; RV64ZVE32F-NEXT: bnez a3, .LBB86_13 |
| ; RV64ZVE32F-NEXT: # %bb.7: # %else6 |
| ; RV64ZVE32F-NEXT: andi a3, a2, 16 |
| ; RV64ZVE32F-NEXT: bnez a3, .LBB86_14 |
| ; RV64ZVE32F-NEXT: .LBB86_8: # %else8 |
| ; RV64ZVE32F-NEXT: andi a3, a2, 32 |
| ; RV64ZVE32F-NEXT: beqz a3, .LBB86_10 |
| ; RV64ZVE32F-NEXT: .LBB86_9: # %cond.store9 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a3, v9 |
| ; RV64ZVE32F-NEXT: and a3, a3, a1 |
| ; RV64ZVE32F-NEXT: slli a3, a3, 3 |
| ; RV64ZVE32F-NEXT: add a3, a0, a3 |
| ; RV64ZVE32F-NEXT: fsd fa5, 0(a3) |
| ; RV64ZVE32F-NEXT: .LBB86_10: # %else10 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: andi a3, a2, 64 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 |
| ; RV64ZVE32F-NEXT: bnez a3, .LBB86_15 |
| ; RV64ZVE32F-NEXT: # %bb.11: # %else12 |
| ; RV64ZVE32F-NEXT: andi a2, a2, -128 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB86_16 |
| ; RV64ZVE32F-NEXT: .LBB86_12: # %else14 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB86_13: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a3, v9 |
| ; RV64ZVE32F-NEXT: and a3, a3, a1 |
| ; RV64ZVE32F-NEXT: slli a3, a3, 3 |
| ; RV64ZVE32F-NEXT: add a3, a0, a3 |
| ; RV64ZVE32F-NEXT: fsd fa3, 0(a3) |
| ; RV64ZVE32F-NEXT: andi a3, a2, 16 |
| ; RV64ZVE32F-NEXT: beqz a3, .LBB86_8 |
| ; RV64ZVE32F-NEXT: .LBB86_14: # %cond.store7 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 |
| ; RV64ZVE32F-NEXT: and a3, a3, a1 |
| ; RV64ZVE32F-NEXT: slli a3, a3, 3 |
| ; RV64ZVE32F-NEXT: add a3, a0, a3 |
| ; RV64ZVE32F-NEXT: fsd fa4, 0(a3) |
| ; RV64ZVE32F-NEXT: andi a3, a2, 32 |
| ; RV64ZVE32F-NEXT: bnez a3, .LBB86_9 |
| ; RV64ZVE32F-NEXT: j .LBB86_10 |
| ; RV64ZVE32F-NEXT: .LBB86_15: # %cond.store11 |
| ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 |
| ; RV64ZVE32F-NEXT: and a3, a3, a1 |
| ; RV64ZVE32F-NEXT: slli a3, a3, 3 |
| ; RV64ZVE32F-NEXT: add a3, a0, a3 |
| ; RV64ZVE32F-NEXT: fsd fa6, 0(a3) |
| ; RV64ZVE32F-NEXT: andi a2, a2, -128 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB86_12 |
| ; RV64ZVE32F-NEXT: .LBB86_16: # %cond.store13 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 |
| ; RV64ZVE32F-NEXT: and a1, a2, a1 |
| ; RV64ZVE32F-NEXT: slli a1, a1, 3 |
| ; RV64ZVE32F-NEXT: add a0, a0, a1 |
| ; RV64ZVE32F-NEXT: fsd fa7, 0(a0) |
| ; RV64ZVE32F-NEXT: ret |
| %eidxs = zext <8 x i16> %idxs to <8 x i64> |
| %ptrs = getelementptr inbounds double, double* %base, <8 x i64> %eidxs |
| call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> %val, <8 x double*> %ptrs, i32 8, <8 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_baseidx_v8i32_v8f64(<8 x double> %val, double* %base, <8 x i32> %idxs, <8 x i1> %m) { |
| ; RV32V-LABEL: mscatter_baseidx_v8i32_v8f64: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| ; RV32V-NEXT: vsll.vi v12, v12, 3 |
| ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu |
| ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_baseidx_v8i32_v8f64: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV64-NEXT: vsext.vf2 v16, v12 |
| ; RV64-NEXT: vsll.vi v12, v16, 3 |
| ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV32ZVE32F-LABEL: mscatter_baseidx_v8i32_v8f64: |
| ; RV32ZVE32F: # %bb.0: |
| ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 |
| ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v0 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 1 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB87_9 |
| ; RV32ZVE32F-NEXT: # %bb.1: # %else |
| ; RV32ZVE32F-NEXT: andi a1, a0, 2 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB87_10 |
| ; RV32ZVE32F-NEXT: .LBB87_2: # %else2 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 4 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB87_11 |
| ; RV32ZVE32F-NEXT: .LBB87_3: # %else4 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 8 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB87_12 |
| ; RV32ZVE32F-NEXT: .LBB87_4: # %else6 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 16 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB87_13 |
| ; RV32ZVE32F-NEXT: .LBB87_5: # %else8 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 32 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB87_14 |
| ; RV32ZVE32F-NEXT: .LBB87_6: # %else10 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 64 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB87_15 |
| ; RV32ZVE32F-NEXT: .LBB87_7: # %else12 |
| ; RV32ZVE32F-NEXT: andi a0, a0, -128 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB87_16 |
| ; RV32ZVE32F-NEXT: .LBB87_8: # %else14 |
| ; RV32ZVE32F-NEXT: ret |
| ; RV32ZVE32F-NEXT: .LBB87_9: # %cond.store |
| ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 |
| ; RV32ZVE32F-NEXT: fsd fa0, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 2 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB87_2 |
| ; RV32ZVE32F-NEXT: .LBB87_10: # %cond.store1 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa1, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 4 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB87_3 |
| ; RV32ZVE32F-NEXT: .LBB87_11: # %cond.store3 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa2, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 8 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB87_4 |
| ; RV32ZVE32F-NEXT: .LBB87_12: # %cond.store5 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa3, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 16 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB87_5 |
| ; RV32ZVE32F-NEXT: .LBB87_13: # %cond.store7 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa4, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 32 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB87_6 |
| ; RV32ZVE32F-NEXT: .LBB87_14: # %cond.store9 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa5, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 64 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB87_7 |
| ; RV32ZVE32F-NEXT: .LBB87_15: # %cond.store11 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa6, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a0, a0, -128 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB87_8 |
| ; RV32ZVE32F-NEXT: .LBB87_16: # %cond.store13 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV32ZVE32F-NEXT: fsd fa7, 0(a0) |
| ; RV32ZVE32F-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i32_v8f64: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 1 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB87_2 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store |
| ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 3 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa0, 0(a2) |
| ; RV64ZVE32F-NEXT: .LBB87_2: # %else |
| ; RV64ZVE32F-NEXT: andi a2, a1, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB87_4 |
| ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 3 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa1, 0(a2) |
| ; RV64ZVE32F-NEXT: .LBB87_4: # %else2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 4 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB87_12 |
| ; RV64ZVE32F-NEXT: # %bb.5: # %else4 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 8 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB87_13 |
| ; RV64ZVE32F-NEXT: .LBB87_6: # %else6 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB87_14 |
| ; RV64ZVE32F-NEXT: .LBB87_7: # %else8 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB87_9 |
| ; RV64ZVE32F-NEXT: .LBB87_8: # %cond.store9 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 3 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa5, 0(a2) |
| ; RV64ZVE32F-NEXT: .LBB87_9: # %else10 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 64 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB87_15 |
| ; RV64ZVE32F-NEXT: # %bb.10: # %else12 |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: bnez a1, .LBB87_16 |
| ; RV64ZVE32F-NEXT: .LBB87_11: # %else14 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB87_12: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 3 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa2, 0(a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 8 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB87_6 |
| ; RV64ZVE32F-NEXT: .LBB87_13: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 3 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa3, 0(a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB87_7 |
| ; RV64ZVE32F-NEXT: .LBB87_14: # %cond.store7 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 3 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa4, 0(a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB87_8 |
| ; RV64ZVE32F-NEXT: j .LBB87_9 |
| ; RV64ZVE32F-NEXT: .LBB87_15: # %cond.store11 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 3 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa6, 0(a2) |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: beqz a1, .LBB87_11 |
| ; RV64ZVE32F-NEXT: .LBB87_16: # %cond.store13 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v8 |
| ; RV64ZVE32F-NEXT: slli a1, a1, 3 |
| ; RV64ZVE32F-NEXT: add a0, a0, a1 |
| ; RV64ZVE32F-NEXT: fsd fa7, 0(a0) |
| ; RV64ZVE32F-NEXT: ret |
| %ptrs = getelementptr inbounds double, double* %base, <8 x i32> %idxs |
| call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> %val, <8 x double*> %ptrs, i32 8, <8 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, double* %base, <8 x i32> %idxs, <8 x i1> %m) { |
| ; RV32V-LABEL: mscatter_baseidx_sext_v8i32_v8f64: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV32V-NEXT: vsext.vf2 v16, v12 |
| ; RV32V-NEXT: vsll.vi v12, v16, 3 |
| ; RV32V-NEXT: vsetvli zero, zero, e32, m2, ta, mu |
| ; RV32V-NEXT: vncvt.x.x.w v16, v12 |
| ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu |
| ; RV32V-NEXT: vsoxei32.v v8, (a0), v16, v0.t |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_baseidx_sext_v8i32_v8f64: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV64-NEXT: vsext.vf2 v16, v12 |
| ; RV64-NEXT: vsll.vi v12, v16, 3 |
| ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV32ZVE32F-LABEL: mscatter_baseidx_sext_v8i32_v8f64: |
| ; RV32ZVE32F: # %bb.0: |
| ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 |
| ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v0 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 1 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB88_9 |
| ; RV32ZVE32F-NEXT: # %bb.1: # %else |
| ; RV32ZVE32F-NEXT: andi a1, a0, 2 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB88_10 |
| ; RV32ZVE32F-NEXT: .LBB88_2: # %else2 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 4 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB88_11 |
| ; RV32ZVE32F-NEXT: .LBB88_3: # %else4 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 8 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB88_12 |
| ; RV32ZVE32F-NEXT: .LBB88_4: # %else6 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 16 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB88_13 |
| ; RV32ZVE32F-NEXT: .LBB88_5: # %else8 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 32 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB88_14 |
| ; RV32ZVE32F-NEXT: .LBB88_6: # %else10 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 64 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB88_15 |
| ; RV32ZVE32F-NEXT: .LBB88_7: # %else12 |
| ; RV32ZVE32F-NEXT: andi a0, a0, -128 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB88_16 |
| ; RV32ZVE32F-NEXT: .LBB88_8: # %else14 |
| ; RV32ZVE32F-NEXT: ret |
| ; RV32ZVE32F-NEXT: .LBB88_9: # %cond.store |
| ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 |
| ; RV32ZVE32F-NEXT: fsd fa0, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 2 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB88_2 |
| ; RV32ZVE32F-NEXT: .LBB88_10: # %cond.store1 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa1, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 4 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB88_3 |
| ; RV32ZVE32F-NEXT: .LBB88_11: # %cond.store3 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa2, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 8 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB88_4 |
| ; RV32ZVE32F-NEXT: .LBB88_12: # %cond.store5 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa3, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 16 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB88_5 |
| ; RV32ZVE32F-NEXT: .LBB88_13: # %cond.store7 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa4, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 32 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB88_6 |
| ; RV32ZVE32F-NEXT: .LBB88_14: # %cond.store9 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa5, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 64 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB88_7 |
| ; RV32ZVE32F-NEXT: .LBB88_15: # %cond.store11 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa6, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a0, a0, -128 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB88_8 |
| ; RV32ZVE32F-NEXT: .LBB88_16: # %cond.store13 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV32ZVE32F-NEXT: fsd fa7, 0(a0) |
| ; RV32ZVE32F-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i32_v8f64: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 1 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB88_2 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store |
| ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 3 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa0, 0(a2) |
| ; RV64ZVE32F-NEXT: .LBB88_2: # %else |
| ; RV64ZVE32F-NEXT: andi a2, a1, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB88_4 |
| ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 3 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa1, 0(a2) |
| ; RV64ZVE32F-NEXT: .LBB88_4: # %else2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 4 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB88_12 |
| ; RV64ZVE32F-NEXT: # %bb.5: # %else4 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 8 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB88_13 |
| ; RV64ZVE32F-NEXT: .LBB88_6: # %else6 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB88_14 |
| ; RV64ZVE32F-NEXT: .LBB88_7: # %else8 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB88_9 |
| ; RV64ZVE32F-NEXT: .LBB88_8: # %cond.store9 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 3 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa5, 0(a2) |
| ; RV64ZVE32F-NEXT: .LBB88_9: # %else10 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 64 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB88_15 |
| ; RV64ZVE32F-NEXT: # %bb.10: # %else12 |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: bnez a1, .LBB88_16 |
| ; RV64ZVE32F-NEXT: .LBB88_11: # %else14 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB88_12: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 3 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa2, 0(a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 8 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB88_6 |
| ; RV64ZVE32F-NEXT: .LBB88_13: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 3 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa3, 0(a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB88_7 |
| ; RV64ZVE32F-NEXT: .LBB88_14: # %cond.store7 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 3 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa4, 0(a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB88_8 |
| ; RV64ZVE32F-NEXT: j .LBB88_9 |
| ; RV64ZVE32F-NEXT: .LBB88_15: # %cond.store11 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 3 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa6, 0(a2) |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: beqz a1, .LBB88_11 |
| ; RV64ZVE32F-NEXT: .LBB88_16: # %cond.store13 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v8 |
| ; RV64ZVE32F-NEXT: slli a1, a1, 3 |
| ; RV64ZVE32F-NEXT: add a0, a0, a1 |
| ; RV64ZVE32F-NEXT: fsd fa7, 0(a0) |
| ; RV64ZVE32F-NEXT: ret |
| %eidxs = sext <8 x i32> %idxs to <8 x i64> |
| %ptrs = getelementptr inbounds double, double* %base, <8 x i64> %eidxs |
| call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> %val, <8 x double*> %ptrs, i32 8, <8 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, double* %base, <8 x i32> %idxs, <8 x i1> %m) { |
| ; RV32V-LABEL: mscatter_baseidx_zext_v8i32_v8f64: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV32V-NEXT: vzext.vf2 v16, v12 |
| ; RV32V-NEXT: vsll.vi v12, v16, 3 |
| ; RV32V-NEXT: vsetvli zero, zero, e32, m2, ta, mu |
| ; RV32V-NEXT: vncvt.x.x.w v16, v12 |
| ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu |
| ; RV32V-NEXT: vsoxei32.v v8, (a0), v16, v0.t |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_baseidx_zext_v8i32_v8f64: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV64-NEXT: vzext.vf2 v16, v12 |
| ; RV64-NEXT: vsll.vi v12, v16, 3 |
| ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i32_v8f64: |
| ; RV32ZVE32F: # %bb.0: |
| ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 |
| ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v0 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 1 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB89_9 |
| ; RV32ZVE32F-NEXT: # %bb.1: # %else |
| ; RV32ZVE32F-NEXT: andi a1, a0, 2 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB89_10 |
| ; RV32ZVE32F-NEXT: .LBB89_2: # %else2 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 4 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB89_11 |
| ; RV32ZVE32F-NEXT: .LBB89_3: # %else4 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 8 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB89_12 |
| ; RV32ZVE32F-NEXT: .LBB89_4: # %else6 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 16 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB89_13 |
| ; RV32ZVE32F-NEXT: .LBB89_5: # %else8 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 32 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB89_14 |
| ; RV32ZVE32F-NEXT: .LBB89_6: # %else10 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 64 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB89_15 |
| ; RV32ZVE32F-NEXT: .LBB89_7: # %else12 |
| ; RV32ZVE32F-NEXT: andi a0, a0, -128 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB89_16 |
| ; RV32ZVE32F-NEXT: .LBB89_8: # %else14 |
| ; RV32ZVE32F-NEXT: ret |
| ; RV32ZVE32F-NEXT: .LBB89_9: # %cond.store |
| ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 |
| ; RV32ZVE32F-NEXT: fsd fa0, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 2 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB89_2 |
| ; RV32ZVE32F-NEXT: .LBB89_10: # %cond.store1 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa1, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 4 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB89_3 |
| ; RV32ZVE32F-NEXT: .LBB89_11: # %cond.store3 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa2, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 8 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB89_4 |
| ; RV32ZVE32F-NEXT: .LBB89_12: # %cond.store5 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa3, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 16 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB89_5 |
| ; RV32ZVE32F-NEXT: .LBB89_13: # %cond.store7 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa4, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 32 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB89_6 |
| ; RV32ZVE32F-NEXT: .LBB89_14: # %cond.store9 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa5, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 64 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB89_7 |
| ; RV32ZVE32F-NEXT: .LBB89_15: # %cond.store11 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa6, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a0, a0, -128 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB89_8 |
| ; RV32ZVE32F-NEXT: .LBB89_16: # %cond.store13 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV32ZVE32F-NEXT: fsd fa7, 0(a0) |
| ; RV32ZVE32F-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i32_v8f64: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 1 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB89_2 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store |
| ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 32 |
| ; RV64ZVE32F-NEXT: srli a2, a2, 29 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa0, 0(a2) |
| ; RV64ZVE32F-NEXT: .LBB89_2: # %else |
| ; RV64ZVE32F-NEXT: andi a2, a1, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB89_4 |
| ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 32 |
| ; RV64ZVE32F-NEXT: srli a2, a2, 29 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa1, 0(a2) |
| ; RV64ZVE32F-NEXT: .LBB89_4: # %else2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 4 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB89_12 |
| ; RV64ZVE32F-NEXT: # %bb.5: # %else4 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 8 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB89_13 |
| ; RV64ZVE32F-NEXT: .LBB89_6: # %else6 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB89_14 |
| ; RV64ZVE32F-NEXT: .LBB89_7: # %else8 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB89_9 |
| ; RV64ZVE32F-NEXT: .LBB89_8: # %cond.store9 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 32 |
| ; RV64ZVE32F-NEXT: srli a2, a2, 29 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa5, 0(a2) |
| ; RV64ZVE32F-NEXT: .LBB89_9: # %else10 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 64 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB89_15 |
| ; RV64ZVE32F-NEXT: # %bb.10: # %else12 |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: bnez a1, .LBB89_16 |
| ; RV64ZVE32F-NEXT: .LBB89_11: # %else14 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB89_12: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 32 |
| ; RV64ZVE32F-NEXT: srli a2, a2, 29 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa2, 0(a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 8 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB89_6 |
| ; RV64ZVE32F-NEXT: .LBB89_13: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 32 |
| ; RV64ZVE32F-NEXT: srli a2, a2, 29 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa3, 0(a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB89_7 |
| ; RV64ZVE32F-NEXT: .LBB89_14: # %cond.store7 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 32 |
| ; RV64ZVE32F-NEXT: srli a2, a2, 29 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa4, 0(a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB89_8 |
| ; RV64ZVE32F-NEXT: j .LBB89_9 |
| ; RV64ZVE32F-NEXT: .LBB89_15: # %cond.store11 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 |
| ; RV64ZVE32F-NEXT: slli a2, a2, 32 |
| ; RV64ZVE32F-NEXT: srli a2, a2, 29 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: fsd fa6, 0(a2) |
| ; RV64ZVE32F-NEXT: andi a1, a1, -128 |
| ; RV64ZVE32F-NEXT: beqz a1, .LBB89_11 |
| ; RV64ZVE32F-NEXT: .LBB89_16: # %cond.store13 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v8 |
| ; RV64ZVE32F-NEXT: slli a1, a1, 32 |
| ; RV64ZVE32F-NEXT: srli a1, a1, 29 |
| ; RV64ZVE32F-NEXT: add a0, a0, a1 |
| ; RV64ZVE32F-NEXT: fsd fa7, 0(a0) |
| ; RV64ZVE32F-NEXT: ret |
| %eidxs = zext <8 x i32> %idxs to <8 x i64> |
| %ptrs = getelementptr inbounds double, double* %base, <8 x i64> %eidxs |
| call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> %val, <8 x double*> %ptrs, i32 8, <8 x i1> %m) |
| ret void |
| } |
| |
| define void @mscatter_baseidx_v8f64(<8 x double> %val, double* %base, <8 x i64> %idxs, <8 x i1> %m) { |
| ; RV32V-LABEL: mscatter_baseidx_v8f64: |
| ; RV32V: # %bb.0: |
| ; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV32V-NEXT: vsll.vi v12, v12, 3 |
| ; RV32V-NEXT: vsetvli zero, zero, e32, m2, ta, mu |
| ; RV32V-NEXT: vncvt.x.x.w v16, v12 |
| ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu |
| ; RV32V-NEXT: vsoxei32.v v8, (a0), v16, v0.t |
| ; RV32V-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_baseidx_v8f64: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu |
| ; RV64-NEXT: vsll.vi v12, v12, 3 |
| ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV32ZVE32F-LABEL: mscatter_baseidx_v8f64: |
| ; RV32ZVE32F: # %bb.0: |
| ; RV32ZVE32F-NEXT: addi sp, sp, -64 |
| ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 64 |
| ; RV32ZVE32F-NEXT: sw ra, 60(sp) # 4-byte Folded Spill |
| ; RV32ZVE32F-NEXT: sw s0, 56(sp) # 4-byte Folded Spill |
| ; RV32ZVE32F-NEXT: .cfi_offset ra, -4 |
| ; RV32ZVE32F-NEXT: .cfi_offset s0, -8 |
| ; RV32ZVE32F-NEXT: addi s0, sp, 64 |
| ; RV32ZVE32F-NEXT: .cfi_def_cfa s0, 0 |
| ; RV32ZVE32F-NEXT: andi sp, sp, -32 |
| ; RV32ZVE32F-NEXT: lw a2, 0(a1) |
| ; RV32ZVE32F-NEXT: lw a3, 8(a1) |
| ; RV32ZVE32F-NEXT: lw a4, 16(a1) |
| ; RV32ZVE32F-NEXT: lw a5, 24(a1) |
| ; RV32ZVE32F-NEXT: lw a6, 56(a1) |
| ; RV32ZVE32F-NEXT: lw a7, 48(a1) |
| ; RV32ZVE32F-NEXT: lw t0, 40(a1) |
| ; RV32ZVE32F-NEXT: lw a1, 32(a1) |
| ; RV32ZVE32F-NEXT: sw a6, 28(sp) |
| ; RV32ZVE32F-NEXT: sw a7, 24(sp) |
| ; RV32ZVE32F-NEXT: sw t0, 20(sp) |
| ; RV32ZVE32F-NEXT: sw a1, 16(sp) |
| ; RV32ZVE32F-NEXT: sw a5, 12(sp) |
| ; RV32ZVE32F-NEXT: sw a4, 8(sp) |
| ; RV32ZVE32F-NEXT: sw a3, 4(sp) |
| ; RV32ZVE32F-NEXT: sw a2, 0(sp) |
| ; RV32ZVE32F-NEXT: mv a1, sp |
| ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vle32.v v8, (a1) |
| ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 |
| ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v0 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 1 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB90_10 |
| ; RV32ZVE32F-NEXT: # %bb.1: # %else |
| ; RV32ZVE32F-NEXT: andi a1, a0, 2 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB90_11 |
| ; RV32ZVE32F-NEXT: .LBB90_2: # %else2 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 4 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB90_12 |
| ; RV32ZVE32F-NEXT: .LBB90_3: # %else4 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 8 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB90_13 |
| ; RV32ZVE32F-NEXT: .LBB90_4: # %else6 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 16 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB90_14 |
| ; RV32ZVE32F-NEXT: .LBB90_5: # %else8 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 32 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB90_15 |
| ; RV32ZVE32F-NEXT: .LBB90_6: # %else10 |
| ; RV32ZVE32F-NEXT: andi a1, a0, 64 |
| ; RV32ZVE32F-NEXT: bnez a1, .LBB90_16 |
| ; RV32ZVE32F-NEXT: .LBB90_7: # %else12 |
| ; RV32ZVE32F-NEXT: andi a0, a0, -128 |
| ; RV32ZVE32F-NEXT: beqz a0, .LBB90_9 |
| ; RV32ZVE32F-NEXT: .LBB90_8: # %cond.store13 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 |
| ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 |
| ; RV32ZVE32F-NEXT: fsd fa7, 0(a0) |
| ; RV32ZVE32F-NEXT: .LBB90_9: # %else14 |
| ; RV32ZVE32F-NEXT: addi sp, s0, -64 |
| ; RV32ZVE32F-NEXT: lw ra, 60(sp) # 4-byte Folded Reload |
| ; RV32ZVE32F-NEXT: lw s0, 56(sp) # 4-byte Folded Reload |
| ; RV32ZVE32F-NEXT: addi sp, sp, 64 |
| ; RV32ZVE32F-NEXT: ret |
| ; RV32ZVE32F-NEXT: .LBB90_10: # %cond.store |
| ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 |
| ; RV32ZVE32F-NEXT: fsd fa0, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 2 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB90_2 |
| ; RV32ZVE32F-NEXT: .LBB90_11: # %cond.store1 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa1, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 4 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB90_3 |
| ; RV32ZVE32F-NEXT: .LBB90_12: # %cond.store3 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa2, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 8 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB90_4 |
| ; RV32ZVE32F-NEXT: .LBB90_13: # %cond.store5 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa3, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 16 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB90_5 |
| ; RV32ZVE32F-NEXT: .LBB90_14: # %cond.store7 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa4, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 32 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB90_6 |
| ; RV32ZVE32F-NEXT: .LBB90_15: # %cond.store9 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa5, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a1, a0, 64 |
| ; RV32ZVE32F-NEXT: beqz a1, .LBB90_7 |
| ; RV32ZVE32F-NEXT: .LBB90_16: # %cond.store11 |
| ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, mu |
| ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 |
| ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV32ZVE32F-NEXT: fsd fa6, 0(a1) |
| ; RV32ZVE32F-NEXT: andi a0, a0, -128 |
| ; RV32ZVE32F-NEXT: bnez a0, .LBB90_8 |
| ; RV32ZVE32F-NEXT: j .LBB90_9 |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_baseidx_v8f64: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: ld t1, 8(a1) |
| ; RV64ZVE32F-NEXT: ld t0, 16(a1) |
| ; RV64ZVE32F-NEXT: ld a7, 24(a1) |
| ; RV64ZVE32F-NEXT: ld a6, 32(a1) |
| ; RV64ZVE32F-NEXT: ld a5, 40(a1) |
| ; RV64ZVE32F-NEXT: ld a4, 48(a1) |
| ; RV64ZVE32F-NEXT: ld a2, 56(a1) |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a3, v0 |
| ; RV64ZVE32F-NEXT: andi t2, a3, 1 |
| ; RV64ZVE32F-NEXT: bnez t2, .LBB90_9 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %else |
| ; RV64ZVE32F-NEXT: andi a1, a3, 2 |
| ; RV64ZVE32F-NEXT: bnez a1, .LBB90_10 |
| ; RV64ZVE32F-NEXT: .LBB90_2: # %else2 |
| ; RV64ZVE32F-NEXT: andi a1, a3, 4 |
| ; RV64ZVE32F-NEXT: bnez a1, .LBB90_11 |
| ; RV64ZVE32F-NEXT: .LBB90_3: # %else4 |
| ; RV64ZVE32F-NEXT: andi a1, a3, 8 |
| ; RV64ZVE32F-NEXT: bnez a1, .LBB90_12 |
| ; RV64ZVE32F-NEXT: .LBB90_4: # %else6 |
| ; RV64ZVE32F-NEXT: andi a1, a3, 16 |
| ; RV64ZVE32F-NEXT: bnez a1, .LBB90_13 |
| ; RV64ZVE32F-NEXT: .LBB90_5: # %else8 |
| ; RV64ZVE32F-NEXT: andi a1, a3, 32 |
| ; RV64ZVE32F-NEXT: bnez a1, .LBB90_14 |
| ; RV64ZVE32F-NEXT: .LBB90_6: # %else10 |
| ; RV64ZVE32F-NEXT: andi a1, a3, 64 |
| ; RV64ZVE32F-NEXT: bnez a1, .LBB90_15 |
| ; RV64ZVE32F-NEXT: .LBB90_7: # %else12 |
| ; RV64ZVE32F-NEXT: andi a1, a3, -128 |
| ; RV64ZVE32F-NEXT: bnez a1, .LBB90_16 |
| ; RV64ZVE32F-NEXT: .LBB90_8: # %else14 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB90_9: # %cond.store |
| ; RV64ZVE32F-NEXT: ld a1, 0(a1) |
| ; RV64ZVE32F-NEXT: slli a1, a1, 3 |
| ; RV64ZVE32F-NEXT: add a1, a0, a1 |
| ; RV64ZVE32F-NEXT: fsd fa0, 0(a1) |
| ; RV64ZVE32F-NEXT: andi a1, a3, 2 |
| ; RV64ZVE32F-NEXT: beqz a1, .LBB90_2 |
| ; RV64ZVE32F-NEXT: .LBB90_10: # %cond.store1 |
| ; RV64ZVE32F-NEXT: slli a1, t1, 3 |
| ; RV64ZVE32F-NEXT: add a1, a0, a1 |
| ; RV64ZVE32F-NEXT: fsd fa1, 0(a1) |
| ; RV64ZVE32F-NEXT: andi a1, a3, 4 |
| ; RV64ZVE32F-NEXT: beqz a1, .LBB90_3 |
| ; RV64ZVE32F-NEXT: .LBB90_11: # %cond.store3 |
| ; RV64ZVE32F-NEXT: slli a1, t0, 3 |
| ; RV64ZVE32F-NEXT: add a1, a0, a1 |
| ; RV64ZVE32F-NEXT: fsd fa2, 0(a1) |
| ; RV64ZVE32F-NEXT: andi a1, a3, 8 |
| ; RV64ZVE32F-NEXT: beqz a1, .LBB90_4 |
| ; RV64ZVE32F-NEXT: .LBB90_12: # %cond.store5 |
| ; RV64ZVE32F-NEXT: slli a1, a7, 3 |
| ; RV64ZVE32F-NEXT: add a1, a0, a1 |
| ; RV64ZVE32F-NEXT: fsd fa3, 0(a1) |
| ; RV64ZVE32F-NEXT: andi a1, a3, 16 |
| ; RV64ZVE32F-NEXT: beqz a1, .LBB90_5 |
| ; RV64ZVE32F-NEXT: .LBB90_13: # %cond.store7 |
| ; RV64ZVE32F-NEXT: slli a1, a6, 3 |
| ; RV64ZVE32F-NEXT: add a1, a0, a1 |
| ; RV64ZVE32F-NEXT: fsd fa4, 0(a1) |
| ; RV64ZVE32F-NEXT: andi a1, a3, 32 |
| ; RV64ZVE32F-NEXT: beqz a1, .LBB90_6 |
| ; RV64ZVE32F-NEXT: .LBB90_14: # %cond.store9 |
| ; RV64ZVE32F-NEXT: slli a1, a5, 3 |
| ; RV64ZVE32F-NEXT: add a1, a0, a1 |
| ; RV64ZVE32F-NEXT: fsd fa5, 0(a1) |
| ; RV64ZVE32F-NEXT: andi a1, a3, 64 |
| ; RV64ZVE32F-NEXT: beqz a1, .LBB90_7 |
| ; RV64ZVE32F-NEXT: .LBB90_15: # %cond.store11 |
| ; RV64ZVE32F-NEXT: slli a1, a4, 3 |
| ; RV64ZVE32F-NEXT: add a1, a0, a1 |
| ; RV64ZVE32F-NEXT: fsd fa6, 0(a1) |
| ; RV64ZVE32F-NEXT: andi a1, a3, -128 |
| ; RV64ZVE32F-NEXT: beqz a1, .LBB90_8 |
| ; RV64ZVE32F-NEXT: .LBB90_16: # %cond.store13 |
| ; RV64ZVE32F-NEXT: slli a1, a2, 3 |
| ; RV64ZVE32F-NEXT: add a0, a0, a1 |
| ; RV64ZVE32F-NEXT: fsd fa7, 0(a0) |
| ; RV64ZVE32F-NEXT: ret |
| %ptrs = getelementptr inbounds double, double* %base, <8 x i64> %idxs |
| call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> %val, <8 x double*> %ptrs, i32 8, <8 x i1> %m) |
| ret void |
| } |
| |
| declare void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8>, <16 x i8*>, i32, <16 x i1>) |
| |
| define void @mscatter_baseidx_v16i8(<16 x i8> %val, i8* %base, <16 x i8> %idxs, <16 x i1> %m) { |
| ; RV32-LABEL: mscatter_baseidx_v16i8: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu |
| ; RV32-NEXT: vsext.vf4 v12, v9 |
| ; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, mu |
| ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t |
| ; RV32-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_baseidx_v16i8: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu |
| ; RV64-NEXT: vsext.vf8 v16, v9 |
| ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_baseidx_v16i8: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e16, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 1 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB91_2 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store |
| ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vse8.v v8, (a2) |
| ; RV64ZVE32F-NEXT: .LBB91_2: # %else |
| ; RV64ZVE32F-NEXT: andi a2, a1, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB91_4 |
| ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 |
| ; RV64ZVE32F-NEXT: vse8.v v10, (a2) |
| ; RV64ZVE32F-NEXT: .LBB91_4: # %else2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 4 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v9, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB91_6 |
| ; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2 |
| ; RV64ZVE32F-NEXT: vse8.v v10, (a2) |
| ; RV64ZVE32F-NEXT: .LBB91_6: # %else4 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 8 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB91_26 |
| ; RV64ZVE32F-NEXT: # %bb.7: # %else6 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB91_27 |
| ; RV64ZVE32F-NEXT: .LBB91_8: # %else8 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB91_10 |
| ; RV64ZVE32F-NEXT: .LBB91_9: # %cond.store9 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 5 |
| ; RV64ZVE32F-NEXT: vse8.v v11, (a2) |
| ; RV64ZVE32F-NEXT: .LBB91_10: # %else10 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 8 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 64 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB91_28 |
| ; RV64ZVE32F-NEXT: # %bb.11: # %else12 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 128 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB91_29 |
| ; RV64ZVE32F-NEXT: .LBB91_12: # %else14 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 256 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB91_30 |
| ; RV64ZVE32F-NEXT: .LBB91_13: # %else16 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 512 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB91_15 |
| ; RV64ZVE32F-NEXT: .LBB91_14: # %cond.store17 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 9 |
| ; RV64ZVE32F-NEXT: vse8.v v10, (a2) |
| ; RV64ZVE32F-NEXT: .LBB91_15: # %else18 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 1024 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB91_17 |
| ; RV64ZVE32F-NEXT: # %bb.16: # %cond.store19 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 10 |
| ; RV64ZVE32F-NEXT: vse8.v v11, (a2) |
| ; RV64ZVE32F-NEXT: .LBB91_17: # %else20 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: slli a2, a1, 52 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 4 |
| ; RV64ZVE32F-NEXT: bltz a2, .LBB91_31 |
| ; RV64ZVE32F-NEXT: # %bb.18: # %else22 |
| ; RV64ZVE32F-NEXT: slli a2, a1, 51 |
| ; RV64ZVE32F-NEXT: bltz a2, .LBB91_32 |
| ; RV64ZVE32F-NEXT: .LBB91_19: # %else24 |
| ; RV64ZVE32F-NEXT: slli a2, a1, 50 |
| ; RV64ZVE32F-NEXT: bgez a2, .LBB91_21 |
| ; RV64ZVE32F-NEXT: .LBB91_20: # %cond.store25 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 13 |
| ; RV64ZVE32F-NEXT: vse8.v v10, (a2) |
| ; RV64ZVE32F-NEXT: .LBB91_21: # %else26 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: slli a2, a1, 49 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 |
| ; RV64ZVE32F-NEXT: bgez a2, .LBB91_23 |
| ; RV64ZVE32F-NEXT: # %bb.22: # %cond.store27 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 14 |
| ; RV64ZVE32F-NEXT: vse8.v v10, (a2) |
| ; RV64ZVE32F-NEXT: .LBB91_23: # %else28 |
| ; RV64ZVE32F-NEXT: lui a2, 1048568 |
| ; RV64ZVE32F-NEXT: and a1, a1, a2 |
| ; RV64ZVE32F-NEXT: beqz a1, .LBB91_25 |
| ; RV64ZVE32F-NEXT: # %bb.24: # %cond.store29 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v9 |
| ; RV64ZVE32F-NEXT: add a0, a0, a1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 15 |
| ; RV64ZVE32F-NEXT: vse8.v v8, (a0) |
| ; RV64ZVE32F-NEXT: .LBB91_25: # %else30 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB91_26: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 3 |
| ; RV64ZVE32F-NEXT: vse8.v v11, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB91_8 |
| ; RV64ZVE32F-NEXT: .LBB91_27: # %cond.store7 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 4 |
| ; RV64ZVE32F-NEXT: vse8.v v11, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB91_9 |
| ; RV64ZVE32F-NEXT: j .LBB91_10 |
| ; RV64ZVE32F-NEXT: .LBB91_28: # %cond.store11 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 6 |
| ; RV64ZVE32F-NEXT: vse8.v v11, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 128 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB91_12 |
| ; RV64ZVE32F-NEXT: .LBB91_29: # %cond.store13 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 7 |
| ; RV64ZVE32F-NEXT: vse8.v v10, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 256 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB91_13 |
| ; RV64ZVE32F-NEXT: .LBB91_30: # %cond.store15 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 8 |
| ; RV64ZVE32F-NEXT: vse8.v v10, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 512 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB91_14 |
| ; RV64ZVE32F-NEXT: j .LBB91_15 |
| ; RV64ZVE32F-NEXT: .LBB91_31: # %cond.store21 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 11 |
| ; RV64ZVE32F-NEXT: vse8.v v10, (a2) |
| ; RV64ZVE32F-NEXT: slli a2, a1, 51 |
| ; RV64ZVE32F-NEXT: bgez a2, .LBB91_19 |
| ; RV64ZVE32F-NEXT: .LBB91_32: # %cond.store23 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 12 |
| ; RV64ZVE32F-NEXT: vse8.v v10, (a2) |
| ; RV64ZVE32F-NEXT: slli a2, a1, 50 |
| ; RV64ZVE32F-NEXT: bltz a2, .LBB91_20 |
| ; RV64ZVE32F-NEXT: j .LBB91_21 |
| %ptrs = getelementptr inbounds i8, i8* %base, <16 x i8> %idxs |
| call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> %val, <16 x i8*> %ptrs, i32 1, <16 x i1> %m) |
| ret void |
| } |
| |
| declare void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8>, <32 x i8*>, i32, <32 x i1>) |
| |
| define void @mscatter_baseidx_v32i8(<32 x i8> %val, i8* %base, <32 x i8> %idxs, <32 x i1> %m) { |
| ; RV32-LABEL: mscatter_baseidx_v32i8: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: li a1, 32 |
| ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, mu |
| ; RV32-NEXT: vsext.vf4 v16, v10 |
| ; RV32-NEXT: vsetvli zero, zero, e8, m2, ta, mu |
| ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t |
| ; RV32-NEXT: ret |
| ; |
| ; RV64-LABEL: mscatter_baseidx_v32i8: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu |
| ; RV64-NEXT: vsext.vf8 v16, v10 |
| ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t |
| ; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, mu |
| ; RV64-NEXT: vslidedown.vi v8, v8, 16 |
| ; RV64-NEXT: vslidedown.vi v10, v10, 16 |
| ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu |
| ; RV64-NEXT: vsext.vf8 v16, v10 |
| ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV64-NEXT: vslidedown.vi v0, v0, 2 |
| ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu |
| ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t |
| ; RV64-NEXT: ret |
| ; |
| ; RV64ZVE32F-LABEL: mscatter_baseidx_v32i8: |
| ; RV64ZVE32F: # %bb.0: |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e32, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v0 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 1 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB92_2 |
| ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store |
| ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vse8.v v8, (a2) |
| ; RV64ZVE32F-NEXT: .LBB92_2: # %else |
| ; RV64ZVE32F-NEXT: andi a2, a1, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB92_4 |
| ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1 |
| ; RV64ZVE32F-NEXT: vse8.v v12, (a2) |
| ; RV64ZVE32F-NEXT: .LBB92_4: # %else2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 4 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB92_6 |
| ; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 2 |
| ; RV64ZVE32F-NEXT: vse8.v v14, (a2) |
| ; RV64ZVE32F-NEXT: .LBB92_6: # %else4 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 8 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v13, v10, 4 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB92_50 |
| ; RV64ZVE32F-NEXT: # %bb.7: # %else6 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB92_51 |
| ; RV64ZVE32F-NEXT: .LBB92_8: # %else8 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB92_10 |
| ; RV64ZVE32F-NEXT: .LBB92_9: # %cond.store9 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v13, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 5 |
| ; RV64ZVE32F-NEXT: vse8.v v14, (a2) |
| ; RV64ZVE32F-NEXT: .LBB92_10: # %else10 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 8 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 64 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 2 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB92_52 |
| ; RV64ZVE32F-NEXT: # %bb.11: # %else12 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 128 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB92_53 |
| ; RV64ZVE32F-NEXT: .LBB92_12: # %else14 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 256 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB92_54 |
| ; RV64ZVE32F-NEXT: .LBB92_13: # %else16 |
| ; RV64ZVE32F-NEXT: andi a2, a1, 512 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB92_15 |
| ; RV64ZVE32F-NEXT: .LBB92_14: # %cond.store17 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v13 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 9 |
| ; RV64ZVE32F-NEXT: vse8.v v14, (a2) |
| ; RV64ZVE32F-NEXT: .LBB92_15: # %else18 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: andi a2, a1, 1024 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 2 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB92_17 |
| ; RV64ZVE32F-NEXT: # %bb.16: # %cond.store19 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v13 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 10 |
| ; RV64ZVE32F-NEXT: vse8.v v14, (a2) |
| ; RV64ZVE32F-NEXT: .LBB92_17: # %else20 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: slli a2, a1, 52 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 4 |
| ; RV64ZVE32F-NEXT: bgez a2, .LBB92_19 |
| ; RV64ZVE32F-NEXT: # %bb.18: # %cond.store21 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v13 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 11 |
| ; RV64ZVE32F-NEXT: vse8.v v14, (a2) |
| ; RV64ZVE32F-NEXT: .LBB92_19: # %else22 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m2, ta, mu |
| ; RV64ZVE32F-NEXT: slli a2, a1, 51 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 16 |
| ; RV64ZVE32F-NEXT: bgez a2, .LBB92_21 |
| ; RV64ZVE32F-NEXT: # %bb.20: # %cond.store23 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 12 |
| ; RV64ZVE32F-NEXT: vse8.v v14, (a2) |
| ; RV64ZVE32F-NEXT: .LBB92_21: # %else24 |
| ; RV64ZVE32F-NEXT: slli a2, a1, 50 |
| ; RV64ZVE32F-NEXT: bgez a2, .LBB92_23 |
| ; RV64ZVE32F-NEXT: # %bb.22: # %cond.store25 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v13 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 13 |
| ; RV64ZVE32F-NEXT: vse8.v v14, (a2) |
| ; RV64ZVE32F-NEXT: .LBB92_23: # %else26 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: slli a2, a1, 49 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 2 |
| ; RV64ZVE32F-NEXT: bltz a2, .LBB92_55 |
| ; RV64ZVE32F-NEXT: # %bb.24: # %else28 |
| ; RV64ZVE32F-NEXT: slli a2, a1, 48 |
| ; RV64ZVE32F-NEXT: bltz a2, .LBB92_56 |
| ; RV64ZVE32F-NEXT: .LBB92_25: # %else30 |
| ; RV64ZVE32F-NEXT: slli a2, a1, 47 |
| ; RV64ZVE32F-NEXT: bltz a2, .LBB92_57 |
| ; RV64ZVE32F-NEXT: .LBB92_26: # %else32 |
| ; RV64ZVE32F-NEXT: slli a2, a1, 46 |
| ; RV64ZVE32F-NEXT: bgez a2, .LBB92_28 |
| ; RV64ZVE32F-NEXT: .LBB92_27: # %cond.store33 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 17 |
| ; RV64ZVE32F-NEXT: vse8.v v12, (a2) |
| ; RV64ZVE32F-NEXT: .LBB92_28: # %else34 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: slli a2, a1, 45 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v13, v10, 2 |
| ; RV64ZVE32F-NEXT: bgez a2, .LBB92_30 |
| ; RV64ZVE32F-NEXT: # %bb.29: # %cond.store35 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v13 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 18 |
| ; RV64ZVE32F-NEXT: vse8.v v14, (a2) |
| ; RV64ZVE32F-NEXT: .LBB92_30: # %else36 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: slli a2, a1, 44 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 4 |
| ; RV64ZVE32F-NEXT: bltz a2, .LBB92_58 |
| ; RV64ZVE32F-NEXT: # %bb.31: # %else38 |
| ; RV64ZVE32F-NEXT: slli a2, a1, 43 |
| ; RV64ZVE32F-NEXT: bltz a2, .LBB92_59 |
| ; RV64ZVE32F-NEXT: .LBB92_32: # %else40 |
| ; RV64ZVE32F-NEXT: slli a2, a1, 42 |
| ; RV64ZVE32F-NEXT: bgez a2, .LBB92_34 |
| ; RV64ZVE32F-NEXT: .LBB92_33: # %cond.store41 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v13 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 21 |
| ; RV64ZVE32F-NEXT: vse8.v v14, (a2) |
| ; RV64ZVE32F-NEXT: .LBB92_34: # %else42 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 8 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: slli a2, a1, 41 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v12, 2 |
| ; RV64ZVE32F-NEXT: bltz a2, .LBB92_60 |
| ; RV64ZVE32F-NEXT: # %bb.35: # %else44 |
| ; RV64ZVE32F-NEXT: slli a2, a1, 40 |
| ; RV64ZVE32F-NEXT: bltz a2, .LBB92_61 |
| ; RV64ZVE32F-NEXT: .LBB92_36: # %else46 |
| ; RV64ZVE32F-NEXT: slli a2, a1, 39 |
| ; RV64ZVE32F-NEXT: bltz a2, .LBB92_62 |
| ; RV64ZVE32F-NEXT: .LBB92_37: # %else48 |
| ; RV64ZVE32F-NEXT: slli a2, a1, 38 |
| ; RV64ZVE32F-NEXT: bgez a2, .LBB92_39 |
| ; RV64ZVE32F-NEXT: .LBB92_38: # %cond.store49 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 25 |
| ; RV64ZVE32F-NEXT: vse8.v v12, (a2) |
| ; RV64ZVE32F-NEXT: .LBB92_39: # %else50 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: slli a2, a1, 37 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 2 |
| ; RV64ZVE32F-NEXT: bgez a2, .LBB92_41 |
| ; RV64ZVE32F-NEXT: # %bb.40: # %cond.store51 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 26 |
| ; RV64ZVE32F-NEXT: vse8.v v12, (a2) |
| ; RV64ZVE32F-NEXT: .LBB92_41: # %else52 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu |
| ; RV64ZVE32F-NEXT: slli a2, a1, 36 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 4 |
| ; RV64ZVE32F-NEXT: bltz a2, .LBB92_63 |
| ; RV64ZVE32F-NEXT: # %bb.42: # %else54 |
| ; RV64ZVE32F-NEXT: slli a2, a1, 35 |
| ; RV64ZVE32F-NEXT: bltz a2, .LBB92_64 |
| ; RV64ZVE32F-NEXT: .LBB92_43: # %else56 |
| ; RV64ZVE32F-NEXT: slli a2, a1, 34 |
| ; RV64ZVE32F-NEXT: bgez a2, .LBB92_45 |
| ; RV64ZVE32F-NEXT: .LBB92_44: # %cond.store57 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 29 |
| ; RV64ZVE32F-NEXT: vse8.v v12, (a2) |
| ; RV64ZVE32F-NEXT: .LBB92_45: # %else58 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: slli a2, a1, 33 |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 |
| ; RV64ZVE32F-NEXT: bgez a2, .LBB92_47 |
| ; RV64ZVE32F-NEXT: # %bb.46: # %cond.store59 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 30 |
| ; RV64ZVE32F-NEXT: vse8.v v12, (a2) |
| ; RV64ZVE32F-NEXT: .LBB92_47: # %else60 |
| ; RV64ZVE32F-NEXT: lui a2, 524288 |
| ; RV64ZVE32F-NEXT: and a1, a1, a2 |
| ; RV64ZVE32F-NEXT: beqz a1, .LBB92_49 |
| ; RV64ZVE32F-NEXT: # %bb.48: # %cond.store61 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a1, v10 |
| ; RV64ZVE32F-NEXT: add a0, a0, a1 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 31 |
| ; RV64ZVE32F-NEXT: vse8.v v8, (a0) |
| ; RV64ZVE32F-NEXT: .LBB92_49: # %else62 |
| ; RV64ZVE32F-NEXT: ret |
| ; RV64ZVE32F-NEXT: .LBB92_50: # %cond.store5 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 3 |
| ; RV64ZVE32F-NEXT: vse8.v v14, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 16 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB92_8 |
| ; RV64ZVE32F-NEXT: .LBB92_51: # %cond.store7 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v13 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 4 |
| ; RV64ZVE32F-NEXT: vse8.v v14, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 32 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB92_9 |
| ; RV64ZVE32F-NEXT: j .LBB92_10 |
| ; RV64ZVE32F-NEXT: .LBB92_52: # %cond.store11 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v13 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 6 |
| ; RV64ZVE32F-NEXT: vse8.v v14, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 128 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB92_12 |
| ; RV64ZVE32F-NEXT: .LBB92_53: # %cond.store13 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v13 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 7 |
| ; RV64ZVE32F-NEXT: vse8.v v14, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 256 |
| ; RV64ZVE32F-NEXT: beqz a2, .LBB92_13 |
| ; RV64ZVE32F-NEXT: .LBB92_54: # %cond.store15 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 8 |
| ; RV64ZVE32F-NEXT: vse8.v v14, (a2) |
| ; RV64ZVE32F-NEXT: andi a2, a1, 512 |
| ; RV64ZVE32F-NEXT: bnez a2, .LBB92_14 |
| ; RV64ZVE32F-NEXT: j .LBB92_15 |
| ; RV64ZVE32F-NEXT: .LBB92_55: # %cond.store27 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 14 |
| ; RV64ZVE32F-NEXT: vse8.v v14, (a2) |
| ; RV64ZVE32F-NEXT: slli a2, a1, 48 |
| ; RV64ZVE32F-NEXT: bgez a2, .LBB92_25 |
| ; RV64ZVE32F-NEXT: .LBB92_56: # %cond.store29 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 15 |
| ; RV64ZVE32F-NEXT: vse8.v v12, (a2) |
| ; RV64ZVE32F-NEXT: slli a2, a1, 47 |
| ; RV64ZVE32F-NEXT: bgez a2, .LBB92_26 |
| ; RV64ZVE32F-NEXT: .LBB92_57: # %cond.store31 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 16 |
| ; RV64ZVE32F-NEXT: vse8.v v12, (a2) |
| ; RV64ZVE32F-NEXT: slli a2, a1, 46 |
| ; RV64ZVE32F-NEXT: bltz a2, .LBB92_27 |
| ; RV64ZVE32F-NEXT: j .LBB92_28 |
| ; RV64ZVE32F-NEXT: .LBB92_58: # %cond.store37 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v13 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 19 |
| ; RV64ZVE32F-NEXT: vse8.v v14, (a2) |
| ; RV64ZVE32F-NEXT: slli a2, a1, 43 |
| ; RV64ZVE32F-NEXT: bgez a2, .LBB92_32 |
| ; RV64ZVE32F-NEXT: .LBB92_59: # %cond.store39 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 20 |
| ; RV64ZVE32F-NEXT: vse8.v v14, (a2) |
| ; RV64ZVE32F-NEXT: slli a2, a1, 42 |
| ; RV64ZVE32F-NEXT: bltz a2, .LBB92_33 |
| ; RV64ZVE32F-NEXT: j .LBB92_34 |
| ; RV64ZVE32F-NEXT: .LBB92_60: # %cond.store43 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 22 |
| ; RV64ZVE32F-NEXT: vse8.v v12, (a2) |
| ; RV64ZVE32F-NEXT: slli a2, a1, 40 |
| ; RV64ZVE32F-NEXT: bgez a2, .LBB92_36 |
| ; RV64ZVE32F-NEXT: .LBB92_61: # %cond.store45 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 23 |
| ; RV64ZVE32F-NEXT: vse8.v v12, (a2) |
| ; RV64ZVE32F-NEXT: slli a2, a1, 39 |
| ; RV64ZVE32F-NEXT: bgez a2, .LBB92_37 |
| ; RV64ZVE32F-NEXT: .LBB92_62: # %cond.store47 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 24 |
| ; RV64ZVE32F-NEXT: vse8.v v12, (a2) |
| ; RV64ZVE32F-NEXT: slli a2, a1, 38 |
| ; RV64ZVE32F-NEXT: bltz a2, .LBB92_38 |
| ; RV64ZVE32F-NEXT: j .LBB92_39 |
| ; RV64ZVE32F-NEXT: .LBB92_63: # %cond.store53 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1 |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 27 |
| ; RV64ZVE32F-NEXT: vse8.v v12, (a2) |
| ; RV64ZVE32F-NEXT: slli a2, a1, 35 |
| ; RV64ZVE32F-NEXT: bgez a2, .LBB92_43 |
| ; RV64ZVE32F-NEXT: .LBB92_64: # %cond.store55 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu |
| ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 |
| ; RV64ZVE32F-NEXT: add a2, a0, a2 |
| ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu |
| ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 28 |
| ; RV64ZVE32F-NEXT: vse8.v v12, (a2) |
| ; RV64ZVE32F-NEXT: slli a2, a1, 34 |
| ; RV64ZVE32F-NEXT: bltz a2, .LBB92_44 |
| ; RV64ZVE32F-NEXT: j .LBB92_45 |
| %ptrs = getelementptr inbounds i8, i8* %base, <32 x i8> %idxs |
| call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> %val, <32 x i8*> %ptrs, i32 1, <32 x i1> %m) |
| ret void |
| } |