[RISCV] Prefer vmv.s.x for build_vector a, undef, ..., undef (#136164)
If we have a build vector which could be either a splat or a scalar
insert, prefer the scalar insert. At high LMUL, this reduces vector
register pressure (locally, the use will likely still be aligned), and
the amount of work performed for the splat.
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 5d4df03..98c8bdb 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -4208,8 +4208,22 @@
if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
return Gather;
- unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
- : RISCVISD::VMV_V_X_VL;
+
+ // Prefer vmv.s.x/vfmv.s.f if legal to reduce work and register
+ // pressure at high LMUL.
+ if (all_of(Op->ops().drop_front(),
+ [](const SDUse &U) { return U.get().isUndef(); })) {
+ unsigned Opc =
+ VT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
+ if (!VT.isFloatingPoint())
+ Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
+ Splat = DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
+ Splat, VL);
+ return convertFromScalableVector(VT, Splat, DAG, Subtarget);
+ }
+
+ unsigned Opc =
+ VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
if (!VT.isFloatingPoint())
Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
Splat =
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-binop-splats.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-binop-splats.ll
index bfc43db..6b5ca5f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-binop-splats.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-binop-splats.ll
@@ -187,7 +187,7 @@
; CHECK-LABEL: v2i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
-; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: vadd.vx v9, v8, a1
; CHECK-NEXT: vrgather.vi v8, v9, 0
; CHECK-NEXT: ret
@@ -203,7 +203,7 @@
; CHECK-LABEL: v4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: vadd.vx v9, v8, a1
; CHECK-NEXT: vrgather.vi v8, v9, 0
; CHECK-NEXT: ret
@@ -219,7 +219,7 @@
; CHECK-LABEL: v8i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: vadd.vx v9, v8, a1
; CHECK-NEXT: vrgather.vi v8, v9, 0
; CHECK-NEXT: ret
@@ -235,7 +235,7 @@
; CHECK-LABEL: v16i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: vadd.vx v9, v8, a1
; CHECK-NEXT: vrgather.vi v8, v9, 0
; CHECK-NEXT: ret
@@ -252,7 +252,7 @@
; CHECK: # %bb.0:
; CHECK-NEXT: li a2, 32
; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma
-; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: vadd.vx v10, v8, a1
; CHECK-NEXT: vrgather.vi v8, v10, 0
; CHECK-NEXT: ret
@@ -269,7 +269,7 @@
; CHECK: # %bb.0:
; CHECK-NEXT: li a2, 64
; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma
-; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: vadd.vx v12, v8, a1
; CHECK-NEXT: vrgather.vi v8, v12, 0
; CHECK-NEXT: ret
@@ -300,7 +300,7 @@
; CHECK-LABEL: v2i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: vadd.vx v9, v8, a1
; CHECK-NEXT: vrgather.vi v8, v9, 0
; CHECK-NEXT: ret
@@ -316,7 +316,7 @@
; CHECK-LABEL: v4i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: vadd.vx v9, v8, a1
; CHECK-NEXT: vrgather.vi v8, v9, 0
; CHECK-NEXT: ret
@@ -332,7 +332,7 @@
; CHECK-LABEL: v8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: vadd.vx v9, v8, a1
; CHECK-NEXT: vrgather.vi v8, v9, 0
; CHECK-NEXT: ret
@@ -348,7 +348,7 @@
; CHECK-LABEL: v16i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: vadd.vx v10, v8, a1
; CHECK-NEXT: vrgather.vi v8, v10, 0
; CHECK-NEXT: ret
@@ -365,7 +365,7 @@
; CHECK: # %bb.0:
; CHECK-NEXT: li a2, 32
; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
-; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: vadd.vx v12, v8, a1
; CHECK-NEXT: vrgather.vi v8, v12, 0
; CHECK-NEXT: ret
@@ -396,7 +396,7 @@
; CHECK-LABEL: v2i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: vadd.vx v9, v8, a1
; CHECK-NEXT: vrgather.vi v8, v9, 0
; CHECK-NEXT: ret
@@ -412,7 +412,7 @@
; CHECK-LABEL: v4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: vadd.vx v9, v8, a1
; CHECK-NEXT: vrgather.vi v8, v9, 0
; CHECK-NEXT: ret
@@ -428,7 +428,7 @@
; CHECK-LABEL: v8i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: vadd.vx v10, v8, a1
; CHECK-NEXT: vrgather.vi v8, v10, 0
; CHECK-NEXT: ret
@@ -444,7 +444,7 @@
; CHECK-LABEL: v16i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
-; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: vadd.vx v12, v8, a1
; CHECK-NEXT: vrgather.vi v8, v12, 0
; CHECK-NEXT: ret
@@ -509,7 +509,7 @@
; RV64-LABEL: v2i64:
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV64-NEXT: vmv.v.x v8, a0
+; RV64-NEXT: vmv.s.x v8, a0
; RV64-NEXT: vadd.vx v9, v8, a1
; RV64-NEXT: vrgather.vi v8, v9, 0
; RV64-NEXT: ret
@@ -542,7 +542,7 @@
; RV64-LABEL: v4i64:
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV64-NEXT: vmv.v.x v8, a0
+; RV64-NEXT: vmv.s.x v8, a0
; RV64-NEXT: vadd.vx v10, v8, a1
; RV64-NEXT: vrgather.vi v8, v10, 0
; RV64-NEXT: ret
@@ -575,7 +575,7 @@
; RV64-LABEL: v8i64:
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64-NEXT: vmv.v.x v8, a0
+; RV64-NEXT: vmv.s.x v8, a0
; RV64-NEXT: vadd.vx v12, v8, a1
; RV64-NEXT: vrgather.vi v8, v12, 0
; RV64-NEXT: ret
@@ -591,7 +591,7 @@
; CHECK-LABEL: v4f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: vfmv.s.f v8, fa0
; CHECK-NEXT: vfadd.vf v9, v8, fa1
; CHECK-NEXT: vrgather.vi v8, v9, 0
; CHECK-NEXT: ret
@@ -607,7 +607,7 @@
; CHECK-LABEL: v2f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: vfmv.s.f v8, fa0
; CHECK-NEXT: vfadd.vf v9, v8, fa1
; CHECK-NEXT: vrgather.vi v8, v9, 0
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll
index dbbb836..c975456 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll
@@ -439,7 +439,7 @@
; RV32-LABEL: buggy:
; RV32: # %bb.0: # %entry
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vmv.v.x v8, a0
+; RV32-NEXT: vmv.s.x v8, a0
; RV32-NEXT: vadd.vv v8, v8, v8
; RV32-NEXT: vor.vi v8, v8, 1
; RV32-NEXT: vrgather.vi v9, v8, 0
@@ -450,7 +450,7 @@
; RV64: # %bb.0: # %entry
; RV64-NEXT: slli a0, a0, 1
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT: vmv.v.x v8, a0
+; RV64-NEXT: vmv.s.x v8, a0
; RV64-NEXT: vor.vi v8, v8, 1
; RV64-NEXT: vrgather.vi v9, v8, 0
; RV64-NEXT: vse32.v v9, (zero)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll
index a171a7f..a29d53b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll
@@ -250,7 +250,7 @@
; CHECK-LABEL: vslide1up_4xf64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT: vfmv.v.f v10, fa0
+; CHECK-NEXT: vfmv.s.f v10, fa0
; CHECK-NEXT: vslideup.vi v10, v8, 3
; CHECK-NEXT: vmv.v.v v8, v10
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll
index 29fbb8a..b6253c6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll
@@ -500,7 +500,7 @@
; RV32-SLOW-NEXT: or a4, a6, a5
; RV32-SLOW-NEXT: or a3, a4, a3
; RV32-SLOW-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; RV32-SLOW-NEXT: vmv.v.x v8, a3
+; RV32-SLOW-NEXT: vmv.s.x v8, a3
; RV32-SLOW-NEXT: .LBB8_2: # %else
; RV32-SLOW-NEXT: andi a2, a2, 2
; RV32-SLOW-NEXT: beqz a2, .LBB8_4
@@ -544,7 +544,7 @@
; RV64-SLOW-NEXT: or a4, a6, a5
; RV64-SLOW-NEXT: or a3, a4, a3
; RV64-SLOW-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; RV64-SLOW-NEXT: vmv.v.x v8, a3
+; RV64-SLOW-NEXT: vmv.s.x v8, a3
; RV64-SLOW-NEXT: .LBB8_2: # %else
; RV64-SLOW-NEXT: andi a2, a2, 2
; RV64-SLOW-NEXT: beqz a2, .LBB8_4
diff --git a/llvm/test/CodeGen/RISCV/rvv/fold-scalar-load-crash.ll b/llvm/test/CodeGen/RISCV/rvv/fold-scalar-load-crash.ll
index 919c2fd..19ea7b7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fold-scalar-load-crash.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fold-scalar-load-crash.ll
@@ -9,8 +9,8 @@
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi a3, a2, 1
; RV32-NEXT: th.lbib a4, (a1), -1, 0
-; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT: vmv.v.x v8, a4
+; RV32-NEXT: vsetivli zero, 8, e8, m1, ta, ma
+; RV32-NEXT: vmv.s.x v8, a4
; RV32-NEXT: vmv.s.x v9, zero
; RV32-NEXT: vsetvli zero, a3, e8, mf2, tu, ma
; RV32-NEXT: vslideup.vx v8, v9, a2
@@ -35,8 +35,8 @@
; RV64: # %bb.0: # %entry
; RV64-NEXT: addi a3, a2, 1
; RV64-NEXT: th.lbib a4, (a1), -1, 0
-; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV64-NEXT: vmv.v.x v8, a4
+; RV64-NEXT: vsetivli zero, 8, e8, m1, ta, ma
+; RV64-NEXT: vmv.s.x v8, a4
; RV64-NEXT: vmv.s.x v9, zero
; RV64-NEXT: vsetvli zero, a3, e8, mf2, tu, ma
; RV64-NEXT: vslideup.vx v8, v9, a2
diff --git a/llvm/test/CodeGen/RISCV/rvv/pr125306.ll b/llvm/test/CodeGen/RISCV/rvv/pr125306.ll
index 111f87d..9400c38 100644
--- a/llvm/test/CodeGen/RISCV/rvv/pr125306.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/pr125306.ll
@@ -60,7 +60,7 @@
; CHECK-NEXT: vslide1down.vx v8, v8, zero
; CHECK-NEXT: vslide1down.vx v10, v10, zero
; CHECK-NEXT: vmin.vv v8, v10, v8
-; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vmv.s.x v10, a0
; CHECK-NEXT: vslide1down.vx v11, v11, zero
; CHECK-NEXT: vmin.vx v10, v10, a2
; CHECK-NEXT: vmin.vx v10, v10, a1