[RISCV] Recognize a zipeven/zipodd requiring larger SEW (#134923)

This is a follow up to f8ee58a3c, and improves code generation for the
XRivosVizip extension.

If we have a slide pair which could be a zipeven or zipodd if the
shuffle was widened, widen the shuffle and then mask the zipeven or
zipodd.

This is basically working around an order of matching issue; we match
the slide pair variants before trying widening. I considered whether we
should just widen slide pairs without any consideration of the zip
idioms, but the resulting codegen changes look mostly like churn, and
have no clear evidence of profitability.
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index c79b3f0..f7d1927 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -4654,7 +4654,8 @@
 }
 
 static bool isAlternating(const std::array<std::pair<int, int>, 2> &SrcInfo,
-                          ArrayRef<int> Mask, bool RequiredPolarity) {
+                          ArrayRef<int> Mask, unsigned Factor,
+                          bool RequiredPolarity) {
   int NumElts = Mask.size();
   for (int i = 0; i != NumElts; ++i) {
     int M = Mask[i];
@@ -4665,7 +4666,7 @@
     bool C = Src == SrcInfo[1].first && Diff == SrcInfo[1].second;
     assert(C != (Src == SrcInfo[0].first && Diff == SrcInfo[0].second) &&
            "Must match exactly one of the two slides");
-    if (RequiredPolarity != (C == i % 2))
+    if (RequiredPolarity != (C == (i / Factor) % 2))
       return false;
   }
   return true;
@@ -4677,9 +4678,11 @@
 /// vs1: b0 b1 b2 b3
 /// vd:  a0 b0 a2 b2
 static bool isZipEven(const std::array<std::pair<int, int>, 2> &SrcInfo,
-                      ArrayRef<int> Mask) {
-  return SrcInfo[0].second == 0 && SrcInfo[1].second == 1 &&
-         isAlternating(SrcInfo, Mask, true);
+                      ArrayRef<int> Mask, unsigned &Factor) {
+  Factor = SrcInfo[1].second;
+  return SrcInfo[0].second == 0 && isPowerOf2_32(Factor) &&
+         Mask.size() % Factor == 0 &&
+         isAlternating(SrcInfo, Mask, Factor, true);
 }
 
 /// Given a shuffle which can be represented as a pair of two slides,
@@ -4690,9 +4693,11 @@
 /// Note that the operand order is swapped due to the way we canonicalize
 /// the slides, so SrCInfo[0] is vs1, and SrcInfo[1] is vs2.
 static bool isZipOdd(const std::array<std::pair<int, int>, 2> &SrcInfo,
-                     ArrayRef<int> Mask) {
-  return SrcInfo[0].second == 0 && SrcInfo[1].second == -1 &&
-         isAlternating(SrcInfo, Mask, false);
+                     ArrayRef<int> Mask, unsigned &Factor) {
+  Factor = -SrcInfo[1].second;
+  return SrcInfo[0].second == 0 && isPowerOf2_32(Factor) &&
+         Mask.size() % Factor == 0 &&
+         isAlternating(SrcInfo, Mask, Factor, false);
 }
 
 // Lower a deinterleave shuffle to SRL and TRUNC.  Factor must be
@@ -5779,16 +5784,33 @@
       return convertFromScalableVector(VT, Res, DAG, Subtarget);
     }
 
-    if (Subtarget.hasVendorXRivosVizip() && isZipEven(SrcInfo, Mask)) {
-      SDValue Src1 = SrcInfo[0].first == 0 ? V1 : V2;
-      SDValue Src2 = SrcInfo[1].first == 0 ? V1 : V2;
-      return lowerVZIP(RISCVISD::RI_VZIPEVEN_VL, Src1, Src2, DL, DAG,
-                       Subtarget);
-    }
-    if (Subtarget.hasVendorXRivosVizip() && isZipOdd(SrcInfo, Mask)) {
-      SDValue Src1 = SrcInfo[1].first == 0 ? V1 : V2;
-      SDValue Src2 = SrcInfo[0].first == 0 ? V1 : V2;
-      return lowerVZIP(RISCVISD::RI_VZIPODD_VL, Src1, Src2, DL, DAG, Subtarget);
+    if (Subtarget.hasVendorXRivosVizip()) {
+      bool TryWiden = false;
+      unsigned Factor;
+      if (isZipEven(SrcInfo, Mask, Factor)) {
+        if (Factor == 1) {
+          SDValue Src1 = SrcInfo[0].first == 0 ? V1 : V2;
+          SDValue Src2 = SrcInfo[1].first == 0 ? V1 : V2;
+          return lowerVZIP(RISCVISD::RI_VZIPEVEN_VL, Src1, Src2, DL, DAG,
+                           Subtarget);
+        }
+        TryWiden = true;
+      }
+      if (isZipOdd(SrcInfo, Mask, Factor)) {
+        if (Factor == 1) {
+          SDValue Src1 = SrcInfo[1].first == 0 ? V1 : V2;
+          SDValue Src2 = SrcInfo[0].first == 0 ? V1 : V2;
+          return lowerVZIP(RISCVISD::RI_VZIPODD_VL, Src1, Src2, DL, DAG,
+                           Subtarget);
+        }
+        TryWiden = true;
+      }
+      // If we found a widening oppurtunity which would let us form a
+      // zipeven or zipodd, use the generic code to widen the shuffle
+      // and recurse through this logic.
+      if (TryWiden)
+        if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
+          return V;
     }
 
     // Build the mask.  Note that vslideup unconditionally preserves elements
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-zipeven-zipodd.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-zipeven-zipodd.ll
index 4e08112..14c17a6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-zipeven-zipodd.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-zipeven-zipodd.ll
@@ -365,10 +365,9 @@
 ;
 ; ZIP-LABEL: zipeven_v8i32_as_v4i64:
 ; ZIP:       # %bb.0:
-; ZIP-NEXT:    li a0, 204
-; ZIP-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
-; ZIP-NEXT:    vmv.s.x v0, a0
-; ZIP-NEXT:    vslideup.vi v8, v10, 2, v0.t
+; ZIP-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; ZIP-NEXT:    ri.vzipeven.vv v12, v8, v10
+; ZIP-NEXT:    vmv.v.v v8, v12
 ; ZIP-NEXT:    ret
   %out = shufflevector <8 x i32> %v1, <8 x i32> %v2, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 12, i32 13>
   ret <8 x i32> %out
@@ -386,11 +385,9 @@
 ;
 ; ZIP-LABEL: zipodd_v8i32_as_v4i64:
 ; ZIP:       # %bb.0:
-; ZIP-NEXT:    li a0, 51
-; ZIP-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
-; ZIP-NEXT:    vmv.s.x v0, a0
-; ZIP-NEXT:    vslidedown.vi v10, v8, 2, v0.t
-; ZIP-NEXT:    vmv.v.v v8, v10
+; ZIP-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; ZIP-NEXT:    ri.vzipodd.vv v12, v8, v10
+; ZIP-NEXT:    vmv.v.v v8, v12
 ; ZIP-NEXT:    ret
   %out = shufflevector <8 x i32> %v1, <8 x i32> %v2, <8 x i32> <i32 2, i32 3, i32 10, i32 11, i32 6, i32 7, i32 14, i32 15>
   ret <8 x i32> %out