[CodeGen] Fix failing assert in interleaved access pass (#156457)
In the InterleavedAccessPass the function getMask assumes that
shufflevector operations are always fixed width, which isn't true
because we use them for splats of scalable vectors. This patch fixes the
code by bailing out for scalable vectors.
diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index c5e9703..e3ded12 100644
--- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -660,6 +660,10 @@
}
if (auto *SVI = dyn_cast<ShuffleVectorInst>(WideMask)) {
+ Type *Op1Ty = SVI->getOperand(1)->getType();
+ if (!isa<FixedVectorType>(Op1Ty))
+ return {nullptr, GapMask};
+
// Check that the shuffle mask is: a) an interleave, b) all of the same
// set of the elements, and c) contained by the first source. (c) could
// be relaxed if desired.
diff --git a/llvm/test/Transforms/InterleavedAccess/AArch64/scalable-deinterleave-intrinsics.ll b/llvm/test/Transforms/InterleavedAccess/AArch64/scalable-deinterleave-intrinsics.ll
index d764980..ed9fba3 100644
--- a/llvm/test/Transforms/InterleavedAccess/AArch64/scalable-deinterleave-intrinsics.ll
+++ b/llvm/test/Transforms/InterleavedAccess/AArch64/scalable-deinterleave-intrinsics.ll
@@ -1,5 +1,4 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
-; RUN: opt < %s -interleaved-access -S | FileCheck %s
; RUN: opt < %s -passes=interleaved-access -S | FileCheck %s
target triple = "aarch64-linux-gnu"
@@ -186,6 +185,22 @@
ret void
}
+define void @interleave_nxi8_factor2_masked_store_splatmask(ptr %ptr, <vscale x 16 x i8> %l, <vscale x 16 x i8> %r, i1 %mask) #0 {
+; CHECK-LABEL: define void @interleave_nxi8_factor2_masked_store_splatmask
+; CHECK-SAME: (ptr [[PTR:%.*]], <vscale x 16 x i8> [[L:%.*]], <vscale x 16 x i8> [[R:%.*]], i1 [[MASK:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[INTERLEAVE:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.interleave2.nxv32i8(<vscale x 16 x i8> [[L]], <vscale x 16 x i8> [[R]])
+; CHECK-NEXT: [[MASK_INS:%.*]] = insertelement <vscale x 32 x i1> poison, i1 [[MASK]], i64 0
+; CHECK-NEXT: [[MASK_SPLAT:%.*]] = shufflevector <vscale x 32 x i1> [[MASK_INS]], <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+; CHECK-NEXT: tail call void @llvm.masked.store.nxv32i8.p0(<vscale x 32 x i8> [[INTERLEAVE]], ptr [[PTR]], i32 1, <vscale x 32 x i1> [[MASK_SPLAT]])
+; CHECK-NEXT: ret void
+;
+ %interleave = tail call <vscale x 32 x i8> @llvm.vector.interleave2.nxv32i8(<vscale x 16 x i8> %l, <vscale x 16 x i8> %r)
+ %mask.ins = insertelement <vscale x 32 x i1> poison, i1 %mask, i64 0
+ %mask.splat = shufflevector <vscale x 32 x i1> %mask.ins, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
+ tail call void @llvm.masked.store.nxv32i8.p0(<vscale x 32 x i8> %interleave, ptr %ptr, i32 1, <vscale x 32 x i1> %mask.splat)
+ ret void
+}
+
;;; Check that we 'legalize' operations that are wider than the target supports.
define void @deinterleave_wide_nxi32_factor2(ptr %ptr) #0 {