[SLP]Relax assertion to avoid compiler crash

Need to relax the assertion to fix a compiler crash in case if the
reordered compress loads are more profitable than the ordered ones.

Fixes #140334
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 4485563..db4a571 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -13626,11 +13626,10 @@
         SmallVector<Value *> PointerOps(Scalars.size());
         for (auto [I, V] : enumerate(Scalars))
           PointerOps[I] = cast<LoadInst>(V)->getPointerOperand();
-        [[maybe_unused]] bool IsVectorized = isMaskedLoadCompress(
+        (void)isMaskedLoadCompress(
             Scalars, PointerOps, E->ReorderIndices, *TTI, *DL, *SE, *AC, *DT,
             *TLI, [](Value *) { return true; }, IsMasked, InterleaveFactor,
             CompressMask, LoadVecTy);
-        assert(IsVectorized && "Expected to be vectorized");
         CompressEntryToData.try_emplace(E, CompressMask, LoadVecTy,
                                         InterleaveFactor, IsMasked);
         Align CommonAlignment = LI0->getAlign();
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/masked-load-compress-reordered.ll b/llvm/test/Transforms/SLPVectorizer/X86/masked-load-compress-reordered.ll
new file mode 100644
index 0000000..b82e817
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/masked-load-compress-reordered.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mcpu=znver1 < %s | FileCheck %s
+
+@mbc_dbl = common global [16 x i8] zeroinitializer, align 8
+@foo_int = common global [16 x i8] zeroinitializer, align 8
+@foo_ptr = common local_unnamed_addr global [640 x i8] zeroinitializer, align 8
+
+define void @test() {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr getelementptr inbounds nuw (i8, ptr @foo_ptr, i64 64), align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i64, ptr @foo_int, i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i64, ptr getelementptr inbounds nuw (i8, ptr @foo_ptr, i64 80), align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i64, ptr @foo_int, i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = load i64, ptr getelementptr inbounds nuw (i8, ptr @foo_ptr, i64 192), align 8
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr double, ptr @mbc_dbl, i64 [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = load i64, ptr getelementptr inbounds nuw (i8, ptr @foo_ptr, i64 128), align 8
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr double, ptr @mbc_dbl, i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = load i64, ptr getelementptr inbounds nuw (i8, ptr @foo_ptr, i64 144), align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr double, ptr @mbc_dbl, i64 [[TMP9]]
+; CHECK-NEXT:    [[TMP11:%.*]] = load i64, ptr getelementptr inbounds nuw (i8, ptr @foo_ptr, i64 208), align 8
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr double, ptr @mbc_dbl, i64 [[TMP11]]
+; CHECK-NEXT:    ret void
+;
+  %165 = load i64, ptr getelementptr inbounds nuw (i8, ptr @foo_ptr, i64 64), align 8
+  %166 = getelementptr i64, ptr @foo_int, i64 %165
+  %168 = load i64, ptr getelementptr inbounds nuw (i8, ptr @foo_ptr, i64 80), align 8
+  %169 = getelementptr i64, ptr @foo_int, i64 %168
+  %171 = load i64, ptr getelementptr inbounds nuw (i8, ptr @foo_ptr, i64 192), align 8
+  %172 = getelementptr double, ptr @mbc_dbl, i64 %171
+  %174 = load i64, ptr getelementptr inbounds nuw (i8, ptr @foo_ptr, i64 128), align 8
+  %175 = getelementptr double, ptr @mbc_dbl, i64 %174
+  %177 = load i64, ptr getelementptr inbounds nuw (i8, ptr @foo_ptr, i64 144), align 8
+  %178 = getelementptr double, ptr @mbc_dbl, i64 %177
+  %180 = load i64, ptr getelementptr inbounds nuw (i8, ptr @foo_ptr, i64 208), align 8
+  %181 = getelementptr double, ptr @mbc_dbl, i64 %180
+  ret void
+}