[SLP]Relax assertion to avoid compiler crash Need to relax the assertion to fix a compiler crash in case if the reordered compress loads are more profitable than the ordered ones. Fixes #140334
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 4485563..db4a571 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -13626,11 +13626,10 @@ SmallVector<Value *> PointerOps(Scalars.size()); for (auto [I, V] : enumerate(Scalars)) PointerOps[I] = cast<LoadInst>(V)->getPointerOperand(); - [[maybe_unused]] bool IsVectorized = isMaskedLoadCompress( + (void)isMaskedLoadCompress( Scalars, PointerOps, E->ReorderIndices, *TTI, *DL, *SE, *AC, *DT, *TLI, [](Value *) { return true; }, IsMasked, InterleaveFactor, CompressMask, LoadVecTy); - assert(IsVectorized && "Expected to be vectorized"); CompressEntryToData.try_emplace(E, CompressMask, LoadVecTy, InterleaveFactor, IsMasked); Align CommonAlignment = LI0->getAlign();
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/masked-load-compress-reordered.ll b/llvm/test/Transforms/SLPVectorizer/X86/masked-load-compress-reordered.ll new file mode 100644 index 0000000..b82e817 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/masked-load-compress-reordered.ll
@@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mcpu=znver1 < %s | FileCheck %s + +@mbc_dbl = common global [16 x i8] zeroinitializer, align 8 +@foo_int = common global [16 x i8] zeroinitializer, align 8 +@foo_ptr = common local_unnamed_addr global [640 x i8] zeroinitializer, align 8 + +define void @test() { +; CHECK-LABEL: define void @test( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr getelementptr inbounds nuw (i8, ptr @foo_ptr, i64 64), align 8 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr @foo_int, i64 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr getelementptr inbounds nuw (i8, ptr @foo_ptr, i64 80), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr @foo_int, i64 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr getelementptr inbounds nuw (i8, ptr @foo_ptr, i64 192), align 8 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr double, ptr @mbc_dbl, i64 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr getelementptr inbounds nuw (i8, ptr @foo_ptr, i64 128), align 8 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr double, ptr @mbc_dbl, i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr getelementptr inbounds nuw (i8, ptr @foo_ptr, i64 144), align 8 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr double, ptr @mbc_dbl, i64 [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr getelementptr inbounds nuw (i8, ptr @foo_ptr, i64 208), align 8 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr double, ptr @mbc_dbl, i64 [[TMP11]] +; CHECK-NEXT: ret void +; + %165 = load i64, ptr getelementptr inbounds nuw (i8, ptr @foo_ptr, i64 64), align 8 + %166 = getelementptr i64, ptr @foo_int, i64 %165 + %168 = load i64, ptr getelementptr inbounds nuw (i8, ptr @foo_ptr, i64 80), align 8 + %169 = getelementptr i64, ptr @foo_int, i64 %168 + %171 = load i64, ptr getelementptr inbounds nuw (i8, ptr @foo_ptr, i64 192), align 8 + %172 = getelementptr double, ptr @mbc_dbl, i64 %171 + %174 = load i64, ptr getelementptr inbounds nuw (i8, ptr @foo_ptr, i64 128), align 8 + %175 = getelementptr double, ptr @mbc_dbl, i64 %174 + %177 = load i64, ptr getelementptr inbounds nuw (i8, ptr @foo_ptr, i64 144), align 8 + %178 = getelementptr double, ptr @mbc_dbl, i64 %177 + %180 = load i64, ptr getelementptr inbounds nuw (i8, ptr @foo_ptr, i64 208), align 8 + %181 = getelementptr double, ptr @mbc_dbl, i64 %180 + ret void +}