[SLP]Fix a crash when trying to reduce in revec after minbitwidth analysis Need to use the original scalar type, when building the reduction, and use the scalar type, when performing casting, to avoid compiler crash.

commit: a2d129b792cc49224f98c83d2279b2e19bfa700f [log] [tgz]
author: Alexey Bataev <a.bataev@outlook.com> Fri Apr 11 10:53:48 2025 -0700
committer: Alexey Bataev <a.bataev@outlook.com> Fri Apr 11 10:58:39 2025 -0700
tree: ceccede472ff5730a8da0c3f4fa97b2b281c7bdf
parent: abe3b90b3fa49829a165f2174191c33542597052 [diff]
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index c854821..b7ee5bf 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

@@ -22141,8 +22141,8 @@
         if (isa<FixedVectorType>(ScalarTy)) {
           assert(SLPReVec && "FixedVectorType is not expected.");
           unsigned ScalarTyNumElements = getNumElements(ScalarTy);
-          Value *ReducedSubTree = PoisonValue::get(getWidenedType(
-              VectorizedRoot->getType()->getScalarType(), ScalarTyNumElements));
+          Value *ReducedSubTree = PoisonValue::get(
+              getWidenedType(ScalarTy->getScalarType(), ScalarTyNumElements));
           for (unsigned I : seq<unsigned>(ScalarTyNumElements)) {
             // Do reduction for each lane.
             // e.g., do reduce add for
@@ -22359,7 +22359,7 @@
                         Type *DestTy) {
     Value *Rdx = emitReduction(Vec, Builder, &TTI, DestTy);
     if (Rdx->getType() != DestTy->getScalarType())
-      Rdx = Builder.CreateIntCast(Rdx, DestTy, IsSigned);
+      Rdx = Builder.CreateIntCast(Rdx, DestTy->getScalarType(), IsSigned);
     // Improved analysis for add/fadd/xor reductions with same scale
     // factor for all operands of reductions. We can emit scalar ops for
     // them instead.

diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/revec-fix-128169.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/revec-fix-128169.ll
index 5320c6b..b9f3545 100644
--- a/llvm/test/Transforms/SLPVectorizer/SystemZ/revec-fix-128169.ll
+++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/revec-fix-128169.ll

@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -mtriple=s390x-unknown-linux-gnu -mcpu=z17 -passes=slp-vectorizer -S -slp-revec %s | FileCheck %s
+; RUN: opt -mtriple=s390x-unknown-linux-gnu -mcpu=z17 -passes=slp-vectorizer -S -slp-revec < %s | FileCheck %s
+; RUN: opt -mtriple=s390x-unknown-linux-gnu -mcpu=z17 -passes=slp-vectorizer -S -slp-revec -slp-threshold=-1000 < %s | FileCheck %s --check-prefix=THRESH
 
 define void @e(<4 x i16> %0) {
 ; CHECK-LABEL: @e(
@@ -22,6 +23,40 @@
 ; CHECK-NEXT:    [[TMP12]] = or <4 x i32> [[TMP9]], [[TMP11]]
 ; CHECK-NEXT:    br label [[VECTOR_BODY]]
 ;
+; THRESH-LABEL: @e(
+; THRESH-NEXT:  entry:
+; THRESH-NEXT:    [[TMP1:%.*]] = call <8 x i16> @llvm.vector.insert.v8i16.v4i16(<8 x i16> poison, <4 x i16> zeroinitializer, i64 0)
+; THRESH-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.vector.insert.v8i16.v4i16(<8 x i16> [[TMP1]], <4 x i16> zeroinitializer, i64 4)
+; THRESH-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.vector.insert.v8i16.v4i16(<8 x i16> poison, <4 x i16> zeroinitializer, i64 4)
+; THRESH-NEXT:    [[TMP4:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> poison, <4 x i16> zeroinitializer, i64 0)
+; THRESH-NEXT:    [[TMP5:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> [[TMP4]], <4 x i16> zeroinitializer, i64 4)
+; THRESH-NEXT:    [[TMP6:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> [[TMP5]], <4 x i16> zeroinitializer, i64 8)
+; THRESH-NEXT:    [[TMP7:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> [[TMP6]], <4 x i16> zeroinitializer, i64 12)
+; THRESH-NEXT:    br label [[VECTOR_BODY:%.*]]
+; THRESH:       vector.body:
+; THRESH-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ zeroinitializer, [[ENTRY:%.*]] ], [ zeroinitializer, [[VECTOR_BODY]] ]
+; THRESH-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[ENTRY]] ], [ [[TMP26:%.*]], [[VECTOR_BODY]] ]
+; THRESH-NEXT:    [[TMP8:%.*]] = call <8 x i16> @llvm.vector.insert.v8i16.v4i16(<8 x i16> [[TMP3]], <4 x i16> [[VEC_IND]], i64 0)
+; THRESH-NEXT:    [[TMP9:%.*]] = add <8 x i16> [[TMP2]], [[TMP8]]
+; THRESH-NEXT:    [[TMP10:%.*]] = shufflevector <8 x i16> [[TMP8]], <8 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; THRESH-NEXT:    [[TMP11:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> [[TMP10]], <4 x i16> [[TMP0:%.*]], i64 4)
+; THRESH-NEXT:    [[TMP12:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v8i16(<16 x i16> [[TMP11]], <8 x i16> [[TMP9]], i64 8)
+; THRESH-NEXT:    [[TMP13:%.*]] = icmp sgt <16 x i16> [[TMP12]], [[TMP7]]
+; THRESH-NEXT:    [[TMP14:%.*]] = shufflevector <16 x i1> [[TMP13]], <16 x i1> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
+; THRESH-NEXT:    [[TMP15:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP14]])
+; THRESH-NEXT:    [[TMP16:%.*]] = shufflevector <16 x i1> [[TMP13]], <16 x i1> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
+; THRESH-NEXT:    [[TMP17:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP16]])
+; THRESH-NEXT:    [[TMP18:%.*]] = shufflevector <16 x i1> [[TMP13]], <16 x i1> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
+; THRESH-NEXT:    [[TMP19:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP18]])
+; THRESH-NEXT:    [[TMP20:%.*]] = shufflevector <16 x i1> [[TMP13]], <16 x i1> poison, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
+; THRESH-NEXT:    [[TMP21:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP20]])
+; THRESH-NEXT:    [[TMP22:%.*]] = insertelement <4 x i1> poison, i1 [[TMP15]], i32 0
+; THRESH-NEXT:    [[TMP23:%.*]] = insertelement <4 x i1> [[TMP22]], i1 [[TMP17]], i32 1
+; THRESH-NEXT:    [[TMP24:%.*]] = insertelement <4 x i1> [[TMP23]], i1 [[TMP19]], i32 2
+; THRESH-NEXT:    [[TMP25:%.*]] = insertelement <4 x i1> [[TMP24]], i1 [[TMP21]], i32 3
+; THRESH-NEXT:    [[TMP26]] = zext <4 x i1> [[TMP25]] to <4 x i32>
+; THRESH-NEXT:    br label [[VECTOR_BODY]]
+;
 entry:
   br label %vector.body
commit	a2d129b792cc49224f98c83d2279b2e19bfa700f	[log] [tgz]
author	Alexey Bataev <a.bataev@outlook.com>	Fri Apr 11 10:53:48 2025 -0700
committer	Alexey Bataev <a.bataev@outlook.com>	Fri Apr 11 10:58:39 2025 -0700
tree	ceccede472ff5730a8da0c3f4fa97b2b281c7bdf
parent	abe3b90b3fa49829a165f2174191c33542597052 [diff]