[SLP]Add a check if the user itself is commutable
If the commutable instruction can be represented as a non-commutable
vector instruction (like add 0, %v can be represented as a part of sub
nodes with operation sub %v, 0), its operands might still be reordered
and this should be accounted when checking for copyables in operands
Fixes #158293
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 7ca43ef..8aafe14 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -5253,6 +5253,7 @@
// Same applies even for non-commutative cmps, because we can invert
// their predicate potentially and, thus, reorder the operands.
bool IsCommutativeUser =
+ ::isCommutative(User) ||
::isCommutative(TE->getMatchingMainOpOrAltOp(User), User);
EdgeInfo EI(TE, U.getOperandNo());
if (!IsCommutativeUser && !isa<CmpInst>(User)) {
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/commutable-member-in-non-commutable-node.ll b/llvm/test/Transforms/SLPVectorizer/X86/commutable-member-in-non-commutable-node.ll
new file mode 100644
index 0000000..adceef1
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/commutable-member-in-non-commutable-node.ll
@@ -0,0 +1,24 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt --passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f < %s | FileCheck %s
+
+define i64 @test(i32 %arg) {
+; CHECK-LABEL: define i64 @test(
+; CHECK-SAME: i32 [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[BB:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) null, i64 896), align 4
+; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[TMP0]], zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[ARG]], i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = sub <2 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: store <2 x i32> [[TMP3]], ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) null, i64 896), align 4
+; CHECK-NEXT: ret i64 0
+;
+bb:
+ %load = load i32, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) null, i64 900), align 4
+ %add = add i32 0, %load
+ store i32 %add, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) null, i64 900), align 4
+ %load1 = load i32, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) null, i64 896), align 4
+ %add2 = add i32 %load1, 0
+ %sub = sub i32 %add2, %arg
+ store i32 %sub, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) null, i64 896), align 4
+ ret i64 0
+}