[DAG] Fold (and X, (add (not Y), Z)) -> (and X, (not (sub Y, Z))). (#141476)
Fixes #140639
---------
Co-authored-by: Simon Pilgrim <llvm-dev@redking.me.uk>
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 5d62ded..f6d811d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -396,6 +396,8 @@
bool PromoteLoad(SDValue Op);
SDValue foldShiftToAvg(SDNode *N);
+ // Fold `a bitwiseop (~b +/- c)` -> `a bitwiseop ~(b -/+ c)`
+ SDValue foldBitwiseOpWithNeg(SDNode *N, const SDLoc &DL, EVT VT);
SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
SDValue RHS, SDValue True, SDValue False,
@@ -7541,6 +7543,12 @@
return DAG.getNode(ISD::AND, DL, VT, X,
DAG.getNOT(DL, DAG.getNode(Opc, DL, VT, Y, Z), VT));
+ // Fold (and X, (add (not Y), Z)) -> (and X, (not (sub Y, Z)))
+ // Fold (and X, (sub (not Y), Z)) -> (and X, (not (add Y, Z)))
+ if (TLI.hasAndNot(SDValue(N, 0)))
+ if (SDValue Folded = foldBitwiseOpWithNeg(N, DL, VT))
+ return Folded;
+
// Fold (and (srl X, C), 1) -> (srl X, BW-1) for signbit extraction
// If we are shifting down an extended sign bit, see if we can simplify
// this to shifting the MSB directly to expose further simplifications.
@@ -11652,6 +11660,22 @@
return DAG.getNode(FloorISD, SDLoc(N), N->getValueType(0), {A, B});
}
+SDValue DAGCombiner::foldBitwiseOpWithNeg(SDNode *N, const SDLoc &DL, EVT VT) {
+ unsigned Opc = N->getOpcode();
+ SDValue X, Y, Z;
+ if (sd_match(
+ N, m_BitwiseLogic(m_Value(X), m_Add(m_Not(m_Value(Y)), m_Value(Z)))))
+ return DAG.getNode(Opc, DL, VT, X,
+ DAG.getNOT(DL, DAG.getNode(ISD::SUB, DL, VT, Y, Z), VT));
+
+ if (sd_match(N, m_BitwiseLogic(m_Value(X), m_Sub(m_OneUse(m_Not(m_Value(Y))),
+ m_Value(Z)))))
+ return DAG.getNode(Opc, DL, VT, X,
+ DAG.getNOT(DL, DAG.getNode(ISD::ADD, DL, VT, Y, Z), VT));
+
+ return SDValue();
+}
+
/// Generate Min/Max node
SDValue DAGCombiner::combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
SDValue RHS, SDValue True,
diff --git a/llvm/test/CodeGen/AArch64/aarch64-bitwisenot-fold.ll b/llvm/test/CodeGen/AArch64/aarch64-bitwisenot-fold.ll
new file mode 100644
index 0000000..5fbf38b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/aarch64-bitwisenot-fold.ll
@@ -0,0 +1,98 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=aarch64-linux | FileCheck %s
+
+define i8 @andnot_add_with_neg_i8(i8 %a0, i8 %a1) {
+; CHECK-LABEL: andnot_add_with_neg_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub w8, w0, w1
+; CHECK-NEXT: bic w0, w0, w8
+; CHECK-NEXT: ret
+ %not = xor i8 %a0, -1
+ %sum = add i8 %not, %a1
+ %and = and i8 %sum, %a0
+ ret i8 %and
+}
+
+define i8 @andnot_sub_with_neg_i8(i8 %a0, i8 %a1) {
+; CHECK-LABEL: andnot_sub_with_neg_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: bic w0, w0, w8
+; CHECK-NEXT: ret
+ %not = xor i8 %a0, -1
+ %diff = sub i8 %not, %a1
+ %and = and i8 %diff, %a0
+ ret i8 %and
+}
+
+define i16 @andnot_add_with_neg_i16(i16 %a0, i16 %a1) {
+; CHECK-LABEL: andnot_add_with_neg_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub w8, w0, w1
+; CHECK-NEXT: bic w0, w0, w8
+; CHECK-NEXT: ret
+ %not = xor i16 %a0, -1
+ %sum = add i16 %not, %a1
+ %and = and i16 %sum, %a0
+ ret i16 %and
+}
+
+define i16 @andnot_sub_with_neg_i16(i16 %a0, i16 %a1) {
+; CHECK-LABEL: andnot_sub_with_neg_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: bic w0, w0, w8
+; CHECK-NEXT: ret
+ %not = xor i16 %a0, -1
+ %diff = sub i16 %not, %a1
+ %and = and i16 %diff, %a0
+ ret i16 %and
+}
+
+define i32 @andnot_add_with_neg_i32(i32 %a0, i32 %a1) {
+; CHECK-LABEL: andnot_add_with_neg_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub w8, w0, w1
+; CHECK-NEXT: bic w0, w0, w8
+; CHECK-NEXT: ret
+ %not = xor i32 %a0, -1
+ %sum = add i32 %not, %a1
+ %and = and i32 %sum, %a0
+ ret i32 %and
+}
+
+define i32 @andnot_sub_with_neg_i32(i32 %a0, i32 %a1) {
+; CHECK-LABEL: andnot_sub_with_neg_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: bic w0, w0, w8
+; CHECK-NEXT: ret
+ %not = xor i32 %a0, -1
+ %diff = sub i32 %not, %a1
+ %and = and i32 %diff, %a0
+ ret i32 %and
+}
+
+define i64 @andnot_add_with_neg_i64(i64 %a0, i64 %a1) {
+; CHECK-LABEL: andnot_add_with_neg_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub x8, x0, x1
+; CHECK-NEXT: bic x0, x0, x8
+; CHECK-NEXT: ret
+ %not = xor i64 %a0, -1
+ %sum = add i64 %not, %a1
+ %and = and i64 %sum, %a0
+ ret i64 %and
+}
+
+define i64 @andnot_sub_with_neg_i64(i64 %a0, i64 %a1) {
+; CHECK-LABEL: andnot_sub_with_neg_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add x8, x0, x1
+; CHECK-NEXT: bic x0, x0, x8
+; CHECK-NEXT: ret
+ %not = xor i64 %a0, -1
+ %diff = sub i64 %not, %a1
+ %and = and i64 %diff, %a0
+ ret i64 %and
+}
diff --git a/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll
index e564d7b..27be02c 100644
--- a/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll
+++ b/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll
@@ -885,9 +885,8 @@
define i8 @test_not_cttz_i8(i8 %a) nounwind {
; LA32R-LABEL: test_not_cttz_i8:
; LA32R: # %bb.0:
-; LA32R-NEXT: nor $a1, $a0, $zero
-; LA32R-NEXT: addi.w $a1, $a1, -1
-; LA32R-NEXT: and $a0, $a0, $a1
+; LA32R-NEXT: addi.w $a1, $a0, 1
+; LA32R-NEXT: andn $a0, $a0, $a1
; LA32R-NEXT: srli.w $a1, $a0, 1
; LA32R-NEXT: andi $a1, $a1, 85
; LA32R-NEXT: sub.w $a0, $a0, $a1
@@ -921,9 +920,8 @@
define i16 @test_not_cttz_i16(i16 %a) nounwind {
; LA32R-LABEL: test_not_cttz_i16:
; LA32R: # %bb.0:
-; LA32R-NEXT: nor $a1, $a0, $zero
-; LA32R-NEXT: addi.w $a1, $a1, -1
-; LA32R-NEXT: and $a0, $a0, $a1
+; LA32R-NEXT: addi.w $a1, $a0, 1
+; LA32R-NEXT: andn $a0, $a0, $a1
; LA32R-NEXT: srli.w $a1, $a0, 1
; LA32R-NEXT: lu12i.w $a2, 5
; LA32R-NEXT: ori $a2, $a2, 1365