[RISCV] Optimize (and (icmp x, 0, neq), (icmp y, 0, neq)) utilizing zicond extension
PR #166469
```
%1 = icmp x, 0, neq
%2 = icmp y, 0, neq
%3 = and %1, %2
```
Origionally lowered to:
```
%1 = snez x
%2 = snez y
%3 = and %1, %2
```
With optimiztion:
```
%1 = snez x
%3 = czero.eqz %1, y
```diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index c1d3841..1977d33 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -16117,6 +16117,46 @@
return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res);
}
+// (and (i1) f, (setcc c, 0, ne)) -> (czero.nez f, c)
+// (and (i1) f, (setcc c, 0, eq)) -> (czero.eqz f, c)
+// (and (setcc c, 0, ne), (i1) g) -> (czero.nez g, c)
+// (and (setcc c, 0, eq), (i1) g) -> (czero.eqz g, c)
+static SDValue combineANDOfSETCCToCZERO(SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ if (!Subtarget.hasCZEROLike())
+ return SDValue();
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ auto IsEqualCompZero = [](SDValue &V) -> bool {
+ if (V.getOpcode() == ISD::SETCC && isNullConstant(V.getOperand(1))) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(V.getOperand(2))->get();
+ if (ISD::isIntEqualitySetCC(CC))
+ return true;
+ }
+ return false;
+ };
+
+ if (!IsEqualCompZero(N0) || !N0.hasOneUse())
+ std::swap(N0, N1);
+ if (!IsEqualCompZero(N0) || !N0.hasOneUse())
+ return SDValue();
+
+ KnownBits Known = DAG.computeKnownBits(N1);
+ if (Known.getMaxValue().ugt(1))
+ return SDValue();
+
+ unsigned CzeroOpcode =
+ (cast<CondCodeSDNode>(N0.getOperand(2))->get() == ISD::SETNE)
+ ? RISCVISD::CZERO_EQZ
+ : RISCVISD::CZERO_NEZ;
+
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+ return DAG.getNode(CzeroOpcode, DL, VT, N1, N0.getOperand(0));
+}
+
static SDValue reduceANDOfAtomicLoad(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
SelectionDAG &DAG = DCI.DAG;
@@ -16180,7 +16220,9 @@
if (SDValue V = reverseZExtICmpCombine(N, DAG, Subtarget))
return V;
-
+ if (DCI.isAfterLegalizeDAG())
+ if (SDValue V = combineANDOfSETCCToCZERO(N, DAG, Subtarget))
+ return V;
if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
return V;
if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
diff --git a/llvm/test/CodeGen/RISCV/xaluo.ll b/llvm/test/CodeGen/RISCV/xaluo.ll
index bf6802d..93b68b0 100644
--- a/llvm/test/CodeGen/RISCV/xaluo.ll
+++ b/llvm/test/CodeGen/RISCV/xaluo.ll
@@ -1834,13 +1834,12 @@
; RV32ZICOND-NEXT: mul a5, a3, a0
; RV32ZICOND-NEXT: mul a6, a1, a2
; RV32ZICOND-NEXT: mulhu a7, a0, a2
-; RV32ZICOND-NEXT: snez t0, a3
-; RV32ZICOND-NEXT: mulhu a3, a3, a0
-; RV32ZICOND-NEXT: mul t1, a0, a2
-; RV32ZICOND-NEXT: mulhu a0, a1, a2
-; RV32ZICOND-NEXT: snez a1, a1
; RV32ZICOND-NEXT: add a5, a6, a5
-; RV32ZICOND-NEXT: and a1, a1, t0
+; RV32ZICOND-NEXT: snez a6, a3
+; RV32ZICOND-NEXT: mulhu a3, a3, a0
+; RV32ZICOND-NEXT: mul t0, a0, a2
+; RV32ZICOND-NEXT: mulhu a0, a1, a2
+; RV32ZICOND-NEXT: czero.eqz a1, a6, a1
; RV32ZICOND-NEXT: snez a0, a0
; RV32ZICOND-NEXT: snez a2, a3
; RV32ZICOND-NEXT: add a5, a7, a5
@@ -1848,7 +1847,7 @@
; RV32ZICOND-NEXT: sltu a1, a5, a7
; RV32ZICOND-NEXT: or a0, a0, a2
; RV32ZICOND-NEXT: or a0, a0, a1
-; RV32ZICOND-NEXT: sw t1, 0(a4)
+; RV32ZICOND-NEXT: sw t0, 0(a4)
; RV32ZICOND-NEXT: sw a5, 4(a4)
; RV32ZICOND-NEXT: ret
;
@@ -3690,11 +3689,10 @@
; RV32ZICOND-NEXT: mul a5, a1, a2
; RV32ZICOND-NEXT: snez a6, a3
; RV32ZICOND-NEXT: add a4, a5, a4
-; RV32ZICOND-NEXT: snez a5, a1
-; RV32ZICOND-NEXT: and a5, a5, a6
-; RV32ZICOND-NEXT: mulhu a6, a1, a2
-; RV32ZICOND-NEXT: snez a6, a6
-; RV32ZICOND-NEXT: or a5, a5, a6
+; RV32ZICOND-NEXT: mulhu a5, a1, a2
+; RV32ZICOND-NEXT: czero.eqz a6, a6, a1
+; RV32ZICOND-NEXT: snez a5, a5
+; RV32ZICOND-NEXT: or a5, a6, a5
; RV32ZICOND-NEXT: mulhu a6, a0, a2
; RV32ZICOND-NEXT: add a4, a6, a4
; RV32ZICOND-NEXT: sltu a4, a4, a6
@@ -3783,18 +3781,17 @@
; RV32ZICOND: # %bb.0: # %entry
; RV32ZICOND-NEXT: mul a4, a3, a0
; RV32ZICOND-NEXT: mul a5, a1, a2
-; RV32ZICOND-NEXT: mulhu a6, a0, a2
+; RV32ZICOND-NEXT: add a4, a5, a4
+; RV32ZICOND-NEXT: mulhu a5, a0, a2
; RV32ZICOND-NEXT: mulhu a0, a3, a0
; RV32ZICOND-NEXT: snez a3, a3
; RV32ZICOND-NEXT: mulhu a2, a1, a2
-; RV32ZICOND-NEXT: snez a1, a1
-; RV32ZICOND-NEXT: add a4, a5, a4
-; RV32ZICOND-NEXT: and a1, a1, a3
+; RV32ZICOND-NEXT: czero.eqz a1, a3, a1
; RV32ZICOND-NEXT: snez a2, a2
; RV32ZICOND-NEXT: snez a0, a0
-; RV32ZICOND-NEXT: add a4, a6, a4
+; RV32ZICOND-NEXT: add a4, a5, a4
; RV32ZICOND-NEXT: or a1, a1, a2
-; RV32ZICOND-NEXT: sltu a2, a4, a6
+; RV32ZICOND-NEXT: sltu a2, a4, a5
; RV32ZICOND-NEXT: or a0, a1, a0
; RV32ZICOND-NEXT: or a0, a0, a2
; RV32ZICOND-NEXT: xori a0, a0, 1
@@ -5156,18 +5153,17 @@
; RV32ZICOND: # %bb.0: # %entry
; RV32ZICOND-NEXT: mul a4, a3, a0
; RV32ZICOND-NEXT: mul a5, a1, a2
-; RV32ZICOND-NEXT: mulhu a6, a0, a2
+; RV32ZICOND-NEXT: add a4, a5, a4
+; RV32ZICOND-NEXT: mulhu a5, a0, a2
; RV32ZICOND-NEXT: mulhu a0, a3, a0
; RV32ZICOND-NEXT: snez a3, a3
; RV32ZICOND-NEXT: mulhu a2, a1, a2
-; RV32ZICOND-NEXT: snez a1, a1
-; RV32ZICOND-NEXT: add a4, a5, a4
-; RV32ZICOND-NEXT: and a1, a1, a3
+; RV32ZICOND-NEXT: czero.eqz a1, a3, a1
; RV32ZICOND-NEXT: snez a2, a2
; RV32ZICOND-NEXT: snez a0, a0
-; RV32ZICOND-NEXT: add a4, a6, a4
+; RV32ZICOND-NEXT: add a4, a5, a4
; RV32ZICOND-NEXT: or a1, a1, a2
-; RV32ZICOND-NEXT: sltu a2, a4, a6
+; RV32ZICOND-NEXT: sltu a2, a4, a5
; RV32ZICOND-NEXT: or a0, a1, a0
; RV32ZICOND-NEXT: or a0, a0, a2
; RV32ZICOND-NEXT: beqz a0, .LBB64_2
diff --git a/llvm/test/CodeGen/RISCV/zicond-opts.ll b/llvm/test/CodeGen/RISCV/zicond-opts.ll
index 305ab93..c6d7298 100644
--- a/llvm/test/CodeGen/RISCV/zicond-opts.ll
+++ b/llvm/test/CodeGen/RISCV/zicond-opts.ll
@@ -7,17 +7,15 @@
; RV32ZICOND-LABEL: icmp_and:
; RV32ZICOND: # %bb.0:
; RV32ZICOND-NEXT: or a2, a2, a3
+; RV32ZICOND-NEXT: snez a2, a2
; RV32ZICOND-NEXT: or a0, a0, a1
-; RV32ZICOND-NEXT: snez a1, a2
-; RV32ZICOND-NEXT: snez a0, a0
-; RV32ZICOND-NEXT: and a0, a0, a1
+; RV32ZICOND-NEXT: czero.eqz a0, a2, a0
; RV32ZICOND-NEXT: ret
;
; RV64ZICOND-LABEL: icmp_and:
; RV64ZICOND: # %bb.0:
; RV64ZICOND-NEXT: snez a1, a1
-; RV64ZICOND-NEXT: snez a0, a0
-; RV64ZICOND-NEXT: and a0, a0, a1
+; RV64ZICOND-NEXT: czero.eqz a0, a1, a0
; RV64ZICOND-NEXT: ret
%3 = icmp ne i64 %y, 0
%4 = icmp ne i64 %x, 0
@@ -26,27 +24,135 @@
ret i32 %6
}
+; Make sure we choose the replace the single use icmp
+define i32 @icmp_and_x_multiple_uses(i64 %x, i64 %y) {
+; RV32ZICOND-LABEL: icmp_and_x_multiple_uses:
+; RV32ZICOND: # %bb.0:
+; RV32ZICOND-NEXT: or a2, a2, a3
+; RV32ZICOND-NEXT: or a0, a0, a1
+; RV32ZICOND-NEXT: snez a0, a0
+; RV32ZICOND-NEXT: czero.eqz a1, a0, a2
+; RV32ZICOND-NEXT: add a0, a1, a0
+; RV32ZICOND-NEXT: ret
+;
+; RV64ZICOND-LABEL: icmp_and_x_multiple_uses:
+; RV64ZICOND: # %bb.0:
+; RV64ZICOND-NEXT: snez a0, a0
+; RV64ZICOND-NEXT: czero.eqz a1, a0, a1
+; RV64ZICOND-NEXT: add a0, a1, a0
+; RV64ZICOND-NEXT: ret
+ %3 = icmp ne i64 %y, 0
+ %4 = icmp ne i64 %x, 0
+ %5 = and i1 %4, %3
+ %6 = zext i1 %5 to i32
+ %7 = zext i1 %4 to i32
+ %8 = add i32 %6, %7
+ ret i32 %8
+}
+
+; Make sure we choose the replace the single use icmp
+define i32 @icmp_and_y_multiple_uses(i64 %x, i64 %y) {
+; RV32ZICOND-LABEL: icmp_and_y_multiple_uses:
+; RV32ZICOND: # %bb.0:
+; RV32ZICOND-NEXT: or a2, a2, a3
+; RV32ZICOND-NEXT: snez a2, a2
+; RV32ZICOND-NEXT: or a0, a0, a1
+; RV32ZICOND-NEXT: czero.eqz a0, a2, a0
+; RV32ZICOND-NEXT: add a0, a0, a2
+; RV32ZICOND-NEXT: ret
+;
+; RV64ZICOND-LABEL: icmp_and_y_multiple_uses:
+; RV64ZICOND: # %bb.0:
+; RV64ZICOND-NEXT: snez a1, a1
+; RV64ZICOND-NEXT: czero.eqz a0, a1, a0
+; RV64ZICOND-NEXT: add a0, a0, a1
+; RV64ZICOND-NEXT: ret
+ %3 = icmp ne i64 %y, 0
+ %4 = icmp ne i64 %x, 0
+ %5 = and i1 %4, %3
+ %6 = zext i1 %5 to i32
+ %7 = zext i1 %3 to i32
+ %8 = add i32 %6, %7
+ ret i32 %8
+}
+
+; Both icmp's have multiple uses, don't optimize
+define i32 @icmp_and_xy_multiple_uses(i64 %x, i64 %y) {
+; RV32ZICOND-LABEL: icmp_and_xy_multiple_uses:
+; RV32ZICOND: # %bb.0:
+; RV32ZICOND-NEXT: or a2, a2, a3
+; RV32ZICOND-NEXT: or a0, a0, a1
+; RV32ZICOND-NEXT: snez a1, a2
+; RV32ZICOND-NEXT: snez a0, a0
+; RV32ZICOND-NEXT: and a2, a0, a1
+; RV32ZICOND-NEXT: add a0, a1, a0
+; RV32ZICOND-NEXT: add a0, a2, a0
+; RV32ZICOND-NEXT: ret
+;
+; RV64ZICOND-LABEL: icmp_and_xy_multiple_uses:
+; RV64ZICOND: # %bb.0:
+; RV64ZICOND-NEXT: snez a1, a1
+; RV64ZICOND-NEXT: snez a0, a0
+; RV64ZICOND-NEXT: and a2, a0, a1
+; RV64ZICOND-NEXT: add a0, a1, a0
+; RV64ZICOND-NEXT: add a0, a2, a0
+; RV64ZICOND-NEXT: ret
+ %3 = icmp ne i64 %y, 0
+ %4 = icmp ne i64 %x, 0
+ %5 = and i1 %4, %3
+ %6 = zext i1 %5 to i32
+ %7 = zext i1 %3 to i32
+ %8 = zext i1 %4 to i32
+ %9 = add i32 %6, %7
+ %10 = add i32 %9, %8
+ ret i32 %10
+}
+
+
+; (and (icmp x. 0, ne), (icmp y, 0, ne)) -> (czero.eqz (icmp x, 0, ne), y)
+define i32 @icmp_and_select(i64 %x, i64 %y, i32 %z) {
+; RV32ZICOND-LABEL: icmp_and_select:
+; RV32ZICOND: # %bb.0:
+; RV32ZICOND-NEXT: sgtz a5, a3
+; RV32ZICOND-NEXT: snez a2, a2
+; RV32ZICOND-NEXT: czero.eqz a5, a5, a3
+; RV32ZICOND-NEXT: czero.nez a2, a2, a3
+; RV32ZICOND-NEXT: or a2, a2, a5
+; RV32ZICOND-NEXT: or a0, a0, a1
+; RV32ZICOND-NEXT: czero.eqz a0, a2, a0
+; RV32ZICOND-NEXT: czero.eqz a0, a4, a0
+; RV32ZICOND-NEXT: ret
+;
+; RV64ZICOND-LABEL: icmp_and_select:
+; RV64ZICOND: # %bb.0:
+; RV64ZICOND-NEXT: sgtz a1, a1
+; RV64ZICOND-NEXT: czero.eqz a0, a1, a0
+; RV64ZICOND-NEXT: czero.eqz a0, a2, a0
+; RV64ZICOND-NEXT: ret
+ %3 = icmp sgt i64 %y, 0
+ %4 = icmp ne i64 %x, 0
+ %5 = and i1 %4, %3
+ %6 = select i1 %5, i32 %z, i32 0
+ ret i32 %6
+}
+
; (and (and (icmp x, 0, ne), (icmp y, 0, ne)), (icmp z, 0, ne)) -> (czero.eqz (czero.eqz (icmp x, 0, ne), y), z)
define i32 @icmp_and_and(i64 %x, i64 %y, i64 %z) {
; RV32ZICOND-LABEL: icmp_and_and:
; RV32ZICOND: # %bb.0:
; RV32ZICOND-NEXT: or a2, a2, a3
; RV32ZICOND-NEXT: or a0, a0, a1
-; RV32ZICOND-NEXT: or a4, a4, a5
-; RV32ZICOND-NEXT: snez a1, a2
; RV32ZICOND-NEXT: snez a0, a0
-; RV32ZICOND-NEXT: and a0, a1, a0
-; RV32ZICOND-NEXT: snez a1, a4
-; RV32ZICOND-NEXT: and a0, a1, a0
+; RV32ZICOND-NEXT: czero.eqz a0, a0, a2
+; RV32ZICOND-NEXT: or a4, a4, a5
+; RV32ZICOND-NEXT: czero.eqz a0, a0, a4
; RV32ZICOND-NEXT: ret
;
; RV64ZICOND-LABEL: icmp_and_and:
; RV64ZICOND: # %bb.0:
-; RV64ZICOND-NEXT: snez a1, a1
; RV64ZICOND-NEXT: snez a0, a0
-; RV64ZICOND-NEXT: and a0, a1, a0
-; RV64ZICOND-NEXT: snez a1, a2
-; RV64ZICOND-NEXT: and a0, a1, a0
+; RV64ZICOND-NEXT: czero.eqz a0, a0, a1
+; RV64ZICOND-NEXT: czero.eqz a0, a0, a2
; RV64ZICOND-NEXT: ret
%4 = icmp ne i64 %y, 0
%5 = icmp ne i64 %x, 0