[X86] Teach PostprocessISelDAG to fold ANDrm+TESTrr when chain result is used.
The isOnlyUserOf prevented the fold if the chain result had any
users. What we really care about is the the data result from the
AND is only used by the TEST, and the flags results from the ANDs
aren't used at all. It's ok if the chain has users, we just need
to replace those users with the chain from the TESTrm.
Reviewed By: LuoYuanke
Differential Revision: https://reviews.llvm.org/D131117
GitOrigin-RevId: 91e8079cd535a736cb6aabd24fc310a8ec46a07f
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index f88037e..b8afe7e 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -1487,12 +1487,13 @@
if ((Opc == X86::TEST8rr || Opc == X86::TEST16rr ||
Opc == X86::TEST32rr || Opc == X86::TEST64rr) &&
N->getOperand(0) == N->getOperand(1) &&
- N->isOnlyUserOf(N->getOperand(0).getNode()) &&
+ N->getOperand(0)->hasNUsesOfValue(2, N->getOperand(0).getResNo()) &&
N->getOperand(0).isMachineOpcode()) {
SDValue And = N->getOperand(0);
unsigned N0Opc = And.getMachineOpcode();
- if (N0Opc == X86::AND8rr || N0Opc == X86::AND16rr ||
- N0Opc == X86::AND32rr || N0Opc == X86::AND64rr) {
+ if ((N0Opc == X86::AND8rr || N0Opc == X86::AND16rr ||
+ N0Opc == X86::AND32rr || N0Opc == X86::AND64rr) &&
+ !And->hasAnyUseOfValue(1)) {
MachineSDNode *Test = CurDAG->getMachineNode(Opc, SDLoc(N),
MVT::i32,
And.getOperand(0),
@@ -1501,8 +1502,9 @@
MadeChange = true;
continue;
}
- if (N0Opc == X86::AND8rm || N0Opc == X86::AND16rm ||
- N0Opc == X86::AND32rm || N0Opc == X86::AND64rm) {
+ if ((N0Opc == X86::AND8rm || N0Opc == X86::AND16rm ||
+ N0Opc == X86::AND32rm || N0Opc == X86::AND64rm) &&
+ !And->hasAnyUseOfValue(1)) {
unsigned NewOpc;
switch (N0Opc) {
case X86::AND8rm: NewOpc = X86::TEST8mr; break;
@@ -1523,7 +1525,8 @@
MVT::i32, MVT::Other, Ops);
CurDAG->setNodeMemRefs(
Test, cast<MachineSDNode>(And.getNode())->memoperands());
- ReplaceUses(N, Test);
+ ReplaceUses(And.getValue(2), SDValue(Test, 1));
+ ReplaceUses(SDValue(N, 0), SDValue(Test, 0));
MadeChange = true;
continue;
}
diff --git a/test/CodeGen/X86/cmp.ll b/test/CodeGen/X86/cmp.ll
index c918e88..2b27d24 100644
--- a/test/CodeGen/X86/cmp.ll
+++ b/test/CodeGen/X86/cmp.ll
@@ -759,14 +759,13 @@
declare i32 @g()
declare i32 @f()
-; FIXME: We should use a test from memory here instead of a load+and.i
-; The store makes sure the chain result of the load is used which prevents the
-; post isel peephole from catching this.
+; Make sure we fold the load+and into a test from memory.
+; The store makes sure the chain result of the load is used which used to
+; prevent the post isel peephole from catching this.
define i1 @fold_test_and_with_chain(i32* %x, i32* %y, i32 %z) {
; CHECK-LABEL: fold_test_and_with_chain:
; CHECK: # %bb.0:
-; CHECK-NEXT: movl (%rdi), %eax # encoding: [0x8b,0x07]
-; CHECK-NEXT: andl %edx, %eax # encoding: [0x21,0xd0]
+; CHECK-NEXT: testl %edx, (%rdi) # encoding: [0x85,0x17]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
; CHECK-NEXT: movl %edx, (%rsi) # encoding: [0x89,0x16]
; CHECK-NEXT: retq # encoding: [0xc3]