[X86] Remove truncation handling from EmitTest. Replace it with a DAG combine.
I'd like to try to move a lot of the flag matching out of EmitTest and push it to isel or isel preprocessing. This is a step towards that.
The test-shrink-bug.ll changie is an improvement because we are no longer interfering with test shrink handling in isel.
The pr34137.ll change is a regression, but the IR came from -O0 and was not reduced by InstCombine. So it contains a lot of redundancies like duplicate loads that made it combine poorly.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@349315 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 704b62d..5e7431f 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -18581,27 +18581,7 @@
unsigned Opcode = 0;
unsigned NumOperands = 0;
- // Truncate operations may prevent the merge of the SETCC instruction
- // and the arithmetic instruction before it. Attempt to truncate the operands
- // of the arithmetic instruction and use a reduced bit-width instruction.
- bool NeedTruncation = false;
SDValue ArithOp = Op;
- if (Op->getOpcode() == ISD::TRUNCATE && Op->hasOneUse()) {
- SDValue Arith = Op->getOperand(0);
- // Both the trunc and the arithmetic op need to have one user each.
- if (Arith->hasOneUse())
- switch (Arith.getOpcode()) {
- default: break;
- case ISD::ADD:
- case ISD::SUB:
- case ISD::AND:
- case ISD::OR:
- case ISD::XOR: {
- NeedTruncation = true;
- ArithOp = Arith;
- }
- }
- }
// Sometimes flags can be set either with an AND or with an SRL/SHL
// instruction. SRL/SHL variant should be preferred for masks longer than this
@@ -18769,36 +18749,6 @@
break;
}
- // If we found that truncation is beneficial, perform the truncation and
- // update 'Op'.
- if (NeedTruncation) {
- EVT VT = Op.getValueType();
- SDValue WideVal = Op->getOperand(0);
- EVT WideVT = WideVal.getValueType();
- unsigned ConvertedOp = 0;
- // Use a target machine opcode to prevent further DAGCombine
- // optimizations that may separate the arithmetic operations
- // from the setcc node.
- switch (WideVal.getOpcode()) {
- default: break;
- case ISD::ADD: ConvertedOp = X86ISD::ADD; break;
- case ISD::SUB: ConvertedOp = X86ISD::SUB; break;
- case ISD::AND: ConvertedOp = X86ISD::AND; break;
- case ISD::OR: ConvertedOp = X86ISD::OR; break;
- case ISD::XOR: ConvertedOp = X86ISD::XOR; break;
- }
-
- if (ConvertedOp) {
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (TLI.isOperationLegal(WideVal.getOpcode(), WideVT)) {
- SDValue V0 = DAG.getNode(ISD::TRUNCATE, dl, VT, WideVal.getOperand(0));
- SDValue V1 = DAG.getNode(ISD::TRUNCATE, dl, VT, WideVal.getOperand(1));
- SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
- Op = DAG.getNode(ConvertedOp, dl, VTs, V0, V1);
- }
- }
- }
-
if (Opcode == 0) {
// Emit a CMP with 0, which is the TEST pattern.
return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
@@ -39956,6 +39906,110 @@
return SDValue();
}
+static bool needCarryOrOverflowFlag(SDValue Flags) {
+ assert(Flags.getValueType() == MVT::i32 && "Unexpected VT!");
+
+ for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
+ UI != UE; ++UI) {
+ SDNode *User = *UI;
+
+ X86::CondCode CC;
+ switch (User->getOpcode()) {
+ default:
+ // Be conservative.
+ return true;
+ case X86ISD::SETCC:
+ case X86ISD::SETCC_CARRY:
+ CC = (X86::CondCode)User->getConstantOperandVal(0);
+ break;
+ case X86ISD::BRCOND:
+ CC = (X86::CondCode)User->getConstantOperandVal(2);
+ break;
+ case X86ISD::CMOV:
+ CC = (X86::CondCode)User->getConstantOperandVal(3);
+ break;
+ }
+
+ switch (CC) {
+ default: break;
+ case X86::COND_A: case X86::COND_AE:
+ case X86::COND_B: case X86::COND_BE:
+ case X86::COND_O: case X86::COND_NO:
+ case X86::COND_G: case X86::COND_GE:
+ case X86::COND_L: case X86::COND_LE:
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static SDValue combineCMP(SDNode *N, SelectionDAG &DAG) {
+ // Only handle test patterns.
+ if (!isNullConstant(N->getOperand(1)))
+ return SDValue();
+
+ // If we have a CMP of a truncated binop, see if we can make a smaller binop
+ // and use its flags directly.
+ // TODO: Maybe we should try promoting compares that only use the zero flag
+ // first if we can prove the upper bits with computeKnownBits?
+ SDValue Op = N->getOperand(0);
+ EVT VT = Op.getValueType();
+
+ // Look for a truncate with a single use.
+ if (Op.getOpcode() != ISD::TRUNCATE || !Op.hasOneUse())
+ return SDValue();
+
+ Op = Op.getOperand(0);
+
+ // Arithmetic op can only have one use.
+ if (!Op.hasOneUse())
+ return SDValue();
+
+ unsigned NewOpc;
+ switch (Op.getOpcode()) {
+ default: return SDValue();
+ case ISD::AND:
+ // Skip and with constant. We have special handling for and with immediate
+ // during isel to generate test instructions.
+ if (isa<ConstantSDNode>(Op.getOperand(1)))
+ return SDValue();
+ NewOpc = X86ISD::AND;
+ break;
+ case ISD::OR: NewOpc = X86ISD::OR; break;
+ case ISD::XOR: NewOpc = X86ISD::XOR; break;
+ case ISD::ADD:
+ // If the carry or overflow flag is used, we can't truncate.
+ if (needCarryOrOverflowFlag(SDValue(N, 0)))
+ return SDValue();
+ NewOpc = X86ISD::ADD;
+ break;
+ case ISD::SUB:
+ // If the carry or overflow flag is used, we can't truncate.
+ if (needCarryOrOverflowFlag(SDValue(N, 0)))
+ return SDValue();
+ NewOpc = X86ISD::SUB;
+ break;
+ }
+
+ // We found an op we can narrow. Truncate its inputs.
+ SDLoc dl(N);
+ SDValue Op0 = DAG.getNode(ISD::TRUNCATE, dl, VT, Op.getOperand(0));
+ SDValue Op1 = DAG.getNode(ISD::TRUNCATE, dl, VT, Op.getOperand(1));
+
+ // Use a X86 specific opcode to avoid DAG combine messing with it.
+ SDVTList VTs = DAG.getVTList(VT, MVT::i32);
+ Op = DAG.getNode(NewOpc, dl, VTs, Op0, Op1);
+
+ // For AND, keep a CMP so that we can match the test pattern.
+ if (NewOpc == X86ISD::AND)
+ return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
+ DAG.getConstant(0, dl, VT));
+
+ // Return the flags.
+ return Op.getValue(1);
+}
+
static SDValue combineSBB(SDNode *N, SelectionDAG &DAG) {
if (SDValue Flags = combineCarryThroughADD(N->getOperand(2))) {
MVT VT = N->getSimpleValueType(0);
@@ -41069,6 +41123,7 @@
case X86ISD::SHRUNKBLEND: return combineSelect(N, DAG, DCI, Subtarget);
case ISD::BITCAST: return combineBitcast(N, DAG, DCI, Subtarget);
case X86ISD::CMOV: return combineCMov(N, DAG, DCI, Subtarget);
+ case X86ISD::CMP: return combineCMP(N, DAG);
case ISD::ADD: return combineAdd(N, DAG, Subtarget);
case ISD::SUB: return combineSub(N, DAG, Subtarget);
case X86ISD::SBB: return combineSBB(N, DAG);
diff --git a/test/CodeGen/X86/pr34137.ll b/test/CodeGen/X86/pr34137.ll
index 4e81cb8..1a85a66 100644
--- a/test/CodeGen/X86/pr34137.ll
+++ b/test/CodeGen/X86/pr34137.ll
@@ -11,11 +11,12 @@
; CHECK-NEXT: movzwl {{.*}}(%rip), %eax
; CHECK-NEXT: movzwl {{.*}}(%rip), %ecx
; CHECK-NEXT: andl %eax, %ecx
-; CHECK-NEXT: andl %eax, %ecx
-; CHECK-NEXT: movzwl %cx, %ecx
-; CHECK-NEXT: movl %ecx, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movl %eax, %edx
+; CHECK-NEXT: andl %ecx, %edx
+; CHECK-NEXT: movzwl %dx, %edx
+; CHECK-NEXT: movl %edx, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: xorl %edx, %edx
-; CHECK-NEXT: testw %cx, %cx
+; CHECK-NEXT: testw %cx, %ax
; CHECK-NEXT: sete %dl
; CHECK-NEXT: andl %eax, %edx
; CHECK-NEXT: movq %rdx, {{.*}}(%rip)
diff --git a/test/CodeGen/X86/test-shrink-bug.ll b/test/CodeGen/X86/test-shrink-bug.ll
index 4fba792..ca2316c 100644
--- a/test/CodeGen/X86/test-shrink-bug.ll
+++ b/test/CodeGen/X86/test-shrink-bug.ll
@@ -68,7 +68,7 @@
; CHECK-X64: # %bb.0:
; CHECK-X64-NEXT: pushq %rax
; CHECK-X64-NEXT: .cfi_def_cfa_offset 16
-; CHECK-X64-NEXT: testw $263, %di # imm = 0x107
+; CHECK-X64-NEXT: testl $263, %edi # imm = 0x107
; CHECK-X64-NEXT: je .LBB1_2
; CHECK-X64-NEXT: # %bb.1:
; CHECK-X64-NEXT: pand {{.*}}(%rip), %xmm0