[X86] If the carry input to an addcarry/subborrow intrinsic is known to be 0, emit a flag setting ADD/SUB instead of ADC/SBB.
Previously we had to take the carry in and add -1 to it to set the carry flag so we could use it with ADC/SBB. But if we know its 0 then we don't need to bother.
This should go a long way towards fixing PR24545.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@348727 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 4590980..a9bc93f 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -21932,10 +21932,19 @@
case ADX: {
SDVTList CFVTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
SDVTList VTs = DAG.getVTList(Op.getOperand(2).getValueType(), MVT::i32);
- SDValue GenCF = DAG.getNode(X86ISD::ADD, dl, CFVTs, Op.getOperand(1),
- DAG.getConstant(-1, dl, MVT::i8));
- SDValue Res = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(2),
- Op.getOperand(3), GenCF.getValue(1));
+
+ SDValue Res;
+ // If the carry in is zero, then we should just use ADD/SUB instead of
+ // ADC/SBB.
+ if (isNullConstant(Op.getOperand(1))) {
+ Res = DAG.getNode(IntrData->Opc1, dl, VTs, Op.getOperand(2),
+ Op.getOperand(3));
+ } else {
+ SDValue GenCF = DAG.getNode(X86ISD::ADD, dl, CFVTs, Op.getOperand(1),
+ DAG.getConstant(-1, dl, MVT::i8));
+ Res = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(2),
+ Op.getOperand(3), GenCF.getValue(1));
+ }
SDValue SetCC = getSETCC(X86::COND_B, Res.getValue(1), dl, DAG);
SDValue Results[] = { SetCC, Res };
return DAG.getMergeValues(Results, dl);
diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h
index 252d648..e3e2961 100644
--- a/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/lib/Target/X86/X86IntrinsicsInfo.h
@@ -286,10 +286,10 @@
* the alphabetical order.
*/
static const IntrinsicData IntrinsicsWithoutChain[] = {
- X86_INTRINSIC_DATA(addcarry_u32, ADX, X86ISD::ADC, 0),
- X86_INTRINSIC_DATA(addcarry_u64, ADX, X86ISD::ADC, 0),
- X86_INTRINSIC_DATA(addcarryx_u32, ADX, X86ISD::ADC, 0),
- X86_INTRINSIC_DATA(addcarryx_u64, ADX, X86ISD::ADC, 0),
+ X86_INTRINSIC_DATA(addcarry_u32, ADX, X86ISD::ADC, X86ISD::ADD),
+ X86_INTRINSIC_DATA(addcarry_u64, ADX, X86ISD::ADC, X86ISD::ADD),
+ X86_INTRINSIC_DATA(addcarryx_u32, ADX, X86ISD::ADC, X86ISD::ADD),
+ X86_INTRINSIC_DATA(addcarryx_u64, ADX, X86ISD::ADC, X86ISD::ADD),
X86_INTRINSIC_DATA(avx_addsub_pd_256, INTR_TYPE_2OP, X86ISD::ADDSUB, 0),
X86_INTRINSIC_DATA(avx_addsub_ps_256, INTR_TYPE_2OP, X86ISD::ADDSUB, 0),
X86_INTRINSIC_DATA(avx_cmp_pd_256, INTR_TYPE_3OP, X86ISD::CMPP, 0),
@@ -1223,8 +1223,8 @@
X86_INTRINSIC_DATA(ssse3_pmadd_ub_sw_128, INTR_TYPE_2OP, X86ISD::VPMADDUBSW, 0),
X86_INTRINSIC_DATA(ssse3_pmul_hr_sw_128, INTR_TYPE_2OP, X86ISD::MULHRS, 0),
X86_INTRINSIC_DATA(ssse3_pshuf_b_128, INTR_TYPE_2OP, X86ISD::PSHUFB, 0),
- X86_INTRINSIC_DATA(subborrow_u32, ADX, X86ISD::SBB, 0),
- X86_INTRINSIC_DATA(subborrow_u64, ADX, X86ISD::SBB, 0),
+ X86_INTRINSIC_DATA(subborrow_u32, ADX, X86ISD::SBB, X86ISD::SUB),
+ X86_INTRINSIC_DATA(subborrow_u64, ADX, X86ISD::SBB, X86ISD::SUB),
X86_INTRINSIC_DATA(tbm_bextri_u32, INTR_TYPE_2OP, X86ISD::BEXTR, 0),
X86_INTRINSIC_DATA(tbm_bextri_u64, INTR_TYPE_2OP, X86ISD::BEXTR, 0),
X86_INTRINSIC_DATA(vcvtph2ps_128, INTR_TYPE_1OP, X86ISD::CVTPH2PS, 0),
diff --git a/test/CodeGen/X86/adx-intrinsics-upgrade.ll b/test/CodeGen/X86/adx-intrinsics-upgrade.ll
index fcb7165..34f8ff8 100644
--- a/test/CodeGen/X86/adx-intrinsics-upgrade.ll
+++ b/test/CodeGen/X86/adx-intrinsics-upgrade.ll
@@ -90,13 +90,11 @@
define i32 @load_crash(i64* nocapture readonly %a, i64* nocapture readonly %b, i64* %res) {
; CHECK-LABEL: load_crash:
; CHECK: ## %bb.0:
-; CHECK-NEXT: movq (%rdi), %rax ## encoding: [0x48,0x8b,0x07]
-; CHECK-NEXT: xorl %ecx, %ecx ## encoding: [0x31,0xc9]
-; CHECK-NEXT: addb $-1, %cl ## encoding: [0x80,0xc1,0xff]
-; CHECK-NEXT: adcq (%rsi), %rax ## encoding: [0x48,0x13,0x06]
-; CHECK-NEXT: setb %cl ## encoding: [0x0f,0x92,0xc1]
-; CHECK-NEXT: movq %rax, (%rdx) ## encoding: [0x48,0x89,0x02]
-; CHECK-NEXT: movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
+; CHECK-NEXT: movq (%rdi), %rcx ## encoding: [0x48,0x8b,0x0f]
+; CHECK-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
+; CHECK-NEXT: addq (%rsi), %rcx ## encoding: [0x48,0x03,0x0e]
+; CHECK-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0]
+; CHECK-NEXT: movq %rcx, (%rdx) ## encoding: [0x48,0x89,0x0a]
; CHECK-NEXT: retq ## encoding: [0xc3]
%1 = load i64, i64* %a, align 8
%2 = load i64, i64* %b, align 8
@@ -111,9 +109,7 @@
; CHECK-LABEL: allzeros:
; CHECK: ## %bb.0: ## %entry
; CHECK-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
-; CHECK-NEXT: addb $-1, %al ## encoding: [0x04,0xff]
-; CHECK-NEXT: sbbq %rax, %rax ## encoding: [0x48,0x19,0xc0]
-; CHECK-NEXT: andl $1, %eax ## encoding: [0x83,0xe0,0x01]
+; CHECK-NEXT: addq $0, %rax ## encoding: [0x48,0x83,0xc0,0x00]
; CHECK-NEXT: movq %rax, 0 ## encoding: [0x48,0x89,0x04,0x25,0x00,0x00,0x00,0x00]
; CHECK-NEXT: retq ## encoding: [0xc3]
entry:
diff --git a/test/CodeGen/X86/adx-intrinsics.ll b/test/CodeGen/X86/adx-intrinsics.ll
index ba820d0..d6b75e1 100644
--- a/test/CodeGen/X86/adx-intrinsics.ll
+++ b/test/CodeGen/X86/adx-intrinsics.ll
@@ -148,13 +148,11 @@
define i32 @load_crash(i64* nocapture readonly %a, i64* nocapture readonly %b, i64* %res) {
; CHECK-LABEL: load_crash:
; CHECK: ## %bb.0:
-; CHECK-NEXT: movq (%rdi), %rax ## encoding: [0x48,0x8b,0x07]
-; CHECK-NEXT: xorl %ecx, %ecx ## encoding: [0x31,0xc9]
-; CHECK-NEXT: addb $-1, %cl ## encoding: [0x80,0xc1,0xff]
-; CHECK-NEXT: adcq (%rsi), %rax ## encoding: [0x48,0x13,0x06]
-; CHECK-NEXT: setb %cl ## encoding: [0x0f,0x92,0xc1]
-; CHECK-NEXT: movq %rax, (%rdx) ## encoding: [0x48,0x89,0x02]
-; CHECK-NEXT: movzbl %cl, %eax ## encoding: [0x0f,0xb6,0xc1]
+; CHECK-NEXT: movq (%rdi), %rcx ## encoding: [0x48,0x8b,0x0f]
+; CHECK-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
+; CHECK-NEXT: addq (%rsi), %rcx ## encoding: [0x48,0x03,0x0e]
+; CHECK-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0]
+; CHECK-NEXT: movq %rcx, (%rdx) ## encoding: [0x48,0x89,0x0a]
; CHECK-NEXT: retq ## encoding: [0xc3]
%1 = load i64, i64* %a, align 8
%2 = load i64, i64* %b, align 8
@@ -173,9 +171,7 @@
; CHECK-LABEL: allzeros:
; CHECK: ## %bb.0: ## %entry
; CHECK-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
-; CHECK-NEXT: addb $-1, %al ## encoding: [0x04,0xff]
-; CHECK-NEXT: sbbq %rax, %rax ## encoding: [0x48,0x19,0xc0]
-; CHECK-NEXT: andl $1, %eax ## encoding: [0x83,0xe0,0x01]
+; CHECK-NEXT: addq $0, %rax ## encoding: [0x48,0x83,0xc0,0x00]
; CHECK-NEXT: movq %rax, 0 ## encoding: [0x48,0x89,0x04,0x25,0x00,0x00,0x00,0x00]
; CHECK-NEXT: retq ## encoding: [0xc3]
entry: