[CodeGenPrepare] limit formation of overflow intrinsics (PR41129)

This is probably a bigger limitation than necessary, but since we don't have any evidence yet
that this transform led to real-world perf improvements rather than regressions, I'm making a
quick, blunt fix.

In the motivating x86 example from:
https://bugs.llvm.org/show_bug.cgi?id=41129
...and shown in the regression test, we want to avoid an extra instruction in the dominating
block because that could be costly.

The x86 LSR test diff is reversing the changes from D57789. There's no evidence that 1 version
is any better than the other yet.

Differential Revision: https://reviews.llvm.org/D59602

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@356665 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index 85c1dc4..e570e98 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -1181,10 +1181,14 @@
     if (!MathDominates && !DT.dominates(Cmp, BO))
       return false;
 
-    // Check that the insertion doesn't create a value that is live across more
-    // than two blocks, so to minimise the increase in register pressure.
     BasicBlock *MathBB = BO->getParent(), *CmpBB = Cmp->getParent();
     if (MathBB != CmpBB) {
+      // Avoid hoisting an extra op into a dominating block and creating a
+      // potentially longer critical path.
+      if (!MathDominates)
+        return false;
+      // Check that the insertion doesn't create a value that is live across
+      // more than two blocks, so to minimise the increase in register pressure.
       BasicBlock *Dominator = MathDominates ? MathBB : CmpBB;
       BasicBlock *Dominated = MathDominates ? CmpBB : MathBB;
       auto Successors = successors(Dominator);
diff --git a/test/CodeGen/X86/cgp-usubo.ll b/test/CodeGen/X86/cgp-usubo.ll
index ac52622..2bdf7ce 100644
--- a/test/CodeGen/X86/cgp-usubo.ll
+++ b/test/CodeGen/X86/cgp-usubo.ll
@@ -211,16 +211,16 @@
 ; CHECK-LABEL: PR41129:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movq (%rdi), %rax
-; CHECK-NEXT:    movq %rax, %rcx
-; CHECK-NEXT:    subq $1, %rcx
-; CHECK-NEXT:    jae .LBB10_1
-; CHECK-NEXT:  # %bb.2: # %true
-; CHECK-NEXT:    movq %rcx, (%rdi)
-; CHECK-NEXT:    retq
-; CHECK-NEXT:  .LBB10_1: # %false
+; CHECK-NEXT:    testq %rax, %rax
+; CHECK-NEXT:    je .LBB10_2
+; CHECK-NEXT:  # %bb.1: # %false
 ; CHECK-NEXT:    andl $7, %eax
 ; CHECK-NEXT:    movq %rax, (%rdi)
 ; CHECK-NEXT:    retq
+; CHECK-NEXT:  .LBB10_2: # %true
+; CHECK-NEXT:    decq %rax
+; CHECK-NEXT:    movq %rax, (%rdi)
+; CHECK-NEXT:    retq
 entry:
   %key = load i64, i64* %p64, align 8
   %cond17 = icmp eq i64 %key, 0
diff --git a/test/CodeGen/X86/lsr-loop-exit-cond.ll b/test/CodeGen/X86/lsr-loop-exit-cond.ll
index 512a21c..7a26623 100644
--- a/test/CodeGen/X86/lsr-loop-exit-cond.ll
+++ b/test/CodeGen/X86/lsr-loop-exit-cond.ll
@@ -16,11 +16,11 @@
 ; GENERIC-NEXT:    movl (%rdx), %eax
 ; GENERIC-NEXT:    movl 4(%rdx), %ebx
 ; GENERIC-NEXT:    decl %ecx
-; GENERIC-NEXT:    leaq 20(%rdx), %r11
+; GENERIC-NEXT:    leaq 20(%rdx), %r14
 ; GENERIC-NEXT:    movq _Te0@{{.*}}(%rip), %r9
 ; GENERIC-NEXT:    movq _Te1@{{.*}}(%rip), %r8
 ; GENERIC-NEXT:    movq _Te3@{{.*}}(%rip), %r10
-; GENERIC-NEXT:    movq %rcx, %r14
+; GENERIC-NEXT:    movq %rcx, %r11
 ; GENERIC-NEXT:    jmp LBB0_1
 ; GENERIC-NEXT:    .p2align 4, 0x90
 ; GENERIC-NEXT:  LBB0_2: ## %bb1
@@ -29,13 +29,14 @@
 ; GENERIC-NEXT:    shrl $16, %ebx
 ; GENERIC-NEXT:    movzbl %bl, %ebx
 ; GENERIC-NEXT:    xorl (%r8,%rbx,4), %eax
-; GENERIC-NEXT:    xorl -4(%r11), %eax
+; GENERIC-NEXT:    xorl -4(%r14), %eax
 ; GENERIC-NEXT:    shrl $24, %edi
 ; GENERIC-NEXT:    movzbl %bpl, %ebx
 ; GENERIC-NEXT:    movl (%r10,%rbx,4), %ebx
 ; GENERIC-NEXT:    xorl (%r9,%rdi,4), %ebx
-; GENERIC-NEXT:    xorl (%r11), %ebx
-; GENERIC-NEXT:    addq $16, %r11
+; GENERIC-NEXT:    xorl (%r14), %ebx
+; GENERIC-NEXT:    decq %r11
+; GENERIC-NEXT:    addq $16, %r14
 ; GENERIC-NEXT:  LBB0_1: ## %bb
 ; GENERIC-NEXT:    ## =>This Inner Loop Header: Depth=1
 ; GENERIC-NEXT:    movzbl %al, %edi
@@ -46,16 +47,16 @@
 ; GENERIC-NEXT:    movzbl %bpl, %ebp
 ; GENERIC-NEXT:    movl (%r8,%rbp,4), %ebp
 ; GENERIC-NEXT:    xorl (%r9,%rax,4), %ebp
-; GENERIC-NEXT:    xorl -12(%r11), %ebp
+; GENERIC-NEXT:    xorl -12(%r14), %ebp
 ; GENERIC-NEXT:    shrl $24, %ebx
 ; GENERIC-NEXT:    movl (%r10,%rdi,4), %edi
 ; GENERIC-NEXT:    xorl (%r9,%rbx,4), %edi
-; GENERIC-NEXT:    xorl -8(%r11), %edi
+; GENERIC-NEXT:    xorl -8(%r14), %edi
 ; GENERIC-NEXT:    movl %ebp, %eax
 ; GENERIC-NEXT:    shrl $24, %eax
 ; GENERIC-NEXT:    movl (%r9,%rax,4), %eax
-; GENERIC-NEXT:    subq $1, %r14
-; GENERIC-NEXT:    jae LBB0_2
+; GENERIC-NEXT:    testq %r11, %r11
+; GENERIC-NEXT:    jne LBB0_2
 ; GENERIC-NEXT:  ## %bb.3: ## %bb2
 ; GENERIC-NEXT:    shlq $4, %rcx
 ; GENERIC-NEXT:    andl $-16777216, %eax ## imm = 0xFF000000
@@ -98,26 +99,27 @@
 ; ATOM-NEXT:    ## kill: def $ecx killed $ecx def $rcx
 ; ATOM-NEXT:    movl (%rdx), %r15d
 ; ATOM-NEXT:    movl 4(%rdx), %eax
-; ATOM-NEXT:    leaq 20(%rdx), %r11
+; ATOM-NEXT:    leaq 20(%rdx), %r14
 ; ATOM-NEXT:    movq _Te0@{{.*}}(%rip), %r9
 ; ATOM-NEXT:    movq _Te1@{{.*}}(%rip), %r8
 ; ATOM-NEXT:    movq _Te3@{{.*}}(%rip), %r10
 ; ATOM-NEXT:    decl %ecx
-; ATOM-NEXT:    movq %rcx, %r14
+; ATOM-NEXT:    movq %rcx, %r11
 ; ATOM-NEXT:    jmp LBB0_1
 ; ATOM-NEXT:    .p2align 4, 0x90
 ; ATOM-NEXT:  LBB0_2: ## %bb1
 ; ATOM-NEXT:    ## in Loop: Header=BB0_1 Depth=1
 ; ATOM-NEXT:    shrl $16, %eax
 ; ATOM-NEXT:    shrl $24, %edi
-; ATOM-NEXT:    movzbl %al, %eax
-; ATOM-NEXT:    xorl (%r8,%rax,4), %r15d
+; ATOM-NEXT:    decq %r11
+; ATOM-NEXT:    movzbl %al, %ebp
 ; ATOM-NEXT:    movzbl %bl, %eax
 ; ATOM-NEXT:    movl (%r10,%rax,4), %eax
-; ATOM-NEXT:    xorl -4(%r11), %r15d
+; ATOM-NEXT:    xorl (%r8,%rbp,4), %r15d
 ; ATOM-NEXT:    xorl (%r9,%rdi,4), %eax
-; ATOM-NEXT:    xorl (%r11), %eax
-; ATOM-NEXT:    addq $16, %r11
+; ATOM-NEXT:    xorl -4(%r14), %r15d
+; ATOM-NEXT:    xorl (%r14), %eax
+; ATOM-NEXT:    addq $16, %r14
 ; ATOM-NEXT:  LBB0_1: ## %bb
 ; ATOM-NEXT:    ## =>This Inner Loop Header: Depth=1
 ; ATOM-NEXT:    movl %eax, %edi
@@ -130,15 +132,15 @@
 ; ATOM-NEXT:    movzbl %r15b, %edi
 ; ATOM-NEXT:    xorl (%r9,%rbp,4), %ebx
 ; ATOM-NEXT:    movl (%r10,%rdi,4), %edi
-; ATOM-NEXT:    xorl -12(%r11), %ebx
+; ATOM-NEXT:    xorl -12(%r14), %ebx
 ; ATOM-NEXT:    xorl (%r9,%rax,4), %edi
 ; ATOM-NEXT:    movl %ebx, %eax
-; ATOM-NEXT:    xorl -8(%r11), %edi
+; ATOM-NEXT:    xorl -8(%r14), %edi
 ; ATOM-NEXT:    shrl $24, %eax
 ; ATOM-NEXT:    movl (%r9,%rax,4), %r15d
-; ATOM-NEXT:    subq $1, %r14
+; ATOM-NEXT:    testq %r11, %r11
 ; ATOM-NEXT:    movl %edi, %eax
-; ATOM-NEXT:    jae LBB0_2
+; ATOM-NEXT:    jne LBB0_2
 ; ATOM-NEXT:  ## %bb.3: ## %bb2
 ; ATOM-NEXT:    shrl $16, %eax
 ; ATOM-NEXT:    shrl $8, %edi
diff --git a/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll b/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll
index e668671..ab636c3 100644
--- a/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll
+++ b/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll
@@ -475,23 +475,22 @@
   ret i64 %sub
 }
 
-; When the compare operand has uses besides add/sub,
-; the transform may not be profitable.
+; Avoid hoisting a math op into a dominating block which would
+; increase the critical path.
 
 define void @PR41129(i64* %p64) {
 ; CHECK-LABEL: @PR41129(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[KEY:%.*]] = load i64, i64* [[P64:%.*]], align 8
-; CHECK-NEXT:    [[TMP0:%.*]] = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 [[KEY]], i64 1)
-; CHECK-NEXT:    [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP0]], 0
-; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i64, i1 } [[TMP0]], 1
-; CHECK-NEXT:    br i1 [[OV]], label [[TRUE:%.*]], label [[FALSE:%.*]]
+; CHECK-NEXT:    [[COND17:%.*]] = icmp eq i64 [[KEY]], 0
+; CHECK-NEXT:    br i1 [[COND17]], label [[TRUE:%.*]], label [[FALSE:%.*]]
 ; CHECK:       false:
 ; CHECK-NEXT:    [[ANDVAL:%.*]] = and i64 [[KEY]], 7
 ; CHECK-NEXT:    store i64 [[ANDVAL]], i64* [[P64]]
 ; CHECK-NEXT:    br label [[EXIT:%.*]]
 ; CHECK:       true:
-; CHECK-NEXT:    store i64 [[MATH]], i64* [[P64]]
+; CHECK-NEXT:    [[SVALUE:%.*]] = add i64 [[KEY]], -1
+; CHECK-NEXT:    store i64 [[SVALUE]], i64* [[P64]]
 ; CHECK-NEXT:    br label [[EXIT]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret void