Re-commit r357452: SimplifyCFG SinkCommonCodeFromPredecessors: Also sink function calls without used results (PR41259)

The original commit caused false positives from AddressSanitizer's
use-after-scope checks, which have now been fixed in r358478.

> The code was previously checking that candidates for sinking had exactly
> one use or were a store instruction (which can't have uses). This meant
> we could sink call instructions only if they had a use.
>
> That limitation seemed a bit arbitrary, so this patch changes it to
> "instruction has zero or one use" which seems more natural and removes
> the need to special-case stores.
>
> Differential revision: https://reviews.llvm.org/D59936

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@358483 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index c3e8203..c64ef35 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1438,9 +1438,10 @@
 static bool canSinkInstructions(
     ArrayRef<Instruction *> Insts,
     DenseMap<Instruction *, SmallVector<Value *, 4>> &PHIOperands) {
-  // Prune out obviously bad instructions to move. Any non-store instruction
-  // must have exactly one use, and we check later that use is by a single,
-  // common PHI instruction in the successor.
+  // Prune out obviously bad instructions to move. Each instruction must have
+  // exactly zero or one use, and we check later that use is by a single, common
+  // PHI instruction in the successor.
+  bool HasUse = !Insts.front()->user_empty();
   for (auto *I : Insts) {
     // These instructions may change or break semantics if moved.
     if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
@@ -1454,9 +1455,10 @@
       if (C->isInlineAsm())
         return false;
 
-    // Everything must have only one use too, apart from stores which
-    // have no uses.
-    if (!isa<StoreInst>(I) && !I->hasOneUse())
+    // Each instruction must have zero or one use.
+    if (HasUse && !I->hasOneUse())
+      return false;
+    if (!HasUse && !I->user_empty())
       return false;
   }
 
@@ -1465,11 +1467,11 @@
     if (!I->isSameOperationAs(I0))
       return false;
 
-  // All instructions in Insts are known to be the same opcode. If they aren't
-  // stores, check the only user of each is a PHI or in the same block as the
-  // instruction, because if a user is in the same block as an instruction
-  // we're contemplating sinking, it must already be determined to be sinkable.
-  if (!isa<StoreInst>(I0)) {
+  // All instructions in Insts are known to be the same opcode. If they have a
+  // use, check that the only user is a PHI or in the same block as the
+  // instruction, because if a user is in the same block as an instruction we're
+  // contemplating sinking, it must already be determined to be sinkable.
+  if (HasUse) {
     auto *PNUse = dyn_cast<PHINode>(*I0->user_begin());
     auto *Succ = I0->getParent()->getTerminator()->getSuccessor(0);
     if (!all_of(Insts, [&PNUse,&Succ](const Instruction *I) -> bool {
@@ -1547,7 +1549,7 @@
   // it is slightly over-aggressive - it gets confused by commutative instructions
   // so double-check it here.
   Instruction *I0 = Insts.front();
-  if (!isa<StoreInst>(I0)) {
+  if (!I0->user_empty()) {
     auto *PNUse = dyn_cast<PHINode>(*I0->user_begin());
     if (!all_of(Insts, [&PNUse](const Instruction *I) -> bool {
           auto *U = cast<Instruction>(*I->user_begin());
@@ -1605,11 +1607,10 @@
       I0->andIRFlags(I);
     }
 
-  if (!isa<StoreInst>(I0)) {
+  if (!I0->user_empty()) {
     // canSinkLastInstruction checked that all instructions were used by
     // one and only one PHI node. Find that now, RAUW it to our common
     // instruction and nuke it.
-    assert(I0->hasOneUse());
     auto *PN = cast<PHINode>(*I0->user_begin());
     PN->replaceAllUsesWith(I0);
     PN->eraseFromParent();
diff --git a/test/CodeGen/AArch64/max-jump-table.ll b/test/CodeGen/AArch64/max-jump-table.ll
index 44dde7b..f309efe 100644
--- a/test/CodeGen/AArch64/max-jump-table.ll
+++ b/test/CodeGen/AArch64/max-jump-table.ll
@@ -4,7 +4,7 @@
 ; RUN: llc %s -O2 -print-machineinstrs -mtriple=aarch64-linux-gnu -jump-table-density=40 -mcpu=exynos-m1        -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECKM1 < %t
 ; RUN: llc %s -O2 -print-machineinstrs -mtriple=aarch64-linux-gnu -jump-table-density=40 -mcpu=exynos-m3        -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECKM3 < %t
 
-declare void @ext(i32)
+declare void @ext(i32, i32)
 
 define i32 @jt1(i32 %a, i32 %b) {
 entry:
@@ -45,23 +45,23 @@
 ; CHECKM3-NEXT: %jump-table.0:
 ; CHECKM3-NOT: %jump-table.1:
 
-bb1: tail call void @ext(i32 0) br label %return
-bb2: tail call void @ext(i32 2) br label %return
-bb3: tail call void @ext(i32 4) br label %return
-bb4: tail call void @ext(i32 6) br label %return
-bb5: tail call void @ext(i32 8) br label %return
-bb6: tail call void @ext(i32 10) br label %return
-bb7: tail call void @ext(i32 12) br label %return
-bb8: tail call void @ext(i32 14) br label %return
-bb9: tail call void @ext(i32 16) br label %return
-bb10: tail call void @ext(i32 18) br label %return
-bb11: tail call void @ext(i32 20) br label %return
-bb12: tail call void @ext(i32 22) br label %return
-bb13: tail call void @ext(i32 24) br label %return
-bb14: tail call void @ext(i32 26) br label %return
-bb15: tail call void @ext(i32 28) br label %return
-bb16: tail call void @ext(i32 30) br label %return
-bb17: tail call void @ext(i32 32) br label %return
+bb1: tail call void  @ext(i32 1, i32 0) br label %return
+bb2: tail call void  @ext(i32 2, i32 2) br label %return
+bb3: tail call void  @ext(i32 3, i32 4) br label %return
+bb4: tail call void  @ext(i32 4, i32 6) br label %return
+bb5: tail call void  @ext(i32 5, i32 8) br label %return
+bb6: tail call void  @ext(i32 6, i32 10) br label %return
+bb7: tail call void  @ext(i32 7, i32 12) br label %return
+bb8: tail call void  @ext(i32 8, i32 14) br label %return
+bb9: tail call void  @ext(i32 9, i32 16) br label %return
+bb10: tail call void @ext(i32 1, i32 18) br label %return
+bb11: tail call void @ext(i32 2, i32 20) br label %return
+bb12: tail call void @ext(i32 3, i32 22) br label %return
+bb13: tail call void @ext(i32 4, i32 24) br label %return
+bb14: tail call void @ext(i32 5, i32 26) br label %return
+bb15: tail call void @ext(i32 6, i32 28) br label %return
+bb16: tail call void @ext(i32 7, i32 30) br label %return
+bb17: tail call void @ext(i32 8, i32 32) br label %return
 
 return: ret i32 %b
 }
@@ -91,11 +91,11 @@
 ; CHECKM3-NOT: %jump-table.1
 ; CHECK-DAG: End machine code for function jt2.
 
-bb1: tail call void @ext(i32 1) br label %return
-bb2: tail call void @ext(i32 2) br label %return
-bb3: tail call void @ext(i32 3) br label %return
-bb4: tail call void @ext(i32 4) br label %return
-bb5: tail call void @ext(i32 5) br label %return
-bb6: tail call void @ext(i32 6) br label %return
+bb1: tail call void @ext(i32 6, i32 1) br label %return
+bb2: tail call void @ext(i32 5, i32 2) br label %return
+bb3: tail call void @ext(i32 4, i32 3) br label %return
+bb4: tail call void @ext(i32 3, i32 4) br label %return
+bb5: tail call void @ext(i32 2, i32 5) br label %return
+bb6: tail call void @ext(i32 1, i32 6) br label %return
 return: ret void
 }
diff --git a/test/CodeGen/AArch64/min-jump-table.ll b/test/CodeGen/AArch64/min-jump-table.ll
index 7d6d262..8d16a4d 100644
--- a/test/CodeGen/AArch64/min-jump-table.ll
+++ b/test/CodeGen/AArch64/min-jump-table.ll
@@ -2,7 +2,7 @@
 ; RUN: llc %s -O2 -print-machineinstrs -mtriple=aarch64-linux-gnu -jump-table-density=40 -min-jump-table-entries=4 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK4  < %t
 ; RUN: llc %s -O2 -print-machineinstrs -mtriple=aarch64-linux-gnu -jump-table-density=40 -min-jump-table-entries=8 -o /dev/null 2> %t; FileCheck %s --check-prefixes=CHECK,CHECK8  < %t
 
-declare void @ext(i32)
+declare void @ext(i32, i32)
 
 define i32 @jt2(i32 %a, i32 %b) {
 entry:
@@ -17,8 +17,8 @@
 ; CHECK4-NOT: {{^}}Jump Tables:
 ; CHECK8-NOT: {{^}}Jump Tables:
 
-bb1: tail call void @ext(i32 0) br label %return
-bb2: tail call void @ext(i32 2) br label %return
+bb1: tail call void @ext(i32 1, i32 0) br label %return
+bb2: tail call void @ext(i32 2, i32 2) br label %return
 
 return: ret i32 %b
 }
@@ -40,10 +40,10 @@
 ; CHECK4-NOT: %jump-table.1:
 ; CHECK8-NOT: {{^}}Jump Tables:
 
-bb1: tail call void @ext(i32 0) br label %return
-bb2: tail call void @ext(i32 2) br label %return
-bb3: tail call void @ext(i32 4) br label %return
-bb4: tail call void @ext(i32 6) br label %return
+bb1: tail call void @ext(i32 1, i32 0) br label %return
+bb2: tail call void @ext(i32 3, i32 2) br label %return
+bb3: tail call void @ext(i32 4, i32 4) br label %return
+bb4: tail call void @ext(i32 5, i32 6) br label %return
 
 return: ret i32 %b
 }
@@ -65,14 +65,14 @@
 ; CHECK-NEXT: %jump-table.0:
 ; CHECK-NOT: %jump-table.1:
 
-bb1: tail call void @ext(i32 0) br label %return
-bb2: tail call void @ext(i32 2) br label %return
-bb3: tail call void @ext(i32 4) br label %return
-bb4: tail call void @ext(i32 6) br label %return
-bb5: tail call void @ext(i32 8) br label %return
-bb6: tail call void @ext(i32 10) br label %return
-bb7: tail call void @ext(i32 12) br label %return
-bb8: tail call void @ext(i32 14) br label %return
+bb1: tail call void @ext(i32 1, i32 0) br label %return
+bb2: tail call void @ext(i32 2, i32 2) br label %return
+bb3: tail call void @ext(i32 3, i32 4) br label %return
+bb4: tail call void @ext(i32 4, i32 6) br label %return
+bb5: tail call void @ext(i32 5, i32 8) br label %return
+bb6: tail call void @ext(i32 6, i32 10) br label %return
+bb7: tail call void @ext(i32 7, i32 12) br label %return
+bb8: tail call void @ext(i32 8, i32 14) br label %return
 
 return: ret i32 %b
 }
diff --git a/test/CodeGen/AArch64/win64-jumptable.ll b/test/CodeGen/AArch64/win64-jumptable.ll
index 8148a59..1caba45 100644
--- a/test/CodeGen/AArch64/win64-jumptable.ll
+++ b/test/CodeGen/AArch64/win64-jumptable.ll
@@ -9,40 +9,40 @@
     i32 3, label %sw.bb3
   ]
 
-sw.bb:                                            ; preds = %entry
-  tail call void @g(i32 0) #2
+sw.bb:
+  tail call void @g(i32 0, i32 4)
   br label %sw.epilog
 
-sw.bb1:                                           ; preds = %entry
-  tail call void @g(i32 1) #2
+sw.bb1:
+  tail call void @g(i32 1, i32 5)
   br label %sw.epilog
 
-sw.bb2:                                           ; preds = %entry
-  tail call void @g(i32 2) #2
+sw.bb2:
+  tail call void @g(i32 2, i32 6)
   br label %sw.epilog
 
-sw.bb3:                                           ; preds = %entry
-  tail call void @g(i32 3) #2
+sw.bb3:
+  tail call void @g(i32 3, i32 7)
   br label %sw.epilog
 
-sw.epilog:                                        ; preds = %entry, %sw.bb3, %sw.bb2, %sw.bb1, %sw.bb
-  tail call void @g(i32 10) #2
+sw.epilog:
+  tail call void @g(i32 10, i32 8)
   ret void
 }
 
-declare void @g(i32)
+declare void @g(i32, i32)
 
-; CHECK:		.text
-; CHECK:		f:
-; CHECK:		.seh_proc f
-; CHECK:		b	g
-; CHECK-NEXT:	.p2align	2
-; CHECK-NEXT:	.LJTI0_0:
-; CHECK:		.word	.LBB0_2-.LJTI0_0
-; CHECK:		.word	.LBB0_3-.LJTI0_0
-; CHECK:		.word	.LBB0_4-.LJTI0_0
-; CHECK:		.word	.LBB0_5-.LJTI0_0
-; CHECK:		.section	.xdata,"dr"
-; CHECK:		.seh_handlerdata
-; CHECK:		.text
-; CHECK:		.seh_endproc
+; CHECK:    .text
+; CHECK:    f:
+; CHECK:    .seh_proc f
+; CHECK:    b g
+; CHECK-NEXT: .p2align  2
+; CHECK-NEXT: .LJTI0_0:
+; CHECK:    .word .LBB0_2-.LJTI0_0
+; CHECK:    .word .LBB0_3-.LJTI0_0
+; CHECK:    .word .LBB0_4-.LJTI0_0
+; CHECK:    .word .LBB0_5-.LJTI0_0
+; CHECK:    .section  .xdata,"dr"
+; CHECK:    .seh_handlerdata
+; CHECK:    .text
+; CHECK:    .seh_endproc
diff --git a/test/CodeGen/ARM/cmpxchg-idioms.ll b/test/CodeGen/ARM/cmpxchg-idioms.ll
index 283202f..1af80e7 100644
--- a/test/CodeGen/ARM/cmpxchg-idioms.ll
+++ b/test/CodeGen/ARM/cmpxchg-idioms.ll
@@ -20,14 +20,14 @@
 ; CHECK: [[FAILED]]:
 ; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}}
 ; CHECK: clrex
-; CHECK: dmb ish
 ; CHECK: movs r0, #0
+; CHECK: dmb ish
 ; CHECK: bx lr
 
 ; CHECK: [[SUCCESS]]:
 ; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}}
-; CHECK: dmb ish
 ; CHECK: movs r0, #1
+; CHECK: dmb ish
 ; CHECK: bx lr
 
   %pair = cmpxchg i32* %p, i32 %oldval, i32 %newval seq_cst seq_cst
diff --git a/test/Transforms/SimplifyCFG/sink-common-code.ll b/test/Transforms/SimplifyCFG/sink-common-code.ll
index 02c29bd..12a3e59 100644
--- a/test/Transforms/SimplifyCFG/sink-common-code.ll
+++ b/test/Transforms/SimplifyCFG/sink-common-code.ll
@@ -843,6 +843,50 @@
 ; CHECK: insertvalue
 ; CHECK-NOT: insertvalue
 
+
+declare void @baz(i32)
+
+define void @test_sink_void_calls(i32 %x) {
+entry:
+  switch i32 %x, label %default [
+    i32 0, label %bb0
+    i32 1, label %bb1
+    i32 2, label %bb2
+    i32 3, label %bb3
+    i32 4, label %bb4
+  ]
+bb0:
+  call void @baz(i32 12)
+  br label %return
+bb1:
+  call void @baz(i32 34)
+  br label %return
+bb2:
+  call void @baz(i32 56)
+  br label %return
+bb3:
+  call void @baz(i32 78)
+  br label %return
+bb4:
+  call void @baz(i32 90)
+  br label %return
+default:
+  unreachable
+return:
+  ret void
+
+; Check that the calls get sunk to the return block.
+; We would previously not sink calls without uses, see PR41259.
+; CHECK-LABEL: @test_sink_void_calls
+; CHECK-NOT: call
+; CHECK-LABEL: return:
+; CHECK: phi
+; CHECK: call
+; CHECK-NOT: call
+; CHECK: ret
+}
+
+
 ; CHECK: ![[$TBAA]] = !{![[TYPE:[0-9]]], ![[TYPE]], i64 0}
 ; CHECK: ![[TYPE]] = !{!"float", ![[TEXT:[0-9]]]}
 ; CHECK: ![[TEXT]] = !{!"an example type tree"}