[BOLT] Fix references in ignored functions in CFG state (#140678)

When we call setIgnored() on functions that already have CFG built,
these functions are not going to get emitted and we risk missing
external function references being updated.

To mitigate the potential issues, run scanExternalRefs() on such
functions to create patches/relocations.

Since scanExternalRefs() relies on function relocations, we have to
preserve relocations until the function is emitted. As a result, the
memory overhead without debug info update could reach up to 2%.
diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index 6b22ff6..14957cb 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -2348,6 +2348,7 @@
       releaseCFG();
       CurrentState = State::Emitted;
     }
+    clearList(Relocations);
   }
 
   /// Process LSDA information for the function.
diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp
index 36b789d..84f1853 100644
--- a/bolt/lib/Core/BinaryContext.cpp
+++ b/bolt/lib/Core/BinaryContext.cpp
@@ -1032,10 +1032,8 @@
 
     if (!hasValidCodePadding(BF)) {
       if (HasRelocations) {
-        if (opts::Verbosity >= 1) {
-          this->outs() << "BOLT-INFO: function " << BF
-                       << " has invalid padding. Ignoring the function.\n";
-        }
+        this->errs() << "BOLT-WARNING: function " << BF
+                     << " has invalid padding. Ignoring the function\n";
         BF.setIgnored();
       } else {
         BF.setMaxSize(BF.getSize());
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index ceb9390..6d1969f 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -1526,8 +1526,6 @@
   if (uint64_t Offset = getFirstInstructionOffset())
     Labels[Offset] = BC.Ctx->createNamedTempSymbol();
 
-  clearList(Relocations);
-
   if (!IsSimple) {
     clearList(Instructions);
     return createNonFatalBOLTError("");
@@ -3244,16 +3242,26 @@
 }
 
 void BinaryFunction::setIgnored() {
+  IsIgnored = true;
+
   if (opts::processAllFunctions()) {
     // We can accept ignored functions before they've been disassembled.
-    // In that case, they would still get disassembled and emited, but not
+    // In that case, they would still get disassembled and emitted, but not
     // optimized.
-    assert(CurrentState == State::Empty &&
-           "cannot ignore non-empty functions in current mode");
-    IsIgnored = true;
+    if (CurrentState != State::Empty) {
+      BC.errs() << "BOLT-ERROR: cannot ignore non-empty function " << *this
+                << " in current mode\n";
+      exit(1);
+    }
     return;
   }
 
+  IsSimple = false;
+  LLVM_DEBUG(dbgs() << "Ignoring " << getPrintName() << '\n');
+
+  if (CurrentState == State::Empty)
+    return;
+
   clearDisasmState();
 
   // Clear CFG state too.
@@ -3273,9 +3281,11 @@
 
   CurrentState = State::Empty;
 
-  IsIgnored = true;
-  IsSimple = false;
-  LLVM_DEBUG(dbgs() << "Ignoring " << getPrintName() << '\n');
+  // Fix external references in the original function body.
+  if (BC.HasRelocations) {
+    LLVM_DEBUG(dbgs() << "Scanning refs in " << *this << '\n');
+    scanExternalRefs();
+  }
 }
 
 void BinaryFunction::duplicateConstantIslands() {
@@ -3764,7 +3774,6 @@
 
 MCSymbol *BinaryFunction::addEntryPointAtOffset(uint64_t Offset) {
   assert(Offset && "cannot add primary entry point");
-  assert(CurrentState == State::Empty || CurrentState == State::Disassembled);
 
   const uint64_t EntryPointAddress = getAddress() + Offset;
   MCSymbol *LocalSymbol = getOrCreateLocalLabel(EntryPointAddress);
@@ -3773,6 +3782,8 @@
   if (EntrySymbol)
     return EntrySymbol;
 
+  assert(CurrentState == State::Empty || CurrentState == State::Disassembled);
+
   if (BinaryData *EntryBD = BC.getBinaryDataAtAddress(EntryPointAddress)) {
     EntrySymbol = EntryBD->getSymbol();
   } else {
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 69c39cf..e1aa00a3d 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -3446,6 +3446,7 @@
         BC->outs() << "BOLT-INFO: could not disassemble function " << Function
                    << ". Will ignore.\n";
       // Forcefully ignore the function.
+      Function.scanExternalRefs();
       Function.setIgnored();
     });
 
diff --git a/bolt/test/AArch64/patch-ignored.s b/bolt/test/AArch64/patch-ignored.s
new file mode 100644
index 0000000..377d4da
--- /dev/null
+++ b/bolt/test/AArch64/patch-ignored.s
@@ -0,0 +1,33 @@
+## Check that llvm-bolt patches functions that are getting ignored after their
+## CFG was constructed.
+
+# RUN: %clang %cflags %s -o %t.exe -Wl,-q
+# RUN: llvm-bolt %t.exe -o %t.bolt --force-patch 2>&1 | FileCheck %s
+# RUN: llvm-objdump -d %t.bolt | FileCheck %s --check-prefix=CHECK-OBJDUMP
+
+  .text
+
+## The function is too small to be patched and BOLT is forced to ignore it under
+## --force-patch. Check that the reference to _start is updated.
+# CHECK: BOLT-WARNING: failed to patch entries in unpatchable
+	.globl unpatchable
+  .type unpatchable, %function
+unpatchable:
+  .cfi_startproc
+# CHECK-OBJDUMP:      <unpatchable>:
+# CHECK-OBJDUMP-NEXT: bl {{.*}} <_start>
+  bl _start
+  ret
+  .cfi_endproc
+  .size unpatchable, .-unpatchable
+
+  .globl _start
+  .type _start, %function
+_start:
+  .cfi_startproc
+  cmp  x0, 1
+  b.eq  .L0
+.L0:
+  ret  x30
+  .cfi_endproc
+  .size _start, .-_start
diff --git a/bolt/test/X86/patch-ignored.s b/bolt/test/X86/patch-ignored.s
new file mode 100644
index 0000000..4f98b51
--- /dev/null
+++ b/bolt/test/X86/patch-ignored.s
@@ -0,0 +1,44 @@
+## Check that llvm-bolt patches functions that are getting ignored after their
+## CFG was constructed.
+
+# RUN: %clang %cflags %s -o %t.exe -Wl,-q
+# RUN: llvm-bolt %t.exe -o %t.bolt 2>&1 | FileCheck %s
+# RUN: llvm-objdump -d %t.bolt | FileCheck %s --check-prefix=CHECK-OBJDUMP
+
+  .text
+
+## After the CFG is built, the following function will be marked as ignored
+## due to the presence of the internal call.
+# CHECK:      BOLT-WARNING: will skip the following function
+# CHECK-NEXT: internal_call
+	.globl internal_call
+  .type internal_call, %function
+internal_call:
+  .cfi_startproc
+# CHECK-OBJDUMP:      <internal_call>:
+  call .L1
+  jmp .L2
+# CHECK-OBJDUMP:      jmp
+.L1:
+  jmp _start
+# CHECK-OBJDUMP:      jmp
+# CHECK-OBJDUMP-SAME: <_start>
+  ret
+.L2:
+  jmp _start
+# CHECK-OBJDUMP:      jmp
+# CHECK-OBJDUMP-SAME: <_start>
+  .cfi_endproc
+  .size internal_call, .-internal_call
+
+  .globl _start
+  .type _start, %function
+_start:
+  .cfi_startproc
+  cmpq  %rdi, 1
+  jne  .L0
+  movq %rdi, %rax
+.L0:
+  ret
+  .cfi_endproc
+  .size _start, .-_start