[lld/MachO] Fix assert on unsorted data-in-code entries (#81758)

When the data-in-code entries are in separate sections, they are not
guaranteed to be sorted. In particular, 68b1cc36f3df marked some libc++
string functions as noinline, which leads to global ctors involving
strings now producing data-in-code sections in __TEXT,__StaticInit,
which is why this now happens in practice.

Since data-in-code entries are relatively rare and small, just sort
them. No observed performance impact.

See also crbug.com/41487860

GitOrigin-RevId: 624ea349d7fa3535760a21a05c9998c7978228a4
diff --git a/MachO/SyntheticSections.cpp b/MachO/SyntheticSections.cpp
index 544847d..a5d66bb 100644
--- a/MachO/SyntheticSections.cpp
+++ b/MachO/SyntheticSections.cpp
@@ -1050,10 +1050,13 @@
     if (entries.empty())
       continue;
 
-    assert(is_sorted(entries, [](const data_in_code_entry &lhs,
+    std::vector<MachO::data_in_code_entry> sortedEntries;
+    sortedEntries.assign(entries.begin(), entries.end());
+    llvm::sort(sortedEntries, [](const data_in_code_entry &lhs,
                                  const data_in_code_entry &rhs) {
       return lhs.offset < rhs.offset;
-    }));
+    });
+
     // For each code subsection find 'data in code' entries residing in it.
     // Compute the new offset values as
     // <offset within subsection> + <subsection address> - <__TEXT address>.
@@ -1066,12 +1069,12 @@
           continue;
         const uint64_t beginAddr = section->addr + subsec.offset;
         auto it = llvm::lower_bound(
-            entries, beginAddr,
+            sortedEntries, beginAddr,
             [](const MachO::data_in_code_entry &entry, uint64_t addr) {
               return entry.offset < addr;
             });
         const uint64_t endAddr = beginAddr + isec->getSize();
-        for (const auto end = entries.end();
+        for (const auto end = sortedEntries.end();
              it != end && it->offset + it->length <= endAddr; ++it)
           dataInCodeEntries.push_back(
               {static_cast<uint32_t>(isec->getVA(it->offset - beginAddr) -
diff --git a/test/MachO/data-in-code.s b/test/MachO/data-in-code.s
index 49aa765..1a09359 100644
--- a/test/MachO/data-in-code.s
+++ b/test/MachO/data-in-code.s
@@ -6,7 +6,7 @@
 # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/bar.s -o %t/bar.o
 # RUN: %lld -lSystem %t/foo.o %t/bar.o -o %t/main.exe
 # RUN: llvm-otool -l %t/main.exe > %t/objdump
-# RUN: llvm-objdump --macho --data-in-code %t/main.exe >> %t/objdump
+# RUN: llvm-otool -Gv %t/main.exe >> %t/objdump
 # RUN: FileCheck %s < %t/objdump
 
 # CHECK-LABEL:  sectname __text
@@ -18,12 +18,13 @@
 # CHECK-LABEL:  cmd LC_DATA_IN_CODE
 # CHECK-NEXT:   cmdsize 16
 # CHECK-NEXT:   dataoff
-# CHECK-NEXT:   datasize 16
+# CHECK-NEXT:   datasize 24
 
-# CHECK-LABEL:  Data in code table (2 entries)
+# CHECK-LABEL:  Data in code table (3 entries)
 # CHECK-NEXT:   offset length kind
 # CHECK-NEXT:   [[#%x,TEXT + 28]] 24 JUMP_TABLE32
-# CHECK-NEXT:   [[#%x,TEXT + 68]] 12 JUMP_TABLE32
+# CHECK-NEXT:   [[#%x,TEXT + 68]] 8 JUMP_TABLE32
+# CHECK-NEXT:   [[#%x,TEXT + 84]] 12 JUMP_TABLE32
 
 # RUN: %lld -lSystem %t/foo.o %t/bar.o -no_data_in_code_info -o %t/main.exe
 # RUN: llvm-otool -l %t/main.exe | FileCheck --check-prefix=OMIT %s
@@ -32,11 +33,22 @@
 
 # RUN: %lld -lSystem %t/foo.o %t/bar.o -no_data_in_code_info -data_in_code_info -o %t/main.exe
 # RUN: llvm-otool -l %t/main.exe > %t/objdump
-# RUN: llvm-objdump --macho --data-in-code %t/main.exe >> %t/objdump
+# RUN: llvm-otool -Gv %t/main.exe >> %t/objdump
 # RUN: FileCheck %s < %t/objdump
 
 #--- foo.s
 .text
+.section __TEXT,__StaticInit,regular,pure_instructions
+.p2align 4, 0x90
+_some_init_function:
+retq
+.p2align 2, 0x90
+.data_region jt32
+.long 0
+.long 0
+.end_data_region
+
+.section __TEXT,__text,regular,pure_instructions
 .globl _main
 .p2align 4, 0x90
 _main: