[LLD][MachO] Option to emit separate cstring sections (#158720)

Add the `--{no-}separate-cstring-literal-sections` option to emit
cstring literals into sections defined by their section name. This
allows for changes like https://github.com/swiftlang/swift/pull/84300
and https://github.com/swiftlang/swift/pull/84236 to actually have an
affect. The default behavior has not changed.

The reason this is useful is because strings in different sections might
have different access patterns at runtime. By splitting these strings
into separate sections, we may reduce the number of page faults during
startup. For example, the ObjC runtime accesses all strings in
`__objc_classname` before main.
diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h
index 19dba79..51b1363 100644
--- a/lld/MachO/Config.h
+++ b/lld/MachO/Config.h
@@ -222,6 +222,7 @@
   bool pgoWarnMismatch;
   bool warnThinArchiveMissingMembers;
   bool disableVerify;
+  bool separateCstringLiteralSections;
 
   bool callGraphProfileSort = false;
   llvm::StringRef printSymbolOrder;
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 5a9b5b6..7ce987e 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -1522,8 +1522,8 @@
   // We always create a cStringSection, regardless of whether dedupLiterals is
   // true. If it isn't, we simply create a non-deduplicating CStringSection.
   // Either way, we must unconditionally finalize it here.
-  in.cStringSection->finalizeContents();
-  in.objcMethnameSection->finalizeContents();
+  for (auto *sec : in.cStringSections)
+    sec->finalizeContents();
   in.wordLiteralSection->finalizeContents();
 }
 
@@ -1711,7 +1711,7 @@
 
     firstTLVDataSection = nullptr;
     tar = nullptr;
-    memset(&in, 0, sizeof(in));
+    in = InStruct();
 
     resetLoadedDylibs();
     resetOutputSegments();
@@ -1983,6 +1983,9 @@
                    OPT_no_warn_thin_archive_missing_members, true);
   config->generateUuid = !args.hasArg(OPT_no_uuid);
   config->disableVerify = args.hasArg(OPT_disable_verify);
+  config->separateCstringLiteralSections =
+      args.hasFlag(OPT_separate_cstring_literal_sections,
+                   OPT_no_separate_cstring_literal_sections, false);
 
   auto IncompatWithCGSort = [&](StringRef firstArgStr) {
     // Throw an error only if --call-graph-profile-sort is explicitly specified
diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp
index 18b3ff9..b173e14 100644
--- a/lld/MachO/InputSection.cpp
+++ b/lld/MachO/InputSection.cpp
@@ -63,15 +63,13 @@
     isec->parent = osec;
     inputSections.push_back(isec);
   } else if (auto *isec = dyn_cast<CStringInputSection>(inputSection)) {
-    if (isec->getName() == section_names::objcMethname) {
-      if (in.objcMethnameSection->inputOrder == UnspecifiedInputOrder)
-        in.objcMethnameSection->inputOrder = inputSectionsOrder++;
-      in.objcMethnameSection->addInput(isec);
-    } else {
-      if (in.cStringSection->inputOrder == UnspecifiedInputOrder)
-        in.cStringSection->inputOrder = inputSectionsOrder++;
-      in.cStringSection->addInput(isec);
-    }
+    bool useSectionName = config->separateCstringLiteralSections ||
+                          isec->getName() == section_names::objcMethname;
+    auto *osec = in.getOrCreateCStringSection(
+        useSectionName ? isec->getName() : section_names::cString);
+    if (osec->inputOrder == UnspecifiedInputOrder)
+      osec->inputOrder = inputSectionsOrder++;
+    osec->addInput(isec);
   } else if (auto *isec = dyn_cast<WordLiteralInputSection>(inputSection)) {
     if (in.wordLiteralSection->inputOrder == UnspecifiedInputOrder)
       in.wordLiteralSection->inputOrder = inputSectionsOrder++;
diff --git a/lld/MachO/MapFile.cpp b/lld/MachO/MapFile.cpp
index f3e221a..29ebcdc 100644
--- a/lld/MachO/MapFile.cpp
+++ b/lld/MachO/MapFile.cpp
@@ -239,7 +239,7 @@
         printIsecArrSyms(textOsec->inputs, textOsec->getThunks());
       } else if (auto *concatOsec = dyn_cast<ConcatOutputSection>(osec)) {
         printIsecArrSyms(concatOsec->inputs);
-      } else if (osec == in.cStringSection || osec == in.objcMethnameSection) {
+      } else if (is_contained(in.cStringSections, osec)) {
         const auto &liveCStrings = info.liveCStringsForSection.lookup(osec);
         uint64_t lastAddr = 0; // strings will never start at address 0, so this
                                // is a sentinel value
diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td
index 8ae50f3..4eeb8fb 100644
--- a/lld/MachO/Options.td
+++ b/lld/MachO/Options.td
@@ -1084,6 +1084,13 @@
 def ignore_auto_link : Flag<["-"], "ignore_auto_link">,
     HelpText<"Ignore LC_LINKER_OPTIONs">,
     Group<grp_rare>;
+defm separate_cstring_literal_sections
+    : BB<"separate-cstring-literal-sections",
+         "Emit all cstring literals into their respective sections defined by "
+         "their section names.",
+         "Emit all cstring literals into the __cstring section. As a special "
+         "case, the __objc_methname section will still be emitted. (default)">,
+      Group<grp_rare>;
 
 def grp_deprecated : OptionGroup<"deprecated">, HelpText<"DEPRECATED">;
 
diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h
index 5796b07..1abf3c21 100644
--- a/lld/MachO/SyntheticSections.h
+++ b/lld/MachO/SyntheticSections.h
@@ -843,6 +843,9 @@
 struct InStruct {
   const uint8_t *bufferStart = nullptr;
   MachHeaderSection *header = nullptr;
+  /// The list of cstring sections. Note that this includes \p cStringSection
+  /// and \p objcMethnameSection already.
+  llvm::SmallVector<CStringSection *> cStringSections;
   CStringSection *cStringSection = nullptr;
   DeduplicatedCStringSection *objcMethnameSection = nullptr;
   WordLiteralSection *wordLiteralSection = nullptr;
@@ -863,6 +866,26 @@
   InitOffsetsSection *initOffsets = nullptr;
   ObjCMethListSection *objcMethList = nullptr;
   ChainedFixupsSection *chainedFixups = nullptr;
+
+  CStringSection *getOrCreateCStringSection(StringRef name,
+                                            bool forceDedupStrings = false) {
+    auto [it, didEmplace] =
+        cStringSectionMap.try_emplace(name, cStringSections.size());
+    if (!didEmplace)
+      return cStringSections[it->getValue()];
+
+    std::string &nameData = *make<std::string>(name);
+    CStringSection *sec;
+    if (config->dedupStrings || forceDedupStrings)
+      sec = make<DeduplicatedCStringSection>(nameData.c_str());
+    else
+      sec = make<CStringSection>(nameData.c_str());
+    cStringSections.push_back(sec);
+    return sec;
+  }
+
+private:
+  llvm::StringMap<unsigned> cStringSectionMap;
 };
 
 extern InStruct in;
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index f288fad..995792b 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -1377,13 +1377,11 @@
 
 void macho::createSyntheticSections() {
   in.header = make<MachHeaderSection>();
-  if (config->dedupStrings)
-    in.cStringSection =
-        make<DeduplicatedCStringSection>(section_names::cString);
-  else
-    in.cStringSection = make<CStringSection>(section_names::cString);
-  in.objcMethnameSection =
-      make<DeduplicatedCStringSection>(section_names::objcMethname);
+  // Materialize cstring and objcMethname sections
+  in.cStringSection = in.getOrCreateCStringSection(section_names::cString);
+  in.objcMethnameSection = cast<DeduplicatedCStringSection>(
+      in.getOrCreateCStringSection(section_names::objcMethname,
+                                   /*forceDedupStrings=*/true));
   in.wordLiteralSection = make<WordLiteralSection>();
   if (config->emitChainedFixups) {
     in.chainedFixups = make<ChainedFixupsSection>();
diff --git a/lld/test/MachO/cstring.ll b/lld/test/MachO/cstring.ll
new file mode 100644
index 0000000..4f82736
--- /dev/null
+++ b/lld/test/MachO/cstring.ll
@@ -0,0 +1,32 @@
+; REQUIRES: aarch64
+; RUN: llvm-as %s -o %t.o
+
+; RUN: %lld -dylib --separate-cstring-literal-sections %t.o -o - | llvm-objdump --macho --section-headers - | FileCheck %s
+; RUN: %lld -dylib --no-separate-cstring-literal-sections %t.o -o - | llvm-objdump --macho --section-headers - | FileCheck %s --check-prefix=CSTR
+; RUN: %lld -dylib %t.o -o - | llvm-objdump --macho --section-headers - | FileCheck %s --check-prefix=CSTR
+
+; CHECK-DAG: __cstring
+; CHECK-DAG: __new_sec
+; CHECK-DAG: __objc_classname
+; CHECK-DAG: __objc_methname
+; CHECK-DAG: __objc_methtype
+
+; CSTR-DAG: __cstring
+; CSTR-DAG: __objc_methname
+
+target triple = "x86_64-apple-darwin"
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128-Fn32"
+
+@.str = private unnamed_addr constant [10 x i8] c"my string\00", align 1
+@.str1 = private unnamed_addr constant [16 x i8] c"my other string\00", section "__TEXT,__new_sec,cstring_literals", align 1
+@OBJC_CLASS_NAME_ = private unnamed_addr constant [4 x i8] c"foo\00", section "__TEXT,__objc_classname,cstring_literals", align 1
+@OBJC_METH_VAR_NAME_ = private unnamed_addr constant [4 x i8] c"bar\00", section "__TEXT,__objc_methname,cstring_literals", align 1
+@OBJC_METH_VAR_TYPE_ = private unnamed_addr constant [4 x i8] c"goo\00", section "__TEXT,__objc_methtype,cstring_literals", align 1
+
+@llvm.compiler.used = appending global [5 x ptr] [
+  ptr @.str,
+  ptr @.str1,
+  ptr @OBJC_METH_VAR_NAME_,
+  ptr @OBJC_CLASS_NAME_,
+  ptr @OBJC_METH_VAR_TYPE_
+]