[WebAssembly] Add segment RETAIN flag to support private retained data (#81539)

In WebAssembly, we have `WASM_SYMBOL_NO_STRIP` symbol flag to mark the
referenced content as retained. However, the flag is not enough to
express retained data that is not referenced by any symbol. This patch
adds a new segment flag`WASM_SEG_FLAG_RETAIN` to support "private"
linkage data that is retained by llvm.used.

This kind of data that is not referenced but must be retained is usually
used with encapsulation symbols (__start/__stop). Swift runtime uses
this technique and depends on the fact "all metadata sections in live
objects are retained", which was not guaranteed with `--gc-sections`
before this patch.

This is a revised version of https://reviews.llvm.org/D126950 (has been
reverted) based on @MaskRay's comments

GitOrigin-RevId: ba3c1f9ce30cf4f8aee5f1961df74d65e11d53bc
diff --git a/test/wasm/no-strip-segment.s b/test/wasm/no-strip-segment.s
new file mode 100644
index 0000000..e70acae
--- /dev/null
+++ b/test/wasm/no-strip-segment.s
@@ -0,0 +1,62 @@
+# RUN: split-file %s %t
+# RUN: llvm-mc -filetype=obj --triple=wasm32-unknown-unknown -o %t/main.o %t/main.s
+# RUN: llvm-mc -filetype=obj --triple=wasm32-unknown-unknown -o %t/liba_x.o %t/liba_x.s
+# RUN: llvm-mc -filetype=obj --triple=wasm32-unknown-unknown -o %t/liba_y.o %t/liba_y.s
+# RUN: rm -f %t/liba.a
+# RUN: llvm-ar rcs %t/liba.a %t/liba_x.o %t/liba_y.o
+# RUN: wasm-ld %t/main.o %t/liba.a --gc-sections -o %t/main.wasm --print-gc-sections | FileCheck %s --check-prefix=GC
+# RUN: obj2yaml %t/main.wasm | FileCheck %s
+
+# --gc-sections should remove non-retained and unused "weathers" section from live object liba_x.o
+# GC: removing unused section {{.*}}/liba.a(liba_x.o):(weathers)
+# Should not remove retained "greetings" sections from live objects main.o and liba_x.o
+# GC-NOT: removing unused section %t/main.o:(greetings)
+# GC-NOT: removing unused section %t/liba_x.o:(greetings)
+
+# Note: All symbols are private so that they don't join the symbol table.
+
+#--- main.s
+  .functype grab_liba () -> ()
+  .globl  _start
+_start:
+  .functype _start () -> ()
+  call grab_liba
+  end_function
+
+  .section greetings,"R",@
+  .asciz  "hello"
+  .section weathers,"R",@
+  .asciz  "cloudy"
+
+#--- liba_x.s
+  .globl  grab_liba
+grab_liba:
+  .functype grab_liba () -> ()
+  end_function
+
+  .section greetings,"R",@
+  .asciz  "world"
+  .section weathers,"",@
+  .asciz  "rainy"
+
+#--- liba_y.s
+  .section        greetings,"R",@
+  .asciz  "bye"
+
+
+# "greetings" section
+# CHECK: - Type:            DATA
+# CHECK:   Segments:
+# CHECK:     - SectionOffset:   7
+# CHECK:       InitFlags:       0
+# CHECK:       Offset:
+# CHECK:         Opcode:          I32_CONST
+# CHECK:         Value:           1024
+# CHECK:       Content:         68656C6C6F00776F726C6400
+# "weahters" section.
+# CHECK: - SectionOffset:   25
+# CHECK:   InitFlags:       0
+# CHECK:   Offset:
+# CHECK:     Opcode:          I32_CONST
+# CHECK:     Value:           1036
+# CHECK:   Content:         636C6F75647900
diff --git a/wasm/InputChunks.h b/wasm/InputChunks.h
index ad1d45e..cf8a524 100644
--- a/wasm/InputChunks.h
+++ b/wasm/InputChunks.h
@@ -81,6 +81,7 @@
   void generateRelocationCode(raw_ostream &os) const;
 
   bool isTLS() const { return flags & llvm::wasm::WASM_SEG_FLAG_TLS; }
+  bool isRetained() const { return flags & llvm::wasm::WASM_SEG_FLAG_RETAIN; }
 
   ObjFile *file;
   OutputSection *outputSec = nullptr;
diff --git a/wasm/MarkLive.cpp b/wasm/MarkLive.cpp
index b8ab774..0f1c508 100644
--- a/wasm/MarkLive.cpp
+++ b/wasm/MarkLive.cpp
@@ -40,7 +40,9 @@
 
 private:
   void enqueue(Symbol *sym);
+  void enqueue(InputChunk *chunk);
   void enqueueInitFunctions(const ObjFile *sym);
+  void enqueueRetainedSegments(const ObjFile *file);
   void mark();
   bool isCallCtorsLive();
 
@@ -56,21 +58,30 @@
   LLVM_DEBUG(dbgs() << "markLive: " << sym->getName() << "\n");
 
   InputFile *file = sym->getFile();
-  bool needInitFunctions = file && !file->isLive() && sym->isDefined();
+  bool markImplicitDeps = file && !file->isLive() && sym->isDefined();
 
   sym->markLive();
 
-  // Mark ctor functions in the object that defines this symbol live.
-  // The ctor functions are all referenced by the synthetic callCtors
-  // function. However, this function does not contain relocations so we
-  // have to manually mark the ctors as live.
-  if (needInitFunctions)
+  if (markImplicitDeps) {
+    // Mark ctor functions in the object that defines this symbol live.
+    // The ctor functions are all referenced by the synthetic callCtors
+    // function. However, this function does not contain relocations so we
+    // have to manually mark the ctors as live.
     enqueueInitFunctions(cast<ObjFile>(file));
+    // Mark retained segments in the object that defines this symbol live.
+    enqueueRetainedSegments(cast<ObjFile>(file));
+  }
 
   if (InputChunk *chunk = sym->getChunk())
     queue.push_back(chunk);
 }
 
+void MarkLive::enqueue(InputChunk *chunk) {
+  LLVM_DEBUG(dbgs() << "markLive: " << toString(chunk) << "\n");
+  chunk->live = true;
+  queue.push_back(chunk);
+}
+
 // The ctor functions are all referenced by the synthetic callCtors
 // function.  However, this function does not contain relocations so we
 // have to manually mark the ctors as live.
@@ -83,6 +94,14 @@
   }
 }
 
+// Mark segments flagged by segment-level no-strip. Segment-level no-strip is
+// usually used to retain segments without having symbol table entry.
+void MarkLive::enqueueRetainedSegments(const ObjFile *file) {
+  for (InputChunk *chunk : file->segments)
+    if (chunk->isRetained())
+      enqueue(chunk);
+}
+
 void MarkLive::run() {
   // Add GC root symbols.
   if (!config->entry.empty())
@@ -96,10 +115,14 @@
   if (WasmSym::callDtors)
     enqueue(WasmSym::callDtors);
 
-  // Enqueue constructors in objects explicitly live from the command-line.
   for (const ObjFile *obj : ctx.objectFiles)
-    if (obj->isLive())
+    if (obj->isLive()) {
+      // Enqueue constructors in objects explicitly live from the command-line.
       enqueueInitFunctions(obj);
+      // Enqueue retained segments in objects explicitly live from the
+      // command-line.
+      enqueueRetainedSegments(obj);
+    }
 
   mark();