[WebAssembly] Add segment RETAIN flag to support private retained data (#81539)
In WebAssembly, we have `WASM_SYMBOL_NO_STRIP` symbol flag to mark the
referenced content as retained. However, the flag is not enough to
express retained data that is not referenced by any symbol. This patch
adds a new segment flag`WASM_SEG_FLAG_RETAIN` to support "private"
linkage data that is retained by llvm.used.
This kind of data that is not referenced but must be retained is usually
used with encapsulation symbols (__start/__stop). Swift runtime uses
this technique and depends on the fact "all metadata sections in live
objects are retained", which was not guaranteed with `--gc-sections`
before this patch.
This is a revised version of https://reviews.llvm.org/D126950 (has been
reverted) based on @MaskRay's comments
GitOrigin-RevId: ba3c1f9ce30cf4f8aee5f1961df74d65e11d53bc
diff --git a/test/wasm/no-strip-segment.s b/test/wasm/no-strip-segment.s
new file mode 100644
index 0000000..e70acae
--- /dev/null
+++ b/test/wasm/no-strip-segment.s
@@ -0,0 +1,62 @@
+# RUN: split-file %s %t
+# RUN: llvm-mc -filetype=obj --triple=wasm32-unknown-unknown -o %t/main.o %t/main.s
+# RUN: llvm-mc -filetype=obj --triple=wasm32-unknown-unknown -o %t/liba_x.o %t/liba_x.s
+# RUN: llvm-mc -filetype=obj --triple=wasm32-unknown-unknown -o %t/liba_y.o %t/liba_y.s
+# RUN: rm -f %t/liba.a
+# RUN: llvm-ar rcs %t/liba.a %t/liba_x.o %t/liba_y.o
+# RUN: wasm-ld %t/main.o %t/liba.a --gc-sections -o %t/main.wasm --print-gc-sections | FileCheck %s --check-prefix=GC
+# RUN: obj2yaml %t/main.wasm | FileCheck %s
+
+# --gc-sections should remove non-retained and unused "weathers" section from live object liba_x.o
+# GC: removing unused section {{.*}}/liba.a(liba_x.o):(weathers)
+# Should not remove retained "greetings" sections from live objects main.o and liba_x.o
+# GC-NOT: removing unused section %t/main.o:(greetings)
+# GC-NOT: removing unused section %t/liba_x.o:(greetings)
+
+# Note: All symbols are private so that they don't join the symbol table.
+
+#--- main.s
+ .functype grab_liba () -> ()
+ .globl _start
+_start:
+ .functype _start () -> ()
+ call grab_liba
+ end_function
+
+ .section greetings,"R",@
+ .asciz "hello"
+ .section weathers,"R",@
+ .asciz "cloudy"
+
+#--- liba_x.s
+ .globl grab_liba
+grab_liba:
+ .functype grab_liba () -> ()
+ end_function
+
+ .section greetings,"R",@
+ .asciz "world"
+ .section weathers,"",@
+ .asciz "rainy"
+
+#--- liba_y.s
+ .section greetings,"R",@
+ .asciz "bye"
+
+
+# "greetings" section
+# CHECK: - Type: DATA
+# CHECK: Segments:
+# CHECK: - SectionOffset: 7
+# CHECK: InitFlags: 0
+# CHECK: Offset:
+# CHECK: Opcode: I32_CONST
+# CHECK: Value: 1024
+# CHECK: Content: 68656C6C6F00776F726C6400
+# "weahters" section.
+# CHECK: - SectionOffset: 25
+# CHECK: InitFlags: 0
+# CHECK: Offset:
+# CHECK: Opcode: I32_CONST
+# CHECK: Value: 1036
+# CHECK: Content: 636C6F75647900
diff --git a/wasm/InputChunks.h b/wasm/InputChunks.h
index ad1d45e..cf8a524 100644
--- a/wasm/InputChunks.h
+++ b/wasm/InputChunks.h
@@ -81,6 +81,7 @@
void generateRelocationCode(raw_ostream &os) const;
bool isTLS() const { return flags & llvm::wasm::WASM_SEG_FLAG_TLS; }
+ bool isRetained() const { return flags & llvm::wasm::WASM_SEG_FLAG_RETAIN; }
ObjFile *file;
OutputSection *outputSec = nullptr;
diff --git a/wasm/MarkLive.cpp b/wasm/MarkLive.cpp
index b8ab774..0f1c508 100644
--- a/wasm/MarkLive.cpp
+++ b/wasm/MarkLive.cpp
@@ -40,7 +40,9 @@
private:
void enqueue(Symbol *sym);
+ void enqueue(InputChunk *chunk);
void enqueueInitFunctions(const ObjFile *sym);
+ void enqueueRetainedSegments(const ObjFile *file);
void mark();
bool isCallCtorsLive();
@@ -56,21 +58,30 @@
LLVM_DEBUG(dbgs() << "markLive: " << sym->getName() << "\n");
InputFile *file = sym->getFile();
- bool needInitFunctions = file && !file->isLive() && sym->isDefined();
+ bool markImplicitDeps = file && !file->isLive() && sym->isDefined();
sym->markLive();
- // Mark ctor functions in the object that defines this symbol live.
- // The ctor functions are all referenced by the synthetic callCtors
- // function. However, this function does not contain relocations so we
- // have to manually mark the ctors as live.
- if (needInitFunctions)
+ if (markImplicitDeps) {
+ // Mark ctor functions in the object that defines this symbol live.
+ // The ctor functions are all referenced by the synthetic callCtors
+ // function. However, this function does not contain relocations so we
+ // have to manually mark the ctors as live.
enqueueInitFunctions(cast<ObjFile>(file));
+ // Mark retained segments in the object that defines this symbol live.
+ enqueueRetainedSegments(cast<ObjFile>(file));
+ }
if (InputChunk *chunk = sym->getChunk())
queue.push_back(chunk);
}
+void MarkLive::enqueue(InputChunk *chunk) {
+ LLVM_DEBUG(dbgs() << "markLive: " << toString(chunk) << "\n");
+ chunk->live = true;
+ queue.push_back(chunk);
+}
+
// The ctor functions are all referenced by the synthetic callCtors
// function. However, this function does not contain relocations so we
// have to manually mark the ctors as live.
@@ -83,6 +94,14 @@
}
}
+// Mark segments flagged by segment-level no-strip. Segment-level no-strip is
+// usually used to retain segments without having symbol table entry.
+void MarkLive::enqueueRetainedSegments(const ObjFile *file) {
+ for (InputChunk *chunk : file->segments)
+ if (chunk->isRetained())
+ enqueue(chunk);
+}
+
void MarkLive::run() {
// Add GC root symbols.
if (!config->entry.empty())
@@ -96,10 +115,14 @@
if (WasmSym::callDtors)
enqueue(WasmSym::callDtors);
- // Enqueue constructors in objects explicitly live from the command-line.
for (const ObjFile *obj : ctx.objectFiles)
- if (obj->isLive())
+ if (obj->isLive()) {
+ // Enqueue constructors in objects explicitly live from the command-line.
enqueueInitFunctions(obj);
+ // Enqueue retained segments in objects explicitly live from the
+ // command-line.
+ enqueueRetainedSegments(obj);
+ }
mark();