[lld-macho] Avoid infinite recursion when parsing corrupted export tries (#152569)
If an export trie is encoded incorrectly, and one of the children
offsets points back to one of the nodes earlier in the serialization,
the current code will end up in an infinite recursion, and eventually
fail exhausting the available memory.
The failure can be avoided if, before recursing, one checks that the
offset is valid, that is, that the offset is beyond the current
position. This is similar to a check done by llvm-objdump which reports
the trie being corrupted.
diff --git a/lld/MachO/ExportTrie.cpp b/lld/MachO/ExportTrie.cpp
index 303eda4..34478f4 100644
--- a/lld/MachO/ExportTrie.cpp
+++ b/lld/MachO/ExportTrie.cpp
@@ -41,6 +41,7 @@
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/Support/LEB128.h"
#include <optional>
+#include <unordered_set>
using namespace llvm;
using namespace lld;
@@ -296,13 +297,19 @@
// Parse a serialized trie and invoke a callback for each entry.
class TrieParser {
public:
- TrieParser(const uint8_t *buf, size_t size, const TrieEntryCallback &callback)
- : start(buf), end(start + size), callback(callback) {}
+ TrieParser(const std::string &fileName, const uint8_t *buf, size_t size,
+ const TrieEntryCallback &callback)
+ : fileName(fileName), start(buf), end(start + size), callback(callback) {}
- void parse(const uint8_t *buf, const Twine &cumulativeString);
+ void parse(const uint8_t *buf, const Twine &cumulativeString,
+ std::unordered_set<size_t> &visited);
- void parse() { parse(start, ""); }
+ void parse() {
+ std::unordered_set<size_t> visited;
+ parse(start, "", visited);
+ }
+ const std::string fileName;
const uint8_t *start;
const uint8_t *end;
const TrieEntryCallback &callback;
@@ -310,9 +317,13 @@
} // namespace
-void TrieParser::parse(const uint8_t *buf, const Twine &cumulativeString) {
+void TrieParser::parse(const uint8_t *buf, const Twine &cumulativeString,
+ std::unordered_set<size_t> &visited) {
if (buf >= end)
- fatal("Node offset points outside export section");
+ fatal(fileName + ": export trie node offset points outside export section");
+
+ size_t currentOffset = buf - start;
+ visited.insert(currentOffset);
unsigned ulebSize;
uint64_t terminalSize = decodeULEB128(buf, &ulebSize);
@@ -331,14 +342,18 @@
buf += substring.size() + 1;
offset = decodeULEB128(buf, &ulebSize);
buf += ulebSize;
- parse(start + offset, cumulativeString + substring);
+ if (visited.find(offset) != visited.end())
+ fatal(fileName + ": export trie child node infinite loop");
+ parse(start + offset, cumulativeString + substring, visited);
}
+
+ visited.erase(currentOffset);
}
-void macho::parseTrie(const uint8_t *buf, size_t size,
- const TrieEntryCallback &callback) {
+void macho::parseTrie(const std::string &fileName, const uint8_t *buf,
+ size_t size, const TrieEntryCallback &callback) {
if (size == 0)
return;
- TrieParser(buf, size, callback).parse();
+ TrieParser(fileName, buf, size, callback).parse();
}
diff --git a/lld/MachO/ExportTrie.h b/lld/MachO/ExportTrie.h
index aa7e3b0..fa73fc4 100644
--- a/lld/MachO/ExportTrie.h
+++ b/lld/MachO/ExportTrie.h
@@ -41,7 +41,8 @@
using TrieEntryCallback =
llvm::function_ref<void(const llvm::Twine & /*name*/, uint64_t /*flags*/)>;
-void parseTrie(const uint8_t *buf, size_t size, const TrieEntryCallback &);
+void parseTrie(const std::string &fileName, const uint8_t *buf, size_t size,
+ const TrieEntryCallback &);
} // namespace lld::macho
diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 3b3023a..442fc60 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -1789,12 +1789,13 @@
auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
std::vector<TrieEntry> entries;
// Find all the $ld$* symbols to process first.
- parseTrie(buf + offset, size, [&](const Twine &name, uint64_t flags) {
- StringRef savedName = saver().save(name);
- if (handleLDSymbol(savedName))
- return;
- entries.push_back({savedName, flags});
- });
+ parseTrie(toString(this), buf + offset, size,
+ [&](const Twine &name, uint64_t flags) {
+ StringRef savedName = saver().save(name);
+ if (handleLDSymbol(savedName))
+ return;
+ entries.push_back({savedName, flags});
+ });
// Process the "normal" symbols.
for (TrieEntry &entry : entries) {
diff --git a/lld/test/MachO/invalid/Inputs/macho-trie-node-loop b/lld/test/MachO/invalid/Inputs/macho-trie-node-loop
new file mode 100755
index 0000000..b94dfa2
--- /dev/null
+++ b/lld/test/MachO/invalid/Inputs/macho-trie-node-loop
Binary files differ
diff --git a/lld/test/MachO/invalid/export-trie-node-loop.s b/lld/test/MachO/invalid/export-trie-node-loop.s
new file mode 100644
index 0000000..fe99159
--- /dev/null
+++ b/lld/test/MachO/invalid/export-trie-node-loop.s
@@ -0,0 +1,9 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o
+# RUN: not %lld -o %t %t.o %S/Inputs/macho-trie-node-loop 2>&1 | FileCheck %s
+# CHECK: error:
+# CHECK-SAME: /Inputs/macho-trie-node-loop: export trie child node infinite loop
+
+.globl _main
+_main:
+ ret