| //===----------------------------------------------------------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "MachOTrie.h" |
| |
| #include "lldb/Utility/DataExtractor.h" |
| #include "lldb/Utility/Flags.h" |
| |
| #include "llvm/ADT/StringRef.h" |
| #include "llvm/BinaryFormat/MachO.h" |
| |
| #include <cstdio> |
| |
| using namespace lldb; |
| using namespace lldb_private; |
| using namespace llvm::MachO; |
| |
| /// Upper bound on the length of a symbol name assembled from export-trie edge |
| /// labels. A corrupt trie can encode an edge label whose terminator is far |
| /// away in the trie data, so a single label is many megabytes long; appending |
| /// it to the running name would otherwise request an unbounded allocation. No |
| /// legitimate symbol name comes close to this size. Also 1 MiB is the |
| /// symbol length limit in ld. |
| static constexpr size_t kMaxTrieSymbolNameLength = 1 << 20; // 1 MiB |
| |
| void TrieEntry::Dump() const { |
| printf("0x%16.16llx 0x%16.16llx 0x%16.16llx \"%s\"", |
| static_cast<unsigned long long>(address), |
| static_cast<unsigned long long>(flags), |
| static_cast<unsigned long long>(other), name.GetCString()); |
| if (import_name) |
| printf(" -> \"%s\"\n", import_name.GetCString()); |
| else |
| printf("\n"); |
| } |
| |
| void TrieEntryWithOffset::Dump(uint32_t idx) const { |
| printf("[%3u] 0x%16.16llx: ", idx, |
| static_cast<unsigned long long>(nodeOffset)); |
| entry.Dump(); |
| } |
| |
| namespace { |
| |
| bool ParseTrieEntriesImpl(DataExtractor &data, lldb::offset_t offset, |
| const bool is_arm, addr_t text_seg_base_addr, |
| std::string &prefix, |
| std::set<lldb::addr_t> &resolver_addresses, |
| std::vector<TrieEntryWithOffset> &reexports, |
| std::vector<TrieEntryWithOffset> &ext_symbols, |
| std::set<lldb::offset_t> &visited_nodes) { |
| if (!data.ValidOffset(offset)) |
| return true; |
| |
| // Every node in a well-formed trie is reached by exactly one path, so a node |
| // offset seen twice means the trie is corrupt. |
| if (!visited_nodes.insert(offset).second) |
| return false; |
| |
| // Terminal node -- end of a branch, possibly add this to |
| // the symbol table or resolver table. |
| const uint64_t terminalSize = data.GetULEB128(&offset); |
| lldb::offset_t children_offset = offset + terminalSize; |
| if (terminalSize != 0) { |
| TrieEntryWithOffset e(offset); |
| e.entry.flags = data.GetULEB128(&offset); |
| const char *import_name = nullptr; |
| if (e.entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT) { |
| e.entry.address = 0; |
| e.entry.other = data.GetULEB128(&offset); // dylib ordinal |
| import_name = data.GetCStr(&offset); |
| } else { |
| e.entry.address = data.GetULEB128(&offset); |
| if (text_seg_base_addr != LLDB_INVALID_ADDRESS) |
| e.entry.address += text_seg_base_addr; |
| if (e.entry.flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) { |
| e.entry.other = data.GetULEB128(&offset); |
| uint64_t resolver_addr = e.entry.other; |
| if (text_seg_base_addr != LLDB_INVALID_ADDRESS) |
| resolver_addr += text_seg_base_addr; |
| if (is_arm) |
| resolver_addr &= THUMB_ADDRESS_BIT_MASK; |
| resolver_addresses.insert(resolver_addr); |
| } else |
| e.entry.other = 0; |
| } |
| bool add_this_entry = false; |
| if (Flags(e.entry.flags).Test(EXPORT_SYMBOL_FLAGS_REEXPORT) && |
| import_name && import_name[0]) { |
| // add symbols that are reexport symbols with a valid import name. |
| add_this_entry = true; |
| } else if (e.entry.flags == 0 && |
| (import_name == nullptr || import_name[0] == '\0')) { |
| // add externally visible symbols, in case the nlist record has |
| // been stripped/omitted. |
| add_this_entry = true; |
| } |
| if (add_this_entry) { |
| if (prefix.size() > 1) { |
| // Skip the leading '_' |
| e.entry.name.SetString(llvm::StringRef(prefix).drop_front()); |
| } |
| if (import_name) { |
| // Skip the leading '_' |
| e.entry.import_name.SetCString(import_name + 1); |
| } |
| if (Flags(e.entry.flags).Test(EXPORT_SYMBOL_FLAGS_REEXPORT)) { |
| reexports.push_back(e); |
| } else { |
| if (is_arm && (e.entry.address & 1)) { |
| e.entry.flags |= TRIE_SYMBOL_IS_THUMB; |
| e.entry.address &= THUMB_ADDRESS_BIT_MASK; |
| } |
| ext_symbols.push_back(e); |
| } |
| } |
| } |
| |
| const uint8_t childrenCount = data.GetU8(&children_offset); |
| for (uint8_t i = 0; i < childrenCount; ++i) { |
| const char *cstr = data.GetCStr(&children_offset); |
| if (!cstr) |
| return false; // Corrupt data |
| if (prefix.size() + llvm::StringRef(cstr).size() > kMaxTrieSymbolNameLength) |
| return false; // Corrupt data: implausibly long symbol name. |
| const size_t prevSize = prefix.size(); |
| prefix.append(cstr); |
| lldb::offset_t childNodeOffset = data.GetULEB128(&children_offset); |
| // A child offset of 0 points back at the root; like any other repeated |
| // offset it is a cycle, which ParseTrieEntriesImpl rejects as corrupt. |
| if (!ParseTrieEntriesImpl(data, childNodeOffset, is_arm, text_seg_base_addr, |
| prefix, resolver_addresses, reexports, |
| ext_symbols, visited_nodes)) |
| return false; |
| prefix.resize(prevSize); |
| } |
| return true; |
| } |
| |
| } // namespace |
| |
| bool lldb_private::ParseTrieEntries( |
| DataExtractor &data, const bool is_arm, lldb::addr_t text_seg_base_addr, |
| std::set<lldb::addr_t> &resolver_addresses, |
| std::vector<TrieEntryWithOffset> &reexports, |
| std::vector<TrieEntryWithOffset> &ext_symbols) { |
| lldb::offset_t offset = 0; |
| std::set<lldb::offset_t> visited_nodes; |
| std::string prefix; |
| return ParseTrieEntriesImpl(data, offset, is_arm, text_seg_base_addr, prefix, |
| resolver_addresses, reexports, ext_symbols, |
| visited_nodes); |
| } |