| //===--- DLangDemangle.cpp ------------------------------------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| /// |
| /// \file |
| /// This file defines a demangler for the D programming language as specified |
| /// in the ABI specification, available at: |
| /// https://dlang.org/spec/abi.html#name_mangling |
| /// |
| //===----------------------------------------------------------------------===// |
| |
| #include "llvm/Demangle/Demangle.h" |
| #include "llvm/Demangle/StringViewExtras.h" |
| #include "llvm/Demangle/Utility.h" |
| |
| #include <cctype> |
| #include <cstring> |
| #include <limits> |
| #include <string_view> |
| |
| using namespace llvm; |
| using llvm::itanium_demangle::OutputBuffer; |
| using llvm::itanium_demangle::starts_with; |
| |
| namespace { |
| |
| /// Demangle information structure. |
| struct Demangler { |
| /// Initialize the information structure we use to pass around information. |
| /// |
| /// \param Mangled String to demangle. |
| Demangler(std::string_view Mangled); |
| |
| /// Extract and demangle the mangled symbol and append it to the output |
| /// string. |
| /// |
| /// \param Demangled Output buffer to write the demangled name. |
| /// |
| /// \return The remaining string on success or nullptr on failure. |
| /// |
| /// \see https://dlang.org/spec/abi.html#name_mangling . |
| /// \see https://dlang.org/spec/abi.html#MangledName . |
| const char *parseMangle(OutputBuffer *Demangled); |
| |
| private: |
| /// Extract and demangle a given mangled symbol and append it to the output |
| /// string. |
| /// |
| /// \param Demangled output buffer to write the demangled name. |
| /// \param Mangled mangled symbol to be demangled. |
| /// |
| /// \see https://dlang.org/spec/abi.html#name_mangling . |
| /// \see https://dlang.org/spec/abi.html#MangledName . |
| void parseMangle(OutputBuffer *Demangled, std::string_view &Mangled); |
| |
| /// Extract the number from a given string. |
| /// |
| /// \param Mangled string to extract the number. |
| /// \param Ret assigned result value. |
| /// |
| /// \note Ret larger than UINT_MAX is considered a failure. |
| /// |
| /// \see https://dlang.org/spec/abi.html#Number . |
| void decodeNumber(std::string_view &Mangled, unsigned long &Ret); |
| |
| /// Extract the back reference position from a given string. |
| /// |
| /// \param Mangled string to extract the back reference position. |
| /// \param Ret assigned result value. |
| /// |
| /// \return true on success, false on error. |
| /// |
| /// \note Ret is always >= 0 on success, and unspecified on failure |
| /// |
| /// \see https://dlang.org/spec/abi.html#back_ref . |
| /// \see https://dlang.org/spec/abi.html#NumberBackRef . |
| bool decodeBackrefPos(std::string_view &Mangled, long &Ret); |
| |
| /// Extract the symbol pointed by the back reference form a given string. |
| /// |
| /// \param Mangled string to extract the back reference position. |
| /// \param Ret assigned result value. |
| /// |
| /// \return true on success, false on error. |
| /// |
| /// \see https://dlang.org/spec/abi.html#back_ref . |
| bool decodeBackref(std::string_view &Mangled, std::string_view &Ret); |
| |
| /// Extract and demangle backreferenced symbol from a given mangled symbol |
| /// and append it to the output string. |
| /// |
| /// \param Demangled output buffer to write the demangled name. |
| /// \param Mangled mangled symbol to be demangled. |
| /// |
| /// \see https://dlang.org/spec/abi.html#back_ref . |
| /// \see https://dlang.org/spec/abi.html#IdentifierBackRef . |
| void parseSymbolBackref(OutputBuffer *Demangled, std::string_view &Mangled); |
| |
| /// Extract and demangle backreferenced type from a given mangled symbol |
| /// and append it to the output string. |
| /// |
| /// \param Mangled mangled symbol to be demangled. |
| /// |
| /// \see https://dlang.org/spec/abi.html#back_ref . |
| /// \see https://dlang.org/spec/abi.html#TypeBackRef . |
| void parseTypeBackref(std::string_view &Mangled); |
| |
| /// Check whether it is the beginning of a symbol name. |
| /// |
| /// \param Mangled string to extract the symbol name. |
| /// |
| /// \return true on success, false otherwise. |
| /// |
| /// \see https://dlang.org/spec/abi.html#SymbolName . |
| bool isSymbolName(std::string_view Mangled); |
| |
| /// Extract and demangle an identifier from a given mangled symbol append it |
| /// to the output string. |
| /// |
| /// \param Demangled Output buffer to write the demangled name. |
| /// \param Mangled Mangled symbol to be demangled. |
| /// |
| /// \see https://dlang.org/spec/abi.html#SymbolName . |
| void parseIdentifier(OutputBuffer *Demangled, std::string_view &Mangled); |
| |
| /// Extract and demangle the plain identifier from a given mangled symbol and |
| /// prepend/append it to the output string, with a special treatment for some |
| /// magic compiler generated symbols. |
| /// |
| /// \param Demangled Output buffer to write the demangled name. |
| /// \param Mangled Mangled symbol to be demangled. |
| /// \param Len Length of the mangled symbol name. |
| /// |
| /// \see https://dlang.org/spec/abi.html#LName . |
| void parseLName(OutputBuffer *Demangled, std::string_view &Mangled, |
| unsigned long Len); |
| |
| /// Extract and demangle the qualified symbol from a given mangled symbol |
| /// append it to the output string. |
| /// |
| /// \param Demangled Output buffer to write the demangled name. |
| /// \param Mangled Mangled symbol to be demangled. |
| /// |
| /// \see https://dlang.org/spec/abi.html#QualifiedName . |
| void parseQualified(OutputBuffer *Demangled, std::string_view &Mangled); |
| |
| /// Extract and demangle a type from a given mangled symbol append it to |
| /// the output string. |
| /// |
| /// \param Mangled mangled symbol to be demangled. |
| /// |
| /// \return true on success, false on error. |
| /// |
| /// \see https://dlang.org/spec/abi.html#Type . |
| bool parseType(std::string_view &Mangled); |
| |
| /// An immutable view of the string we are demangling. |
| const std::string_view Str; |
| /// The index of the last back reference. |
| int LastBackref; |
| }; |
| |
| } // namespace |
| |
| void Demangler::decodeNumber(std::string_view &Mangled, unsigned long &Ret) { |
| // Clear Mangled if trying to extract something that isn't a digit. |
| if (Mangled.empty()) { |
| Mangled = {}; |
| return; |
| } |
| |
| if (!std::isdigit(Mangled.front())) { |
| Mangled = {}; |
| return; |
| } |
| |
| unsigned long Val = 0; |
| |
| do { |
| unsigned long Digit = Mangled[0] - '0'; |
| |
| // Check for overflow. |
| if (Val > (std::numeric_limits<unsigned int>::max() - Digit) / 10) { |
| Mangled = {}; |
| return; |
| } |
| |
| Val = Val * 10 + Digit; |
| Mangled.remove_prefix(1); |
| } while (!Mangled.empty() && std::isdigit(Mangled.front())); |
| |
| if (Mangled.empty()) { |
| Mangled = {}; |
| return; |
| } |
| |
| Ret = Val; |
| } |
| |
| bool Demangler::decodeBackrefPos(std::string_view &Mangled, long &Ret) { |
| // Return nullptr if trying to extract something that isn't a digit |
| if (Mangled.empty()) { |
| Mangled = {}; |
| return false; |
| } |
| // Any identifier or non-basic type that has been emitted to the mangled |
| // symbol before will not be emitted again, but is referenced by a special |
| // sequence encoding the relative position of the original occurrence in the |
| // mangled symbol name. |
| // Numbers in back references are encoded with base 26 by upper case letters |
| // A-Z for higher digits but lower case letters a-z for the last digit. |
| // NumberBackRef: |
| // [a-z] |
| // [A-Z] NumberBackRef |
| // ^ |
| unsigned long Val = 0; |
| |
| while (!Mangled.empty() && std::isalpha(Mangled.front())) { |
| // Check for overflow |
| if (Val > (std::numeric_limits<unsigned long>::max() - 25) / 26) |
| break; |
| |
| Val *= 26; |
| |
| if (Mangled[0] >= 'a' && Mangled[0] <= 'z') { |
| Val += Mangled[0] - 'a'; |
| if ((long)Val <= 0) |
| break; |
| Ret = Val; |
| Mangled.remove_prefix(1); |
| return true; |
| } |
| |
| Val += Mangled[0] - 'A'; |
| Mangled.remove_prefix(1); |
| } |
| |
| Mangled = {}; |
| return false; |
| } |
| |
| bool Demangler::decodeBackref(std::string_view &Mangled, |
| std::string_view &Ret) { |
| assert(!Mangled.empty() && Mangled.front() == 'Q' && |
| "Invalid back reference!"); |
| Ret = {}; |
| |
| // Position of 'Q' |
| const char *Qpos = Mangled.data(); |
| long RefPos; |
| Mangled.remove_prefix(1); |
| |
| if (!decodeBackrefPos(Mangled, RefPos)) { |
| Mangled = {}; |
| return false; |
| } |
| |
| if (RefPos > Qpos - Str.data()) { |
| Mangled = {}; |
| return false; |
| } |
| |
| // Set the position of the back reference. |
| Ret = Qpos - RefPos; |
| |
| return true; |
| } |
| |
| void Demangler::parseSymbolBackref(OutputBuffer *Demangled, |
| std::string_view &Mangled) { |
| // An identifier back reference always points to a digit 0 to 9. |
| // IdentifierBackRef: |
| // Q NumberBackRef |
| // ^ |
| unsigned long Len; |
| |
| // Get position of the back reference |
| std::string_view Backref; |
| if (!decodeBackref(Mangled, Backref)) { |
| Mangled = {}; |
| return; |
| } |
| |
| // Must point to a simple identifier |
| decodeNumber(Backref, Len); |
| if (Backref.empty() || Backref.length() < Len) { |
| Mangled = {}; |
| return; |
| } |
| |
| parseLName(Demangled, Backref, Len); |
| if (Backref.empty()) |
| Mangled = {}; |
| } |
| |
| void Demangler::parseTypeBackref(std::string_view &Mangled) { |
| // A type back reference always points to a letter. |
| // TypeBackRef: |
| // Q NumberBackRef |
| // ^ |
| |
| // If we appear to be moving backwards through the mangle string, then |
| // bail as this may be a recursive back reference. |
| if (Mangled.data() - Str.data() >= LastBackref) { |
| Mangled = {}; |
| return; |
| } |
| |
| int SaveRefPos = LastBackref; |
| LastBackref = Mangled.data() - Str.data(); |
| |
| // Get position of the back reference. |
| std::string_view Backref; |
| if (!decodeBackref(Mangled, Backref)) { |
| Mangled = {}; |
| return; |
| } |
| |
| // Can't decode back reference. |
| if (Backref.empty()) { |
| Mangled = {}; |
| return; |
| } |
| |
| // TODO: Add support for function type back references. |
| if (!parseType(Backref)) |
| Mangled = {}; |
| |
| LastBackref = SaveRefPos; |
| |
| if (Backref.empty()) |
| Mangled = {}; |
| } |
| |
| bool Demangler::isSymbolName(std::string_view Mangled) { |
| long Ret; |
| const char *Qref = Mangled.data(); |
| |
| if (std::isdigit(Mangled.front())) |
| return true; |
| |
| // TODO: Handle template instances. |
| |
| if (Mangled.front() != 'Q') |
| return false; |
| |
| Mangled.remove_prefix(1); |
| bool Valid = decodeBackrefPos(Mangled, Ret); |
| if (!Valid || Ret > Qref - Str.data()) |
| return false; |
| |
| return std::isdigit(Qref[-Ret]); |
| } |
| |
| void Demangler::parseMangle(OutputBuffer *Demangled, |
| std::string_view &Mangled) { |
| // A D mangled symbol is comprised of both scope and type information. |
| // MangleName: |
| // _D QualifiedName Type |
| // _D QualifiedName Z |
| // ^ |
| // The caller should have guaranteed that the start pointer is at the |
| // above location. |
| // Note that type is never a function type, but only the return type of |
| // a function or the type of a variable. |
| Mangled.remove_prefix(2); |
| |
| parseQualified(Demangled, Mangled); |
| |
| if (Mangled.empty()) { |
| Mangled = {}; |
| return; |
| } |
| |
| // Artificial symbols end with 'Z' and have no type. |
| if (Mangled.front() == 'Z') { |
| Mangled.remove_prefix(1); |
| } else if (!parseType(Mangled)) |
| Mangled = {}; |
| } |
| |
| void Demangler::parseQualified(OutputBuffer *Demangled, |
| std::string_view &Mangled) { |
| // Qualified names are identifiers separated by their encoded length. |
| // Nested functions also encode their argument types without specifying |
| // what they return. |
| // QualifiedName: |
| // SymbolFunctionName |
| // SymbolFunctionName QualifiedName |
| // ^ |
| // SymbolFunctionName: |
| // SymbolName |
| // SymbolName TypeFunctionNoReturn |
| // SymbolName M TypeFunctionNoReturn |
| // SymbolName M TypeModifiers TypeFunctionNoReturn |
| // The start pointer should be at the above location. |
| |
| // Whether it has more than one symbol |
| size_t NotFirst = false; |
| do { |
| // Skip over anonymous symbols. |
| if (!Mangled.empty() && Mangled.front() == '0') { |
| do |
| Mangled.remove_prefix(1); |
| while (!Mangled.empty() && Mangled.front() == '0'); |
| |
| continue; |
| } |
| |
| if (NotFirst) |
| *Demangled << '.'; |
| NotFirst = true; |
| |
| parseIdentifier(Demangled, Mangled); |
| } while (!Mangled.empty() && isSymbolName(Mangled)); |
| } |
| |
| void Demangler::parseIdentifier(OutputBuffer *Demangled, |
| std::string_view &Mangled) { |
| if (Mangled.empty()) { |
| Mangled = {}; |
| return; |
| } |
| |
| if (Mangled.front() == 'Q') |
| return parseSymbolBackref(Demangled, Mangled); |
| |
| // TODO: Parse lengthless template instances. |
| |
| unsigned long Len; |
| decodeNumber(Mangled, Len); |
| |
| if (Mangled.empty()) { |
| Mangled = {}; |
| return; |
| } |
| if (!Len || Mangled.length() < Len) { |
| Mangled = {}; |
| return; |
| } |
| |
| // TODO: Parse template instances with a length prefix. |
| |
| // There can be multiple different declarations in the same function that |
| // have the same mangled name. To make the mangled names unique, a fake |
| // parent in the form `__Sddd' is added to the symbol. |
| if (Len >= 4 && starts_with(Mangled, "__S")) { |
| const size_t SuffixLen = Mangled.length() - Len; |
| std::string_view P = Mangled.substr(3); |
| while (P.length() > SuffixLen && std::isdigit(P.front())) |
| P.remove_prefix(1); |
| if (P.length() == SuffixLen) { |
| // Skip over the fake parent. |
| Mangled.remove_prefix(Len); |
| return parseIdentifier(Demangled, Mangled); |
| } |
| |
| // Else demangle it as a plain identifier. |
| } |
| |
| parseLName(Demangled, Mangled, Len); |
| } |
| |
| bool Demangler::parseType(std::string_view &Mangled) { |
| if (Mangled.empty()) { |
| Mangled = {}; |
| return false; |
| } |
| |
| switch (Mangled.front()) { |
| // TODO: Parse type qualifiers. |
| // TODO: Parse function types. |
| // TODO: Parse compound types. |
| // TODO: Parse delegate types. |
| // TODO: Parse tuple types. |
| |
| // Basic types. |
| case 'i': |
| Mangled.remove_prefix(1); |
| // TODO: Add type name dumping |
| return true; |
| |
| // TODO: Add support for the rest of the basic types. |
| |
| // Back referenced type. |
| case 'Q': { |
| parseTypeBackref(Mangled); |
| return true; |
| } |
| |
| default: // unhandled. |
| Mangled = {}; |
| return false; |
| } |
| } |
| |
| void Demangler::parseLName(OutputBuffer *Demangled, std::string_view &Mangled, |
| unsigned long Len) { |
| switch (Len) { |
| case 6: |
| if (starts_with(Mangled, "__initZ")) { |
| // The static initializer for a given symbol. |
| Demangled->prepend("initializer for "); |
| Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1); |
| Mangled.remove_prefix(Len); |
| return; |
| } |
| if (starts_with(Mangled, "__vtblZ")) { |
| // The vtable symbol for a given class. |
| Demangled->prepend("vtable for "); |
| Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1); |
| Mangled.remove_prefix(Len); |
| return; |
| } |
| break; |
| |
| case 7: |
| if (starts_with(Mangled, "__ClassZ")) { |
| // The classinfo symbol for a given class. |
| Demangled->prepend("ClassInfo for "); |
| Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1); |
| Mangled.remove_prefix(Len); |
| return; |
| } |
| break; |
| |
| case 11: |
| if (starts_with(Mangled, "__InterfaceZ")) { |
| // The interface symbol for a given class. |
| Demangled->prepend("Interface for "); |
| Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1); |
| Mangled.remove_prefix(Len); |
| return; |
| } |
| break; |
| |
| case 12: |
| if (starts_with(Mangled, "__ModuleInfoZ")) { |
| // The ModuleInfo symbol for a given module. |
| Demangled->prepend("ModuleInfo for "); |
| Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1); |
| Mangled.remove_prefix(Len); |
| return; |
| } |
| break; |
| } |
| |
| *Demangled << Mangled.substr(0, Len); |
| Mangled.remove_prefix(Len); |
| } |
| |
| Demangler::Demangler(std::string_view Mangled) |
| : Str(Mangled), LastBackref(Mangled.length()) {} |
| |
| const char *Demangler::parseMangle(OutputBuffer *Demangled) { |
| std::string_view M(this->Str); |
| parseMangle(Demangled, M); |
| return M.data(); |
| } |
| |
| char *llvm::dlangDemangle(std::string_view MangledName) { |
| if (MangledName.empty() || !starts_with(MangledName, "_D")) |
| return nullptr; |
| |
| OutputBuffer Demangled; |
| if (MangledName == "_Dmain") { |
| Demangled << "D main"; |
| } else { |
| |
| Demangler D(MangledName); |
| const char *M = D.parseMangle(&Demangled); |
| |
| // Check that the entire symbol was successfully demangled. |
| if (M == nullptr || *M != '\0') { |
| std::free(Demangled.getBuffer()); |
| return nullptr; |
| } |
| } |
| |
| // OutputBuffer's internal buffer is not null terminated and therefore we need |
| // to add it to comply with C null terminated strings. |
| if (Demangled.getCurrentPosition() > 0) { |
| Demangled << '\0'; |
| Demangled.setCurrentPosition(Demangled.getCurrentPosition() - 1); |
| return Demangled.getBuffer(); |
| } |
| |
| std::free(Demangled.getBuffer()); |
| return nullptr; |
| } |