| //===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file defines a demangler for Rust v0 mangled symbols as specified in |
| // https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "llvm/Demangle/Demangle.h" |
| #include "llvm/Demangle/StringView.h" |
| #include "llvm/Demangle/Utility.h" |
| |
| #include <algorithm> |
| #include <cassert> |
| #include <cstdint> |
| #include <cstring> |
| #include <limits> |
| |
| using namespace llvm; |
| |
| using llvm::itanium_demangle::OutputBuffer; |
| using llvm::itanium_demangle::StringView; |
| using llvm::itanium_demangle::SwapAndRestore; |
| |
| namespace { |
| |
| struct Identifier { |
| StringView Name; |
| bool Punycode; |
| |
| bool empty() const { return Name.empty(); } |
| }; |
| |
| enum class BasicType { |
| Bool, |
| Char, |
| I8, |
| I16, |
| I32, |
| I64, |
| I128, |
| ISize, |
| U8, |
| U16, |
| U32, |
| U64, |
| U128, |
| USize, |
| F32, |
| F64, |
| Str, |
| Placeholder, |
| Unit, |
| Variadic, |
| Never, |
| }; |
| |
| enum class IsInType { |
| No, |
| Yes, |
| }; |
| |
| enum class LeaveGenericsOpen { |
| No, |
| Yes, |
| }; |
| |
| class Demangler { |
| // Maximum recursion level. Used to avoid stack overflow. |
| size_t MaxRecursionLevel; |
| // Current recursion level. |
| size_t RecursionLevel; |
| size_t BoundLifetimes; |
| // Input string that is being demangled with "_R" prefix removed. |
| StringView Input; |
| // Position in the input string. |
| size_t Position; |
| // When true, print methods append the output to the stream. |
| // When false, the output is suppressed. |
| bool Print; |
| // True if an error occurred. |
| bool Error; |
| |
| public: |
| // Demangled output. |
| OutputBuffer Output; |
| |
| Demangler(size_t MaxRecursionLevel = 500); |
| |
| bool demangle(StringView MangledName); |
| |
| private: |
| bool demanglePath(IsInType Type, |
| LeaveGenericsOpen LeaveOpen = LeaveGenericsOpen::No); |
| void demangleImplPath(IsInType InType); |
| void demangleGenericArg(); |
| void demangleType(); |
| void demangleFnSig(); |
| void demangleDynBounds(); |
| void demangleDynTrait(); |
| void demangleOptionalBinder(); |
| void demangleConst(); |
| void demangleConstInt(); |
| void demangleConstBool(); |
| void demangleConstChar(); |
| |
| template <typename Callable> void demangleBackref(Callable Demangler) { |
| uint64_t Backref = parseBase62Number(); |
| if (Error || Backref >= Position) { |
| Error = true; |
| return; |
| } |
| |
| if (!Print) |
| return; |
| |
| SwapAndRestore<size_t> SavePosition(Position, Position); |
| Position = Backref; |
| Demangler(); |
| } |
| |
| Identifier parseIdentifier(); |
| uint64_t parseOptionalBase62Number(char Tag); |
| uint64_t parseBase62Number(); |
| uint64_t parseDecimalNumber(); |
| uint64_t parseHexNumber(StringView &HexDigits); |
| |
| void print(char C); |
| void print(StringView S); |
| void printDecimalNumber(uint64_t N); |
| void printBasicType(BasicType); |
| void printLifetime(uint64_t Index); |
| void printIdentifier(Identifier Ident); |
| |
| char look() const; |
| char consume(); |
| bool consumeIf(char Prefix); |
| |
| bool addAssign(uint64_t &A, uint64_t B); |
| bool mulAssign(uint64_t &A, uint64_t B); |
| }; |
| |
| } // namespace |
| |
| char *llvm::rustDemangle(const char *MangledName, char *Buf, size_t *N, |
| int *Status) { |
| if (MangledName == nullptr || (Buf != nullptr && N == nullptr)) { |
| if (Status != nullptr) |
| *Status = demangle_invalid_args; |
| return nullptr; |
| } |
| |
| // Return early if mangled name doesn't look like a Rust symbol. |
| StringView Mangled(MangledName); |
| if (!Mangled.startsWith("_R")) { |
| if (Status != nullptr) |
| *Status = demangle_invalid_mangled_name; |
| return nullptr; |
| } |
| |
| Demangler D; |
| if (!initializeOutputBuffer(nullptr, nullptr, D.Output, 1024)) { |
| if (Status != nullptr) |
| *Status = demangle_memory_alloc_failure; |
| return nullptr; |
| } |
| |
| if (!D.demangle(Mangled)) { |
| if (Status != nullptr) |
| *Status = demangle_invalid_mangled_name; |
| std::free(D.Output.getBuffer()); |
| return nullptr; |
| } |
| |
| D.Output += '\0'; |
| char *Demangled = D.Output.getBuffer(); |
| size_t DemangledLen = D.Output.getCurrentPosition(); |
| |
| if (Buf != nullptr) { |
| if (DemangledLen <= *N) { |
| std::memcpy(Buf, Demangled, DemangledLen); |
| std::free(Demangled); |
| Demangled = Buf; |
| } else { |
| std::free(Buf); |
| } |
| } |
| |
| if (N != nullptr) |
| *N = DemangledLen; |
| |
| if (Status != nullptr) |
| *Status = demangle_success; |
| |
| return Demangled; |
| } |
| |
| Demangler::Demangler(size_t MaxRecursionLevel) |
| : MaxRecursionLevel(MaxRecursionLevel) {} |
| |
| static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; } |
| |
| static inline bool isHexDigit(const char C) { |
| return ('0' <= C && C <= '9') || ('a' <= C && C <= 'f'); |
| } |
| |
| static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; } |
| |
| static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; } |
| |
| /// Returns true if C is a valid mangled character: <0-9a-zA-Z_>. |
| static inline bool isValid(const char C) { |
| return isDigit(C) || isLower(C) || isUpper(C) || C == '_'; |
| } |
| |
| // Demangles Rust v0 mangled symbol. Returns true when successful, and false |
| // otherwise. The demangled symbol is stored in Output field. It is |
| // responsibility of the caller to free the memory behind the output stream. |
| // |
| // <symbol-name> = "_R" <path> [<instantiating-crate>] |
| bool Demangler::demangle(StringView Mangled) { |
| Position = 0; |
| Error = false; |
| Print = true; |
| RecursionLevel = 0; |
| BoundLifetimes = 0; |
| |
| if (!Mangled.consumeFront("_R")) { |
| Error = true; |
| return false; |
| } |
| size_t Dot = Mangled.find('.'); |
| Input = Mangled.substr(0, Dot); |
| StringView Suffix = Mangled.dropFront(Dot); |
| |
| demanglePath(IsInType::No); |
| |
| if (Position != Input.size()) { |
| SwapAndRestore<bool> SavePrint(Print, false); |
| demanglePath(IsInType::No); |
| } |
| |
| if (Position != Input.size()) |
| Error = true; |
| |
| if (!Suffix.empty()) { |
| print(" ("); |
| print(Suffix); |
| print(")"); |
| } |
| |
| return !Error; |
| } |
| |
| // Demangles a path. InType indicates whether a path is inside a type. When |
| // LeaveOpen is true, a closing `>` after generic arguments is omitted from the |
| // output. Return value indicates whether generics arguments have been left |
| // open. |
| // |
| // <path> = "C" <identifier> // crate root |
| // | "M" <impl-path> <type> // <T> (inherent impl) |
| // | "X" <impl-path> <type> <path> // <T as Trait> (trait impl) |
| // | "Y" <type> <path> // <T as Trait> (trait definition) |
| // | "N" <ns> <path> <identifier> // ...::ident (nested path) |
| // | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args) |
| // | <backref> |
| // <identifier> = [<disambiguator>] <undisambiguated-identifier> |
| // <ns> = "C" // closure |
| // | "S" // shim |
| // | <A-Z> // other special namespaces |
| // | <a-z> // internal namespaces |
| bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) { |
| if (Error || RecursionLevel >= MaxRecursionLevel) { |
| Error = true; |
| return false; |
| } |
| SwapAndRestore<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1); |
| |
| switch (consume()) { |
| case 'C': { |
| parseOptionalBase62Number('s'); |
| printIdentifier(parseIdentifier()); |
| break; |
| } |
| case 'M': { |
| demangleImplPath(InType); |
| print("<"); |
| demangleType(); |
| print(">"); |
| break; |
| } |
| case 'X': { |
| demangleImplPath(InType); |
| print("<"); |
| demangleType(); |
| print(" as "); |
| demanglePath(IsInType::Yes); |
| print(">"); |
| break; |
| } |
| case 'Y': { |
| print("<"); |
| demangleType(); |
| print(" as "); |
| demanglePath(IsInType::Yes); |
| print(">"); |
| break; |
| } |
| case 'N': { |
| char NS = consume(); |
| if (!isLower(NS) && !isUpper(NS)) { |
| Error = true; |
| break; |
| } |
| demanglePath(InType); |
| |
| uint64_t Disambiguator = parseOptionalBase62Number('s'); |
| Identifier Ident = parseIdentifier(); |
| |
| if (isUpper(NS)) { |
| // Special namespaces |
| print("::{"); |
| if (NS == 'C') |
| print("closure"); |
| else if (NS == 'S') |
| print("shim"); |
| else |
| print(NS); |
| if (!Ident.empty()) { |
| print(":"); |
| printIdentifier(Ident); |
| } |
| print('#'); |
| printDecimalNumber(Disambiguator); |
| print('}'); |
| } else { |
| // Implementation internal namespaces. |
| if (!Ident.empty()) { |
| print("::"); |
| printIdentifier(Ident); |
| } |
| } |
| break; |
| } |
| case 'I': { |
| demanglePath(InType); |
| // Omit "::" when in a type, where it is optional. |
| if (InType == IsInType::No) |
| print("::"); |
| print("<"); |
| for (size_t I = 0; !Error && !consumeIf('E'); ++I) { |
| if (I > 0) |
| print(", "); |
| demangleGenericArg(); |
| } |
| if (LeaveOpen == LeaveGenericsOpen::Yes) |
| return true; |
| else |
| print(">"); |
| break; |
| } |
| case 'B': { |
| bool IsOpen = false; |
| demangleBackref([&] { IsOpen = demanglePath(InType, LeaveOpen); }); |
| return IsOpen; |
| } |
| default: |
| Error = true; |
| break; |
| } |
| |
| return false; |
| } |
| |
| // <impl-path> = [<disambiguator>] <path> |
| // <disambiguator> = "s" <base-62-number> |
| void Demangler::demangleImplPath(IsInType InType) { |
| SwapAndRestore<bool> SavePrint(Print, false); |
| parseOptionalBase62Number('s'); |
| demanglePath(InType); |
| } |
| |
| // <generic-arg> = <lifetime> |
| // | <type> |
| // | "K" <const> |
| // <lifetime> = "L" <base-62-number> |
| void Demangler::demangleGenericArg() { |
| if (consumeIf('L')) |
| printLifetime(parseBase62Number()); |
| else if (consumeIf('K')) |
| demangleConst(); |
| else |
| demangleType(); |
| } |
| |
| // <basic-type> = "a" // i8 |
| // | "b" // bool |
| // | "c" // char |
| // | "d" // f64 |
| // | "e" // str |
| // | "f" // f32 |
| // | "h" // u8 |
| // | "i" // isize |
| // | "j" // usize |
| // | "l" // i32 |
| // | "m" // u32 |
| // | "n" // i128 |
| // | "o" // u128 |
| // | "s" // i16 |
| // | "t" // u16 |
| // | "u" // () |
| // | "v" // ... |
| // | "x" // i64 |
| // | "y" // u64 |
| // | "z" // ! |
| // | "p" // placeholder (e.g. for generic params), shown as _ |
| static bool parseBasicType(char C, BasicType &Type) { |
| switch (C) { |
| case 'a': |
| Type = BasicType::I8; |
| return true; |
| case 'b': |
| Type = BasicType::Bool; |
| return true; |
| case 'c': |
| Type = BasicType::Char; |
| return true; |
| case 'd': |
| Type = BasicType::F64; |
| return true; |
| case 'e': |
| Type = BasicType::Str; |
| return true; |
| case 'f': |
| Type = BasicType::F32; |
| return true; |
| case 'h': |
| Type = BasicType::U8; |
| return true; |
| case 'i': |
| Type = BasicType::ISize; |
| return true; |
| case 'j': |
| Type = BasicType::USize; |
| return true; |
| case 'l': |
| Type = BasicType::I32; |
| return true; |
| case 'm': |
| Type = BasicType::U32; |
| return true; |
| case 'n': |
| Type = BasicType::I128; |
| return true; |
| case 'o': |
| Type = BasicType::U128; |
| return true; |
| case 'p': |
| Type = BasicType::Placeholder; |
| return true; |
| case 's': |
| Type = BasicType::I16; |
| return true; |
| case 't': |
| Type = BasicType::U16; |
| return true; |
| case 'u': |
| Type = BasicType::Unit; |
| return true; |
| case 'v': |
| Type = BasicType::Variadic; |
| return true; |
| case 'x': |
| Type = BasicType::I64; |
| return true; |
| case 'y': |
| Type = BasicType::U64; |
| return true; |
| case 'z': |
| Type = BasicType::Never; |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| void Demangler::printBasicType(BasicType Type) { |
| switch (Type) { |
| case BasicType::Bool: |
| print("bool"); |
| break; |
| case BasicType::Char: |
| print("char"); |
| break; |
| case BasicType::I8: |
| print("i8"); |
| break; |
| case BasicType::I16: |
| print("i16"); |
| break; |
| case BasicType::I32: |
| print("i32"); |
| break; |
| case BasicType::I64: |
| print("i64"); |
| break; |
| case BasicType::I128: |
| print("i128"); |
| break; |
| case BasicType::ISize: |
| print("isize"); |
| break; |
| case BasicType::U8: |
| print("u8"); |
| break; |
| case BasicType::U16: |
| print("u16"); |
| break; |
| case BasicType::U32: |
| print("u32"); |
| break; |
| case BasicType::U64: |
| print("u64"); |
| break; |
| case BasicType::U128: |
| print("u128"); |
| break; |
| case BasicType::USize: |
| print("usize"); |
| break; |
| case BasicType::F32: |
| print("f32"); |
| break; |
| case BasicType::F64: |
| print("f64"); |
| break; |
| case BasicType::Str: |
| print("str"); |
| break; |
| case BasicType::Placeholder: |
| print("_"); |
| break; |
| case BasicType::Unit: |
| print("()"); |
| break; |
| case BasicType::Variadic: |
| print("..."); |
| break; |
| case BasicType::Never: |
| print("!"); |
| break; |
| } |
| } |
| |
| // <type> = | <basic-type> |
| // | <path> // named type |
| // | "A" <type> <const> // [T; N] |
| // | "S" <type> // [T] |
| // | "T" {<type>} "E" // (T1, T2, T3, ...) |
| // | "R" [<lifetime>] <type> // &T |
| // | "Q" [<lifetime>] <type> // &mut T |
| // | "P" <type> // *const T |
| // | "O" <type> // *mut T |
| // | "F" <fn-sig> // fn(...) -> ... |
| // | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a |
| // | <backref> // backref |
| void Demangler::demangleType() { |
| if (Error || RecursionLevel >= MaxRecursionLevel) { |
| Error = true; |
| return; |
| } |
| SwapAndRestore<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1); |
| |
| size_t Start = Position; |
| char C = consume(); |
| BasicType Type; |
| if (parseBasicType(C, Type)) |
| return printBasicType(Type); |
| |
| switch (C) { |
| case 'A': |
| print("["); |
| demangleType(); |
| print("; "); |
| demangleConst(); |
| print("]"); |
| break; |
| case 'S': |
| print("["); |
| demangleType(); |
| print("]"); |
| break; |
| case 'T': { |
| print("("); |
| size_t I = 0; |
| for (; !Error && !consumeIf('E'); ++I) { |
| if (I > 0) |
| print(", "); |
| demangleType(); |
| } |
| if (I == 1) |
| print(","); |
| print(")"); |
| break; |
| } |
| case 'R': |
| case 'Q': |
| print('&'); |
| if (consumeIf('L')) { |
| if (auto Lifetime = parseBase62Number()) { |
| printLifetime(Lifetime); |
| print(' '); |
| } |
| } |
| if (C == 'Q') |
| print("mut "); |
| demangleType(); |
| break; |
| case 'P': |
| print("*const "); |
| demangleType(); |
| break; |
| case 'O': |
| print("*mut "); |
| demangleType(); |
| break; |
| case 'F': |
| demangleFnSig(); |
| break; |
| case 'D': |
| demangleDynBounds(); |
| if (consumeIf('L')) { |
| if (auto Lifetime = parseBase62Number()) { |
| print(" + "); |
| printLifetime(Lifetime); |
| } |
| } else { |
| Error = true; |
| } |
| break; |
| case 'B': |
| demangleBackref([&] { demangleType(); }); |
| break; |
| default: |
| Position = Start; |
| demanglePath(IsInType::Yes); |
| break; |
| } |
| } |
| |
| // <fn-sig> := [<binder>] ["U"] ["K" <abi>] {<type>} "E" <type> |
| // <abi> = "C" |
| // | <undisambiguated-identifier> |
| void Demangler::demangleFnSig() { |
| SwapAndRestore<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes); |
| demangleOptionalBinder(); |
| |
| if (consumeIf('U')) |
| print("unsafe "); |
| |
| if (consumeIf('K')) { |
| print("extern \""); |
| if (consumeIf('C')) { |
| print("C"); |
| } else { |
| Identifier Ident = parseIdentifier(); |
| if (Ident.Punycode) |
| Error = true; |
| for (char C : Ident.Name) { |
| // When mangling ABI string, the "-" is replaced with "_". |
| if (C == '_') |
| C = '-'; |
| print(C); |
| } |
| } |
| print("\" "); |
| } |
| |
| print("fn("); |
| for (size_t I = 0; !Error && !consumeIf('E'); ++I) { |
| if (I > 0) |
| print(", "); |
| demangleType(); |
| } |
| print(")"); |
| |
| if (consumeIf('u')) { |
| // Skip the unit type from the output. |
| } else { |
| print(" -> "); |
| demangleType(); |
| } |
| } |
| |
| // <dyn-bounds> = [<binder>] {<dyn-trait>} "E" |
| void Demangler::demangleDynBounds() { |
| SwapAndRestore<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes); |
| print("dyn "); |
| demangleOptionalBinder(); |
| for (size_t I = 0; !Error && !consumeIf('E'); ++I) { |
| if (I > 0) |
| print(" + "); |
| demangleDynTrait(); |
| } |
| } |
| |
| // <dyn-trait> = <path> {<dyn-trait-assoc-binding>} |
| // <dyn-trait-assoc-binding> = "p" <undisambiguated-identifier> <type> |
| void Demangler::demangleDynTrait() { |
| bool IsOpen = demanglePath(IsInType::Yes, LeaveGenericsOpen::Yes); |
| while (!Error && consumeIf('p')) { |
| if (!IsOpen) { |
| IsOpen = true; |
| print('<'); |
| } else { |
| print(", "); |
| } |
| print(parseIdentifier().Name); |
| print(" = "); |
| demangleType(); |
| } |
| if (IsOpen) |
| print(">"); |
| } |
| |
| // Demangles optional binder and updates the number of bound lifetimes. |
| // |
| // <binder> = "G" <base-62-number> |
| void Demangler::demangleOptionalBinder() { |
| uint64_t Binder = parseOptionalBase62Number('G'); |
| if (Error || Binder == 0) |
| return; |
| |
| // In valid inputs each bound lifetime is referenced later. Referencing a |
| // lifetime requires at least one byte of input. Reject inputs that are too |
| // short to reference all bound lifetimes. Otherwise demangling of invalid |
| // binders could generate excessive amounts of output. |
| if (Binder >= Input.size() - BoundLifetimes) { |
| Error = true; |
| return; |
| } |
| |
| print("for<"); |
| for (size_t I = 0; I != Binder; ++I) { |
| BoundLifetimes += 1; |
| if (I > 0) |
| print(", "); |
| printLifetime(1); |
| } |
| print("> "); |
| } |
| |
| // <const> = <basic-type> <const-data> |
| // | "p" // placeholder |
| // | <backref> |
| void Demangler::demangleConst() { |
| if (Error || RecursionLevel >= MaxRecursionLevel) { |
| Error = true; |
| return; |
| } |
| SwapAndRestore<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1); |
| |
| char C = consume(); |
| BasicType Type; |
| if (parseBasicType(C, Type)) { |
| switch (Type) { |
| case BasicType::I8: |
| case BasicType::I16: |
| case BasicType::I32: |
| case BasicType::I64: |
| case BasicType::I128: |
| case BasicType::ISize: |
| case BasicType::U8: |
| case BasicType::U16: |
| case BasicType::U32: |
| case BasicType::U64: |
| case BasicType::U128: |
| case BasicType::USize: |
| demangleConstInt(); |
| break; |
| case BasicType::Bool: |
| demangleConstBool(); |
| break; |
| case BasicType::Char: |
| demangleConstChar(); |
| break; |
| case BasicType::Placeholder: |
| print('_'); |
| break; |
| default: |
| Error = true; |
| break; |
| } |
| } else if (C == 'B') { |
| demangleBackref([&] { demangleConst(); }); |
| } else { |
| Error = true; |
| } |
| } |
| |
| // <const-data> = ["n"] <hex-number> |
| void Demangler::demangleConstInt() { |
| if (consumeIf('n')) |
| print('-'); |
| |
| StringView HexDigits; |
| uint64_t Value = parseHexNumber(HexDigits); |
| if (HexDigits.size() <= 16) { |
| printDecimalNumber(Value); |
| } else { |
| print("0x"); |
| print(HexDigits); |
| } |
| } |
| |
| // <const-data> = "0_" // false |
| // | "1_" // true |
| void Demangler::demangleConstBool() { |
| StringView HexDigits; |
| parseHexNumber(HexDigits); |
| if (HexDigits == "0") |
| print("false"); |
| else if (HexDigits == "1") |
| print("true"); |
| else |
| Error = true; |
| } |
| |
| /// Returns true if CodePoint represents a printable ASCII character. |
| static bool isAsciiPrintable(uint64_t CodePoint) { |
| return 0x20 <= CodePoint && CodePoint <= 0x7e; |
| } |
| |
| // <const-data> = <hex-number> |
| void Demangler::demangleConstChar() { |
| StringView HexDigits; |
| uint64_t CodePoint = parseHexNumber(HexDigits); |
| if (Error || HexDigits.size() > 6) { |
| Error = true; |
| return; |
| } |
| |
| print("'"); |
| switch (CodePoint) { |
| case '\t': |
| print(R"(\t)"); |
| break; |
| case '\r': |
| print(R"(\r)"); |
| break; |
| case '\n': |
| print(R"(\n)"); |
| break; |
| case '\\': |
| print(R"(\\)"); |
| break; |
| case '"': |
| print(R"(")"); |
| break; |
| case '\'': |
| print(R"(\')"); |
| break; |
| default: |
| if (isAsciiPrintable(CodePoint)) { |
| char C = CodePoint; |
| print(C); |
| } else { |
| print(R"(\u{)"); |
| print(HexDigits); |
| print('}'); |
| } |
| break; |
| } |
| print('\''); |
| } |
| |
| // <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes> |
| Identifier Demangler::parseIdentifier() { |
| bool Punycode = consumeIf('u'); |
| uint64_t Bytes = parseDecimalNumber(); |
| |
| // Underscore resolves the ambiguity when identifier starts with a decimal |
| // digit or another underscore. |
| consumeIf('_'); |
| |
| if (Error || Bytes > Input.size() - Position) { |
| Error = true; |
| return {}; |
| } |
| StringView S = Input.substr(Position, Bytes); |
| Position += Bytes; |
| |
| if (!std::all_of(S.begin(), S.end(), isValid)) { |
| Error = true; |
| return {}; |
| } |
| |
| return {S, Punycode}; |
| } |
| |
| // Parses optional base 62 number. The presence of a number is determined using |
| // Tag. Returns 0 when tag is absent and parsed value + 1 otherwise |
| // |
| // This function is indended for parsing disambiguators and binders which when |
| // not present have their value interpreted as 0, and otherwise as decoded |
| // value + 1. For example for binders, value for "G_" is 1, for "G0_" value is |
| // 2. When "G" is absent value is 0. |
| uint64_t Demangler::parseOptionalBase62Number(char Tag) { |
| if (!consumeIf(Tag)) |
| return 0; |
| |
| uint64_t N = parseBase62Number(); |
| if (Error || !addAssign(N, 1)) |
| return 0; |
| |
| return N; |
| } |
| |
| // Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by |
| // "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1, |
| // "1_" encodes 2, etc. |
| // |
| // <base-62-number> = {<0-9a-zA-Z>} "_" |
| uint64_t Demangler::parseBase62Number() { |
| if (consumeIf('_')) |
| return 0; |
| |
| uint64_t Value = 0; |
| |
| while (true) { |
| uint64_t Digit; |
| char C = consume(); |
| |
| if (C == '_') { |
| break; |
| } else if (isDigit(C)) { |
| Digit = C - '0'; |
| } else if (isLower(C)) { |
| Digit = 10 + (C - 'a'); |
| } else if (isUpper(C)) { |
| Digit = 10 + 26 + (C - 'A'); |
| } else { |
| Error = true; |
| return 0; |
| } |
| |
| if (!mulAssign(Value, 62)) |
| return 0; |
| |
| if (!addAssign(Value, Digit)) |
| return 0; |
| } |
| |
| if (!addAssign(Value, 1)) |
| return 0; |
| |
| return Value; |
| } |
| |
| // Parses a decimal number that had been encoded without any leading zeros. |
| // |
| // <decimal-number> = "0" |
| // | <1-9> {<0-9>} |
| uint64_t Demangler::parseDecimalNumber() { |
| char C = look(); |
| if (!isDigit(C)) { |
| Error = true; |
| return 0; |
| } |
| |
| if (C == '0') { |
| consume(); |
| return 0; |
| } |
| |
| uint64_t Value = 0; |
| |
| while (isDigit(look())) { |
| if (!mulAssign(Value, 10)) { |
| Error = true; |
| return 0; |
| } |
| |
| uint64_t D = consume() - '0'; |
| if (!addAssign(Value, D)) |
| return 0; |
| } |
| |
| return Value; |
| } |
| |
| // Parses a hexadecimal number with <0-9a-f> as a digits. Returns the parsed |
| // value and stores hex digits in HexDigits. The return value is unspecified if |
| // HexDigits.size() > 16. |
| // |
| // <hex-number> = "0_" |
| // | <1-9a-f> {<0-9a-f>} "_" |
| uint64_t Demangler::parseHexNumber(StringView &HexDigits) { |
| size_t Start = Position; |
| uint64_t Value = 0; |
| |
| if (!isHexDigit(look())) |
| Error = true; |
| |
| if (consumeIf('0')) { |
| if (!consumeIf('_')) |
| Error = true; |
| } else { |
| while (!Error && !consumeIf('_')) { |
| char C = consume(); |
| Value *= 16; |
| if (isDigit(C)) |
| Value += C - '0'; |
| else if ('a' <= C && C <= 'f') |
| Value += 10 + (C - 'a'); |
| else |
| Error = true; |
| } |
| } |
| |
| if (Error) { |
| HexDigits = StringView(); |
| return 0; |
| } |
| |
| size_t End = Position - 1; |
| assert(Start < End); |
| HexDigits = Input.substr(Start, End - Start); |
| return Value; |
| } |
| |
| void Demangler::print(char C) { |
| if (Error || !Print) |
| return; |
| |
| Output += C; |
| } |
| |
| void Demangler::print(StringView S) { |
| if (Error || !Print) |
| return; |
| |
| Output += S; |
| } |
| |
| void Demangler::printDecimalNumber(uint64_t N) { |
| if (Error || !Print) |
| return; |
| |
| Output << N; |
| } |
| |
| // Prints a lifetime. An index 0 always represents an erased lifetime. Indices |
| // starting from 1, are De Bruijn indices, referring to higher-ranked lifetimes |
| // bound by one of the enclosing binders. |
| void Demangler::printLifetime(uint64_t Index) { |
| if (Index == 0) { |
| print("'_"); |
| return; |
| } |
| |
| if (Index - 1 >= BoundLifetimes) { |
| Error = true; |
| return; |
| } |
| |
| uint64_t Depth = BoundLifetimes - Index; |
| print('\''); |
| if (Depth < 26) { |
| char C = 'a' + Depth; |
| print(C); |
| } else { |
| print('z'); |
| printDecimalNumber(Depth - 26 + 1); |
| } |
| } |
| |
| static inline bool decodePunycodeDigit(char C, size_t &Value) { |
| if (isLower(C)) { |
| Value = C - 'a'; |
| return true; |
| } |
| |
| if (isDigit(C)) { |
| Value = 26 + (C - '0'); |
| return true; |
| } |
| |
| return false; |
| } |
| |
| static void removeNullBytes(OutputBuffer &Output, size_t StartIdx) { |
| char *Buffer = Output.getBuffer(); |
| char *Start = Buffer + StartIdx; |
| char *End = Buffer + Output.getCurrentPosition(); |
| Output.setCurrentPosition(std::remove(Start, End, '\0') - Buffer); |
| } |
| |
| // Encodes code point as UTF-8 and stores results in Output. Returns false if |
| // CodePoint is not a valid unicode scalar value. |
| static inline bool encodeUTF8(size_t CodePoint, char *Output) { |
| if (0xD800 <= CodePoint && CodePoint <= 0xDFFF) |
| return false; |
| |
| if (CodePoint <= 0x7F) { |
| Output[0] = CodePoint; |
| return true; |
| } |
| |
| if (CodePoint <= 0x7FF) { |
| Output[0] = 0xC0 | ((CodePoint >> 6) & 0x3F); |
| Output[1] = 0x80 | (CodePoint & 0x3F); |
| return true; |
| } |
| |
| if (CodePoint <= 0xFFFF) { |
| Output[0] = 0xE0 | (CodePoint >> 12); |
| Output[1] = 0x80 | ((CodePoint >> 6) & 0x3F); |
| Output[2] = 0x80 | (CodePoint & 0x3F); |
| return true; |
| } |
| |
| if (CodePoint <= 0x10FFFF) { |
| Output[0] = 0xF0 | (CodePoint >> 18); |
| Output[1] = 0x80 | ((CodePoint >> 12) & 0x3F); |
| Output[2] = 0x80 | ((CodePoint >> 6) & 0x3F); |
| Output[3] = 0x80 | (CodePoint & 0x3F); |
| return true; |
| } |
| |
| return false; |
| } |
| |
| // Decodes string encoded using punycode and appends results to Output. |
| // Returns true if decoding was successful. |
| static bool decodePunycode(StringView Input, OutputBuffer &Output) { |
| size_t OutputSize = Output.getCurrentPosition(); |
| size_t InputIdx = 0; |
| |
| // Rust uses an underscore as a delimiter. |
| size_t DelimiterPos = StringView::npos; |
| for (size_t I = 0; I != Input.size(); ++I) |
| if (Input[I] == '_') |
| DelimiterPos = I; |
| |
| if (DelimiterPos != StringView::npos) { |
| // Copy basic code points before the last delimiter to the output. |
| for (; InputIdx != DelimiterPos; ++InputIdx) { |
| char C = Input[InputIdx]; |
| if (!isValid(C)) |
| return false; |
| // Code points are padded with zeros while decoding is in progress. |
| char UTF8[4] = {C}; |
| Output += StringView(UTF8, UTF8 + 4); |
| } |
| // Skip over the delimiter. |
| ++InputIdx; |
| } |
| |
| size_t Base = 36; |
| size_t Skew = 38; |
| size_t Bias = 72; |
| size_t N = 0x80; |
| size_t TMin = 1; |
| size_t TMax = 26; |
| size_t Damp = 700; |
| |
| auto Adapt = [&](size_t Delta, size_t NumPoints) { |
| Delta /= Damp; |
| Delta += Delta / NumPoints; |
| Damp = 2; |
| |
| size_t K = 0; |
| while (Delta > (Base - TMin) * TMax / 2) { |
| Delta /= Base - TMin; |
| K += Base; |
| } |
| return K + (((Base - TMin + 1) * Delta) / (Delta + Skew)); |
| }; |
| |
| // Main decoding loop. |
| for (size_t I = 0; InputIdx != Input.size(); I += 1) { |
| size_t OldI = I; |
| size_t W = 1; |
| size_t Max = std::numeric_limits<size_t>::max(); |
| for (size_t K = Base; true; K += Base) { |
| if (InputIdx == Input.size()) |
| return false; |
| char C = Input[InputIdx++]; |
| size_t Digit = 0; |
| if (!decodePunycodeDigit(C, Digit)) |
| return false; |
| |
| if (Digit > (Max - I) / W) |
| return false; |
| I += Digit * W; |
| |
| size_t T; |
| if (K <= Bias) |
| T = TMin; |
| else if (K >= Bias + TMax) |
| T = TMax; |
| else |
| T = K - Bias; |
| |
| if (Digit < T) |
| break; |
| |
| if (W > Max / (Base - T)) |
| return false; |
| W *= (Base - T); |
| } |
| size_t NumPoints = (Output.getCurrentPosition() - OutputSize) / 4 + 1; |
| Bias = Adapt(I - OldI, NumPoints); |
| |
| if (I / NumPoints > Max - N) |
| return false; |
| N += I / NumPoints; |
| I = I % NumPoints; |
| |
| // Insert N at position I in the output. |
| char UTF8[4] = {}; |
| if (!encodeUTF8(N, UTF8)) |
| return false; |
| Output.insert(OutputSize + I * 4, UTF8, 4); |
| } |
| |
| removeNullBytes(Output, OutputSize); |
| return true; |
| } |
| |
| void Demangler::printIdentifier(Identifier Ident) { |
| if (Error || !Print) |
| return; |
| |
| if (Ident.Punycode) { |
| if (!decodePunycode(Ident.Name, Output)) |
| Error = true; |
| } else { |
| print(Ident.Name); |
| } |
| } |
| |
| char Demangler::look() const { |
| if (Error || Position >= Input.size()) |
| return 0; |
| |
| return Input[Position]; |
| } |
| |
| char Demangler::consume() { |
| if (Error || Position >= Input.size()) { |
| Error = true; |
| return 0; |
| } |
| |
| return Input[Position++]; |
| } |
| |
| bool Demangler::consumeIf(char Prefix) { |
| if (Error || Position >= Input.size() || Input[Position] != Prefix) |
| return false; |
| |
| Position += 1; |
| return true; |
| } |
| |
| /// Computes A + B. When computation wraps around sets the error and returns |
| /// false. Otherwise assigns the result to A and returns true. |
| bool Demangler::addAssign(uint64_t &A, uint64_t B) { |
| if (A > std::numeric_limits<uint64_t>::max() - B) { |
| Error = true; |
| return false; |
| } |
| |
| A += B; |
| return true; |
| } |
| |
| /// Computes A * B. When computation wraps around sets the error and returns |
| /// false. Otherwise assigns the result to A and returns true. |
| bool Demangler::mulAssign(uint64_t &A, uint64_t B) { |
| if (B != 0 && A > std::numeric_limits<uint64_t>::max() / B) { |
| Error = true; |
| return false; |
| } |
| |
| A *= B; |
| return true; |
| } |