| //===--- Symbol.h ------------------------------------------------*- C++-*-===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_H |
| #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_H |
| |
| #include "SymbolID.h" |
| #include "SymbolLocation.h" |
| #include "SymbolOrigin.h" |
| #include "clang/Index/IndexSymbol.h" |
| #include "llvm/ADT/StringRef.h" |
| #include "llvm/Support/StringSaver.h" |
| |
| namespace clang { |
| namespace clangd { |
| |
| /// The class presents a C++ symbol, e.g. class, function. |
| /// |
| /// WARNING: Symbols do not own much of their underlying data - typically |
| /// strings are owned by a SymbolSlab. They should be treated as non-owning |
| /// references. Copies are shallow. |
| /// |
| /// When adding new unowned data fields to Symbol, remember to update: |
| /// - SymbolSlab::Builder in Index.cpp, to copy them to the slab's storage. |
| /// - mergeSymbol in Merge.cpp, to properly combine two Symbols. |
| /// |
| /// A fully documented symbol can be split as: |
| /// size_type std::map<k, t>::count(const K& key) const |
| /// | Return | Scope |Name| Signature | |
| /// We split up these components to allow display flexibility later. |
| struct Symbol { |
| /// The ID of the symbol. |
| SymbolID ID; |
| /// The symbol information, like symbol kind. |
| index::SymbolInfo SymInfo = index::SymbolInfo(); |
| /// The unqualified name of the symbol, e.g. "bar" (for ns::bar). |
| llvm::StringRef Name; |
| /// The containing namespace. e.g. "" (global), "ns::" (top-level namespace). |
| llvm::StringRef Scope; |
| /// The location of the symbol's definition, if one was found. |
| /// This just covers the symbol name (e.g. without class/function body). |
| SymbolLocation Definition; |
| /// The location of the preferred declaration of the symbol. |
| /// This just covers the symbol name. |
| /// This may be the same as Definition. |
| /// |
| /// A C++ symbol may have multiple declarations, and we pick one to prefer. |
| /// * For classes, the canonical declaration should be the definition. |
| /// * For non-inline functions, the canonical declaration typically appears |
| /// in the ".h" file corresponding to the definition. |
| SymbolLocation CanonicalDeclaration; |
| /// The number of translation units that reference this symbol from their main |
| /// file. This number is only meaningful if aggregated in an index. |
| unsigned References = 0; |
| /// Where this symbol came from. Usually an index provides a constant value. |
| SymbolOrigin Origin = SymbolOrigin::Unknown; |
| /// A brief description of the symbol that can be appended in the completion |
| /// candidate list. For example, "(X x, Y y) const" is a function signature. |
| /// Only set when the symbol is indexed for completion. |
| llvm::StringRef Signature; |
| /// Argument list in human-readable format, will be displayed to help |
| /// disambiguate between different specializations of a template. Empty for |
| /// non-specializations. Example: "<int, bool, 3>" |
| llvm::StringRef TemplateSpecializationArgs; |
| /// What to insert when completing this symbol, after the symbol name. |
| /// This is in LSP snippet syntax (e.g. "({$0})" for a no-args function). |
| /// (When snippets are disabled, the symbol name alone is used). |
| /// Only set when the symbol is indexed for completion. |
| llvm::StringRef CompletionSnippetSuffix; |
| /// Documentation including comment for the symbol declaration. |
| llvm::StringRef Documentation; |
| /// Type when this symbol is used in an expression. (Short display form). |
| /// e.g. return type of a function, or type of a variable. |
| /// Only set when the symbol is indexed for completion. |
| llvm::StringRef ReturnType; |
| |
| /// Raw representation of the OpaqueType of the symbol, used for scoring |
| /// purposes. |
| /// Only set when the symbol is indexed for completion. |
| llvm::StringRef Type; |
| |
| struct IncludeHeaderWithReferences { |
| IncludeHeaderWithReferences() = default; |
| |
| IncludeHeaderWithReferences(llvm::StringRef IncludeHeader, |
| unsigned References) |
| : IncludeHeader(IncludeHeader), References(References) {} |
| |
| /// This can be either a URI of the header to be #include'd |
| /// for this symbol, or a literal header quoted with <> or "" that is |
| /// suitable to be included directly. When it is a URI, the exact #include |
| /// path needs to be calculated according to the URI scheme. |
| /// |
| /// Note that the include header is a canonical include for the symbol and |
| /// can be different from FileURI in the CanonicalDeclaration. |
| llvm::StringRef IncludeHeader = ""; |
| /// The number of translation units that reference this symbol and include |
| /// this header. This number is only meaningful if aggregated in an index. |
| unsigned References = 0; |
| }; |
| /// One Symbol can potentially be included via different headers. |
| /// - If we haven't seen a definition, this covers all declarations. |
| /// - If we have seen a definition, this covers declarations visible from |
| /// any definition. |
| /// Only set when the symbol is indexed for completion. |
| llvm::SmallVector<IncludeHeaderWithReferences, 1> IncludeHeaders; |
| |
| enum SymbolFlag : uint8_t { |
| None = 0, |
| /// Whether or not this symbol is meant to be used for the code completion. |
| /// See also isIndexedForCodeCompletion(). |
| /// Note that we don't store completion information (signature, snippet, |
| /// type, includes) if the symbol is not indexed for code completion. |
| IndexedForCodeCompletion = 1 << 0, |
| /// Indicates if the symbol is deprecated. |
| Deprecated = 1 << 1, |
| /// Symbol is an implementation detail. |
| ImplementationDetail = 1 << 2, |
| /// Symbol is visible to other files (not e.g. a static helper function). |
| VisibleOutsideFile = 1 << 3, |
| }; |
| |
| SymbolFlag Flags = SymbolFlag::None; |
| /// FIXME: also add deprecation message and fixit? |
| }; |
| |
| inline Symbol::SymbolFlag operator|(Symbol::SymbolFlag A, |
| Symbol::SymbolFlag B) { |
| return static_cast<Symbol::SymbolFlag>(static_cast<uint8_t>(A) | |
| static_cast<uint8_t>(B)); |
| } |
| inline Symbol::SymbolFlag &operator|=(Symbol::SymbolFlag &A, |
| Symbol::SymbolFlag B) { |
| return A = A | B; |
| } |
| |
| llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Symbol &S); |
| llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, Symbol::SymbolFlag); |
| |
| /// Invokes Callback with each StringRef& contained in the Symbol. |
| /// Useful for deduplicating backing strings. |
| template <typename Callback> void visitStrings(Symbol &S, const Callback &CB) { |
| CB(S.Name); |
| CB(S.Scope); |
| CB(S.TemplateSpecializationArgs); |
| CB(S.Signature); |
| CB(S.CompletionSnippetSuffix); |
| CB(S.Documentation); |
| CB(S.ReturnType); |
| CB(S.Type); |
| auto RawCharPointerCB = [&CB](const char *&P) { |
| llvm::StringRef S(P); |
| CB(S); |
| assert(!S.data()[S.size()] && "Visited StringRef must be null-terminated"); |
| P = S.data(); |
| }; |
| RawCharPointerCB(S.CanonicalDeclaration.FileURI); |
| RawCharPointerCB(S.Definition.FileURI); |
| |
| for (auto &Include : S.IncludeHeaders) |
| CB(Include.IncludeHeader); |
| } |
| |
| /// Computes query-independent quality score for a Symbol. |
| /// This currently falls in the range [1, ln(#indexed documents)]. |
| /// FIXME: this should probably be split into symbol -> signals |
| /// and signals -> score, so it can be reused for Sema completions. |
| float quality(const Symbol &S); |
| |
| /// An immutable symbol container that stores a set of symbols. |
| /// The container will maintain the lifetime of the symbols. |
| class SymbolSlab { |
| public: |
| using const_iterator = std::vector<Symbol>::const_iterator; |
| using iterator = const_iterator; |
| using value_type = Symbol; |
| |
| SymbolSlab() = default; |
| |
| const_iterator begin() const { return Symbols.begin(); } |
| const_iterator end() const { return Symbols.end(); } |
| const_iterator find(const SymbolID &SymID) const; |
| |
| using size_type = size_t; |
| size_type size() const { return Symbols.size(); } |
| bool empty() const { return Symbols.empty(); } |
| // Estimates the total memory usage. |
| size_t bytes() const { |
| return sizeof(*this) + Arena.getTotalMemory() + |
| Symbols.capacity() * sizeof(Symbol); |
| } |
| |
| /// SymbolSlab::Builder is a mutable container that can 'freeze' to |
| /// SymbolSlab. The frozen SymbolSlab will use less memory. |
| class Builder { |
| public: |
| Builder() : UniqueStrings(Arena) {} |
| |
| /// Adds a symbol, overwriting any existing one with the same ID. |
| /// This is a deep copy: underlying strings will be owned by the slab. |
| void insert(const Symbol &S); |
| |
| /// Removes the symbol with an ID, if it exists. |
| void erase(const SymbolID &ID) { Symbols.erase(ID); } |
| |
| /// Returns the symbol with an ID, if it exists. Valid until insert/remove. |
| const Symbol *find(const SymbolID &ID) { |
| auto I = Symbols.find(ID); |
| return I == Symbols.end() ? nullptr : &I->second; |
| } |
| |
| /// Consumes the builder to finalize the slab. |
| SymbolSlab build() &&; |
| |
| private: |
| llvm::BumpPtrAllocator Arena; |
| /// Intern table for strings. Contents are on the arena. |
| llvm::UniqueStringSaver UniqueStrings; |
| /// Values are indices into Symbols vector. |
| llvm::DenseMap<SymbolID, Symbol> Symbols; |
| }; |
| |
| private: |
| SymbolSlab(llvm::BumpPtrAllocator Arena, std::vector<Symbol> Symbols) |
| : Arena(std::move(Arena)), Symbols(std::move(Symbols)) {} |
| |
| llvm::BumpPtrAllocator Arena; // Owns Symbol data that the Symbols do not. |
| std::vector<Symbol> Symbols; // Sorted by SymbolID to allow lookup. |
| }; |
| |
| llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const SymbolSlab &Slab); |
| |
| } // namespace clangd |
| } // namespace clang |
| |
| #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_SYMBOL_H |