blob: 822f3cb9b956304f4bb5c9eb158e8f7e4d168672 [file] [log] [blame]
//===--- SymbolCollector.cpp -------------------------------------*- C++-*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "SymbolCollector.h"
#include "AST.h"
#include "CodeComplete.h"
#include "CodeCompletionStrings.h"
#include "ExpectedTypes.h"
#include "SourceCode.h"
#include "URI.h"
#include "index/CanonicalIncludes.h"
#include "index/Relation.h"
#include "index/SymbolID.h"
#include "index/SymbolLocation.h"
#include "clang/AST/Decl.h"
#include "clang/AST/DeclBase.h"
#include "clang/AST/DeclObjC.h"
#include "clang/AST/DeclTemplate.h"
#include "clang/AST/DeclarationName.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Index/IndexSymbol.h"
#include "clang/Lex/Preprocessor.h"
#include "clang/Lex/Token.h"
#include "clang/Tooling/Inclusions/HeaderAnalysis.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
#include <optional>
namespace clang {
namespace clangd {
namespace {
/// If \p ND is a template specialization, returns the described template.
/// Otherwise, returns \p ND.
const NamedDecl &getTemplateOrThis(const NamedDecl &ND) {
if (auto *T = ND.getDescribedTemplate())
return *T;
return ND;
}
// Checks whether the decl is a private symbol in a header generated by
// protobuf compiler.
// FIXME: make filtering extensible when there are more use cases for symbol
// filters.
bool isPrivateProtoDecl(const NamedDecl &ND) {
const auto &SM = ND.getASTContext().getSourceManager();
if (!isProtoFile(nameLocation(ND, SM), SM))
return false;
// ND without identifier can be operators.
if (ND.getIdentifier() == nullptr)
return false;
auto Name = ND.getIdentifier()->getName();
if (!Name.contains('_'))
return false;
// Nested proto entities (e.g. Message::Nested) have top-level decls
// that shouldn't be used (Message_Nested). Ignore them completely.
// The nested entities are dangling type aliases, we may want to reconsider
// including them in the future.
// For enum constants, SOME_ENUM_CONSTANT is not private and should be
// indexed. Outer_INNER is private. This heuristic relies on naming style, it
// will include OUTER_INNER and exclude some_enum_constant.
// FIXME: the heuristic relies on naming style (i.e. no underscore in
// user-defined names) and can be improved.
return (ND.getKind() != Decl::EnumConstant) || llvm::any_of(Name, islower);
}
// We only collect #include paths for symbols that are suitable for global code
// completion, except for namespaces since #include path for a namespace is hard
// to define.
Symbol::IncludeDirective shouldCollectIncludePath(index::SymbolKind Kind) {
using SK = index::SymbolKind;
switch (Kind) {
case SK::Macro:
case SK::Enum:
case SK::Struct:
case SK::Class:
case SK::Union:
case SK::TypeAlias:
case SK::Using:
case SK::Function:
case SK::Variable:
case SK::EnumConstant:
case SK::Concept:
return Symbol::Include | Symbol::Import;
case SK::Protocol:
return Symbol::Import;
default:
return Symbol::Invalid;
}
}
// Return the symbol range of the token at \p TokLoc.
std::pair<SymbolLocation::Position, SymbolLocation::Position>
getTokenRange(SourceLocation TokLoc, const SourceManager &SM,
const LangOptions &LangOpts) {
auto CreatePosition = [&SM](SourceLocation Loc) {
auto LSPLoc = sourceLocToPosition(SM, Loc);
SymbolLocation::Position Pos;
Pos.setLine(LSPLoc.line);
Pos.setColumn(LSPLoc.character);
return Pos;
};
auto TokenLength = clang::Lexer::MeasureTokenLength(TokLoc, SM, LangOpts);
return {CreatePosition(TokLoc),
CreatePosition(TokLoc.getLocWithOffset(TokenLength))};
}
// Checks whether \p ND is a good candidate to be the *canonical* declaration of
// its symbol (e.g. a go-to-declaration target). This overrides the default of
// using Clang's canonical declaration, which is the first in the TU.
//
// Example: preferring a class declaration over its forward declaration.
bool isPreferredDeclaration(const NamedDecl &ND, index::SymbolRoleSet Roles) {
const auto &SM = ND.getASTContext().getSourceManager();
if (isa<TagDecl>(ND))
return (Roles & static_cast<unsigned>(index::SymbolRole::Definition)) &&
!isInsideMainFile(ND.getLocation(), SM);
if (const auto *ID = dyn_cast<ObjCInterfaceDecl>(&ND))
return ID->isThisDeclarationADefinition();
if (const auto *PD = dyn_cast<ObjCProtocolDecl>(&ND))
return PD->isThisDeclarationADefinition();
return false;
}
RefKind toRefKind(index::SymbolRoleSet Roles, bool Spelled = false) {
RefKind Result = RefKind::Unknown;
if (Roles & static_cast<unsigned>(index::SymbolRole::Declaration))
Result |= RefKind::Declaration;
if (Roles & static_cast<unsigned>(index::SymbolRole::Definition))
Result |= RefKind::Definition;
if (Roles & static_cast<unsigned>(index::SymbolRole::Reference))
Result |= RefKind::Reference;
if (Spelled)
Result |= RefKind::Spelled;
return Result;
}
std::optional<RelationKind> indexableRelation(const index::SymbolRelation &R) {
if (R.Roles & static_cast<unsigned>(index::SymbolRole::RelationBaseOf))
return RelationKind::BaseOf;
if (R.Roles & static_cast<unsigned>(index::SymbolRole::RelationOverrideOf))
return RelationKind::OverriddenBy;
return std::nullopt;
}
// Check if there is an exact spelling of \p ND at \p Loc.
bool isSpelled(SourceLocation Loc, const NamedDecl &ND) {
auto Name = ND.getDeclName();
const auto NameKind = Name.getNameKind();
if (NameKind != DeclarationName::Identifier &&
NameKind != DeclarationName::CXXConstructorName)
return false;
const auto &AST = ND.getASTContext();
const auto &SM = AST.getSourceManager();
const auto &LO = AST.getLangOpts();
clang::Token Tok;
if (clang::Lexer::getRawToken(Loc, Tok, SM, LO))
return false;
auto StrName = Name.getAsString();
return clang::Lexer::getSpelling(Tok, SM, LO) == StrName;
}
} // namespace
// Encapsulates decisions about how to record header paths in the index,
// including filename normalization, URI conversion etc.
// Expensive checks are cached internally.
class SymbolCollector::HeaderFileURICache {
struct FrameworkUmbrellaSpelling {
// Spelling for the public umbrella header, e.g. <Foundation/Foundation.h>
std::optional<std::string> PublicHeader;
// Spelling for the private umbrella header, e.g.
// <Foundation/Foundation_Private.h>
std::optional<std::string> PrivateHeader;
};
// Weird double-indirect access to PP, which might not be ready yet when
// HeaderFiles is created but will be by the time it's used.
// (IndexDataConsumer::setPreprocessor can happen before or after initialize)
Preprocessor *&PP;
const SourceManager &SM;
const CanonicalIncludes *Includes;
llvm::StringRef FallbackDir;
llvm::DenseMap<const FileEntry *, const std::string *> CacheFEToURI;
llvm::StringMap<std::string> CachePathToURI;
llvm::DenseMap<FileID, llvm::StringRef> CacheFIDToInclude;
llvm::StringMap<std::string> CachePathToFrameworkSpelling;
llvm::StringMap<FrameworkUmbrellaSpelling>
CacheFrameworkToUmbrellaHeaderSpelling;
public:
HeaderFileURICache(Preprocessor *&PP, const SourceManager &SM,
const SymbolCollector::Options &Opts)
: PP(PP), SM(SM), Includes(Opts.Includes), FallbackDir(Opts.FallbackDir) {
}
// Returns a canonical URI for the file \p FE.
// We attempt to make the path absolute first.
const std::string &toURI(const FileEntry *FE) {
auto R = CacheFEToURI.try_emplace(FE);
if (R.second) {
auto CanonPath = getCanonicalPath(FE, SM);
R.first->second = &toURIInternal(CanonPath ? *CanonPath : FE->getName());
}
return *R.first->second;
}
// Returns a canonical URI for \p Path.
// If the file is in the FileManager, use that to canonicalize the path.
// We attempt to make the path absolute in any case.
const std::string &toURI(llvm::StringRef Path) {
if (auto File = SM.getFileManager().getFile(Path))
return toURI(*File);
return toURIInternal(Path);
}
// Gets a canonical include (URI of the header or <header> or "header") for
// header of \p FID (which should usually be the *expansion* file).
// This does not account for any per-symbol overrides!
// Returns "" if includes should not be inserted for this file.
llvm::StringRef getIncludeHeader(FileID FID) {
auto R = CacheFIDToInclude.try_emplace(FID);
if (R.second)
R.first->second = getIncludeHeaderUncached(FID);
return R.first->second;
}
private:
// This takes care of making paths absolute and path->URI caching, but no
// FileManager-based canonicalization.
const std::string &toURIInternal(llvm::StringRef Path) {
auto R = CachePathToURI.try_emplace(Path);
if (R.second) {
llvm::SmallString<256> AbsPath = Path;
if (!llvm::sys::path::is_absolute(AbsPath) && !FallbackDir.empty())
llvm::sys::fs::make_absolute(FallbackDir, AbsPath);
assert(llvm::sys::path::is_absolute(AbsPath) &&
"If the VFS can't make paths absolute, a FallbackDir must be "
"provided");
llvm::sys::path::remove_dots(AbsPath, /*remove_dot_dot=*/true);
R.first->second = URI::create(AbsPath).toString();
}
return R.first->second;
}
struct FrameworkHeaderPath {
// Path to the framework directory containing the Headers/PrivateHeaders
// directories e.g. /Frameworks/Foundation.framework/
llvm::StringRef HeadersParentDir;
// Subpath relative to the Headers or PrivateHeaders dir, e.g. NSObject.h
// Note: This is NOT relative to the `HeadersParentDir`.
llvm::StringRef HeaderSubpath;
// Whether this header is under the PrivateHeaders dir
bool IsPrivateHeader;
};
std::optional<FrameworkHeaderPath>
splitFrameworkHeaderPath(llvm::StringRef Path) {
using namespace llvm::sys;
path::reverse_iterator I = path::rbegin(Path);
path::reverse_iterator Prev = I;
path::reverse_iterator E = path::rend(Path);
while (I != E) {
if (*I == "Headers") {
FrameworkHeaderPath HeaderPath;
HeaderPath.HeadersParentDir = Path.substr(0, I - E);
HeaderPath.HeaderSubpath = Path.substr(Prev - E);
HeaderPath.IsPrivateHeader = false;
return HeaderPath;
}
if (*I == "PrivateHeaders") {
FrameworkHeaderPath HeaderPath;
HeaderPath.HeadersParentDir = Path.substr(0, I - E);
HeaderPath.HeaderSubpath = Path.substr(Prev - E);
HeaderPath.IsPrivateHeader = true;
return HeaderPath;
}
Prev = I;
++I;
}
// Unexpected, must not be a framework header.
return std::nullopt;
}
// Frameworks typically have an umbrella header of the same name, e.g.
// <Foundation/Foundation.h> instead of <Foundation/NSObject.h> or
// <Foundation/Foundation_Private.h> instead of
// <Foundation/NSObject_Private.h> which should be used instead of directly
// importing the header.
std::optional<std::string> getFrameworkUmbrellaSpelling(
llvm::StringRef Framework, SrcMgr::CharacteristicKind HeadersDirKind,
HeaderSearch &HS, FrameworkHeaderPath &HeaderPath) {
auto Res = CacheFrameworkToUmbrellaHeaderSpelling.try_emplace(Framework);
auto *CachedSpelling = &Res.first->second;
if (!Res.second) {
return HeaderPath.IsPrivateHeader ? CachedSpelling->PrivateHeader
: CachedSpelling->PublicHeader;
}
bool IsSystem = isSystem(HeadersDirKind);
SmallString<256> UmbrellaPath(HeaderPath.HeadersParentDir);
llvm::sys::path::append(UmbrellaPath, "Headers", Framework + ".h");
llvm::vfs::Status Status;
auto StatErr = HS.getFileMgr().getNoncachedStatValue(UmbrellaPath, Status);
if (!StatErr) {
if (IsSystem)
CachedSpelling->PublicHeader = llvm::formatv("<{0}/{0}.h>", Framework);
else
CachedSpelling->PublicHeader =
llvm::formatv("\"{0}/{0}.h\"", Framework);
}
UmbrellaPath = HeaderPath.HeadersParentDir;
llvm::sys::path::append(UmbrellaPath, "PrivateHeaders",
Framework + "_Private.h");
StatErr = HS.getFileMgr().getNoncachedStatValue(UmbrellaPath, Status);
if (!StatErr) {
if (IsSystem)
CachedSpelling->PrivateHeader =
llvm::formatv("<{0}/{0}_Private.h>", Framework);
else
CachedSpelling->PrivateHeader =
llvm::formatv("\"{0}/{0}_Private.h\"", Framework);
}
return HeaderPath.IsPrivateHeader ? CachedSpelling->PrivateHeader
: CachedSpelling->PublicHeader;
}
// Compute the framework include spelling for `FE` which is in a framework
// named `Framework`, e.g. `NSObject.h` in framework `Foundation` would
// give <Foundation/Foundation.h> if the umbrella header exists, otherwise
// <Foundation/NSObject.h>.
std::optional<llvm::StringRef> getFrameworkHeaderIncludeSpelling(
const FileEntry *FE, llvm::StringRef Framework, HeaderSearch &HS) {
auto Res = CachePathToFrameworkSpelling.try_emplace(FE->getName());
auto *CachedHeaderSpelling = &Res.first->second;
if (!Res.second)
return llvm::StringRef(*CachedHeaderSpelling);
auto HeaderPath = splitFrameworkHeaderPath(FE->getName());
if (!HeaderPath) {
// Unexpected: must not be a proper framework header, don't cache the
// failure.
CachePathToFrameworkSpelling.erase(Res.first);
return std::nullopt;
}
auto DirKind = HS.getFileDirFlavor(FE);
if (auto UmbrellaSpelling =
getFrameworkUmbrellaSpelling(Framework, DirKind, HS, *HeaderPath)) {
*CachedHeaderSpelling = *UmbrellaSpelling;
return llvm::StringRef(*CachedHeaderSpelling);
}
if (isSystem(DirKind))
*CachedHeaderSpelling =
llvm::formatv("<{0}/{1}>", Framework, HeaderPath->HeaderSubpath)
.str();
else
*CachedHeaderSpelling =
llvm::formatv("\"{0}/{1}\"", Framework, HeaderPath->HeaderSubpath)
.str();
return llvm::StringRef(*CachedHeaderSpelling);
}
llvm::StringRef getIncludeHeaderUncached(FileID FID) {
const FileEntry *FE = SM.getFileEntryForID(FID);
if (!FE || FE->getName().empty())
return "";
llvm::StringRef Filename = FE->getName();
// If a file is mapped by canonical headers, use that mapping, regardless
// of whether it's an otherwise-good header (header guards etc).
if (Includes) {
llvm::StringRef Canonical =
Includes->mapHeader(*SM.getFileEntryRefForID(FID));
if (!Canonical.empty()) {
// If we had a mapping, always use it.
if (Canonical.startswith("<") || Canonical.startswith("\""))
return Canonical;
return toURI(Canonical);
}
}
// Framework headers are spelled as <FrameworkName/Foo.h>, not
// "path/FrameworkName.framework/Headers/Foo.h".
auto &HS = PP->getHeaderSearchInfo();
if (const auto *HFI = HS.getExistingFileInfo(FE, /*WantExternal*/ false))
if (!HFI->Framework.empty())
if (auto Spelling =
getFrameworkHeaderIncludeSpelling(FE, HFI->Framework, HS))
return *Spelling;
if (!tooling::isSelfContainedHeader(FE, PP->getSourceManager(),
PP->getHeaderSearchInfo())) {
// A .inc or .def file is often included into a real header to define
// symbols (e.g. LLVM tablegen files).
if (Filename.endswith(".inc") || Filename.endswith(".def"))
// Don't use cache reentrantly due to iterator invalidation.
return getIncludeHeaderUncached(SM.getFileID(SM.getIncludeLoc(FID)));
// Conservatively refuse to insert #includes to files without guards.
return "";
}
// Standard case: just insert the file itself.
return toURI(FE);
}
};
// Return the symbol location of the token at \p TokLoc.
std::optional<SymbolLocation>
SymbolCollector::getTokenLocation(SourceLocation TokLoc) {
const auto &SM = ASTCtx->getSourceManager();
auto *FE = SM.getFileEntryForID(SM.getFileID(TokLoc));
if (!FE)
return std::nullopt;
SymbolLocation Result;
Result.FileURI = HeaderFileURIs->toURI(FE).c_str();
auto Range = getTokenRange(TokLoc, SM, ASTCtx->getLangOpts());
Result.Start = Range.first;
Result.End = Range.second;
return Result;
}
SymbolCollector::SymbolCollector(Options Opts) : Opts(std::move(Opts)) {}
SymbolCollector::~SymbolCollector() = default;
void SymbolCollector::initialize(ASTContext &Ctx) {
ASTCtx = &Ctx;
HeaderFileURIs = std::make_unique<HeaderFileURICache>(
this->PP, ASTCtx->getSourceManager(), Opts);
CompletionAllocator = std::make_shared<GlobalCodeCompletionAllocator>();
CompletionTUInfo =
std::make_unique<CodeCompletionTUInfo>(CompletionAllocator);
}
bool SymbolCollector::shouldCollectSymbol(const NamedDecl &ND,
const ASTContext &ASTCtx,
const Options &Opts,
bool IsMainFileOnly) {
// Skip anonymous declarations, e.g (anonymous enum/class/struct).
if (ND.getDeclName().isEmpty())
return false;
// Skip main-file symbols if we are not collecting them.
if (IsMainFileOnly && !Opts.CollectMainFileSymbols)
return false;
// Skip symbols in anonymous namespaces in header files.
if (!IsMainFileOnly && ND.isInAnonymousNamespace())
return false;
// For function local symbols, index only classes and its member functions.
if (index::isFunctionLocalSymbol(&ND))
return isa<RecordDecl>(ND) ||
(ND.isCXXInstanceMember() && ND.isFunctionOrFunctionTemplate());
// We want most things but not "local" symbols such as symbols inside
// FunctionDecl, BlockDecl, ObjCMethodDecl and OMPDeclareReductionDecl.
// FIXME: Need a matcher for ExportDecl in order to include symbols declared
// within an export.
const auto *DeclCtx = ND.getDeclContext();
switch (DeclCtx->getDeclKind()) {
case Decl::TranslationUnit:
case Decl::Namespace:
case Decl::LinkageSpec:
case Decl::Enum:
case Decl::ObjCProtocol:
case Decl::ObjCInterface:
case Decl::ObjCCategory:
case Decl::ObjCCategoryImpl:
case Decl::ObjCImplementation:
break;
default:
// Record has a few derivations (e.g. CXXRecord, Class specialization), it's
// easier to cast.
if (!isa<RecordDecl>(DeclCtx))
return false;
}
// Avoid indexing internal symbols in protobuf generated headers.
if (isPrivateProtoDecl(ND))
return false;
if (!Opts.CollectReserved &&
(hasReservedName(ND) || hasReservedScope(*ND.getDeclContext())))
return false;
return true;
}
const Decl *
SymbolCollector::getRefContainer(const Decl *Enclosing,
const SymbolCollector::Options &Opts) {
while (Enclosing) {
const auto *ND = dyn_cast<NamedDecl>(Enclosing);
if (ND && shouldCollectSymbol(*ND, ND->getASTContext(), Opts, true)) {
break;
}
Enclosing = dyn_cast_or_null<Decl>(Enclosing->getDeclContext());
}
return Enclosing;
}
// Always return true to continue indexing.
bool SymbolCollector::handleDeclOccurrence(
const Decl *D, index::SymbolRoleSet Roles,
llvm::ArrayRef<index::SymbolRelation> Relations, SourceLocation Loc,
index::IndexDataConsumer::ASTNodeInfo ASTNode) {
assert(ASTCtx && PP && HeaderFileURIs);
assert(CompletionAllocator && CompletionTUInfo);
assert(ASTNode.OrigD);
// Indexing API puts canonical decl into D, which might not have a valid
// source location for implicit/built-in decls. Fallback to original decl in
// such cases.
if (D->getLocation().isInvalid())
D = ASTNode.OrigD;
// If OrigD is an declaration associated with a friend declaration and it's
// not a definition, skip it. Note that OrigD is the occurrence that the
// collector is currently visiting.
if ((ASTNode.OrigD->getFriendObjectKind() !=
Decl::FriendObjectKind::FOK_None) &&
!(Roles & static_cast<unsigned>(index::SymbolRole::Definition)))
return true;
// A declaration created for a friend declaration should not be used as the
// canonical declaration in the index. Use OrigD instead, unless we've already
// picked a replacement for D
if (D->getFriendObjectKind() != Decl::FriendObjectKind::FOK_None)
D = CanonicalDecls.try_emplace(D, ASTNode.OrigD).first->second;
// Flag to mark that D should be considered canonical meaning its declaration
// will override any previous declaration for the Symbol.
bool DeclIsCanonical = false;
// Avoid treating ObjCImplementationDecl as a canonical declaration if it has
// a corresponding non-implicit and non-forward declared ObjcInterfaceDecl.
if (const auto *IID = dyn_cast<ObjCImplementationDecl>(D)) {
DeclIsCanonical = true;
if (const auto *CID = IID->getClassInterface())
if (const auto *DD = CID->getDefinition())
if (!DD->isImplicitInterfaceDecl())
D = DD;
}
// Avoid treating ObjCCategoryImplDecl as a canonical declaration in favor of
// its ObjCCategoryDecl if it has one.
if (const auto *CID = dyn_cast<ObjCCategoryImplDecl>(D)) {
DeclIsCanonical = true;
if (const auto *CD = CID->getCategoryDecl())
D = CD;
}
const NamedDecl *ND = dyn_cast<NamedDecl>(D);
if (!ND)
return true;
auto ID = getSymbolIDCached(ND);
if (!ID)
return true;
// Mark D as referenced if this is a reference coming from the main file.
// D may not be an interesting symbol, but it's cheaper to check at the end.
auto &SM = ASTCtx->getSourceManager();
if (Opts.CountReferences &&
(Roles & static_cast<unsigned>(index::SymbolRole::Reference)) &&
SM.getFileID(SM.getSpellingLoc(Loc)) == SM.getMainFileID())
ReferencedSymbols.insert(ID);
// ND is the canonical (i.e. first) declaration. If it's in the main file
// (which is not a header), then no public declaration was visible, so assume
// it's main-file only.
bool IsMainFileOnly =
SM.isWrittenInMainFile(SM.getExpansionLoc(ND->getBeginLoc())) &&
!isHeaderFile(SM.getFileEntryRefForID(SM.getMainFileID())->getName(),
ASTCtx->getLangOpts());
// In C, printf is a redecl of an implicit builtin! So check OrigD instead.
if (ASTNode.OrigD->isImplicit() ||
!shouldCollectSymbol(*ND, *ASTCtx, Opts, IsMainFileOnly))
return true;
// Note: we need to process relations for all decl occurrences, including
// refs, because the indexing code only populates relations for specific
// occurrences. For example, RelationBaseOf is only populated for the
// occurrence inside the base-specifier.
processRelations(*ND, ID, Relations);
bool CollectRef = static_cast<bool>(Opts.RefFilter & toRefKind(Roles));
// Unlike other fields, e.g. Symbols (which use spelling locations), we use
// file locations for references (as it aligns the behavior of clangd's
// AST-based xref).
// FIXME: we should try to use the file locations for other fields.
if (CollectRef &&
(!IsMainFileOnly || Opts.CollectMainFileRefs ||
ND->isExternallyVisible()) &&
!isa<NamespaceDecl>(ND)) {
auto FileLoc = SM.getFileLoc(Loc);
auto FID = SM.getFileID(FileLoc);
if (Opts.RefsInHeaders || FID == SM.getMainFileID()) {
addRef(ID, SymbolRef{FileLoc, FID, Roles,
getRefContainer(ASTNode.Parent, Opts),
isSpelled(FileLoc, *ND)});
}
}
// Don't continue indexing if this is a mere reference.
if (!(Roles & (static_cast<unsigned>(index::SymbolRole::Declaration) |
static_cast<unsigned>(index::SymbolRole::Definition))))
return true;
// FIXME: ObjCPropertyDecl are not properly indexed here:
// - ObjCPropertyDecl may have an OrigD of ObjCPropertyImplDecl, which is
// not a NamedDecl.
auto *OriginalDecl = dyn_cast<NamedDecl>(ASTNode.OrigD);
if (!OriginalDecl)
return true;
const Symbol *BasicSymbol = Symbols.find(ID);
if (isPreferredDeclaration(*OriginalDecl, Roles))
// If OriginalDecl is preferred, replace/create the existing canonical
// declaration (e.g. a class forward declaration). There should be at most
// one duplicate as we expect to see only one preferred declaration per
// TU, because in practice they are definitions.
BasicSymbol = addDeclaration(*OriginalDecl, std::move(ID), IsMainFileOnly);
else if (!BasicSymbol || DeclIsCanonical)
BasicSymbol = addDeclaration(*ND, std::move(ID), IsMainFileOnly);
if (Roles & static_cast<unsigned>(index::SymbolRole::Definition))
addDefinition(*OriginalDecl, *BasicSymbol);
return true;
}
void SymbolCollector::handleMacros(const MainFileMacros &MacroRefsToIndex) {
assert(HeaderFileURIs && PP);
const auto &SM = PP->getSourceManager();
const auto *MainFileEntry = SM.getFileEntryForID(SM.getMainFileID());
assert(MainFileEntry);
const std::string &MainFileURI = HeaderFileURIs->toURI(MainFileEntry);
// Add macro references.
for (const auto &IDToRefs : MacroRefsToIndex.MacroRefs) {
for (const auto &MacroRef : IDToRefs.second) {
const auto &Range = MacroRef.Rng;
bool IsDefinition = MacroRef.IsDefinition;
Ref R;
R.Location.Start.setLine(Range.start.line);
R.Location.Start.setColumn(Range.start.character);
R.Location.End.setLine(Range.end.line);
R.Location.End.setColumn(Range.end.character);
R.Location.FileURI = MainFileURI.c_str();
R.Kind = IsDefinition ? RefKind::Definition : RefKind::Reference;
Refs.insert(IDToRefs.first, R);
if (IsDefinition) {
Symbol S;
S.ID = IDToRefs.first;
auto StartLoc = cantFail(sourceLocationInMainFile(SM, Range.start));
auto EndLoc = cantFail(sourceLocationInMainFile(SM, Range.end));
S.Name = toSourceCode(SM, SourceRange(StartLoc, EndLoc));
S.SymInfo.Kind = index::SymbolKind::Macro;
S.SymInfo.SubKind = index::SymbolSubKind::None;
S.SymInfo.Properties = index::SymbolPropertySet();
S.SymInfo.Lang = index::SymbolLanguage::C;
S.Origin = Opts.Origin;
S.CanonicalDeclaration = R.Location;
// Make the macro visible for code completion if main file is an
// include-able header.
if (!HeaderFileURIs->getIncludeHeader(SM.getMainFileID()).empty()) {
S.Flags |= Symbol::IndexedForCodeCompletion;
S.Flags |= Symbol::VisibleOutsideFile;
}
Symbols.insert(S);
}
}
}
}
bool SymbolCollector::handleMacroOccurrence(const IdentifierInfo *Name,
const MacroInfo *MI,
index::SymbolRoleSet Roles,
SourceLocation Loc) {
assert(PP);
// Builtin macros don't have useful locations and aren't needed in completion.
if (MI->isBuiltinMacro())
return true;
const auto &SM = PP->getSourceManager();
auto DefLoc = MI->getDefinitionLoc();
// Also avoid storing predefined macros like __DBL_MIN__.
if (SM.isWrittenInBuiltinFile(DefLoc) ||
Name->getName() == "__GCC_HAVE_DWARF2_CFI_ASM")
return true;
auto ID = getSymbolIDCached(Name->getName(), MI, SM);
if (!ID)
return true;
auto SpellingLoc = SM.getSpellingLoc(Loc);
bool IsMainFileOnly =
SM.isInMainFile(SM.getExpansionLoc(DefLoc)) &&
!isHeaderFile(SM.getFileEntryRefForID(SM.getMainFileID())->getName(),
ASTCtx->getLangOpts());
// Do not store references to main-file macros.
if ((static_cast<unsigned>(Opts.RefFilter) & Roles) && !IsMainFileOnly &&
(Opts.RefsInHeaders || SM.getFileID(SpellingLoc) == SM.getMainFileID())) {
// FIXME: Populate container information for macro references.
// FIXME: All MacroRefs are marked as Spelled now, but this should be
// checked.
addRef(ID, SymbolRef{Loc, SM.getFileID(Loc), Roles, /*Container=*/nullptr,
/*Spelled=*/true});
}
// Collect symbols.
if (!Opts.CollectMacro)
return true;
// Skip main-file macros if we are not collecting them.
if (IsMainFileOnly && !Opts.CollectMainFileSymbols)
return false;
// Mark the macro as referenced if this is a reference coming from the main
// file. The macro may not be an interesting symbol, but it's cheaper to check
// at the end.
if (Opts.CountReferences &&
(Roles & static_cast<unsigned>(index::SymbolRole::Reference)) &&
SM.getFileID(SpellingLoc) == SM.getMainFileID())
ReferencedSymbols.insert(ID);
// Don't continue indexing if this is a mere reference.
// FIXME: remove macro with ID if it is undefined.
if (!(Roles & static_cast<unsigned>(index::SymbolRole::Declaration) ||
Roles & static_cast<unsigned>(index::SymbolRole::Definition)))
return true;
// Only collect one instance in case there are multiple.
if (Symbols.find(ID) != nullptr)
return true;
Symbol S;
S.ID = std::move(ID);
S.Name = Name->getName();
if (!IsMainFileOnly) {
S.Flags |= Symbol::IndexedForCodeCompletion;
S.Flags |= Symbol::VisibleOutsideFile;
}
S.SymInfo = index::getSymbolInfoForMacro(*MI);
S.Origin = Opts.Origin;
// FIXME: use the result to filter out symbols.
shouldIndexFile(SM.getFileID(Loc));
if (auto DeclLoc = getTokenLocation(DefLoc))
S.CanonicalDeclaration = *DeclLoc;
CodeCompletionResult SymbolCompletion(Name);
const auto *CCS = SymbolCompletion.CreateCodeCompletionStringForMacro(
*PP, *CompletionAllocator, *CompletionTUInfo);
std::string Signature;
std::string SnippetSuffix;
getSignature(*CCS, &Signature, &SnippetSuffix);
S.Signature = Signature;
S.CompletionSnippetSuffix = SnippetSuffix;
IndexedMacros.insert(Name);
setIncludeLocation(S, DefLoc);
Symbols.insert(S);
return true;
}
void SymbolCollector::processRelations(
const NamedDecl &ND, const SymbolID &ID,
ArrayRef<index::SymbolRelation> Relations) {
for (const auto &R : Relations) {
auto RKind = indexableRelation(R);
if (!RKind)
continue;
const Decl *Object = R.RelatedSymbol;
auto ObjectID = getSymbolIDCached(Object);
if (!ObjectID)
continue;
// Record the relation.
// TODO: There may be cases where the object decl is not indexed for some
// reason. Those cases should probably be removed in due course, but for
// now there are two possible ways to handle it:
// (A) Avoid storing the relation in such cases.
// (B) Store it anyways. Clients will likely lookup() the SymbolID
// in the index and find nothing, but that's a situation they
// probably need to handle for other reasons anyways.
// We currently do (B) because it's simpler.
if (*RKind == RelationKind::BaseOf)
this->Relations.insert({ID, *RKind, ObjectID});
else if (*RKind == RelationKind::OverriddenBy)
this->Relations.insert({ObjectID, *RKind, ID});
}
}
void SymbolCollector::setIncludeLocation(const Symbol &S, SourceLocation Loc) {
if (Opts.CollectIncludePath &&
shouldCollectIncludePath(S.SymInfo.Kind) != Symbol::Invalid)
// Use the expansion location to get the #include header since this is
// where the symbol is exposed.
IncludeFiles[S.ID] =
PP->getSourceManager().getDecomposedExpansionLoc(Loc).first;
}
void SymbolCollector::finish() {
// At the end of the TU, add 1 to the refcount of all referenced symbols.
for (const auto &ID : ReferencedSymbols) {
if (const auto *S = Symbols.find(ID)) {
// SymbolSlab::Builder returns const symbols because strings are interned
// and modifying returned symbols without inserting again wouldn't go
// well. const_cast is safe here as we're modifying a data owned by the
// Symbol. This reduces time spent in SymbolCollector by ~1%.
++const_cast<Symbol *>(S)->References;
}
}
if (Opts.CollectMacro) {
assert(PP);
// First, drop header guards. We can't identify these until EOF.
for (const IdentifierInfo *II : IndexedMacros) {
if (const auto *MI = PP->getMacroDefinition(II).getMacroInfo())
if (auto ID =
getSymbolIDCached(II->getName(), MI, PP->getSourceManager()))
if (MI->isUsedForHeaderGuard())
Symbols.erase(ID);
}
}
llvm::DenseMap<FileID, bool> FileToContainsImportsOrObjC;
// Fill in IncludeHeaders.
// We delay this until end of TU so header guards are all resolved.
llvm::SmallString<128> QName;
for (const auto &[SID, FID] : IncludeFiles) {
if (const Symbol *S = Symbols.find(SID)) {
llvm::StringRef IncludeHeader;
// Look for an overridden include header for this symbol specifically.
if (Opts.Includes) {
QName = S->Scope;
QName.append(S->Name);
IncludeHeader = Opts.Includes->mapSymbol(QName);
if (!IncludeHeader.empty()) {
if (IncludeHeader.front() != '"' && IncludeHeader.front() != '<')
IncludeHeader = HeaderFileURIs->toURI(IncludeHeader);
else if (IncludeHeader == "<utility>" && QName == "std::move" &&
S->Signature.contains(','))
IncludeHeader = "<algorithm>";
}
}
// Otherwise find the approprate include header for the defining file.
if (IncludeHeader.empty())
IncludeHeader = HeaderFileURIs->getIncludeHeader(FID);
// Symbols in slabs aren't mutable, insert() has to walk all the strings
if (!IncludeHeader.empty()) {
Symbol::IncludeDirective Directives = Symbol::Invalid;
auto CollectDirectives = shouldCollectIncludePath(S->SymInfo.Kind);
if ((CollectDirectives & Symbol::Include) != 0)
Directives |= Symbol::Include;
// Only allow #import for symbols from ObjC-like files.
if ((CollectDirectives & Symbol::Import) != 0) {
auto [It, Inserted] = FileToContainsImportsOrObjC.try_emplace(FID);
if (Inserted)
It->second = FilesWithObjCConstructs.contains(FID) ||
tooling::codeContainsImports(
ASTCtx->getSourceManager().getBufferData(FID));
if (It->second)
Directives |= Symbol::Import;
}
if (Directives != Symbol::Invalid) {
Symbol NewSym = *S;
NewSym.IncludeHeaders.push_back({IncludeHeader, 1, Directives});
Symbols.insert(NewSym);
}
}
}
}
ReferencedSymbols.clear();
IncludeFiles.clear();
FilesWithObjCConstructs.clear();
}
const Symbol *SymbolCollector::addDeclaration(const NamedDecl &ND, SymbolID ID,
bool IsMainFileOnly) {
auto &Ctx = ND.getASTContext();
auto &SM = Ctx.getSourceManager();
Symbol S;
S.ID = std::move(ID);
std::string QName = printQualifiedName(ND);
// FIXME: this returns foo:bar: for objective-C methods, we prefer only foo:
// for consistency with CodeCompletionString and a clean name/signature split.
std::tie(S.Scope, S.Name) = splitQualifiedName(QName);
std::string TemplateSpecializationArgs = printTemplateSpecializationArgs(ND);
S.TemplateSpecializationArgs = TemplateSpecializationArgs;
// We collect main-file symbols, but do not use them for code completion.
if (!IsMainFileOnly && isIndexedForCodeCompletion(ND, Ctx))
S.Flags |= Symbol::IndexedForCodeCompletion;
if (isImplementationDetail(&ND))
S.Flags |= Symbol::ImplementationDetail;
if (!IsMainFileOnly)
S.Flags |= Symbol::VisibleOutsideFile;
S.SymInfo = index::getSymbolInfo(&ND);
auto Loc = nameLocation(ND, SM);
assert(Loc.isValid() && "Invalid source location for NamedDecl");
// FIXME: use the result to filter out symbols.
auto FID = SM.getFileID(Loc);
shouldIndexFile(FID);
if (auto DeclLoc = getTokenLocation(Loc))
S.CanonicalDeclaration = *DeclLoc;
S.Origin = Opts.Origin;
if (ND.getAvailability() == AR_Deprecated)
S.Flags |= Symbol::Deprecated;
// Add completion info.
// FIXME: we may want to choose a different redecl, or combine from several.
assert(ASTCtx && PP && "ASTContext and Preprocessor must be set.");
// We use the primary template, as clang does during code completion.
CodeCompletionResult SymbolCompletion(&getTemplateOrThis(ND), 0);
const auto *CCS = SymbolCompletion.CreateCodeCompletionString(
*ASTCtx, *PP, CodeCompletionContext::CCC_Symbol, *CompletionAllocator,
*CompletionTUInfo,
/*IncludeBriefComments*/ false);
std::string Documentation =
formatDocumentation(*CCS, getDocComment(Ctx, SymbolCompletion,
/*CommentsFromHeaders=*/true));
if (!(S.Flags & Symbol::IndexedForCodeCompletion)) {
if (Opts.StoreAllDocumentation)
S.Documentation = Documentation;
Symbols.insert(S);
return Symbols.find(S.ID);
}
S.Documentation = Documentation;
std::string Signature;
std::string SnippetSuffix;
getSignature(*CCS, &Signature, &SnippetSuffix);
S.Signature = Signature;
S.CompletionSnippetSuffix = SnippetSuffix;
std::string ReturnType = getReturnType(*CCS);
S.ReturnType = ReturnType;
std::optional<OpaqueType> TypeStorage;
if (S.Flags & Symbol::IndexedForCodeCompletion) {
TypeStorage = OpaqueType::fromCompletionResult(*ASTCtx, SymbolCompletion);
if (TypeStorage)
S.Type = TypeStorage->raw();
}
Symbols.insert(S);
setIncludeLocation(S, ND.getLocation());
if (S.SymInfo.Lang == index::SymbolLanguage::ObjC)
FilesWithObjCConstructs.insert(FID);
return Symbols.find(S.ID);
}
void SymbolCollector::addDefinition(const NamedDecl &ND,
const Symbol &DeclSym) {
if (DeclSym.Definition)
return;
const auto &SM = ND.getASTContext().getSourceManager();
auto Loc = nameLocation(ND, SM);
shouldIndexFile(SM.getFileID(Loc));
auto DefLoc = getTokenLocation(Loc);
// If we saw some forward declaration, we end up copying the symbol.
// This is not ideal, but avoids duplicating the "is this a definition" check
// in clang::index. We should only see one definition.
if (!DefLoc)
return;
Symbol S = DeclSym;
// FIXME: use the result to filter out symbols.
S.Definition = *DefLoc;
Symbols.insert(S);
}
bool SymbolCollector::shouldIndexFile(FileID FID) {
if (!Opts.FileFilter)
return true;
auto I = FilesToIndexCache.try_emplace(FID);
if (I.second)
I.first->second = Opts.FileFilter(ASTCtx->getSourceManager(), FID);
return I.first->second;
}
void SymbolCollector::addRef(SymbolID ID, const SymbolRef &SR) {
const auto &SM = ASTCtx->getSourceManager();
// FIXME: use the result to filter out references.
shouldIndexFile(SR.FID);
if (const auto *FE = SM.getFileEntryForID(SR.FID)) {
auto Range = getTokenRange(SR.Loc, SM, ASTCtx->getLangOpts());
Ref R;
R.Location.Start = Range.first;
R.Location.End = Range.second;
R.Location.FileURI = HeaderFileURIs->toURI(FE).c_str();
R.Kind = toRefKind(SR.Roles, SR.Spelled);
R.Container = getSymbolIDCached(SR.Container);
Refs.insert(ID, R);
}
}
SymbolID SymbolCollector::getSymbolIDCached(const Decl *D) {
auto It = DeclToIDCache.try_emplace(D, SymbolID{});
if (It.second)
It.first->second = getSymbolID(D);
return It.first->second;
}
SymbolID SymbolCollector::getSymbolIDCached(const llvm::StringRef MacroName,
const MacroInfo *MI,
const SourceManager &SM) {
auto It = MacroToIDCache.try_emplace(MI, SymbolID{});
if (It.second)
It.first->second = getSymbolID(MacroName, MI, SM);
return It.first->second;
}
} // namespace clangd
} // namespace clang