| //===--- SourceCode.h - Manipulating source code as strings -----*- C++ -*-===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // Various code that examines C++ source code without using heavy AST machinery |
| // (and often not even the lexer). To be used sparingly! |
| // |
| //===----------------------------------------------------------------------===// |
| #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_SOURCECODE_H |
| #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_SOURCECODE_H |
| |
| #include "Protocol.h" |
| #include "support/Context.h" |
| #include "support/ThreadsafeFS.h" |
| #include "clang/Basic/Diagnostic.h" |
| #include "clang/Basic/LangOptions.h" |
| #include "clang/Basic/SourceLocation.h" |
| #include "clang/Basic/SourceManager.h" |
| #include "clang/Format/Format.h" |
| #include "clang/Lex/HeaderSearch.h" |
| #include "clang/Tooling/Core/Replacement.h" |
| #include "clang/Tooling/Syntax/Tokens.h" |
| #include "llvm/ADT/StringRef.h" |
| #include "llvm/ADT/StringSet.h" |
| #include "llvm/Support/Error.h" |
| #include "llvm/Support/SHA1.h" |
| #include <string> |
| |
| namespace clang { |
| class SourceManager; |
| |
| namespace clangd { |
| |
| // We tend to generate digests for source codes in a lot of different places. |
| // This represents the type for those digests to prevent us hard coding details |
| // of hashing function at every place that needs to store this information. |
| using FileDigest = std::array<uint8_t, 8>; |
| FileDigest digest(StringRef Content); |
| Optional<FileDigest> digestFile(const SourceManager &SM, FileID FID); |
| |
| // This context variable controls the behavior of functions in this file |
| // that convert between LSP offsets and native clang byte offsets. |
| // If not set, defaults to UTF-16 for backwards-compatibility. |
| extern Key<OffsetEncoding> kCurrentOffsetEncoding; |
| |
| // Counts the number of UTF-16 code units needed to represent a string (LSP |
| // specifies string lengths in UTF-16 code units). |
| // Use of UTF-16 may be overridden by kCurrentOffsetEncoding. |
| size_t lspLength(StringRef Code); |
| |
| /// Turn a [line, column] pair into an offset in Code. |
| /// |
| /// If P.character exceeds the line length, returns the offset at end-of-line. |
| /// (If !AllowColumnsBeyondLineLength, then returns an error instead). |
| /// If the line number is out of range, returns an error. |
| /// |
| /// The returned value is in the range [0, Code.size()]. |
| llvm::Expected<size_t> |
| positionToOffset(llvm::StringRef Code, Position P, |
| bool AllowColumnsBeyondLineLength = true); |
| |
| /// Turn an offset in Code into a [line, column] pair. |
| /// The offset must be in range [0, Code.size()]. |
| Position offsetToPosition(llvm::StringRef Code, size_t Offset); |
| |
| /// Turn a SourceLocation into a [line, column] pair. |
| /// FIXME: This should return an error if the location is invalid. |
| Position sourceLocToPosition(const SourceManager &SM, SourceLocation Loc); |
| |
| /// Return the file location, corresponding to \p P. Note that one should take |
| /// care to avoid comparing the result with expansion locations. |
| llvm::Expected<SourceLocation> sourceLocationInMainFile(const SourceManager &SM, |
| Position P); |
| |
| /// Returns true iff \p Loc is inside the main file. This function handles |
| /// file & macro locations. For macro locations, returns iff the macro is being |
| /// expanded inside the main file. |
| /// |
| /// The function is usually used to check whether a declaration is inside the |
| /// the main file. |
| bool isInsideMainFile(SourceLocation Loc, const SourceManager &SM); |
| |
| /// Returns the #include location through which IncludedFIle was loaded. |
| /// Where SM.getIncludeLoc() returns the location of the *filename*, which may |
| /// be in a macro, includeHashLoc() returns the location of the #. |
| SourceLocation includeHashLoc(FileID IncludedFile, const SourceManager &SM); |
| |
| /// Returns true if the token at Loc is spelled in the source code. |
| /// This is not the case for: |
| /// * symbols formed via macro concatenation, the spelling location will |
| /// be "<scratch space>" |
| /// * symbols controlled and defined by a compile command-line option |
| /// `-DName=foo`, the spelling location will be "<command line>". |
| bool isSpelledInSource(SourceLocation Loc, const SourceManager &SM); |
| |
| /// Turns a token range into a half-open range and checks its correctness. |
| /// The resulting range will have only valid source location on both sides, both |
| /// of which are file locations. |
| /// |
| /// File locations always point to a particular offset in a file, i.e. they |
| /// never refer to a location inside a macro expansion. Turning locations from |
| /// macro expansions into file locations is ambiguous - one can use |
| /// SourceManager::{getExpansion|getFile|getSpelling}Loc. This function |
| /// calls SourceManager::getFileLoc on both ends of \p R to do the conversion. |
| /// |
| /// User input (e.g. cursor position) is expressed as a file location, so this |
| /// function can be viewed as a way to normalize the ranges used in the clang |
| /// AST so that they are comparable with ranges coming from the user input. |
| llvm::Optional<SourceRange> toHalfOpenFileRange(const SourceManager &Mgr, |
| const LangOptions &LangOpts, |
| SourceRange R); |
| |
| /// Returns true iff all of the following conditions hold: |
| /// - start and end locations are valid, |
| /// - start and end locations are file locations from the same file |
| /// (i.e. expansion locations are not taken into account). |
| /// - start offset <= end offset. |
| /// FIXME: introduce a type for source range with this invariant. |
| bool isValidFileRange(const SourceManager &Mgr, SourceRange R); |
| |
| /// Returns the source code covered by the source range. |
| /// EXPECTS: isValidFileRange(R) == true. |
| llvm::StringRef toSourceCode(const SourceManager &SM, SourceRange R); |
| |
| // Converts a half-open clang source range to an LSP range. |
| // Note that clang also uses closed source ranges, which this can't handle! |
| Range halfOpenToRange(const SourceManager &SM, CharSourceRange R); |
| |
| // Expand range `A` to also contain `B`. |
| void unionRanges(Range &A, Range B); |
| |
| // Converts an offset to a clang line/column (1-based, columns are bytes). |
| // The offset must be in range [0, Code.size()]. |
| // Prefer to use SourceManager if one is available. |
| std::pair<size_t, size_t> offsetToClangLineColumn(llvm::StringRef Code, |
| size_t Offset); |
| |
| /// From "a::b::c", return {"a::b::", "c"}. Scope is empty if there's no |
| /// qualifier. |
| std::pair<llvm::StringRef, llvm::StringRef> |
| splitQualifiedName(llvm::StringRef QName); |
| |
| TextEdit replacementToEdit(StringRef Code, const tooling::Replacement &R); |
| |
| std::vector<TextEdit> replacementsToEdits(StringRef Code, |
| const tooling::Replacements &Repls); |
| |
| TextEdit toTextEdit(const FixItHint &FixIt, const SourceManager &M, |
| const LangOptions &L); |
| |
| /// Get the canonical path of \p F. This means: |
| /// |
| /// - Absolute path |
| /// - Symlinks resolved |
| /// - No "." or ".." component |
| /// - No duplicate or trailing directory separator |
| /// |
| /// This function should be used when paths needs to be used outside the |
| /// component that generate it, so that paths are normalized as much as |
| /// possible. |
| llvm::Optional<std::string> getCanonicalPath(const FileEntry *F, |
| const SourceManager &SourceMgr); |
| |
| /// Choose the clang-format style we should apply to a certain file. |
| /// This will usually use FS to look for .clang-format directories. |
| /// FIXME: should we be caching the .clang-format file search? |
| /// This uses format::DefaultFormatStyle and format::DefaultFallbackStyle, |
| /// though the latter may have been overridden in main()! |
| format::FormatStyle getFormatStyleForFile(llvm::StringRef File, |
| llvm::StringRef Content, |
| const ThreadsafeFS &TFS); |
| |
| /// Cleanup and format the given replacements. |
| llvm::Expected<tooling::Replacements> |
| cleanupAndFormat(StringRef Code, const tooling::Replacements &Replaces, |
| const format::FormatStyle &Style); |
| |
| /// A set of edits generated for a single file. Can verify whether it is safe to |
| /// apply these edits to a code block. |
| struct Edit { |
| tooling::Replacements Replacements; |
| std::string InitialCode; |
| |
| Edit() = default; |
| |
| Edit(llvm::StringRef Code, tooling::Replacements Reps) |
| : Replacements(std::move(Reps)), InitialCode(Code) {} |
| |
| /// Returns the file contents after changes are applied. |
| llvm::Expected<std::string> apply() const; |
| |
| /// Represents Replacements as TextEdits that are available for use in LSP. |
| std::vector<TextEdit> asTextEdits() const; |
| |
| /// Checks whether the Replacements are applicable to given Code. |
| bool canApplyTo(llvm::StringRef Code) const; |
| }; |
| /// A mapping from absolute file path (the one used for accessing the underlying |
| /// VFS) to edits. |
| using FileEdits = llvm::StringMap<Edit>; |
| |
| /// Formats the edits and code around it according to Style. Changes |
| /// Replacements to formatted ones if succeeds. |
| llvm::Error reformatEdit(Edit &E, const format::FormatStyle &Style); |
| |
| /// Apply an incremental update to a text document. |
| llvm::Error applyChange(std::string &Contents, |
| const TextDocumentContentChangeEvent &Change); |
| |
| /// Collects identifiers with counts in the source code. |
| llvm::StringMap<unsigned> collectIdentifiers(llvm::StringRef Content, |
| const format::FormatStyle &Style); |
| |
| /// Collects all ranges of the given identifier in the source code. |
| std::vector<Range> collectIdentifierRanges(llvm::StringRef Identifier, |
| llvm::StringRef Content, |
| const LangOptions &LangOpts); |
| |
| /// Collects words from the source code. |
| /// Unlike collectIdentifiers: |
| /// - also finds text in comments: |
| /// - splits text into words |
| /// - drops stopwords like "get" and "for" |
| llvm::StringSet<> collectWords(llvm::StringRef Content); |
| |
| // Something that looks like a word in the source code. |
| // Could be a "real" token that's "live" in the AST, a spelled token consumed by |
| // the preprocessor, or part of a spelled token (e.g. word in a comment). |
| struct SpelledWord { |
| // (Spelling) location of the start of the word. |
| SourceLocation Location; |
| // The range of the word itself, excluding any quotes. |
| // This is a subrange of the file buffer. |
| llvm::StringRef Text; |
| // Whether this word is likely to refer to an identifier. True if: |
| // - the word is a spelled identifier token |
| // - Text is identifier-like (e.g. "foo_bar") |
| // - Text is surrounded by backticks (e.g. Foo in "// returns `Foo`") |
| bool LikelyIdentifier = false; |
| // Set if the word is contained in a token spelled in the file. |
| // (This should always be true, but comments aren't retained by TokenBuffer). |
| const syntax::Token *PartOfSpelledToken = nullptr; |
| // Set if the word is exactly a token spelled in the file. |
| const syntax::Token *SpelledToken = nullptr; |
| // Set if the word is a token spelled in the file, and that token survives |
| // preprocessing to emit an expanded token spelled the same way. |
| const syntax::Token *ExpandedToken = nullptr; |
| |
| // Find the unique word that contains SpelledLoc or starts/ends there. |
| static llvm::Optional<SpelledWord> touching(SourceLocation SpelledLoc, |
| const syntax::TokenBuffer &TB, |
| const LangOptions &LangOpts); |
| }; |
| |
| /// Return true if the \p TokenName is in the list of reversed keywords of the |
| /// language. |
| bool isKeyword(llvm::StringRef TokenName, const LangOptions &LangOpts); |
| |
| /// Heuristically determine namespaces visible at a point, without parsing Code. |
| /// This considers using-directives and enclosing namespace-declarations that |
| /// are visible (and not obfuscated) in the file itself (not headers). |
| /// Code should be truncated at the point of interest. |
| /// |
| /// The returned vector is always non-empty. |
| /// - The first element is the namespace that encloses the point: a declaration |
| /// near the point would be within this namespace. |
| /// - The elements are the namespaces in scope at the point: an unqualified |
| /// lookup would search within these namespaces. |
| /// |
| /// Using directives are resolved against all enclosing scopes, but no other |
| /// namespace directives. |
| /// |
| /// example: |
| /// using namespace a; |
| /// namespace foo { |
| /// using namespace b; |
| /// |
| /// visibleNamespaces are {"foo::", "", "a::", "b::", "foo::b::"}, not "a::b::". |
| std::vector<std::string> visibleNamespaces(llvm::StringRef Code, |
| const LangOptions &LangOpts); |
| |
| /// Represents locations that can accept a definition. |
| struct EligibleRegion { |
| /// Namespace that owns all of the EligiblePoints, e.g. |
| /// namespace a{ namespace b {^ void foo();^} } |
| /// It will be “a::b” for both carrot locations. |
| std::string EnclosingNamespace; |
| /// Offsets into the code marking eligible points to insert a function |
| /// definition. |
| std::vector<Position> EligiblePoints; |
| }; |
| |
| /// Returns most eligible region to insert a definition for \p |
| /// FullyQualifiedName in the \p Code. |
| /// Pseudo parses \pCode under the hood to determine namespace decls and |
| /// possible insertion points. Choses the region that matches the longest prefix |
| /// of \p FullyQualifiedName. Returns EOF if there are no shared namespaces. |
| /// \p FullyQualifiedName should not contain anonymous namespaces. |
| EligibleRegion getEligiblePoints(llvm::StringRef Code, |
| llvm::StringRef FullyQualifiedName, |
| const LangOptions &LangOpts); |
| |
| struct DefinedMacro { |
| llvm::StringRef Name; |
| const MacroInfo *Info; |
| /// Location of the identifier that names the macro. |
| /// Unlike Info->Location, this translates preamble-patch locations to |
| /// main-file locations. |
| SourceLocation NameLoc; |
| }; |
| /// Gets the macro referenced by \p SpelledTok. It must be a spelled token |
| /// aligned to the beginning of an identifier. |
| llvm::Optional<DefinedMacro> locateMacroAt(const syntax::Token &SpelledTok, |
| Preprocessor &PP); |
| |
| /// Infers whether this is a header from the FileName and LangOpts (if |
| /// presents). |
| bool isHeaderFile(llvm::StringRef FileName, |
| llvm::Optional<LangOptions> LangOpts = llvm::None); |
| |
| /// Returns true if the given location is in a generated protobuf file. |
| bool isProtoFile(SourceLocation Loc, const SourceManager &SourceMgr); |
| |
| /// This scans source code, and should not be called when using a preamble. |
| /// Prefer to access the cache in IncludeStructure::isSelfContained if you can. |
| bool isSelfContainedHeader(const FileEntry *FE, FileID ID, |
| const SourceManager &SM, HeaderSearch &HeaderInfo); |
| |
| } // namespace clangd |
| } // namespace clang |
| #endif |