Haojian Wu | 5dcc66d | 2019-02-06 09:08:26 +0000 | [diff] [blame] | 1 | //===--- Selection.cpp ----------------------------------------------------===// |
Sam McCall | 3186e3c | 2019-02-01 15:05:11 +0000 | [diff] [blame] | 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "Selection.h" |
Sam McCall | bb81e70 | 2020-10-20 12:01:48 +0200 | [diff] [blame] | 10 | #include "AST.h" |
Sam McCall | ad97ccf | 2020-04-28 17:49:17 +0200 | [diff] [blame] | 11 | #include "support/Logger.h" |
Haojian Wu | 774acdf | 2020-05-11 11:02:34 +0200 | [diff] [blame] | 12 | #include "support/Trace.h" |
Ilya Biryukov | 2b833d4 | 2022-04-28 08:39:47 +0000 | [diff] [blame] | 13 | #include "clang/AST/ASTConcept.h" |
Sam McCall | 5cd77f9 | 2019-06-27 11:17:13 +0000 | [diff] [blame] | 14 | #include "clang/AST/ASTTypeTraits.h" |
Kadir Cetinkaya | e6b8181 | 2020-02-25 09:33:52 +0100 | [diff] [blame] | 15 | #include "clang/AST/Decl.h" |
Sam McCall | ad9fd32 | 2019-11-15 16:07:34 +0100 | [diff] [blame] | 16 | #include "clang/AST/DeclCXX.h" |
Sam McCall | bbcbb10 | 2019-11-13 20:16:23 +0100 | [diff] [blame] | 17 | #include "clang/AST/Expr.h" |
Sam McCall | e18ab2a | 2019-11-19 16:57:31 +0100 | [diff] [blame] | 18 | #include "clang/AST/ExprCXX.h" |
Sam McCall | 5cd77f9 | 2019-06-27 11:17:13 +0000 | [diff] [blame] | 19 | #include "clang/AST/PrettyPrinter.h" |
Sam McCall | 3186e3c | 2019-02-01 15:05:11 +0000 | [diff] [blame] | 20 | #include "clang/AST/RecursiveASTVisitor.h" |
Sam McCall | 5cd77f9 | 2019-06-27 11:17:13 +0000 | [diff] [blame] | 21 | #include "clang/AST/TypeLoc.h" |
Sam McCall | bbcbb10 | 2019-11-13 20:16:23 +0100 | [diff] [blame] | 22 | #include "clang/Basic/OperatorKinds.h" |
Sam McCall | 7fc8f41 | 2019-07-22 15:55:53 +0000 | [diff] [blame] | 23 | #include "clang/Basic/SourceLocation.h" |
Shaurya Gupta | 06841ea | 2019-07-19 11:41:02 +0000 | [diff] [blame] | 24 | #include "clang/Basic/SourceManager.h" |
| 25 | #include "clang/Basic/TokenKinds.h" |
Sam McCall | 19cefc2 | 2019-09-03 15:34:47 +0000 | [diff] [blame] | 26 | #include "clang/Lex/Lexer.h" |
Sam McCall | abe3c29 | 2019-07-31 17:52:40 +0000 | [diff] [blame] | 27 | #include "clang/Tooling/Syntax/Tokens.h" |
Jan Svoboda | c6fb636 | 2022-01-18 17:58:57 +0100 | [diff] [blame] | 28 | #include "llvm/ADT/BitVector.h" |
Sam McCall | 5cd77f9 | 2019-06-27 11:17:13 +0000 | [diff] [blame] | 29 | #include "llvm/ADT/STLExtras.h" |
Sam McCall | e3e1583 | 2020-05-19 01:30:23 +0200 | [diff] [blame] | 30 | #include "llvm/ADT/StringExtras.h" |
Sam McCall | 3d79fd6 | 2019-09-04 12:15:20 +0000 | [diff] [blame] | 31 | #include "llvm/Support/Casting.h" |
Sam McCall | 7fc8f41 | 2019-07-22 15:55:53 +0000 | [diff] [blame] | 32 | #include "llvm/Support/raw_ostream.h" |
Sam McCall | 5cd77f9 | 2019-06-27 11:17:13 +0000 | [diff] [blame] | 33 | #include <algorithm> |
Kazu Hirata | 71f5573 | 2023-01-07 20:02:20 -0800 | [diff] [blame] | 34 | #include <optional> |
Christopher Di Bella | e9a902c | 2022-04-16 00:22:43 +0000 | [diff] [blame] | 35 | #include <set> |
Sam McCall | 7fc8f41 | 2019-07-22 15:55:53 +0000 | [diff] [blame] | 36 | #include <string> |
Sam McCall | 3186e3c | 2019-02-01 15:05:11 +0000 | [diff] [blame] | 37 | |
| 38 | namespace clang { |
| 39 | namespace clangd { |
| 40 | namespace { |
| 41 | using Node = SelectionTree::Node; |
Sam McCall | 3186e3c | 2019-02-01 15:05:11 +0000 | [diff] [blame] | 42 | |
Haojian Wu | 774acdf | 2020-05-11 11:02:34 +0200 | [diff] [blame] | 43 | // Measure the fraction of selections that were enabled by recovery AST. |
Haojian Wu | 1df0677 | 2020-12-07 10:52:04 +0100 | [diff] [blame] | 44 | void recordMetrics(const SelectionTree &S, const LangOptions &Lang) { |
| 45 | if (!trace::enabled()) |
| 46 | return; |
| 47 | const char *LanguageLabel = Lang.CPlusPlus ? "C++" : Lang.ObjC ? "ObjC" : "C"; |
Haojian Wu | 774acdf | 2020-05-11 11:02:34 +0200 | [diff] [blame] | 48 | static constexpr trace::Metric SelectionUsedRecovery( |
Haojian Wu | 1df0677 | 2020-12-07 10:52:04 +0100 | [diff] [blame] | 49 | "selection_recovery", trace::Metric::Distribution, "language"); |
| 50 | static constexpr trace::Metric RecoveryType( |
| 51 | "selection_recovery_type", trace::Metric::Distribution, "language"); |
Haojian Wu | 774acdf | 2020-05-11 11:02:34 +0200 | [diff] [blame] | 52 | const auto *Common = S.commonAncestor(); |
| 53 | for (const auto *N = Common; N; N = N->Parent) { |
Haojian Wu | 26cf6c1 | 2020-07-13 11:26:45 +0200 | [diff] [blame] | 54 | if (const auto *RE = N->ASTNode.get<RecoveryExpr>()) { |
Haojian Wu | 1df0677 | 2020-12-07 10:52:04 +0100 | [diff] [blame] | 55 | SelectionUsedRecovery.record(1, LanguageLabel); // used recovery ast. |
| 56 | RecoveryType.record(RE->isTypeDependent() ? 0 : 1, LanguageLabel); |
Haojian Wu | 774acdf | 2020-05-11 11:02:34 +0200 | [diff] [blame] | 57 | return; |
| 58 | } |
| 59 | } |
| 60 | if (Common) |
Haojian Wu | 1df0677 | 2020-12-07 10:52:04 +0100 | [diff] [blame] | 61 | SelectionUsedRecovery.record(0, LanguageLabel); // unused. |
Haojian Wu | 774acdf | 2020-05-11 11:02:34 +0200 | [diff] [blame] | 62 | } |
| 63 | |
Haojian Wu | 20f8f46 | 2022-01-04 11:50:17 +0100 | [diff] [blame] | 64 | // Return the range covering a node and all its children. |
Sam McCall | bb41f85 | 2021-06-16 15:24:23 +0200 | [diff] [blame] | 65 | SourceRange getSourceRange(const DynTypedNode &N) { |
| 66 | // MemberExprs to implicitly access anonymous fields should not claim any |
| 67 | // tokens for themselves. Given: |
| 68 | // struct A { struct { int b; }; }; |
| 69 | // The clang AST reports the following nodes for an access to b: |
| 70 | // A().b; |
| 71 | // [----] MemberExpr, base = A().<anonymous>, member = b |
| 72 | // [----] MemberExpr: base = A(), member = <anonymous> |
| 73 | // [-] CXXConstructExpr |
| 74 | // For our purposes, we don't want the second MemberExpr to own any tokens, |
| 75 | // so we reduce its range to match the CXXConstructExpr. |
| 76 | // (It's not clear that changing the clang AST would be correct in general). |
| 77 | if (const auto *ME = N.get<MemberExpr>()) { |
| 78 | if (!ME->getMemberDecl()->getDeclName()) |
| 79 | return ME->getBase() |
| 80 | ? getSourceRange(DynTypedNode::create(*ME->getBase())) |
| 81 | : SourceRange(); |
| 82 | } |
| 83 | return N.getSourceRange(); |
| 84 | } |
| 85 | |
Sam McCall | c9c714c | 2019-12-03 16:59:52 +0100 | [diff] [blame] | 86 | // An IntervalSet maintains a set of disjoint subranges of an array. |
| 87 | // |
| 88 | // Initially, it contains the entire array. |
| 89 | // [-----------------------------------------------------------] |
| 90 | // |
| 91 | // When a range is erased(), it will typically split the array in two. |
| 92 | // Claim: [--------------------] |
| 93 | // after: [----------------] [-------------------] |
| 94 | // |
| 95 | // erase() returns the segments actually erased. Given the state above: |
| 96 | // Claim: [---------------------------------------] |
| 97 | // Out: [---------] [------] |
| 98 | // After: [-----] [-----------] |
| 99 | // |
| 100 | // It is used to track (expanded) tokens not yet associated with an AST node. |
| 101 | // On traversing an AST node, its token range is erased from the unclaimed set. |
| 102 | // The tokens actually removed are associated with that node, and hit-tested |
| 103 | // against the selection to determine whether the node is selected. |
Nathan Ridge | e70a9f3 | 2019-12-23 13:38:04 -0500 | [diff] [blame] | 104 | template <typename T> class IntervalSet { |
Sam McCall | 5cd77f9 | 2019-06-27 11:17:13 +0000 | [diff] [blame] | 105 | public: |
Sam McCall | c9c714c | 2019-12-03 16:59:52 +0100 | [diff] [blame] | 106 | IntervalSet(llvm::ArrayRef<T> Range) { UnclaimedRanges.insert(Range); } |
Sam McCall | 905b002 | 2019-11-29 19:59:02 +0100 | [diff] [blame] | 107 | |
Sam McCall | c9c714c | 2019-12-03 16:59:52 +0100 | [diff] [blame] | 108 | // Removes the elements of Claim from the set, modifying or removing ranges |
| 109 | // that overlap it. |
| 110 | // Returns the continuous subranges of Claim that were actually removed. |
Kirill Bobyrev | ee02e20 | 2020-12-10 13:36:35 +0100 | [diff] [blame] | 111 | llvm::SmallVector<llvm::ArrayRef<T>> erase(llvm::ArrayRef<T> Claim) { |
| 112 | llvm::SmallVector<llvm::ArrayRef<T>> Out; |
Sam McCall | c9c714c | 2019-12-03 16:59:52 +0100 | [diff] [blame] | 113 | if (Claim.empty()) |
| 114 | return Out; |
| 115 | |
| 116 | // General case: |
| 117 | // Claim: [-----------------] |
| 118 | // UnclaimedRanges: [-A-] [-B-] [-C-] [-D-] [-E-] [-F-] [-G-] |
| 119 | // Overlap: ^first ^second |
| 120 | // Ranges C and D are fully included. Ranges B and E must be trimmed. |
| 121 | auto Overlap = std::make_pair( |
| 122 | UnclaimedRanges.lower_bound({Claim.begin(), Claim.begin()}), // C |
| 123 | UnclaimedRanges.lower_bound({Claim.end(), Claim.end()})); // F |
| 124 | // Rewind to cover B. |
| 125 | if (Overlap.first != UnclaimedRanges.begin()) { |
| 126 | --Overlap.first; |
| 127 | // ...unless B isn't selected at all. |
| 128 | if (Overlap.first->end() <= Claim.begin()) |
Nathan Ridge | e70a9f3 | 2019-12-23 13:38:04 -0500 | [diff] [blame] | 129 | ++Overlap.first; |
Sam McCall | 5cd77f9 | 2019-06-27 11:17:13 +0000 | [diff] [blame] | 130 | } |
Sam McCall | c9c714c | 2019-12-03 16:59:52 +0100 | [diff] [blame] | 131 | if (Overlap.first == Overlap.second) |
| 132 | return Out; |
Sam McCall | 19daa21 | 2019-11-20 23:25:17 +0100 | [diff] [blame] | 133 | |
Sam McCall | c9c714c | 2019-12-03 16:59:52 +0100 | [diff] [blame] | 134 | // First, copy all overlapping ranges into the output. |
| 135 | auto OutFirst = Out.insert(Out.end(), Overlap.first, Overlap.second); |
| 136 | // If any of the overlapping ranges were sliced by the claim, split them: |
| 137 | // - restrict the returned range to the claimed part |
| 138 | // - save the unclaimed part so it can be reinserted |
| 139 | llvm::ArrayRef<T> RemainingHead, RemainingTail; |
| 140 | if (Claim.begin() > OutFirst->begin()) { |
| 141 | RemainingHead = {OutFirst->begin(), Claim.begin()}; |
| 142 | *OutFirst = {Claim.begin(), OutFirst->end()}; |
| 143 | } |
| 144 | if (Claim.end() < Out.back().end()) { |
| 145 | RemainingTail = {Claim.end(), Out.back().end()}; |
| 146 | Out.back() = {Out.back().begin(), Claim.end()}; |
Sam McCall | 19daa21 | 2019-11-20 23:25:17 +0100 | [diff] [blame] | 147 | } |
Sam McCall | 19daa21 | 2019-11-20 23:25:17 +0100 | [diff] [blame] | 148 | |
Sam McCall | c9c714c | 2019-12-03 16:59:52 +0100 | [diff] [blame] | 149 | // Erase all the overlapping ranges (invalidating all iterators). |
| 150 | UnclaimedRanges.erase(Overlap.first, Overlap.second); |
| 151 | // Reinsert ranges that were merely trimmed. |
| 152 | if (!RemainingHead.empty()) |
| 153 | UnclaimedRanges.insert(RemainingHead); |
| 154 | if (!RemainingTail.empty()) |
| 155 | UnclaimedRanges.insert(RemainingTail); |
| 156 | |
| 157 | return Out; |
Sam McCall | 19daa21 | 2019-11-20 23:25:17 +0100 | [diff] [blame] | 158 | } |
| 159 | |
| 160 | private: |
Sam McCall | c9c714c | 2019-12-03 16:59:52 +0100 | [diff] [blame] | 161 | using TokenRange = llvm::ArrayRef<T>; |
| 162 | struct RangeLess { |
Sam McCall | 1374f7b | 2019-12-03 22:13:45 +0100 | [diff] [blame] | 163 | bool operator()(llvm::ArrayRef<T> L, llvm::ArrayRef<T> R) const { |
Sam McCall | c9c714c | 2019-12-03 16:59:52 +0100 | [diff] [blame] | 164 | return L.begin() < R.begin(); |
Sam McCall | 905b002 | 2019-11-29 19:59:02 +0100 | [diff] [blame] | 165 | } |
Sam McCall | abe3c29 | 2019-07-31 17:52:40 +0000 | [diff] [blame] | 166 | }; |
Sam McCall | c9c714c | 2019-12-03 16:59:52 +0100 | [diff] [blame] | 167 | |
| 168 | // Disjoint sorted unclaimed ranges of expanded tokens. |
Nathan Ridge | e70a9f3 | 2019-12-23 13:38:04 -0500 | [diff] [blame] | 169 | std::set<llvm::ArrayRef<T>, RangeLess> UnclaimedRanges; |
Sam McCall | c9c714c | 2019-12-03 16:59:52 +0100 | [diff] [blame] | 170 | }; |
| 171 | |
| 172 | // Sentinel value for the selectedness of a node where we've seen no tokens yet. |
| 173 | // This resolves to Unselected if no tokens are ever seen. |
| 174 | // But Unselected + Complete -> Partial, while NoTokens + Complete --> Complete. |
| 175 | // This value is never exposed publicly. |
| 176 | constexpr SelectionTree::Selection NoTokens = |
| 177 | static_cast<SelectionTree::Selection>( |
| 178 | static_cast<unsigned char>(SelectionTree::Complete + 1)); |
| 179 | |
| 180 | // Nodes start with NoTokens, and then use this function to aggregate the |
| 181 | // selectedness as more tokens are found. |
| 182 | void update(SelectionTree::Selection &Result, SelectionTree::Selection New) { |
| 183 | if (New == NoTokens) |
| 184 | return; |
| 185 | if (Result == NoTokens) |
| 186 | Result = New; |
| 187 | else if (Result != New) |
| 188 | // Can only be completely selected (or unselected) if all tokens are. |
| 189 | Result = SelectionTree::Partial; |
| 190 | } |
| 191 | |
Sam McCall | be6d07c | 2020-02-23 20:03:00 +0100 | [diff] [blame] | 192 | // As well as comments, don't count semicolons as real tokens. |
| 193 | // They're not properly claimed as expr-statement is missing from the AST. |
| 194 | bool shouldIgnore(const syntax::Token &Tok) { |
Sam McCall | fc7a9f3 | 2022-01-13 08:03:39 +0100 | [diff] [blame] | 195 | switch (Tok.kind()) { |
| 196 | // Even "attached" comments are not considered part of a node's range. |
| 197 | case tok::comment: |
| 198 | // The AST doesn't directly store locations for terminating semicolons. |
| 199 | case tok::semi: |
| 200 | // We don't have locations for cvr-qualifiers: see QualifiedTypeLoc. |
| 201 | case tok::kw_const: |
| 202 | case tok::kw_volatile: |
| 203 | case tok::kw_restrict: |
| 204 | return true; |
| 205 | default: |
| 206 | return false; |
| 207 | } |
Sam McCall | be6d07c | 2020-02-23 20:03:00 +0100 | [diff] [blame] | 208 | } |
Sam McCall | c9c714c | 2019-12-03 16:59:52 +0100 | [diff] [blame] | 209 | |
Nathan Ridge | e70a9f3 | 2019-12-23 13:38:04 -0500 | [diff] [blame] | 210 | // Determine whether 'Target' is the first expansion of the macro |
| 211 | // argument whose top-level spelling location is 'SpellingLoc'. |
| 212 | bool isFirstExpansion(FileID Target, SourceLocation SpellingLoc, |
| 213 | const SourceManager &SM) { |
| 214 | SourceLocation Prev = SpellingLoc; |
| 215 | while (true) { |
| 216 | // If the arg is expanded multiple times, getMacroArgExpandedLocation() |
| 217 | // returns the first expansion. |
| 218 | SourceLocation Next = SM.getMacroArgExpandedLocation(Prev); |
| 219 | // So if we reach the target, target is the first-expansion of the |
| 220 | // first-expansion ... |
| 221 | if (SM.getFileID(Next) == Target) |
| 222 | return true; |
| 223 | |
| 224 | // Otherwise, if the FileID stops changing, we've reached the innermost |
| 225 | // macro expansion, and Target was on a different branch. |
| 226 | if (SM.getFileID(Next) == SM.getFileID(Prev)) |
| 227 | return false; |
| 228 | |
| 229 | Prev = Next; |
| 230 | } |
| 231 | return false; |
| 232 | } |
| 233 | |
Sam McCall | c9c714c | 2019-12-03 16:59:52 +0100 | [diff] [blame] | 234 | // SelectionTester can determine whether a range of tokens from the PP-expanded |
| 235 | // stream (corresponding to an AST node) is considered selected. |
| 236 | // |
| 237 | // When the tokens result from macro expansions, the appropriate tokens in the |
| 238 | // main file are examined (macro invocation or args). Similarly for #includes. |
Nathan Ridge | e70a9f3 | 2019-12-23 13:38:04 -0500 | [diff] [blame] | 239 | // However, only the first expansion of a given spelled token is considered |
| 240 | // selected. |
Sam McCall | c9c714c | 2019-12-03 16:59:52 +0100 | [diff] [blame] | 241 | // |
| 242 | // It tests each token in the range (not just the endpoints) as contiguous |
| 243 | // expanded tokens may not have contiguous spellings (with macros). |
| 244 | // |
| 245 | // Non-token text, and tokens not modeled in the AST (comments, semicolons) |
| 246 | // are ignored when determining selectedness. |
| 247 | class SelectionTester { |
| 248 | public: |
| 249 | // The selection is offsets [SelBegin, SelEnd) in SelFile. |
| 250 | SelectionTester(const syntax::TokenBuffer &Buf, FileID SelFile, |
| 251 | unsigned SelBegin, unsigned SelEnd, const SourceManager &SM) |
Sam McCall | 1e9b837 | 2022-01-11 11:01:02 +0100 | [diff] [blame] | 252 | : SelFile(SelFile), SelFileBounds(SM.getLocForStartOfFile(SelFile), |
| 253 | SM.getLocForEndOfFile(SelFile)), |
| 254 | SM(SM) { |
Sam McCall | c9c714c | 2019-12-03 16:59:52 +0100 | [diff] [blame] | 255 | // Find all tokens (partially) selected in the file. |
| 256 | auto AllSpelledTokens = Buf.spelledTokens(SelFile); |
| 257 | const syntax::Token *SelFirst = |
| 258 | llvm::partition_point(AllSpelledTokens, [&](const syntax::Token &Tok) { |
| 259 | return SM.getFileOffset(Tok.endLocation()) <= SelBegin; |
| 260 | }); |
| 261 | const syntax::Token *SelLimit = std::partition_point( |
| 262 | SelFirst, AllSpelledTokens.end(), [&](const syntax::Token &Tok) { |
| 263 | return SM.getFileOffset(Tok.location()) < SelEnd; |
| 264 | }); |
serge-sans-paille | 984b800 | 2023-01-09 18:11:07 +0100 | [diff] [blame] | 265 | auto Sel = llvm::ArrayRef(SelFirst, SelLimit); |
Sam McCall | 72f2fb1 | 2020-07-17 12:03:24 +0200 | [diff] [blame] | 266 | // Find which of these are preprocessed to nothing and should be ignored. |
Jan Svoboda | c6fb636 | 2022-01-18 17:58:57 +0100 | [diff] [blame] | 267 | llvm::BitVector PPIgnored(Sel.size(), false); |
Sam McCall | 72f2fb1 | 2020-07-17 12:03:24 +0200 | [diff] [blame] | 268 | for (const syntax::TokenBuffer::Expansion &X : |
Haojian Wu | 61d664c | 2020-07-20 15:11:00 +0200 | [diff] [blame] | 269 | Buf.expansionsOverlapping(Sel)) { |
Sam McCall | 72f2fb1 | 2020-07-17 12:03:24 +0200 | [diff] [blame] | 270 | if (X.Expanded.empty()) { |
| 271 | for (const syntax::Token &Tok : X.Spelled) { |
| 272 | if (&Tok >= SelFirst && &Tok < SelLimit) |
| 273 | PPIgnored[&Tok - SelFirst] = true; |
| 274 | } |
| 275 | } |
| 276 | } |
Sam McCall | c9c714c | 2019-12-03 16:59:52 +0100 | [diff] [blame] | 277 | // Precompute selectedness and offset for selected spelled tokens. |
Sam McCall | 72f2fb1 | 2020-07-17 12:03:24 +0200 | [diff] [blame] | 278 | for (unsigned I = 0; I < Sel.size(); ++I) { |
| 279 | if (shouldIgnore(Sel[I]) || PPIgnored[I]) |
Sam McCall | c9c714c | 2019-12-03 16:59:52 +0100 | [diff] [blame] | 280 | continue; |
Sam McCall | 4dedd82 | 2022-01-17 15:13:27 +0100 | [diff] [blame] | 281 | SelectedSpelled.emplace_back(); |
| 282 | Tok &S = SelectedSpelled.back(); |
Sam McCall | 72f2fb1 | 2020-07-17 12:03:24 +0200 | [diff] [blame] | 283 | S.Offset = SM.getFileOffset(Sel[I].location()); |
| 284 | if (S.Offset >= SelBegin && S.Offset + Sel[I].length() <= SelEnd) |
Sam McCall | c9c714c | 2019-12-03 16:59:52 +0100 | [diff] [blame] | 285 | S.Selected = SelectionTree::Complete; |
| 286 | else |
| 287 | S.Selected = SelectionTree::Partial; |
| 288 | } |
Sam McCall | 4dedd82 | 2022-01-17 15:13:27 +0100 | [diff] [blame] | 289 | MaybeSelectedExpanded = computeMaybeSelectedExpandedTokens(Buf); |
Sam McCall | c9c714c | 2019-12-03 16:59:52 +0100 | [diff] [blame] | 290 | } |
| 291 | |
| 292 | // Test whether a consecutive range of tokens is selected. |
| 293 | // The tokens are taken from the expanded token stream. |
| 294 | SelectionTree::Selection |
| 295 | test(llvm::ArrayRef<syntax::Token> ExpandedTokens) const { |
Sam McCall | 4dedd82 | 2022-01-17 15:13:27 +0100 | [diff] [blame] | 296 | if (ExpandedTokens.empty()) |
Sam McCall | c9c714c | 2019-12-03 16:59:52 +0100 | [diff] [blame] | 297 | return NoTokens; |
Sam McCall | 4dedd82 | 2022-01-17 15:13:27 +0100 | [diff] [blame] | 298 | if (SelectedSpelled.empty()) |
| 299 | return SelectionTree::Unselected; |
| 300 | // Cheap (pointer) check whether any of the tokens could touch selection. |
| 301 | // In most cases, the node's overall source range touches ExpandedTokens, |
| 302 | // or we would have failed mayHit(). However now we're only considering |
| 303 | // the *unclaimed* spans of expanded tokens. |
| 304 | // This is a significant performance improvement when a lot of nodes |
| 305 | // surround the selection, including when generated by macros. |
| 306 | if (MaybeSelectedExpanded.empty() || |
| 307 | &ExpandedTokens.front() > &MaybeSelectedExpanded.back() || |
| 308 | &ExpandedTokens.back() < &MaybeSelectedExpanded.front()) { |
| 309 | return SelectionTree::Unselected; |
| 310 | } |
| 311 | |
Haojian Wu | 4cb1686 | 2022-01-27 08:43:31 +0100 | [diff] [blame] | 312 | // The eof token is used as a sentinel. |
| 313 | // In general, source range from an AST node should not claim the eof token, |
| 314 | // but it could occur for unmatched-bracket cases. |
| 315 | // FIXME: fix it in TokenBuffer, expandedTokens(SourceRange) should not |
| 316 | // return the eof token. |
| 317 | if (ExpandedTokens.back().kind() == tok::eof) |
| 318 | ExpandedTokens = ExpandedTokens.drop_back(); |
| 319 | |
Sam McCall | c9c714c | 2019-12-03 16:59:52 +0100 | [diff] [blame] | 320 | SelectionTree::Selection Result = NoTokens; |
| 321 | while (!ExpandedTokens.empty()) { |
| 322 | // Take consecutive tokens from the same context together for efficiency. |
Sam McCall | 22ac067 | 2022-01-11 10:15:12 +0100 | [diff] [blame] | 323 | SourceLocation Start = ExpandedTokens.front().location(); |
| 324 | FileID FID = SM.getFileID(Start); |
| 325 | // Comparing SourceLocations against bounds is cheaper than getFileID(). |
| 326 | SourceLocation Limit = SM.getComposedLoc(FID, SM.getFileIDSize(FID)); |
Sam McCall | c9c714c | 2019-12-03 16:59:52 +0100 | [diff] [blame] | 327 | auto Batch = ExpandedTokens.take_while([&](const syntax::Token &T) { |
Sam McCall | 22ac067 | 2022-01-11 10:15:12 +0100 | [diff] [blame] | 328 | return T.location() >= Start && T.location() < Limit; |
Sam McCall | c9c714c | 2019-12-03 16:59:52 +0100 | [diff] [blame] | 329 | }); |
| 330 | assert(!Batch.empty()); |
| 331 | ExpandedTokens = ExpandedTokens.drop_front(Batch.size()); |
| 332 | |
| 333 | update(Result, testChunk(FID, Batch)); |
| 334 | } |
| 335 | return Result; |
| 336 | } |
| 337 | |
| 338 | // Cheap check whether any of the tokens in R might be selected. |
| 339 | // If it returns false, test() will return NoTokens or Unselected. |
| 340 | // If it returns true, test() may return any value. |
| 341 | bool mayHit(SourceRange R) const { |
Sam McCall | 4dedd82 | 2022-01-17 15:13:27 +0100 | [diff] [blame] | 342 | if (SelectedSpelled.empty() || MaybeSelectedExpanded.empty()) |
Sam McCall | c9c714c | 2019-12-03 16:59:52 +0100 | [diff] [blame] | 343 | return false; |
Sam McCall | 2b2dbe6 | 2022-01-11 00:09:58 +0100 | [diff] [blame] | 344 | // If the node starts after the selection ends, it is not selected. |
| 345 | // Tokens a macro location might claim are >= its expansion start. |
| 346 | // So if the expansion start > last selected token, we can prune it. |
| 347 | // (This is particularly helpful for GTest's TEST macro). |
| 348 | if (auto B = offsetInSelFile(getExpansionStart(R.getBegin()))) |
Sam McCall | 4dedd82 | 2022-01-17 15:13:27 +0100 | [diff] [blame] | 349 | if (*B > SelectedSpelled.back().Offset) |
Sam McCall | 2b2dbe6 | 2022-01-11 00:09:58 +0100 | [diff] [blame] | 350 | return false; |
| 351 | // If the node ends before the selection begins, it is not selected. |
| 352 | SourceLocation EndLoc = R.getEnd(); |
| 353 | while (EndLoc.isMacroID()) |
| 354 | EndLoc = SM.getImmediateExpansionRange(EndLoc).getEnd(); |
| 355 | // In the rare case that the expansion range is a char range, EndLoc is |
| 356 | // ~one token too far to the right. We may fail to prune, that's OK. |
| 357 | if (auto E = offsetInSelFile(EndLoc)) |
Sam McCall | 4dedd82 | 2022-01-17 15:13:27 +0100 | [diff] [blame] | 358 | if (*E < SelectedSpelled.front().Offset) |
Sam McCall | c9c714c | 2019-12-03 16:59:52 +0100 | [diff] [blame] | 359 | return false; |
| 360 | return true; |
| 361 | } |
| 362 | |
| 363 | private: |
Sam McCall | 4dedd82 | 2022-01-17 15:13:27 +0100 | [diff] [blame] | 364 | // Plausible expanded tokens that might be affected by the selection. |
| 365 | // This is an overestimate, it may contain tokens that are not selected. |
| 366 | // The point is to allow cheap pruning in test() |
| 367 | llvm::ArrayRef<syntax::Token> |
| 368 | computeMaybeSelectedExpandedTokens(const syntax::TokenBuffer &Toks) { |
| 369 | if (SelectedSpelled.empty()) |
| 370 | return {}; |
| 371 | |
| 372 | auto LastAffectedToken = [&](SourceLocation Loc) { |
| 373 | auto Offset = offsetInSelFile(Loc); |
| 374 | while (Loc.isValid() && !Offset) { |
| 375 | Loc = Loc.isMacroID() ? SM.getImmediateExpansionRange(Loc).getEnd() |
| 376 | : SM.getIncludeLoc(SM.getFileID(Loc)); |
| 377 | Offset = offsetInSelFile(Loc); |
| 378 | } |
| 379 | return Offset; |
| 380 | }; |
| 381 | auto FirstAffectedToken = [&](SourceLocation Loc) { |
| 382 | auto Offset = offsetInSelFile(Loc); |
| 383 | while (Loc.isValid() && !Offset) { |
| 384 | Loc = Loc.isMacroID() ? SM.getImmediateExpansionRange(Loc).getBegin() |
| 385 | : SM.getIncludeLoc(SM.getFileID(Loc)); |
| 386 | Offset = offsetInSelFile(Loc); |
| 387 | } |
| 388 | return Offset; |
| 389 | }; |
| 390 | |
| 391 | const syntax::Token *Start = llvm::partition_point( |
| 392 | Toks.expandedTokens(), |
| 393 | [&, First = SelectedSpelled.front().Offset](const syntax::Token &Tok) { |
| 394 | if (Tok.kind() == tok::eof) |
| 395 | return false; |
| 396 | // Implausible if upperbound(Tok) < First. |
| 397 | if (auto Offset = LastAffectedToken(Tok.location())) |
| 398 | return *Offset < First; |
Rageking8 | c8f4792 | 2022-11-08 15:28:25 +0100 | [diff] [blame] | 399 | // A prefix of the expanded tokens may be from an implicit |
Sam McCall | 4dedd82 | 2022-01-17 15:13:27 +0100 | [diff] [blame] | 400 | // inclusion (e.g. preamble patch, or command-line -include). |
| 401 | return true; |
| 402 | }); |
| 403 | |
| 404 | bool EndInvalid = false; |
| 405 | const syntax::Token *End = std::partition_point( |
| 406 | Start, Toks.expandedTokens().end(), |
| 407 | [&, Last = SelectedSpelled.back().Offset](const syntax::Token &Tok) { |
| 408 | if (Tok.kind() == tok::eof) |
| 409 | return false; |
| 410 | // Plausible if lowerbound(Tok) <= Last. |
| 411 | if (auto Offset = FirstAffectedToken(Tok.location())) |
| 412 | return *Offset <= Last; |
| 413 | // Shouldn't happen: once we've seen tokens traceable to the main |
| 414 | // file, there shouldn't be any more implicit inclusions. |
| 415 | assert(false && "Expanded token could not be resolved to main file!"); |
| 416 | EndInvalid = true; |
| 417 | return true; // conservatively assume this token can overlap |
| 418 | }); |
| 419 | if (EndInvalid) |
| 420 | End = Toks.expandedTokens().end(); |
| 421 | |
serge-sans-paille | 984b800 | 2023-01-09 18:11:07 +0100 | [diff] [blame] | 422 | return llvm::ArrayRef(Start, End); |
Sam McCall | 4dedd82 | 2022-01-17 15:13:27 +0100 | [diff] [blame] | 423 | } |
| 424 | |
Sam McCall | c9c714c | 2019-12-03 16:59:52 +0100 | [diff] [blame] | 425 | // Hit-test a consecutive range of tokens from a single file ID. |
| 426 | SelectionTree::Selection |
| 427 | testChunk(FileID FID, llvm::ArrayRef<syntax::Token> Batch) const { |
| 428 | assert(!Batch.empty()); |
| 429 | SourceLocation StartLoc = Batch.front().location(); |
| 430 | // There are several possible categories of FileID depending on how the |
| 431 | // preprocessor was used to generate these tokens: |
| 432 | // main file, #included file, macro args, macro bodies. |
| 433 | // We need to identify the main-file tokens that represent Batch, and |
| 434 | // determine whether we want to exclusively claim them. Regular tokens |
| 435 | // represent one AST construct, but a macro invocation can represent many. |
| 436 | |
| 437 | // Handle tokens written directly in the main file. |
| 438 | if (FID == SelFile) { |
Sam McCall | 1e9b837 | 2022-01-11 11:01:02 +0100 | [diff] [blame] | 439 | return testTokenRange(*offsetInSelFile(Batch.front().location()), |
| 440 | *offsetInSelFile(Batch.back().location())); |
Sam McCall | c9c714c | 2019-12-03 16:59:52 +0100 | [diff] [blame] | 441 | } |
| 442 | |
| 443 | // Handle tokens in another file #included into the main file. |
| 444 | // Check if the #include is selected, but don't claim it exclusively. |
| 445 | if (StartLoc.isFileID()) { |
| 446 | for (SourceLocation Loc = Batch.front().location(); Loc.isValid(); |
| 447 | Loc = SM.getIncludeLoc(SM.getFileID(Loc))) { |
Sam McCall | 1e9b837 | 2022-01-11 11:01:02 +0100 | [diff] [blame] | 448 | if (auto Offset = offsetInSelFile(Loc)) |
Sam McCall | c9c714c | 2019-12-03 16:59:52 +0100 | [diff] [blame] | 449 | // FIXME: use whole #include directive, not just the filename string. |
Sam McCall | 1e9b837 | 2022-01-11 11:01:02 +0100 | [diff] [blame] | 450 | return testToken(*Offset); |
Sam McCall | c9c714c | 2019-12-03 16:59:52 +0100 | [diff] [blame] | 451 | } |
| 452 | return NoTokens; |
| 453 | } |
| 454 | |
| 455 | assert(StartLoc.isMacroID()); |
| 456 | // Handle tokens that were passed as a macro argument. |
| 457 | SourceLocation ArgStart = SM.getTopMacroCallerLoc(StartLoc); |
Sam McCall | 1e9b837 | 2022-01-11 11:01:02 +0100 | [diff] [blame] | 458 | if (auto ArgOffset = offsetInSelFile(ArgStart)) { |
Nathan Ridge | e70a9f3 | 2019-12-23 13:38:04 -0500 | [diff] [blame] | 459 | if (isFirstExpansion(FID, ArgStart, SM)) { |
| 460 | SourceLocation ArgEnd = |
| 461 | SM.getTopMacroCallerLoc(Batch.back().location()); |
Sam McCall | 1e9b837 | 2022-01-11 11:01:02 +0100 | [diff] [blame] | 462 | return testTokenRange(*ArgOffset, *offsetInSelFile(ArgEnd)); |
Christian Kühnel | ec4a2c9 | 2021-11-15 15:00:23 +0000 | [diff] [blame] | 463 | } else { // NOLINT(llvm-else-after-return) |
Nathan Ridge | e70a9f3 | 2019-12-23 13:38:04 -0500 | [diff] [blame] | 464 | /* fall through and treat as part of the macro body */ |
| 465 | } |
Sam McCall | c9c714c | 2019-12-03 16:59:52 +0100 | [diff] [blame] | 466 | } |
| 467 | |
| 468 | // Handle tokens produced by non-argument macro expansion. |
| 469 | // Check if the macro name is selected, don't claim it exclusively. |
Sam McCall | 1e9b837 | 2022-01-11 11:01:02 +0100 | [diff] [blame] | 470 | if (auto ExpansionOffset = offsetInSelFile(getExpansionStart(StartLoc))) |
Sam McCall | c9c714c | 2019-12-03 16:59:52 +0100 | [diff] [blame] | 471 | // FIXME: also check ( and ) for function-like macros? |
Sam McCall | 1e9b837 | 2022-01-11 11:01:02 +0100 | [diff] [blame] | 472 | return testToken(*ExpansionOffset); |
Christian Kühnel | ec4a2c9 | 2021-11-15 15:00:23 +0000 | [diff] [blame] | 473 | return NoTokens; |
Sam McCall | c9c714c | 2019-12-03 16:59:52 +0100 | [diff] [blame] | 474 | } |
| 475 | |
| 476 | // Is the closed token range [Begin, End] selected? |
| 477 | SelectionTree::Selection testTokenRange(unsigned Begin, unsigned End) const { |
| 478 | assert(Begin <= End); |
| 479 | // Outside the selection entirely? |
Sam McCall | 4dedd82 | 2022-01-17 15:13:27 +0100 | [diff] [blame] | 480 | if (End < SelectedSpelled.front().Offset || |
| 481 | Begin > SelectedSpelled.back().Offset) |
Sam McCall | c9c714c | 2019-12-03 16:59:52 +0100 | [diff] [blame] | 482 | return SelectionTree::Unselected; |
| 483 | |
| 484 | // Compute range of tokens. |
| 485 | auto B = llvm::partition_point( |
Sam McCall | 4dedd82 | 2022-01-17 15:13:27 +0100 | [diff] [blame] | 486 | SelectedSpelled, [&](const Tok &T) { return T.Offset < Begin; }); |
| 487 | auto E = std::partition_point(B, SelectedSpelled.end(), [&](const Tok &T) { |
| 488 | return T.Offset <= End; |
| 489 | }); |
Sam McCall | c9c714c | 2019-12-03 16:59:52 +0100 | [diff] [blame] | 490 | |
| 491 | // Aggregate selectedness of tokens in range. |
Sam McCall | 4dedd82 | 2022-01-17 15:13:27 +0100 | [diff] [blame] | 492 | bool ExtendsOutsideSelection = Begin < SelectedSpelled.front().Offset || |
| 493 | End > SelectedSpelled.back().Offset; |
Sam McCall | c9c714c | 2019-12-03 16:59:52 +0100 | [diff] [blame] | 494 | SelectionTree::Selection Result = |
| 495 | ExtendsOutsideSelection ? SelectionTree::Unselected : NoTokens; |
| 496 | for (auto It = B; It != E; ++It) |
| 497 | update(Result, It->Selected); |
| 498 | return Result; |
| 499 | } |
| 500 | |
| 501 | // Is the token at `Offset` selected? |
| 502 | SelectionTree::Selection testToken(unsigned Offset) const { |
| 503 | // Outside the selection entirely? |
Sam McCall | 4dedd82 | 2022-01-17 15:13:27 +0100 | [diff] [blame] | 504 | if (Offset < SelectedSpelled.front().Offset || |
| 505 | Offset > SelectedSpelled.back().Offset) |
Sam McCall | c9c714c | 2019-12-03 16:59:52 +0100 | [diff] [blame] | 506 | return SelectionTree::Unselected; |
| 507 | // Find the token, if it exists. |
| 508 | auto It = llvm::partition_point( |
Sam McCall | 4dedd82 | 2022-01-17 15:13:27 +0100 | [diff] [blame] | 509 | SelectedSpelled, [&](const Tok &T) { return T.Offset < Offset; }); |
| 510 | if (It != SelectedSpelled.end() && It->Offset == Offset) |
Sam McCall | c9c714c | 2019-12-03 16:59:52 +0100 | [diff] [blame] | 511 | return It->Selected; |
| 512 | return NoTokens; |
| 513 | } |
| 514 | |
Sam McCall | 2b2dbe6 | 2022-01-11 00:09:58 +0100 | [diff] [blame] | 515 | // Decomposes Loc and returns the offset if the file ID is SelFile. |
Kazu Hirata | f71ffd3 | 2023-01-07 20:19:42 -0800 | [diff] [blame] | 516 | std::optional<unsigned> offsetInSelFile(SourceLocation Loc) const { |
Sam McCall | 2b2dbe6 | 2022-01-11 00:09:58 +0100 | [diff] [blame] | 517 | // Decoding Loc with SM.getDecomposedLoc is relatively expensive. |
| 518 | // But SourceLocations for a file are numerically contiguous, so we |
| 519 | // can use cheap integer operations instead. |
Sam McCall | 1e9b837 | 2022-01-11 11:01:02 +0100 | [diff] [blame] | 520 | if (Loc < SelFileBounds.getBegin() || Loc >= SelFileBounds.getEnd()) |
Kazu Hirata | 059a23c | 2022-12-03 11:54:50 -0800 | [diff] [blame] | 521 | return std::nullopt; |
Sam McCall | 2b2dbe6 | 2022-01-11 00:09:58 +0100 | [diff] [blame] | 522 | // FIXME: subtracting getRawEncoding() is dubious, move this logic into SM. |
Sam McCall | 1e9b837 | 2022-01-11 11:01:02 +0100 | [diff] [blame] | 523 | return Loc.getRawEncoding() - SelFileBounds.getBegin().getRawEncoding(); |
| 524 | } |
| 525 | |
| 526 | SourceLocation getExpansionStart(SourceLocation Loc) const { |
| 527 | while (Loc.isMacroID()) |
| 528 | Loc = SM.getImmediateExpansionRange(Loc).getBegin(); |
| 529 | return Loc; |
| 530 | } |
| 531 | |
Sam McCall | c9c714c | 2019-12-03 16:59:52 +0100 | [diff] [blame] | 532 | struct Tok { |
| 533 | unsigned Offset; |
| 534 | SelectionTree::Selection Selected; |
| 535 | }; |
Sam McCall | 4dedd82 | 2022-01-17 15:13:27 +0100 | [diff] [blame] | 536 | std::vector<Tok> SelectedSpelled; |
| 537 | llvm::ArrayRef<syntax::Token> MaybeSelectedExpanded; |
Sam McCall | c9c714c | 2019-12-03 16:59:52 +0100 | [diff] [blame] | 538 | FileID SelFile; |
Sam McCall | 1e9b837 | 2022-01-11 11:01:02 +0100 | [diff] [blame] | 539 | SourceRange SelFileBounds; |
Sam McCall | c9c714c | 2019-12-03 16:59:52 +0100 | [diff] [blame] | 540 | const SourceManager &SM; |
Sam McCall | 5cd77f9 | 2019-06-27 11:17:13 +0000 | [diff] [blame] | 541 | }; |
| 542 | |
Sam McCall | 2ff40ca | 2019-07-24 09:39:11 +0000 | [diff] [blame] | 543 | // Show the type of a node for debugging. |
| 544 | void printNodeKind(llvm::raw_ostream &OS, const DynTypedNode &N) { |
Sam McCall | 7fc8f41 | 2019-07-22 15:55:53 +0000 | [diff] [blame] | 545 | if (const TypeLoc *TL = N.get<TypeLoc>()) { |
| 546 | // TypeLoc is a hierarchy, but has only a single ASTNodeKind. |
| 547 | // Synthesize the name from the Type subclass (except for QualifiedTypeLoc). |
| 548 | if (TL->getTypeLocClass() == TypeLoc::Qualified) |
| 549 | OS << "QualifiedTypeLoc"; |
| 550 | else |
| 551 | OS << TL->getType()->getTypeClassName() << "TypeLoc"; |
| 552 | } else { |
| 553 | OS << N.getNodeKind().asStringRef(); |
| 554 | } |
Sam McCall | 7fc8f41 | 2019-07-22 15:55:53 +0000 | [diff] [blame] | 555 | } |
| 556 | |
Bjorn Pettersson | f0f63ca | 2019-07-27 17:09:15 +0000 | [diff] [blame] | 557 | #ifndef NDEBUG |
Sam McCall | 7fc8f41 | 2019-07-22 15:55:53 +0000 | [diff] [blame] | 558 | std::string printNodeToString(const DynTypedNode &N, const PrintingPolicy &PP) { |
| 559 | std::string S; |
| 560 | llvm::raw_string_ostream OS(S); |
Sam McCall | 2ff40ca | 2019-07-24 09:39:11 +0000 | [diff] [blame] | 561 | printNodeKind(OS, N); |
Sam McCall | 7fc8f41 | 2019-07-22 15:55:53 +0000 | [diff] [blame] | 562 | return std::move(OS.str()); |
| 563 | } |
Bjorn Pettersson | f0f63ca | 2019-07-27 17:09:15 +0000 | [diff] [blame] | 564 | #endif |
Sam McCall | 7fc8f41 | 2019-07-22 15:55:53 +0000 | [diff] [blame] | 565 | |
Nathan Ridge | e70a9f3 | 2019-12-23 13:38:04 -0500 | [diff] [blame] | 566 | bool isImplicit(const Stmt *S) { |
Sam McCall | bbcbb10 | 2019-11-13 20:16:23 +0100 | [diff] [blame] | 567 | // Some Stmts are implicit and shouldn't be traversed, but there's no |
| 568 | // "implicit" attribute on Stmt/Expr. |
| 569 | // Unwrap implicit casts first if present (other nodes too?). |
| 570 | if (auto *ICE = llvm::dyn_cast<ImplicitCastExpr>(S)) |
| 571 | S = ICE->getSubExprAsWritten(); |
| 572 | // Implicit this in a MemberExpr is not filtered out by RecursiveASTVisitor. |
| 573 | // It would be nice if RAV handled this (!shouldTraverseImplicitCode()). |
| 574 | if (auto *CTI = llvm::dyn_cast<CXXThisExpr>(S)) |
| 575 | if (CTI->isImplicit()) |
| 576 | return true; |
Kadir Cetinkaya | 512aa84 | 2021-09-30 15:25:42 +0200 | [diff] [blame] | 577 | // Make sure implicit access of anonymous structs don't end up owning tokens. |
| 578 | if (auto *ME = llvm::dyn_cast<MemberExpr>(S)) { |
| 579 | if (auto *FD = llvm::dyn_cast<FieldDecl>(ME->getMemberDecl())) |
| 580 | if (FD->isAnonymousStructOrUnion()) |
| 581 | // If Base is an implicit CXXThis, then the whole MemberExpr has no |
| 582 | // tokens. If it's a normal e.g. DeclRef, we treat the MemberExpr like |
| 583 | // an implicit cast. |
| 584 | return isImplicit(ME->getBase()); |
| 585 | } |
Sam McCall | bbcbb10 | 2019-11-13 20:16:23 +0100 | [diff] [blame] | 586 | // Refs to operator() and [] are (almost?) always implicit as part of calls. |
| 587 | if (auto *DRE = llvm::dyn_cast<DeclRefExpr>(S)) { |
| 588 | if (auto *FD = llvm::dyn_cast<FunctionDecl>(DRE->getDecl())) { |
| 589 | switch (FD->getOverloadedOperator()) { |
| 590 | case OO_Call: |
| 591 | case OO_Subscript: |
| 592 | return true; |
| 593 | default: |
| 594 | break; |
| 595 | } |
| 596 | } |
| 597 | } |
| 598 | return false; |
| 599 | } |
| 600 | |
Sam McCall | 3186e3c | 2019-02-01 15:05:11 +0000 | [diff] [blame] | 601 | // We find the selection by visiting written nodes in the AST, looking for nodes |
| 602 | // that intersect with the selected character range. |
| 603 | // |
| 604 | // While traversing, we maintain a parent stack. As nodes pop off the stack, |
| 605 | // we decide whether to keep them or not. To be kept, they must either be |
| 606 | // selected or contain some nodes that are. |
| 607 | // |
| 608 | // For simple cases (not inside macros) we prune subtrees that don't intersect. |
| 609 | class SelectionVisitor : public RecursiveASTVisitor<SelectionVisitor> { |
| 610 | public: |
| 611 | // Runs the visitor to gather selected nodes and their ancestors. |
| 612 | // If there is any selection, the root (TUDecl) is the first node. |
Sam McCall | abe3c29 | 2019-07-31 17:52:40 +0000 | [diff] [blame] | 613 | static std::deque<Node> collect(ASTContext &AST, |
| 614 | const syntax::TokenBuffer &Tokens, |
| 615 | const PrintingPolicy &PP, unsigned Begin, |
| 616 | unsigned End, FileID File) { |
| 617 | SelectionVisitor V(AST, Tokens, PP, Begin, End, File); |
Sam McCall | 3186e3c | 2019-02-01 15:05:11 +0000 | [diff] [blame] | 618 | V.TraverseAST(AST); |
| 619 | assert(V.Stack.size() == 1 && "Unpaired push/pop?"); |
| 620 | assert(V.Stack.top() == &V.Nodes.front()); |
Sam McCall | 3186e3c | 2019-02-01 15:05:11 +0000 | [diff] [blame] | 621 | return std::move(V.Nodes); |
| 622 | } |
| 623 | |
| 624 | // We traverse all "well-behaved" nodes the same way: |
| 625 | // - push the node onto the stack |
| 626 | // - traverse its children recursively |
| 627 | // - pop it from the stack |
| 628 | // - hit testing: is intersection(node, selection) - union(children) empty? |
| 629 | // - attach it to the tree if it or any children hit the selection |
| 630 | // |
| 631 | // Two categories of nodes are not "well-behaved": |
| 632 | // - those without source range information, we don't record those |
| 633 | // - those that can't be stored in DynTypedNode. |
Sam McCall | 3186e3c | 2019-02-01 15:05:11 +0000 | [diff] [blame] | 634 | bool TraverseDecl(Decl *X) { |
Christian Kühnel | 7aa2ce0 | 2021-11-15 15:25:12 +0000 | [diff] [blame] | 635 | if (llvm::isa_and_nonnull<TranslationUnitDecl>(X)) |
Sam McCall | 3186e3c | 2019-02-01 15:05:11 +0000 | [diff] [blame] | 636 | return Base::TraverseDecl(X); // Already pushed by constructor. |
Sam McCall | 395fde7 | 2019-06-18 13:37:54 +0000 | [diff] [blame] | 637 | // Base::TraverseDecl will suppress children, but not this node itself. |
Sam McCall | a8c9b9f | 2023-08-31 20:14:35 +0200 | [diff] [blame] | 638 | if (X && X->isImplicit()) { |
| 639 | // Most implicit nodes have only implicit children and can be skipped. |
| 640 | // However there are exceptions (`void foo(Concept auto x)`), and |
| 641 | // the base implementation knows how to find them. |
| 642 | return Base::TraverseDecl(X); |
| 643 | } |
Sam McCall | 3186e3c | 2019-02-01 15:05:11 +0000 | [diff] [blame] | 644 | return traverseNode(X, [&] { return Base::TraverseDecl(X); }); |
| 645 | } |
| 646 | bool TraverseTypeLoc(TypeLoc X) { |
| 647 | return traverseNode(&X, [&] { return Base::TraverseTypeLoc(X); }); |
| 648 | } |
Nathan Ridge | 70d583a | 2020-08-09 20:37:43 -0400 | [diff] [blame] | 649 | bool TraverseTemplateArgumentLoc(const TemplateArgumentLoc &X) { |
| 650 | return traverseNode(&X, |
| 651 | [&] { return Base::TraverseTemplateArgumentLoc(X); }); |
| 652 | } |
Sam McCall | 79f7831 | 2019-06-25 09:36:09 +0000 | [diff] [blame] | 653 | bool TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc X) { |
Sam McCall | 3186e3c | 2019-02-01 15:05:11 +0000 | [diff] [blame] | 654 | return traverseNode( |
| 655 | &X, [&] { return Base::TraverseNestedNameSpecifierLoc(X); }); |
| 656 | } |
| 657 | bool TraverseConstructorInitializer(CXXCtorInitializer *X) { |
| 658 | return traverseNode( |
| 659 | X, [&] { return Base::TraverseConstructorInitializer(X); }); |
| 660 | } |
Nathan James | d92413a | 2021-01-26 18:58:53 +0000 | [diff] [blame] | 661 | bool TraverseCXXBaseSpecifier(const CXXBaseSpecifier &X) { |
| 662 | return traverseNode(&X, [&] { return Base::TraverseCXXBaseSpecifier(X); }); |
| 663 | } |
Sam McCall | bb81e70 | 2020-10-20 12:01:48 +0200 | [diff] [blame] | 664 | bool TraverseAttr(Attr *X) { |
| 665 | return traverseNode(X, [&] { return Base::TraverseAttr(X); }); |
| 666 | } |
Sam McCall | a8c9b9f | 2023-08-31 20:14:35 +0200 | [diff] [blame] | 667 | bool TraverseConceptReference(ConceptReference *X) { |
| 668 | return traverseNode(X, [&] { return Base::TraverseConceptReference(X); }); |
| 669 | } |
Sam McCall | 3186e3c | 2019-02-01 15:05:11 +0000 | [diff] [blame] | 670 | // Stmt is the same, but this form allows the data recursion optimization. |
| 671 | bool dataTraverseStmtPre(Stmt *X) { |
Sam McCall | bbcbb10 | 2019-11-13 20:16:23 +0100 | [diff] [blame] | 672 | if (!X || isImplicit(X)) |
Sam McCall | 3186e3c | 2019-02-01 15:05:11 +0000 | [diff] [blame] | 673 | return false; |
Sam McCall | 7fc8f41 | 2019-07-22 15:55:53 +0000 | [diff] [blame] | 674 | auto N = DynTypedNode::create(*X); |
| 675 | if (canSafelySkipNode(N)) |
| 676 | return false; |
| 677 | push(std::move(N)); |
Sam McCall | e18ab2a | 2019-11-19 16:57:31 +0100 | [diff] [blame] | 678 | if (shouldSkipChildren(X)) { |
| 679 | pop(); |
| 680 | return false; |
| 681 | } |
Sam McCall | 3186e3c | 2019-02-01 15:05:11 +0000 | [diff] [blame] | 682 | return true; |
| 683 | } |
| 684 | bool dataTraverseStmtPost(Stmt *X) { |
| 685 | pop(); |
| 686 | return true; |
| 687 | } |
Sam McCall | 2ff40ca | 2019-07-24 09:39:11 +0000 | [diff] [blame] | 688 | // QualifiedTypeLoc is handled strangely in RecursiveASTVisitor: the derived |
| 689 | // TraverseTypeLoc is not called for the inner UnqualTypeLoc. |
| 690 | // This means we'd never see 'int' in 'const int'! Work around that here. |
| 691 | // (The reason for the behavior is to avoid traversing the nested Type twice, |
| 692 | // but we ignore TraverseType anyway). |
| 693 | bool TraverseQualifiedTypeLoc(QualifiedTypeLoc QX) { |
| 694 | return traverseNode<TypeLoc>( |
| 695 | &QX, [&] { return TraverseTypeLoc(QX.getUnqualifiedLoc()); }); |
| 696 | } |
David Goldman | 54a962b | 2022-02-09 15:01:17 -0500 | [diff] [blame] | 697 | bool TraverseObjCProtocolLoc(ObjCProtocolLoc PL) { |
| 698 | return traverseNode(&PL, [&] { return Base::TraverseObjCProtocolLoc(PL); }); |
| 699 | } |
Sam McCall | 3186e3c | 2019-02-01 15:05:11 +0000 | [diff] [blame] | 700 | // Uninteresting parts of the AST that don't have locations within them. |
| 701 | bool TraverseNestedNameSpecifier(NestedNameSpecifier *) { return true; } |
| 702 | bool TraverseType(QualType) { return true; } |
| 703 | |
Sam McCall | ab65945 | 2019-08-28 12:05:12 +0000 | [diff] [blame] | 704 | // The DeclStmt for the loop variable claims to cover the whole range |
| 705 | // inside the parens, this causes the range-init expression to not be hit. |
| 706 | // Traverse the loop VarDecl instead, which has the right source range. |
| 707 | bool TraverseCXXForRangeStmt(CXXForRangeStmt *S) { |
| 708 | return traverseNode(S, [&] { |
| 709 | return TraverseStmt(S->getInit()) && TraverseDecl(S->getLoopVariable()) && |
| 710 | TraverseStmt(S->getRangeInit()) && TraverseStmt(S->getBody()); |
| 711 | }); |
| 712 | } |
Sam McCall | af071f0 | 2020-01-13 19:53:52 +0100 | [diff] [blame] | 713 | // OpaqueValueExpr blocks traversal, we must explicitly traverse it. |
| 714 | bool TraverseOpaqueValueExpr(OpaqueValueExpr *E) { |
| 715 | return traverseNode(E, [&] { return TraverseStmt(E->getSourceExpr()); }); |
| 716 | } |
| 717 | // We only want to traverse the *syntactic form* to understand the selection. |
| 718 | bool TraversePseudoObjectExpr(PseudoObjectExpr *E) { |
| 719 | return traverseNode(E, [&] { return TraverseStmt(E->getSyntacticForm()); }); |
| 720 | } |
Ilya Biryukov | 2b833d4 | 2022-04-28 08:39:47 +0000 | [diff] [blame] | 721 | bool TraverseTypeConstraint(const TypeConstraint *C) { |
| 722 | if (auto *E = C->getImmediatelyDeclaredConstraint()) { |
| 723 | // Technically this expression is 'implicit' and not traversed by the RAV. |
| 724 | // However, the range is correct, so we visit expression to avoid adding |
| 725 | // an extra kind to 'DynTypeNode' that hold 'TypeConstraint'. |
| 726 | return TraverseStmt(E); |
| 727 | } |
| 728 | return Base::TraverseTypeConstraint(C); |
| 729 | } |
Sam McCall | ab65945 | 2019-08-28 12:05:12 +0000 | [diff] [blame] | 730 | |
Sam McCall | 13b2a0c | 2022-08-19 14:51:36 +0200 | [diff] [blame] | 731 | // Override child traversal for certain node types. |
| 732 | using RecursiveASTVisitor::getStmtChildren; |
| 733 | // PredefinedExpr like __func__ has a StringLiteral child for its value. |
| 734 | // It's not written, so don't traverse it. |
| 735 | Stmt::child_range getStmtChildren(PredefinedExpr *) { |
| 736 | return {StmtIterator{}, StmtIterator{}}; |
| 737 | } |
| 738 | |
Sam McCall | 3186e3c | 2019-02-01 15:05:11 +0000 | [diff] [blame] | 739 | private: |
| 740 | using Base = RecursiveASTVisitor<SelectionVisitor>; |
Sam McCall | 5cd77f9 | 2019-06-27 11:17:13 +0000 | [diff] [blame] | 741 | |
Sam McCall | abe3c29 | 2019-07-31 17:52:40 +0000 | [diff] [blame] | 742 | SelectionVisitor(ASTContext &AST, const syntax::TokenBuffer &Tokens, |
| 743 | const PrintingPolicy &PP, unsigned SelBegin, unsigned SelEnd, |
| 744 | FileID SelFile) |
Sam McCall | 3186e3c | 2019-02-01 15:05:11 +0000 | [diff] [blame] | 745 | : SM(AST.getSourceManager()), LangOpts(AST.getLangOpts()), |
Bjorn Pettersson | f0f63ca | 2019-07-27 17:09:15 +0000 | [diff] [blame] | 746 | #ifndef NDEBUG |
| 747 | PrintPolicy(PP), |
| 748 | #endif |
Sam McCall | c9c714c | 2019-12-03 16:59:52 +0100 | [diff] [blame] | 749 | TokenBuf(Tokens), SelChecker(Tokens, SelFile, SelBegin, SelEnd, SM), |
| 750 | UnclaimedExpandedTokens(Tokens.expandedTokens()) { |
Sam McCall | 3186e3c | 2019-02-01 15:05:11 +0000 | [diff] [blame] | 751 | // Ensure we have a node for the TU decl, regardless of traversal scope. |
| 752 | Nodes.emplace_back(); |
| 753 | Nodes.back().ASTNode = DynTypedNode::create(*AST.getTranslationUnitDecl()); |
| 754 | Nodes.back().Parent = nullptr; |
| 755 | Nodes.back().Selected = SelectionTree::Unselected; |
| 756 | Stack.push(&Nodes.back()); |
| 757 | } |
| 758 | |
| 759 | // Generic case of TraverseFoo. Func should be the call to Base::TraverseFoo. |
| 760 | // Node is always a pointer so the generic code can handle any null checks. |
| 761 | template <typename T, typename Func> |
| 762 | bool traverseNode(T *Node, const Func &Body) { |
Sam McCall | 7fc8f41 | 2019-07-22 15:55:53 +0000 | [diff] [blame] | 763 | if (Node == nullptr) |
| 764 | return true; |
| 765 | auto N = DynTypedNode::create(*Node); |
| 766 | if (canSafelySkipNode(N)) |
Sam McCall | 3186e3c | 2019-02-01 15:05:11 +0000 | [diff] [blame] | 767 | return true; |
| 768 | push(DynTypedNode::create(*Node)); |
| 769 | bool Ret = Body(); |
| 770 | pop(); |
| 771 | return Ret; |
| 772 | } |
| 773 | |
Sam McCall | 5cd77f9 | 2019-06-27 11:17:13 +0000 | [diff] [blame] | 774 | // HIT TESTING |
| 775 | // |
| 776 | // We do rough hit testing on the way down the tree to avoid traversing |
| 777 | // subtrees that don't touch the selection (canSafelySkipNode), but |
| 778 | // fine-grained hit-testing is mostly done on the way back up (in pop()). |
| 779 | // This means children get to claim parts of the selection first, and parents |
| 780 | // are only selected if they own tokens that no child owned. |
| 781 | // |
| 782 | // Nodes *usually* nest nicely: a child's getSourceRange() lies within the |
| 783 | // parent's, and a node (transitively) owns all tokens in its range. |
| 784 | // |
Sam McCall | 96f5cc1 | 2022-01-04 22:46:25 +0100 | [diff] [blame] | 785 | // Exception 1: when declarators nest, *inner* declarator is the *outer* type. |
| 786 | // e.g. void foo[5](int) is an array of functions. |
| 787 | // To handle this case, declarators are careful to only claim the tokens they |
| 788 | // own, rather than claim a range and rely on claim ordering. |
Sam McCall | 5cd77f9 | 2019-06-27 11:17:13 +0000 | [diff] [blame] | 789 | // |
| 790 | // Exception 2: siblings both claim the same node. |
| 791 | // e.g. `int x, y;` produces two sibling VarDecls. |
| 792 | // ~~~~~ x |
| 793 | // ~~~~~~~~ y |
| 794 | // Here the first ("leftmost") sibling claims the tokens it wants, and the |
| 795 | // other sibling gets what's left. So selecting "int" only includes the left |
| 796 | // VarDecl in the selection tree. |
| 797 | |
Sam McCall | 3186e3c | 2019-02-01 15:05:11 +0000 | [diff] [blame] | 798 | // An optimization for a common case: nodes outside macro expansions that |
| 799 | // don't intersect the selection may be recursively skipped. |
Sam McCall | 7fc8f41 | 2019-07-22 15:55:53 +0000 | [diff] [blame] | 800 | bool canSafelySkipNode(const DynTypedNode &N) { |
Sam McCall | bb41f85 | 2021-06-16 15:24:23 +0200 | [diff] [blame] | 801 | SourceRange S = getSourceRange(N); |
Kadir Cetinkaya | f5465e7 | 2020-01-13 12:09:30 +0100 | [diff] [blame] | 802 | if (auto *TL = N.get<TypeLoc>()) { |
David Goldman | d5c022d | 2020-10-16 14:14:37 -0400 | [diff] [blame] | 803 | // FIXME: TypeLoc::getBeginLoc()/getEndLoc() are pretty fragile |
| 804 | // heuristics. We should consider only pruning critical TypeLoc nodes, to |
| 805 | // be more robust. |
| 806 | |
David Goldman | d5c022d | 2020-10-16 14:14:37 -0400 | [diff] [blame] | 807 | // AttributedTypeLoc may point to the attribute's range, NOT the modified |
| 808 | // type's range. |
| 809 | if (auto AT = TL->getAs<AttributedTypeLoc>()) |
| 810 | S = AT.getModifiedLoc().getSourceRange(); |
Kadir Cetinkaya | f5465e7 | 2020-01-13 12:09:30 +0100 | [diff] [blame] | 811 | } |
Sam McCall | bb81e70 | 2020-10-20 12:01:48 +0200 | [diff] [blame] | 812 | // SourceRange often doesn't manage to accurately cover attributes. |
| 813 | // Fortunately, attributes are rare. |
| 814 | if (llvm::any_of(getAttributes(N), |
| 815 | [](const Attr *A) { return !A->isImplicit(); })) |
| 816 | return false; |
Sam McCall | c9c714c | 2019-12-03 16:59:52 +0100 | [diff] [blame] | 817 | if (!SelChecker.mayHit(S)) { |
Haojian Wu | 55b702c | 2022-01-19 15:06:34 +0100 | [diff] [blame] | 818 | dlog("{2}skip: {0} {1}", printNodeToString(N, PrintPolicy), |
| 819 | S.printToString(SM), indent()); |
Sam McCall | c9c714c | 2019-12-03 16:59:52 +0100 | [diff] [blame] | 820 | return true; |
| 821 | } |
| 822 | return false; |
Sam McCall | 3186e3c | 2019-02-01 15:05:11 +0000 | [diff] [blame] | 823 | } |
| 824 | |
Sam McCall | e18ab2a | 2019-11-19 16:57:31 +0100 | [diff] [blame] | 825 | // There are certain nodes we want to treat as leaves in the SelectionTree, |
| 826 | // although they do have children. |
| 827 | bool shouldSkipChildren(const Stmt *X) const { |
| 828 | // UserDefinedLiteral (e.g. 12_i) has two children (12 and _i). |
| 829 | // Unfortunately TokenBuffer sees 12_i as one token and can't split it. |
| 830 | // So we treat UserDefinedLiteral as a leaf node, owning the token. |
| 831 | return llvm::isa<UserDefinedLiteral>(X); |
| 832 | } |
| 833 | |
Sam McCall | 3186e3c | 2019-02-01 15:05:11 +0000 | [diff] [blame] | 834 | // Pushes a node onto the ancestor stack. Pairs with pop(). |
Haojian Wu | fd598e1 | 2022-01-17 14:27:29 +0100 | [diff] [blame] | 835 | // Performs early hit detection for some nodes (on the earlySourceRange). |
Sam McCall | 3186e3c | 2019-02-01 15:05:11 +0000 | [diff] [blame] | 836 | void push(DynTypedNode Node) { |
Haojian Wu | fd598e1 | 2022-01-17 14:27:29 +0100 | [diff] [blame] | 837 | SourceRange Early = earlySourceRange(Node); |
Haojian Wu | 55b702c | 2022-01-19 15:06:34 +0100 | [diff] [blame] | 838 | dlog("{2}push: {0} {1}", printNodeToString(Node, PrintPolicy), |
| 839 | Node.getSourceRange().printToString(SM), indent()); |
Sam McCall | 3186e3c | 2019-02-01 15:05:11 +0000 | [diff] [blame] | 840 | Nodes.emplace_back(); |
| 841 | Nodes.back().ASTNode = std::move(Node); |
| 842 | Nodes.back().Parent = Stack.top(); |
Sam McCall | c9c714c | 2019-12-03 16:59:52 +0100 | [diff] [blame] | 843 | Nodes.back().Selected = NoTokens; |
Sam McCall | 3186e3c | 2019-02-01 15:05:11 +0000 | [diff] [blame] | 844 | Stack.push(&Nodes.back()); |
Haojian Wu | fd598e1 | 2022-01-17 14:27:29 +0100 | [diff] [blame] | 845 | claimRange(Early, Nodes.back().Selected); |
Sam McCall | 3186e3c | 2019-02-01 15:05:11 +0000 | [diff] [blame] | 846 | } |
| 847 | |
| 848 | // Pops a node off the ancestor stack, and finalizes it. Pairs with push(). |
Sam McCall | 5cd77f9 | 2019-06-27 11:17:13 +0000 | [diff] [blame] | 849 | // Performs primary hit detection. |
Sam McCall | 3186e3c | 2019-02-01 15:05:11 +0000 | [diff] [blame] | 850 | void pop() { |
| 851 | Node &N = *Stack.top(); |
Sam McCall | 7fc8f41 | 2019-07-22 15:55:53 +0000 | [diff] [blame] | 852 | dlog("{1}pop: {0}", printNodeToString(N.ASTNode, PrintPolicy), indent(-1)); |
Haojian Wu | 20f8f46 | 2022-01-04 11:50:17 +0100 | [diff] [blame] | 853 | claimTokensFor(N.ASTNode, N.Selected); |
Sam McCall | c9c714c | 2019-12-03 16:59:52 +0100 | [diff] [blame] | 854 | if (N.Selected == NoTokens) |
| 855 | N.Selected = SelectionTree::Unselected; |
Sam McCall | 3186e3c | 2019-02-01 15:05:11 +0000 | [diff] [blame] | 856 | if (N.Selected || !N.Children.empty()) { |
| 857 | // Attach to the tree. |
| 858 | N.Parent->Children.push_back(&N); |
| 859 | } else { |
| 860 | // Neither N any children are selected, it doesn't belong in the tree. |
| 861 | assert(&N == &Nodes.back()); |
| 862 | Nodes.pop_back(); |
| 863 | } |
| 864 | Stack.pop(); |
| 865 | } |
| 866 | |
Haojian Wu | fd598e1 | 2022-01-17 14:27:29 +0100 | [diff] [blame] | 867 | // Returns the range of tokens that this node will claim directly, and |
| 868 | // is not available to the node's children. |
| 869 | // Usually empty, but sometimes children cover tokens but shouldn't own them. |
| 870 | SourceRange earlySourceRange(const DynTypedNode &N) { |
Sam McCall | 62116c8 | 2022-10-19 01:42:31 +0200 | [diff] [blame] | 871 | if (const Decl *VD = N.get<VarDecl>()) { |
Haojian Wu | fd598e1 | 2022-01-17 14:27:29 +0100 | [diff] [blame] | 872 | // We want the name in the var-decl to be claimed by the decl itself and |
| 873 | // not by any children. Ususally, we don't need this, because source |
| 874 | // ranges of children are not overlapped with their parent's. |
| 875 | // An exception is lambda captured var decl, where AutoTypeLoc is |
| 876 | // overlapped with the name loc. |
| 877 | // auto fun = [bar = foo]() { ... } |
| 878 | // ~~~~~~~~~ VarDecl |
| 879 | // ~~~ |- AutoTypeLoc |
Sam McCall | 62116c8 | 2022-10-19 01:42:31 +0200 | [diff] [blame] | 880 | return VD->getLocation(); |
| 881 | } |
| 882 | |
| 883 | // When referring to a destructor ~Foo(), attribute Foo to the destructor |
| 884 | // rather than the TypeLoc nested inside it. |
| 885 | // We still traverse the TypeLoc, because it may contain other targeted |
| 886 | // things like the T in ~Foo<T>(). |
| 887 | if (const auto *CDD = N.get<CXXDestructorDecl>()) |
| 888 | return CDD->getNameInfo().getNamedTypeInfo()->getTypeLoc().getBeginLoc(); |
| 889 | if (const auto *ME = N.get<MemberExpr>()) { |
| 890 | auto NameInfo = ME->getMemberNameInfo(); |
| 891 | if (NameInfo.getName().getNameKind() == |
| 892 | DeclarationName::CXXDestructorName) |
| 893 | return NameInfo.getNamedTypeInfo()->getTypeLoc().getBeginLoc(); |
Haojian Wu | fd598e1 | 2022-01-17 14:27:29 +0100 | [diff] [blame] | 894 | } |
| 895 | |
| 896 | return SourceRange(); |
| 897 | } |
| 898 | |
Haojian Wu | 20f8f46 | 2022-01-04 11:50:17 +0100 | [diff] [blame] | 899 | // Claim tokens for N, after processing its children. |
| 900 | // By default this claims all unclaimed tokens in getSourceRange(). |
| 901 | // We override this if we want to claim fewer tokens (e.g. there are gaps). |
| 902 | void claimTokensFor(const DynTypedNode &N, SelectionTree::Selection &Result) { |
Sam McCall | 96f5cc1 | 2022-01-04 22:46:25 +0100 | [diff] [blame] | 903 | // CXXConstructExpr often shows implicit construction, like `string s;`. |
| 904 | // Don't associate any tokens with it unless there's some syntax like {}. |
| 905 | // This prevents it from claiming 's', its primary location. |
| 906 | if (const auto *CCE = N.get<CXXConstructExpr>()) { |
| 907 | claimRange(CCE->getParenOrBraceRange(), Result); |
| 908 | return; |
| 909 | } |
| 910 | // ExprWithCleanups is always implicit. It often wraps CXXConstructExpr. |
| 911 | // Prevent it claiming 's' in the case above. |
| 912 | if (N.get<ExprWithCleanups>()) |
| 913 | return; |
| 914 | |
| 915 | // Declarators nest "inside out", with parent types inside child ones. |
| 916 | // Instead of claiming the whole range (clobbering parent tokens), carefully |
| 917 | // claim the tokens owned by this node and non-declarator children. |
| 918 | // (We could manipulate traversal order instead, but this is easier). |
| 919 | // |
| 920 | // Non-declarator types nest normally, and are handled like other nodes. |
| 921 | // |
| 922 | // Example: |
| 923 | // Vec<R<int>(*[2])(A<char>)> is a Vec of arrays of pointers to functions, |
Haojian Wu | 7632d19 | 2022-01-05 15:50:07 +0100 | [diff] [blame] | 924 | // which accept A<char> and return R<int>. |
Sam McCall | 96f5cc1 | 2022-01-04 22:46:25 +0100 | [diff] [blame] | 925 | // The TypeLoc hierarchy: |
| 926 | // Vec<R<int>(*[2])(A<char>)> m; |
Sam McCall | bb10e03 | 2022-01-05 16:00:13 +0100 | [diff] [blame] | 927 | // Vec<#####################> TemplateSpecialization Vec |
Sam McCall | 96f5cc1 | 2022-01-04 22:46:25 +0100 | [diff] [blame] | 928 | // --------[2]---------- `-Array |
| 929 | // -------*------------- `-Pointer |
| 930 | // ------(----)--------- `-Paren |
Sam McCall | bb10e03 | 2022-01-05 16:00:13 +0100 | [diff] [blame] | 931 | // ------------(#######) `-Function |
| 932 | // R<###> |-TemplateSpecialization R |
Sam McCall | 96f5cc1 | 2022-01-04 22:46:25 +0100 | [diff] [blame] | 933 | // int | `-Builtin int |
Sam McCall | bb10e03 | 2022-01-05 16:00:13 +0100 | [diff] [blame] | 934 | // A<####> `-TemplateSpecialization A |
Haojian Wu | 7632d19 | 2022-01-05 15:50:07 +0100 | [diff] [blame] | 935 | // char `-Builtin char |
Sam McCall | 96f5cc1 | 2022-01-04 22:46:25 +0100 | [diff] [blame] | 936 | // |
Sam McCall | bb10e03 | 2022-01-05 16:00:13 +0100 | [diff] [blame] | 937 | // In each row |
| 938 | // --- represents unclaimed parts of the SourceRange. |
| 939 | // ### represents parts that children already claimed. |
Haojian Wu | 20f8f46 | 2022-01-04 11:50:17 +0100 | [diff] [blame] | 940 | if (const auto *TL = N.get<TypeLoc>()) { |
Sam McCall | 96f5cc1 | 2022-01-04 22:46:25 +0100 | [diff] [blame] | 941 | if (auto PTL = TL->getAs<ParenTypeLoc>()) { |
| 942 | claimRange(PTL.getLParenLoc(), Result); |
| 943 | claimRange(PTL.getRParenLoc(), Result); |
Haojian Wu | 20f8f46 | 2022-01-04 11:50:17 +0100 | [diff] [blame] | 944 | return; |
| 945 | } |
Sam McCall | 96f5cc1 | 2022-01-04 22:46:25 +0100 | [diff] [blame] | 946 | if (auto ATL = TL->getAs<ArrayTypeLoc>()) { |
| 947 | claimRange(ATL.getBracketsRange(), Result); |
| 948 | return; |
| 949 | } |
| 950 | if (auto PTL = TL->getAs<PointerTypeLoc>()) { |
| 951 | claimRange(PTL.getStarLoc(), Result); |
| 952 | return; |
| 953 | } |
| 954 | if (auto FTL = TL->getAs<FunctionTypeLoc>()) { |
| 955 | claimRange(SourceRange(FTL.getLParenLoc(), FTL.getEndLoc()), Result); |
| 956 | return; |
| 957 | } |
Haojian Wu | 20f8f46 | 2022-01-04 11:50:17 +0100 | [diff] [blame] | 958 | } |
| 959 | claimRange(getSourceRange(N), Result); |
| 960 | } |
| 961 | |
Sam McCall | 3186e3c | 2019-02-01 15:05:11 +0000 | [diff] [blame] | 962 | // Perform hit-testing of a complete Node against the selection. |
| 963 | // This runs for every node in the AST, and must be fast in common cases. |
Sam McCall | 5cd77f9 | 2019-06-27 11:17:13 +0000 | [diff] [blame] | 964 | // This is usually called from pop(), so we can take children into account. |
Sam McCall | 96f5cc1 | 2022-01-04 22:46:25 +0100 | [diff] [blame] | 965 | // The existing state of Result is relevant. |
Sam McCall | 20c5fbb | 2019-10-02 10:01:53 +0000 | [diff] [blame] | 966 | void claimRange(SourceRange S, SelectionTree::Selection &Result) { |
Sam McCall | c9c714c | 2019-12-03 16:59:52 +0100 | [diff] [blame] | 967 | for (const auto &ClaimedRange : |
| 968 | UnclaimedExpandedTokens.erase(TokenBuf.expandedTokens(S))) |
| 969 | update(Result, SelChecker.test(ClaimedRange)); |
| 970 | |
| 971 | if (Result && Result != NoTokens) |
| 972 | dlog("{1}hit selection: {0}", S.printToString(SM), indent()); |
Sam McCall | 3186e3c | 2019-02-01 15:05:11 +0000 | [diff] [blame] | 973 | } |
| 974 | |
Sam McCall | 7fc8f41 | 2019-07-22 15:55:53 +0000 | [diff] [blame] | 975 | std::string indent(int Offset = 0) { |
| 976 | // Cast for signed arithmetic. |
| 977 | int Amount = int(Stack.size()) + Offset; |
| 978 | assert(Amount >= 0); |
| 979 | return std::string(Amount, ' '); |
| 980 | } |
| 981 | |
Sam McCall | 3186e3c | 2019-02-01 15:05:11 +0000 | [diff] [blame] | 982 | SourceManager &SM; |
| 983 | const LangOptions &LangOpts; |
Bjorn Pettersson | f0f63ca | 2019-07-27 17:09:15 +0000 | [diff] [blame] | 984 | #ifndef NDEBUG |
Sam McCall | 7fc8f41 | 2019-07-22 15:55:53 +0000 | [diff] [blame] | 985 | const PrintingPolicy &PrintPolicy; |
Bjorn Pettersson | f0f63ca | 2019-07-27 17:09:15 +0000 | [diff] [blame] | 986 | #endif |
Sam McCall | c9c714c | 2019-12-03 16:59:52 +0100 | [diff] [blame] | 987 | const syntax::TokenBuffer &TokenBuf; |
Sam McCall | 3186e3c | 2019-02-01 15:05:11 +0000 | [diff] [blame] | 988 | std::stack<Node *> Stack; |
Sam McCall | c9c714c | 2019-12-03 16:59:52 +0100 | [diff] [blame] | 989 | SelectionTester SelChecker; |
| 990 | IntervalSet<syntax::Token> UnclaimedExpandedTokens; |
Sam McCall | 3186e3c | 2019-02-01 15:05:11 +0000 | [diff] [blame] | 991 | std::deque<Node> Nodes; // Stable pointers as we add more nodes. |
Sam McCall | 3186e3c | 2019-02-01 15:05:11 +0000 | [diff] [blame] | 992 | }; |
| 993 | |
| 994 | } // namespace |
| 995 | |
Sam McCall | e3e1583 | 2020-05-19 01:30:23 +0200 | [diff] [blame] | 996 | llvm::SmallString<256> abbreviatedString(DynTypedNode N, |
| 997 | const PrintingPolicy &PP) { |
| 998 | llvm::SmallString<256> Result; |
| 999 | { |
| 1000 | llvm::raw_svector_ostream OS(Result); |
| 1001 | N.print(OS, PP); |
| 1002 | } |
| 1003 | auto Pos = Result.find('\n'); |
| 1004 | if (Pos != llvm::StringRef::npos) { |
David Blaikie | 1def257 | 2021-07-08 13:30:14 -0700 | [diff] [blame] | 1005 | bool MoreText = !llvm::all_of(Result.str().drop_front(Pos), llvm::isSpace); |
Sam McCall | e3e1583 | 2020-05-19 01:30:23 +0200 | [diff] [blame] | 1006 | Result.resize(Pos); |
| 1007 | if (MoreText) |
| 1008 | Result.append(" …"); |
| 1009 | } |
| 1010 | return Result; |
| 1011 | } |
| 1012 | |
Sam McCall | 3186e3c | 2019-02-01 15:05:11 +0000 | [diff] [blame] | 1013 | void SelectionTree::print(llvm::raw_ostream &OS, const SelectionTree::Node &N, |
| 1014 | int Indent) const { |
| 1015 | if (N.Selected) |
| 1016 | OS.indent(Indent - 1) << (N.Selected == SelectionTree::Complete ? '*' |
| 1017 | : '.'); |
| 1018 | else |
| 1019 | OS.indent(Indent); |
Sam McCall | 2ff40ca | 2019-07-24 09:39:11 +0000 | [diff] [blame] | 1020 | printNodeKind(OS, N.ASTNode); |
Sam McCall | e3e1583 | 2020-05-19 01:30:23 +0200 | [diff] [blame] | 1021 | OS << ' ' << abbreviatedString(N.ASTNode, PrintPolicy) << "\n"; |
Sam McCall | 3186e3c | 2019-02-01 15:05:11 +0000 | [diff] [blame] | 1022 | for (const Node *Child : N.Children) |
| 1023 | print(OS, *Child, Indent + 2); |
| 1024 | } |
| 1025 | |
Sam McCall | 2ff40ca | 2019-07-24 09:39:11 +0000 | [diff] [blame] | 1026 | std::string SelectionTree::Node::kind() const { |
| 1027 | std::string S; |
| 1028 | llvm::raw_string_ostream OS(S); |
| 1029 | printNodeKind(OS, ASTNode); |
| 1030 | return std::move(OS.str()); |
| 1031 | } |
| 1032 | |
Sam McCall | be6d07c | 2020-02-23 20:03:00 +0100 | [diff] [blame] | 1033 | // Decide which selections emulate a "point" query in between characters. |
| 1034 | // If it's ambiguous (the neighboring characters are selectable tokens), returns |
| 1035 | // both possibilities in preference order. |
| 1036 | // Always returns at least one range - if no tokens touched, and empty range. |
| 1037 | static llvm::SmallVector<std::pair<unsigned, unsigned>, 2> |
| 1038 | pointBounds(unsigned Offset, const syntax::TokenBuffer &Tokens) { |
| 1039 | const auto &SM = Tokens.sourceManager(); |
| 1040 | SourceLocation Loc = SM.getComposedLoc(SM.getMainFileID(), Offset); |
| 1041 | llvm::SmallVector<std::pair<unsigned, unsigned>, 2> Result; |
| 1042 | // Prefer right token over left. |
| 1043 | for (const syntax::Token &Tok : |
| 1044 | llvm::reverse(spelledTokensTouching(Loc, Tokens))) { |
| 1045 | if (shouldIgnore(Tok)) |
| 1046 | continue; |
| 1047 | unsigned Offset = Tokens.sourceManager().getFileOffset(Tok.location()); |
| 1048 | Result.emplace_back(Offset, Offset + Tok.length()); |
| 1049 | } |
| 1050 | if (Result.empty()) |
| 1051 | Result.emplace_back(Offset, Offset); |
| 1052 | return Result; |
| 1053 | } |
| 1054 | |
| 1055 | bool SelectionTree::createEach(ASTContext &AST, |
| 1056 | const syntax::TokenBuffer &Tokens, |
| 1057 | unsigned Begin, unsigned End, |
| 1058 | llvm::function_ref<bool(SelectionTree)> Func) { |
| 1059 | if (Begin != End) |
| 1060 | return Func(SelectionTree(AST, Tokens, Begin, End)); |
| 1061 | for (std::pair<unsigned, unsigned> Bounds : pointBounds(Begin, Tokens)) |
| 1062 | if (Func(SelectionTree(AST, Tokens, Bounds.first, Bounds.second))) |
| 1063 | return true; |
| 1064 | return false; |
| 1065 | } |
| 1066 | |
| 1067 | SelectionTree SelectionTree::createRight(ASTContext &AST, |
| 1068 | const syntax::TokenBuffer &Tokens, |
| 1069 | unsigned int Begin, unsigned int End) { |
Kazu Hirata | f71ffd3 | 2023-01-07 20:19:42 -0800 | [diff] [blame] | 1070 | std::optional<SelectionTree> Result; |
Sam McCall | be6d07c | 2020-02-23 20:03:00 +0100 | [diff] [blame] | 1071 | createEach(AST, Tokens, Begin, End, [&](SelectionTree T) { |
| 1072 | Result = std::move(T); |
| 1073 | return true; |
| 1074 | }); |
| 1075 | return std::move(*Result); |
Sam McCall | 3186e3c | 2019-02-01 15:05:11 +0000 | [diff] [blame] | 1076 | } |
| 1077 | |
Sam McCall | abe3c29 | 2019-07-31 17:52:40 +0000 | [diff] [blame] | 1078 | SelectionTree::SelectionTree(ASTContext &AST, const syntax::TokenBuffer &Tokens, |
| 1079 | unsigned Begin, unsigned End) |
Sam McCall | 3186e3c | 2019-02-01 15:05:11 +0000 | [diff] [blame] | 1080 | : PrintPolicy(AST.getLangOpts()) { |
| 1081 | // No fundamental reason the selection needs to be in the main file, |
| 1082 | // but that's all clangd has needed so far. |
Sam McCall | 7fc8f41 | 2019-07-22 15:55:53 +0000 | [diff] [blame] | 1083 | const SourceManager &SM = AST.getSourceManager(); |
| 1084 | FileID FID = SM.getMainFileID(); |
Sam McCall | 3186e3c | 2019-02-01 15:05:11 +0000 | [diff] [blame] | 1085 | PrintPolicy.TerseOutput = true; |
Sam McCall | ca89eb5 | 2019-06-24 13:01:28 +0000 | [diff] [blame] | 1086 | PrintPolicy.IncludeNewlines = false; |
Sam McCall | 3186e3c | 2019-02-01 15:05:11 +0000 | [diff] [blame] | 1087 | |
Sam McCall | 7fc8f41 | 2019-07-22 15:55:53 +0000 | [diff] [blame] | 1088 | dlog("Computing selection for {0}", |
| 1089 | SourceRange(SM.getComposedLoc(FID, Begin), SM.getComposedLoc(FID, End)) |
| 1090 | .printToString(SM)); |
Sam McCall | abe3c29 | 2019-07-31 17:52:40 +0000 | [diff] [blame] | 1091 | Nodes = SelectionVisitor::collect(AST, Tokens, PrintPolicy, Begin, End, FID); |
Sam McCall | 3186e3c | 2019-02-01 15:05:11 +0000 | [diff] [blame] | 1092 | Root = Nodes.empty() ? nullptr : &Nodes.front(); |
Haojian Wu | 1df0677 | 2020-12-07 10:52:04 +0100 | [diff] [blame] | 1093 | recordMetrics(*this, AST.getLangOpts()); |
Sam McCall | 7fc8f41 | 2019-07-22 15:55:53 +0000 | [diff] [blame] | 1094 | dlog("Built selection tree\n{0}", *this); |
Sam McCall | 3186e3c | 2019-02-01 15:05:11 +0000 | [diff] [blame] | 1095 | } |
| 1096 | |
Sam McCall | 3186e3c | 2019-02-01 15:05:11 +0000 | [diff] [blame] | 1097 | const Node *SelectionTree::commonAncestor() const { |
Haojian Wu | 1503a3b | 2019-07-11 12:29:01 +0000 | [diff] [blame] | 1098 | const Node *Ancestor = Root; |
| 1099 | while (Ancestor->Children.size() == 1 && !Ancestor->Selected) |
| 1100 | Ancestor = Ancestor->Children.front(); |
Sam McCall | bdc6b6e | 2019-07-24 12:14:56 +0000 | [diff] [blame] | 1101 | // Returning nullptr here is a bit unprincipled, but it makes the API safer: |
| 1102 | // the TranslationUnitDecl contains all of the preamble, so traversing it is a |
| 1103 | // performance cliff. Callers can check for null and use root() if they want. |
| 1104 | return Ancestor != Root ? Ancestor : nullptr; |
Sam McCall | 3186e3c | 2019-02-01 15:05:11 +0000 | [diff] [blame] | 1105 | } |
| 1106 | |
Nathan Ridge | e70a9f3 | 2019-12-23 13:38:04 -0500 | [diff] [blame] | 1107 | const DeclContext &SelectionTree::Node::getDeclContext() const { |
| 1108 | for (const Node *CurrentNode = this; CurrentNode != nullptr; |
Sam McCall | 9470142 | 2019-07-11 16:04:18 +0000 | [diff] [blame] | 1109 | CurrentNode = CurrentNode->Parent) { |
Nathan Ridge | e70a9f3 | 2019-12-23 13:38:04 -0500 | [diff] [blame] | 1110 | if (const Decl *Current = CurrentNode->ASTNode.get<Decl>()) { |
Sam McCall | 9470142 | 2019-07-11 16:04:18 +0000 | [diff] [blame] | 1111 | if (CurrentNode != this) |
| 1112 | if (auto *DC = dyn_cast<DeclContext>(Current)) |
| 1113 | return *DC; |
Kadir Cetinkaya | 3d73548 | 2021-10-28 14:56:53 +0200 | [diff] [blame] | 1114 | return *Current->getLexicalDeclContext(); |
Sam McCall | 9470142 | 2019-07-11 16:04:18 +0000 | [diff] [blame] | 1115 | } |
Younan Zhang | 9ef2ac3 | 2024-01-11 16:59:18 +0800 | [diff] [blame] | 1116 | if (const auto *LE = CurrentNode->ASTNode.get<LambdaExpr>()) |
| 1117 | if (CurrentNode != this) |
| 1118 | return *LE->getCallOperator(); |
Sam McCall | 9470142 | 2019-07-11 16:04:18 +0000 | [diff] [blame] | 1119 | } |
| 1120 | llvm_unreachable("A tree must always be rooted at TranslationUnitDecl."); |
| 1121 | } |
| 1122 | |
Sam McCall | 91e8eac | 2019-07-26 15:29:52 +0000 | [diff] [blame] | 1123 | const SelectionTree::Node &SelectionTree::Node::ignoreImplicit() const { |
| 1124 | if (Children.size() == 1 && |
Sam McCall | bb41f85 | 2021-06-16 15:24:23 +0200 | [diff] [blame] | 1125 | getSourceRange(Children.front()->ASTNode) == getSourceRange(ASTNode)) |
Sam McCall | 91e8eac | 2019-07-26 15:29:52 +0000 | [diff] [blame] | 1126 | return Children.front()->ignoreImplicit(); |
| 1127 | return *this; |
| 1128 | } |
| 1129 | |
Sam McCall | 1aaef90 | 2019-08-09 23:40:54 +0000 | [diff] [blame] | 1130 | const SelectionTree::Node &SelectionTree::Node::outerImplicit() const { |
Sam McCall | bb41f85 | 2021-06-16 15:24:23 +0200 | [diff] [blame] | 1131 | if (Parent && getSourceRange(Parent->ASTNode) == getSourceRange(ASTNode)) |
Sam McCall | 1aaef90 | 2019-08-09 23:40:54 +0000 | [diff] [blame] | 1132 | return Parent->outerImplicit(); |
| 1133 | return *this; |
| 1134 | } |
| 1135 | |
Sam McCall | 3186e3c | 2019-02-01 15:05:11 +0000 | [diff] [blame] | 1136 | } // namespace clangd |
| 1137 | } // namespace clang |