| //===--- FileDistance.cpp - File contents container -------------*- C++ -*-===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // The FileDistance structure allows calculating the minimum distance to paths |
| // in a single tree. |
| // We simply walk up the path's ancestors until we find a node whose cost is |
| // known, and add the cost of walking back down. Initialization ensures this |
| // gives the correct path to the roots. |
| // We cache the results, so that the runtime is O(|A|), where A is the set of |
| // all distinct ancestors of visited paths. |
| // |
| // Example after initialization with /=2, /bar=0, DownCost = 1: |
| // / = 2 |
| // /bar = 0 |
| // |
| // After querying /foo/bar and /bar/foo: |
| // / = 2 |
| // /bar = 0 |
| // /bar/foo = 1 |
| // /foo = 3 |
| // /foo/bar = 4 |
| // |
| // URIDistance creates FileDistance lazily for each URI scheme encountered. In |
| // practice this is a small constant factor. |
| // |
| //===-------------------------------------------------------------------------// |
| |
| #include "FileDistance.h" |
| #include "support/Logger.h" |
| #include "llvm/ADT/STLExtras.h" |
| #include <queue> |
| |
| namespace clang { |
| namespace clangd { |
| |
| // Convert a path into the canonical form. |
| // Canonical form is either "/", or "/segment" * N: |
| // C:\foo\bar --> /c:/foo/bar |
| // /foo/ --> /foo |
| // a/b/c --> /a/b/c |
| static llvm::SmallString<128> canonicalize(llvm::StringRef Path) { |
| llvm::SmallString<128> Result = Path.rtrim('/'); |
| native(Result, llvm::sys::path::Style::posix); |
| if (Result.empty() || Result.front() != '/') |
| Result.insert(Result.begin(), '/'); |
| return Result; |
| } |
| |
| constexpr const unsigned FileDistance::Unreachable; |
| const llvm::hash_code FileDistance::RootHash = |
| llvm::hash_value(llvm::StringRef("/")); |
| |
| FileDistance::FileDistance(llvm::StringMap<SourceParams> Sources, |
| const FileDistanceOptions &Opts) |
| : Opts(Opts) { |
| llvm::DenseMap<llvm::hash_code, llvm::SmallVector<llvm::hash_code>> DownEdges; |
| // Compute the best distance following only up edges. |
| // Keep track of down edges, in case we can use them to improve on this. |
| for (const auto &S : Sources) { |
| auto Canonical = canonicalize(S.getKey()); |
| dlog("Source {0} = {1}, MaxUp = {2}", Canonical, S.second.Cost, |
| S.second.MaxUpTraversals); |
| // Walk up to ancestors of this source, assigning cost. |
| llvm::StringRef Rest = Canonical; |
| llvm::hash_code Hash = llvm::hash_value(Rest); |
| for (unsigned I = 0; !Rest.empty(); ++I) { |
| Rest = parent_path(Rest, llvm::sys::path::Style::posix); |
| auto NextHash = llvm::hash_value(Rest); |
| auto &Down = DownEdges[NextHash]; |
| if (!llvm::is_contained(Down, Hash)) |
| Down.push_back(Hash); |
| // We can't just break after MaxUpTraversals, must still set DownEdges. |
| if (I > S.getValue().MaxUpTraversals) { |
| if (Cache.find(Hash) != Cache.end()) |
| break; |
| } else { |
| unsigned Cost = S.getValue().Cost + I * Opts.UpCost; |
| auto R = Cache.try_emplace(Hash, Cost); |
| if (!R.second) { |
| if (Cost < R.first->second) { |
| R.first->second = Cost; |
| } else { |
| // If we're not the best way to get to this path, stop assigning. |
| break; |
| } |
| } |
| } |
| Hash = NextHash; |
| } |
| } |
| // Now propagate scores parent -> child if that's an improvement. |
| // BFS ensures we propagate down chains (must visit parents before children). |
| std::queue<llvm::hash_code> Next; |
| for (auto Child : DownEdges.lookup(llvm::hash_value(llvm::StringRef("")))) |
| Next.push(Child); |
| while (!Next.empty()) { |
| auto Parent = Next.front(); |
| Next.pop(); |
| auto ParentCost = Cache.lookup(Parent); |
| for (auto Child : DownEdges.lookup(Parent)) { |
| if (Parent != RootHash || Opts.AllowDownTraversalFromRoot) { |
| auto &ChildCost = |
| Cache.try_emplace(Child, Unreachable).first->getSecond(); |
| if (ParentCost + Opts.DownCost < ChildCost) |
| ChildCost = ParentCost + Opts.DownCost; |
| } |
| Next.push(Child); |
| } |
| } |
| } |
| |
| unsigned FileDistance::distance(llvm::StringRef Path) { |
| auto Canonical = canonicalize(Path); |
| unsigned Cost = Unreachable; |
| llvm::SmallVector<llvm::hash_code> Ancestors; |
| // Walk up ancestors until we find a path we know the distance for. |
| for (llvm::StringRef Rest = Canonical; !Rest.empty(); |
| Rest = parent_path(Rest, llvm::sys::path::Style::posix)) { |
| auto Hash = llvm::hash_value(Rest); |
| if (Hash == RootHash && !Ancestors.empty() && |
| !Opts.AllowDownTraversalFromRoot) { |
| Cost = Unreachable; |
| break; |
| } |
| auto It = Cache.find(Hash); |
| if (It != Cache.end()) { |
| Cost = It->second; |
| break; |
| } |
| Ancestors.push_back(Hash); |
| } |
| // Now we know the costs for (known node, queried node]. |
| // Fill these in, walking down the directory tree. |
| for (llvm::hash_code Hash : llvm::reverse(Ancestors)) { |
| if (Cost != Unreachable) |
| Cost += Opts.DownCost; |
| Cache.try_emplace(Hash, Cost); |
| } |
| dlog("distance({0} = {1})", Path, Cost); |
| return Cost; |
| } |
| |
| unsigned URIDistance::distance(llvm::StringRef URI) { |
| auto R = Cache.try_emplace(llvm::hash_value(URI), FileDistance::Unreachable); |
| if (!R.second) |
| return R.first->getSecond(); |
| if (auto U = clangd::URI::parse(URI)) { |
| dlog("distance({0} = {1})", URI, U->body()); |
| R.first->second = forScheme(U->scheme()).distance(U->body()); |
| } else { |
| log("URIDistance::distance() of unparseable {0}: {1}", URI, U.takeError()); |
| } |
| return R.first->second; |
| } |
| |
| FileDistance &URIDistance::forScheme(llvm::StringRef Scheme) { |
| auto &Delegate = ByScheme[Scheme]; |
| if (!Delegate) { |
| llvm::StringMap<SourceParams> SchemeSources; |
| for (const auto &Source : Sources) { |
| if (auto U = clangd::URI::create(Source.getKey(), Scheme)) |
| SchemeSources.try_emplace(U->body(), Source.getValue()); |
| else |
| llvm::consumeError(U.takeError()); |
| } |
| dlog("FileDistance for scheme {0}: {1}/{2} sources", Scheme, |
| SchemeSources.size(), Sources.size()); |
| Delegate.reset(new FileDistance(std::move(SchemeSources), Opts)); |
| } |
| return *Delegate; |
| } |
| |
| static std::pair<std::string, int> scopeToPath(llvm::StringRef Scope) { |
| llvm::SmallVector<llvm::StringRef> Split; |
| Scope.split(Split, "::", /*MaxSplit=*/-1, /*KeepEmpty=*/false); |
| return {"/" + llvm::join(Split, "/"), Split.size()}; |
| } |
| |
| static FileDistance |
| createScopeFileDistance(llvm::ArrayRef<std::string> QueryScopes) { |
| FileDistanceOptions Opts; |
| Opts.UpCost = 2; |
| Opts.DownCost = 4; |
| Opts.AllowDownTraversalFromRoot = false; |
| |
| llvm::StringMap<SourceParams> Sources; |
| llvm::StringRef Preferred = |
| QueryScopes.empty() ? "" : QueryScopes.front().c_str(); |
| for (llvm::StringRef S : QueryScopes) { |
| SourceParams Param; |
| // Penalize the global scope even it's preferred, as all projects can define |
| // symbols in it, and there is pattern where using-namespace is used in |
| // place of enclosing namespaces (e.g. in implementation files). |
| if (S == Preferred) |
| Param.Cost = S == "" ? 4 : 0; |
| else if (Preferred.startswith(S) && !S.empty()) |
| continue; // just rely on up-traversals. |
| else |
| Param.Cost = S == "" ? 6 : 2; |
| auto Path = scopeToPath(S); |
| // The global namespace is not 'near' its children. |
| Param.MaxUpTraversals = std::max(Path.second - 1, 0); |
| Sources[Path.first] = std::move(Param); |
| } |
| return FileDistance(std::move(Sources), Opts); |
| } |
| |
| ScopeDistance::ScopeDistance(llvm::ArrayRef<std::string> QueryScopes) |
| : Distance(createScopeFileDistance(QueryScopes)) {} |
| |
| unsigned ScopeDistance::distance(llvm::StringRef SymbolScope) { |
| return Distance.distance(scopeToPath(SymbolScope).first); |
| } |
| |
| } // namespace clangd |
| } // namespace clang |