| //===-- Background.cpp - Build an index in a background thread ------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "index/Background.h" |
| #include "Compiler.h" |
| #include "Config.h" |
| #include "Headers.h" |
| #include "ParsedAST.h" |
| #include "SourceCode.h" |
| #include "Symbol.h" |
| #include "URI.h" |
| #include "index/BackgroundIndexLoader.h" |
| #include "index/FileIndex.h" |
| #include "index/Index.h" |
| #include "index/IndexAction.h" |
| #include "index/MemIndex.h" |
| #include "index/Ref.h" |
| #include "index/Relation.h" |
| #include "index/Serialization.h" |
| #include "index/SymbolCollector.h" |
| #include "support/Context.h" |
| #include "support/Logger.h" |
| #include "support/Path.h" |
| #include "support/Threading.h" |
| #include "support/ThreadsafeFS.h" |
| #include "support/Trace.h" |
| #include "clang/Basic/SourceLocation.h" |
| #include "clang/Basic/SourceManager.h" |
| #include "clang/Driver/Types.h" |
| #include "llvm/ADT/ArrayRef.h" |
| #include "llvm/ADT/DenseSet.h" |
| #include "llvm/ADT/Hashing.h" |
| #include "llvm/ADT/STLExtras.h" |
| #include "llvm/ADT/ScopeExit.h" |
| #include "llvm/ADT/StringMap.h" |
| #include "llvm/ADT/StringRef.h" |
| #include "llvm/ADT/StringSet.h" |
| #include "llvm/Support/Error.h" |
| #include "llvm/Support/Path.h" |
| #include "llvm/Support/Threading.h" |
| #include "llvm/Support/xxhash.h" |
| |
| #include <algorithm> |
| #include <atomic> |
| #include <chrono> |
| #include <condition_variable> |
| #include <cstddef> |
| #include <memory> |
| #include <mutex> |
| #include <numeric> |
| #include <queue> |
| #include <random> |
| #include <string> |
| #include <thread> |
| #include <utility> |
| #include <vector> |
| |
| namespace clang { |
| namespace clangd { |
| namespace { |
| |
| // We cannot use vfs->makeAbsolute because Cmd.FileName is either absolute or |
| // relative to Cmd.Directory, which might not be the same as current working |
| // directory. |
| llvm::SmallString<128> getAbsolutePath(const tooling::CompileCommand &Cmd) { |
| llvm::SmallString<128> AbsolutePath; |
| if (llvm::sys::path::is_absolute(Cmd.Filename)) { |
| AbsolutePath = Cmd.Filename; |
| } else { |
| AbsolutePath = Cmd.Directory; |
| llvm::sys::path::append(AbsolutePath, Cmd.Filename); |
| llvm::sys::path::remove_dots(AbsolutePath, true); |
| } |
| return AbsolutePath; |
| } |
| |
| bool shardIsStale(const LoadedShard &LS, llvm::vfs::FileSystem *FS) { |
| auto Buf = FS->getBufferForFile(LS.AbsolutePath); |
| if (!Buf) { |
| elog("Background-index: Couldn't read {0} to validate stored index: {1}", |
| LS.AbsolutePath, Buf.getError().message()); |
| // There is no point in indexing an unreadable file. |
| return false; |
| } |
| return digest(Buf->get()->getBuffer()) != LS.Digest; |
| } |
| |
| } // namespace |
| |
| BackgroundIndex::BackgroundIndex( |
| const ThreadsafeFS &TFS, const GlobalCompilationDatabase &CDB, |
| BackgroundIndexStorage::Factory IndexStorageFactory, Options Opts) |
| : SwapIndex(std::make_unique<MemIndex>()), TFS(TFS), CDB(CDB), |
| ContextProvider(std::move(Opts.ContextProvider)), |
| IndexedSymbols(IndexContents::All), |
| Rebuilder(this, &IndexedSymbols, Opts.ThreadPoolSize), |
| IndexStorageFactory(std::move(IndexStorageFactory)), |
| Queue(std::move(Opts.OnProgress)), |
| CommandsChanged( |
| CDB.watch([&](const std::vector<std::string> &ChangedFiles) { |
| enqueue(ChangedFiles); |
| })) { |
| assert(Opts.ThreadPoolSize > 0 && "Thread pool size can't be zero."); |
| assert(this->IndexStorageFactory && "Storage factory can not be null!"); |
| for (unsigned I = 0; I < Opts.ThreadPoolSize; ++I) { |
| ThreadPool.runAsync("background-worker-" + llvm::Twine(I + 1), |
| [this, Ctx(Context::current().clone())]() mutable { |
| WithContext BGContext(std::move(Ctx)); |
| Queue.work([&] { Rebuilder.idle(); }); |
| }); |
| } |
| } |
| |
| BackgroundIndex::~BackgroundIndex() { |
| stop(); |
| ThreadPool.wait(); |
| } |
| |
| BackgroundQueue::Task BackgroundIndex::changedFilesTask( |
| const std::vector<std::string> &ChangedFiles) { |
| BackgroundQueue::Task T([this, ChangedFiles] { |
| trace::Span Tracer("BackgroundIndexEnqueue"); |
| |
| llvm::Optional<WithContext> WithProvidedContext; |
| if (ContextProvider) |
| WithProvidedContext.emplace(ContextProvider(/*Path=*/"")); |
| |
| // We're doing this asynchronously, because we'll read shards here too. |
| log("Enqueueing {0} commands for indexing", ChangedFiles.size()); |
| SPAN_ATTACH(Tracer, "files", int64_t(ChangedFiles.size())); |
| |
| auto NeedsReIndexing = loadProject(std::move(ChangedFiles)); |
| // Run indexing for files that need to be updated. |
| std::shuffle(NeedsReIndexing.begin(), NeedsReIndexing.end(), |
| std::mt19937(std::random_device{}())); |
| std::vector<BackgroundQueue::Task> Tasks; |
| Tasks.reserve(NeedsReIndexing.size()); |
| for (const auto &File : NeedsReIndexing) |
| Tasks.push_back(indexFileTask(std::move(File))); |
| Queue.append(std::move(Tasks)); |
| }); |
| |
| T.QueuePri = LoadShards; |
| T.ThreadPri = llvm::ThreadPriority::Default; |
| return T; |
| } |
| |
| static llvm::StringRef filenameWithoutExtension(llvm::StringRef Path) { |
| Path = llvm::sys::path::filename(Path); |
| return Path.drop_back(llvm::sys::path::extension(Path).size()); |
| } |
| |
| BackgroundQueue::Task BackgroundIndex::indexFileTask(std::string Path) { |
| std::string Tag = filenameWithoutExtension(Path).str(); |
| uint64_t Key = llvm::xxHash64(Path); |
| BackgroundQueue::Task T([this, Path(std::move(Path))] { |
| llvm::Optional<WithContext> WithProvidedContext; |
| if (ContextProvider) |
| WithProvidedContext.emplace(ContextProvider(Path)); |
| auto Cmd = CDB.getCompileCommand(Path); |
| if (!Cmd) |
| return; |
| if (auto Error = index(std::move(*Cmd))) |
| elog("Indexing {0} failed: {1}", Path, std::move(Error)); |
| }); |
| T.QueuePri = IndexFile; |
| T.Tag = std::move(Tag); |
| T.Key = Key; |
| return T; |
| } |
| |
| void BackgroundIndex::boostRelated(llvm::StringRef Path) { |
| if (isHeaderFile(Path)) |
| Queue.boost(filenameWithoutExtension(Path), IndexBoostedFile); |
| } |
| |
| /// Given index results from a TU, only update symbols coming from files that |
| /// are different or missing from than \p ShardVersionsSnapshot. Also stores new |
| /// index information on IndexStorage. |
| void BackgroundIndex::update( |
| llvm::StringRef MainFile, IndexFileIn Index, |
| const llvm::StringMap<ShardVersion> &ShardVersionsSnapshot, |
| bool HadErrors) { |
| // Keys are URIs. |
| llvm::StringMap<std::pair<Path, FileDigest>> FilesToUpdate; |
| // Note that sources do not contain any information regarding missing headers, |
| // since we don't even know what absolute path they should fall in. |
| for (const auto &IndexIt : *Index.Sources) { |
| const auto &IGN = IndexIt.getValue(); |
| auto AbsPath = URI::resolve(IGN.URI, MainFile); |
| if (!AbsPath) { |
| elog("Failed to resolve URI: {0}", AbsPath.takeError()); |
| continue; |
| } |
| const auto DigestIt = ShardVersionsSnapshot.find(*AbsPath); |
| // File has different contents, or indexing was successful this time. |
| if (DigestIt == ShardVersionsSnapshot.end() || |
| DigestIt->getValue().Digest != IGN.Digest || |
| (DigestIt->getValue().HadErrors && !HadErrors)) |
| FilesToUpdate[IGN.URI] = {std::move(*AbsPath), IGN.Digest}; |
| } |
| |
| // Shard slabs into files. |
| FileShardedIndex ShardedIndex(std::move(Index)); |
| |
| // Build and store new slabs for each updated file. |
| for (const auto &FileIt : FilesToUpdate) { |
| auto Uri = FileIt.first(); |
| auto IF = ShardedIndex.getShard(Uri); |
| assert(IF && "no shard for file in Index.Sources?"); |
| PathRef Path = FileIt.getValue().first; |
| |
| // Only store command line hash for main files of the TU, since our |
| // current model keeps only one version of a header file. |
| if (Path != MainFile) |
| IF->Cmd.reset(); |
| |
| // We need to store shards before updating the index, since the latter |
| // consumes slabs. |
| // FIXME: Also skip serializing the shard if it is already up-to-date. |
| if (auto Error = IndexStorageFactory(Path)->storeShard(Path, *IF)) |
| elog("Failed to write background-index shard for file {0}: {1}", Path, |
| std::move(Error)); |
| |
| { |
| std::lock_guard<std::mutex> Lock(ShardVersionsMu); |
| const auto &Hash = FileIt.getValue().second; |
| auto DigestIt = ShardVersions.try_emplace(Path); |
| ShardVersion &SV = DigestIt.first->second; |
| // Skip if file is already up to date, unless previous index was broken |
| // and this one is not. |
| if (!DigestIt.second && SV.Digest == Hash && SV.HadErrors && !HadErrors) |
| continue; |
| SV.Digest = Hash; |
| SV.HadErrors = HadErrors; |
| |
| // This can override a newer version that is added in another thread, if |
| // this thread sees the older version but finishes later. This should be |
| // rare in practice. |
| IndexedSymbols.update( |
| Uri, std::make_unique<SymbolSlab>(std::move(*IF->Symbols)), |
| std::make_unique<RefSlab>(std::move(*IF->Refs)), |
| std::make_unique<RelationSlab>(std::move(*IF->Relations)), |
| Path == MainFile); |
| } |
| } |
| } |
| |
| llvm::Error BackgroundIndex::index(tooling::CompileCommand Cmd) { |
| trace::Span Tracer("BackgroundIndex"); |
| SPAN_ATTACH(Tracer, "file", Cmd.Filename); |
| auto AbsolutePath = getAbsolutePath(Cmd); |
| |
| auto FS = TFS.view(Cmd.Directory); |
| auto Buf = FS->getBufferForFile(AbsolutePath); |
| if (!Buf) |
| return llvm::errorCodeToError(Buf.getError()); |
| auto Hash = digest(Buf->get()->getBuffer()); |
| |
| // Take a snapshot of the versions to avoid locking for each file in the TU. |
| llvm::StringMap<ShardVersion> ShardVersionsSnapshot; |
| { |
| std::lock_guard<std::mutex> Lock(ShardVersionsMu); |
| ShardVersionsSnapshot = ShardVersions; |
| } |
| |
| vlog("Indexing {0} (digest:={1})", Cmd.Filename, llvm::toHex(Hash)); |
| ParseInputs Inputs; |
| Inputs.TFS = &TFS; |
| Inputs.CompileCommand = std::move(Cmd); |
| IgnoreDiagnostics IgnoreDiags; |
| auto CI = buildCompilerInvocation(Inputs, IgnoreDiags); |
| if (!CI) |
| return error("Couldn't build compiler invocation"); |
| |
| auto Clang = |
| prepareCompilerInstance(std::move(CI), /*Preamble=*/nullptr, |
| std::move(*Buf), std::move(FS), IgnoreDiags); |
| if (!Clang) |
| return error("Couldn't build compiler instance"); |
| |
| SymbolCollector::Options IndexOpts; |
| // Creates a filter to not collect index results from files with unchanged |
| // digests. |
| IndexOpts.FileFilter = [&ShardVersionsSnapshot](const SourceManager &SM, |
| FileID FID) { |
| const auto *F = SM.getFileEntryForID(FID); |
| if (!F) |
| return false; // Skip invalid files. |
| auto AbsPath = getCanonicalPath(F, SM); |
| if (!AbsPath) |
| return false; // Skip files without absolute path. |
| auto Digest = digestFile(SM, FID); |
| if (!Digest) |
| return false; |
| auto D = ShardVersionsSnapshot.find(*AbsPath); |
| if (D != ShardVersionsSnapshot.end() && D->second.Digest == Digest && |
| !D->second.HadErrors) |
| return false; // Skip files that haven't changed, without errors. |
| return true; |
| }; |
| IndexOpts.CollectMainFileRefs = true; |
| |
| IndexFileIn Index; |
| auto Action = createStaticIndexingAction( |
| IndexOpts, [&](SymbolSlab S) { Index.Symbols = std::move(S); }, |
| [&](RefSlab R) { Index.Refs = std::move(R); }, |
| [&](RelationSlab R) { Index.Relations = std::move(R); }, |
| [&](IncludeGraph IG) { Index.Sources = std::move(IG); }); |
| |
| // We're going to run clang here, and it could potentially crash. |
| // We could use CrashRecoveryContext to try to make indexing crashes nonfatal, |
| // but the leaky "recovery" is pretty scary too in a long-running process. |
| // If crashes are a real problem, maybe we should fork a child process. |
| |
| const FrontendInputFile &Input = Clang->getFrontendOpts().Inputs.front(); |
| if (!Action->BeginSourceFile(*Clang, Input)) |
| return error("BeginSourceFile() failed"); |
| if (llvm::Error Err = Action->Execute()) |
| return Err; |
| |
| Action->EndSourceFile(); |
| |
| Index.Cmd = Inputs.CompileCommand; |
| assert(Index.Symbols && Index.Refs && Index.Sources && |
| "Symbols, Refs and Sources must be set."); |
| |
| log("Indexed {0} ({1} symbols, {2} refs, {3} files)", |
| Inputs.CompileCommand.Filename, Index.Symbols->size(), |
| Index.Refs->numRefs(), Index.Sources->size()); |
| SPAN_ATTACH(Tracer, "symbols", int(Index.Symbols->size())); |
| SPAN_ATTACH(Tracer, "refs", int(Index.Refs->numRefs())); |
| SPAN_ATTACH(Tracer, "sources", int(Index.Sources->size())); |
| |
| bool HadErrors = Clang->hasDiagnostics() && |
| Clang->getDiagnostics().hasUncompilableErrorOccurred(); |
| if (HadErrors) { |
| log("Failed to compile {0}, index may be incomplete", AbsolutePath); |
| for (auto &It : *Index.Sources) |
| It.second.Flags |= IncludeGraphNode::SourceFlag::HadErrors; |
| } |
| update(AbsolutePath, std::move(Index), ShardVersionsSnapshot, HadErrors); |
| |
| Rebuilder.indexedTU(); |
| return llvm::Error::success(); |
| } |
| |
| // Restores shards for \p MainFiles from index storage. Then checks staleness of |
| // those shards and returns a list of TUs that needs to be indexed to update |
| // staleness. |
| std::vector<std::string> |
| BackgroundIndex::loadProject(std::vector<std::string> MainFiles) { |
| // Drop files where background indexing is disabled in config. |
| if (ContextProvider) |
| llvm::erase_if(MainFiles, [&](const std::string &TU) { |
| // Load the config for each TU, as indexing may be selectively enabled. |
| WithContext WithProvidedContext(ContextProvider(TU)); |
| return Config::current().Index.Background == |
| Config::BackgroundPolicy::Skip; |
| }); |
| Rebuilder.startLoading(); |
| // Load shards for all of the mainfiles. |
| const std::vector<LoadedShard> Result = |
| loadIndexShards(MainFiles, IndexStorageFactory, CDB); |
| size_t LoadedShards = 0; |
| { |
| // Update in-memory state. |
| std::lock_guard<std::mutex> Lock(ShardVersionsMu); |
| for (auto &LS : Result) { |
| if (!LS.Shard) |
| continue; |
| auto SS = |
| LS.Shard->Symbols |
| ? std::make_unique<SymbolSlab>(std::move(*LS.Shard->Symbols)) |
| : nullptr; |
| auto RS = LS.Shard->Refs |
| ? std::make_unique<RefSlab>(std::move(*LS.Shard->Refs)) |
| : nullptr; |
| auto RelS = |
| LS.Shard->Relations |
| ? std::make_unique<RelationSlab>(std::move(*LS.Shard->Relations)) |
| : nullptr; |
| ShardVersion &SV = ShardVersions[LS.AbsolutePath]; |
| SV.Digest = LS.Digest; |
| SV.HadErrors = LS.HadErrors; |
| ++LoadedShards; |
| |
| IndexedSymbols.update(URI::create(LS.AbsolutePath).toString(), |
| std::move(SS), std::move(RS), std::move(RelS), |
| LS.CountReferences); |
| } |
| } |
| Rebuilder.loadedShard(LoadedShards); |
| Rebuilder.doneLoading(); |
| |
| auto FS = TFS.view(/*CWD=*/llvm::None); |
| llvm::DenseSet<PathRef> TUsToIndex; |
| // We'll accept data from stale shards, but ensure the files get reindexed |
| // soon. |
| for (auto &LS : Result) { |
| if (!shardIsStale(LS, FS.get())) |
| continue; |
| PathRef TUForFile = LS.DependentTU; |
| assert(!TUForFile.empty() && "File without a TU!"); |
| |
| // FIXME: Currently, we simply schedule indexing on a TU whenever any of |
| // its dependencies needs re-indexing. We might do it smarter by figuring |
| // out a minimal set of TUs that will cover all the stale dependencies. |
| // FIXME: Try looking at other TUs if no compile commands are available |
| // for this TU, i.e TU was deleted after we performed indexing. |
| TUsToIndex.insert(TUForFile); |
| } |
| |
| return {TUsToIndex.begin(), TUsToIndex.end()}; |
| } |
| |
| void BackgroundIndex::profile(MemoryTree &MT) const { |
| IndexedSymbols.profile(MT.child("slabs")); |
| // We don't want to mix memory used by index and symbols, so call base class. |
| MT.child("index").addUsage(SwapIndex::estimateMemoryUsage()); |
| } |
| } // namespace clangd |
| } // namespace clang |