| //===----------------- ModulesBuilder.cpp ------------------------*- C++-*-===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "ModulesBuilder.h" |
| #include "Compiler.h" |
| #include "support/Logger.h" |
| #include "clang/Frontend/FrontendAction.h" |
| #include "clang/Frontend/FrontendActions.h" |
| #include "clang/Serialization/ASTReader.h" |
| #include "clang/Serialization/ModuleCache.h" |
| #include "llvm/ADT/ScopeExit.h" |
| #include <queue> |
| |
| namespace clang { |
| namespace clangd { |
| |
| namespace { |
| |
| // Create a path to store module files. Generally it should be: |
| // |
| // {TEMP_DIRS}/clangd/module_files/{hashed-file-name}-%%-%%-%%-%%-%%-%%/. |
| // |
| // {TEMP_DIRS} is the temporary directory for the system, e.g., "/var/tmp" |
| // or "C:/TEMP". |
| // |
| // '%%' means random value to make the generated path unique. |
| // |
| // \param MainFile is used to get the root of the project from global |
| // compilation database. |
| // |
| // TODO: Move these module fils out of the temporary directory if the module |
| // files are persistent. |
| llvm::SmallString<256> getUniqueModuleFilesPath(PathRef MainFile) { |
| llvm::SmallString<128> HashedPrefix = llvm::sys::path::filename(MainFile); |
| // There might be multiple files with the same name in a project. So appending |
| // the hash value of the full path to make sure they won't conflict. |
| HashedPrefix += std::to_string(llvm::hash_value(MainFile)); |
| |
| llvm::SmallString<256> ResultPattern; |
| |
| llvm::sys::path::system_temp_directory(/*erasedOnReboot=*/true, |
| ResultPattern); |
| |
| llvm::sys::path::append(ResultPattern, "clangd"); |
| llvm::sys::path::append(ResultPattern, "module_files"); |
| |
| llvm::sys::path::append(ResultPattern, HashedPrefix); |
| |
| ResultPattern.append("-%%-%%-%%-%%-%%-%%"); |
| |
| llvm::SmallString<256> Result; |
| llvm::sys::fs::createUniquePath(ResultPattern, Result, |
| /*MakeAbsolute=*/false); |
| |
| llvm::sys::fs::create_directories(Result); |
| return Result; |
| } |
| |
| // Get a unique module file path under \param ModuleFilesPrefix. |
| std::string getModuleFilePath(llvm::StringRef ModuleName, |
| PathRef ModuleFilesPrefix) { |
| llvm::SmallString<256> ModuleFilePath(ModuleFilesPrefix); |
| auto [PrimaryModuleName, PartitionName] = ModuleName.split(':'); |
| llvm::sys::path::append(ModuleFilePath, PrimaryModuleName); |
| if (!PartitionName.empty()) { |
| ModuleFilePath.append("-"); |
| ModuleFilePath.append(PartitionName); |
| } |
| |
| ModuleFilePath.append(".pcm"); |
| return std::string(ModuleFilePath); |
| } |
| |
| // FailedPrerequisiteModules - stands for the PrerequisiteModules which has |
| // errors happened during the building process. |
| class FailedPrerequisiteModules : public PrerequisiteModules { |
| public: |
| ~FailedPrerequisiteModules() override = default; |
| |
| // We shouldn't adjust the compilation commands based on |
| // FailedPrerequisiteModules. |
| void adjustHeaderSearchOptions(HeaderSearchOptions &Options) const override { |
| } |
| |
| // FailedPrerequisiteModules can never be reused. |
| bool |
| canReuse(const CompilerInvocation &CI, |
| llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem>) const override { |
| return false; |
| } |
| }; |
| |
| struct ModuleFile { |
| ModuleFile(StringRef ModuleName, PathRef ModuleFilePath) |
| : ModuleName(ModuleName.str()), ModuleFilePath(ModuleFilePath.str()) {} |
| |
| ModuleFile() = delete; |
| |
| ModuleFile(const ModuleFile &) = delete; |
| ModuleFile operator=(const ModuleFile &) = delete; |
| |
| // The move constructor is needed for llvm::SmallVector. |
| ModuleFile(ModuleFile &&Other) |
| : ModuleName(std::move(Other.ModuleName)), |
| ModuleFilePath(std::move(Other.ModuleFilePath)) { |
| Other.ModuleName.clear(); |
| Other.ModuleFilePath.clear(); |
| } |
| |
| ModuleFile &operator=(ModuleFile &&Other) { |
| if (this == &Other) |
| return *this; |
| |
| this->~ModuleFile(); |
| new (this) ModuleFile(std::move(Other)); |
| return *this; |
| } |
| |
| ~ModuleFile() { |
| if (!ModuleFilePath.empty()) |
| llvm::sys::fs::remove(ModuleFilePath); |
| } |
| |
| StringRef getModuleName() const { return ModuleName; } |
| |
| StringRef getModuleFilePath() const { return ModuleFilePath; } |
| |
| private: |
| std::string ModuleName; |
| std::string ModuleFilePath; |
| }; |
| |
| // ReusablePrerequisiteModules - stands for PrerequisiteModules for which all |
| // the required modules are built successfully. All the module files |
| // are owned by the modules builder. |
| class ReusablePrerequisiteModules : public PrerequisiteModules { |
| public: |
| ReusablePrerequisiteModules() = default; |
| |
| ReusablePrerequisiteModules(const ReusablePrerequisiteModules &Other) = |
| default; |
| ReusablePrerequisiteModules & |
| operator=(const ReusablePrerequisiteModules &) = default; |
| ReusablePrerequisiteModules(ReusablePrerequisiteModules &&) = delete; |
| ReusablePrerequisiteModules |
| operator=(ReusablePrerequisiteModules &&) = delete; |
| |
| ~ReusablePrerequisiteModules() override = default; |
| |
| void adjustHeaderSearchOptions(HeaderSearchOptions &Options) const override { |
| // Appending all built module files. |
| for (const auto &RequiredModule : RequiredModules) |
| Options.PrebuiltModuleFiles.insert_or_assign( |
| RequiredModule->getModuleName().str(), |
| RequiredModule->getModuleFilePath().str()); |
| } |
| |
| bool canReuse(const CompilerInvocation &CI, |
| llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem>) const override; |
| |
| bool isModuleUnitBuilt(llvm::StringRef ModuleName) const { |
| return BuiltModuleNames.contains(ModuleName); |
| } |
| |
| void addModuleFile(std::shared_ptr<const ModuleFile> ModuleFile) { |
| BuiltModuleNames.insert(ModuleFile->getModuleName()); |
| RequiredModules.emplace_back(std::move(ModuleFile)); |
| } |
| |
| private: |
| llvm::SmallVector<std::shared_ptr<const ModuleFile>, 8> RequiredModules; |
| // A helper class to speedup the query if a module is built. |
| llvm::StringSet<> BuiltModuleNames; |
| }; |
| |
| bool IsModuleFileUpToDate(PathRef ModuleFilePath, |
| const PrerequisiteModules &RequisiteModules, |
| llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS) { |
| HeaderSearchOptions HSOpts; |
| RequisiteModules.adjustHeaderSearchOptions(HSOpts); |
| HSOpts.ForceCheckCXX20ModulesInputFiles = true; |
| HSOpts.ValidateASTInputFilesContent = true; |
| |
| clang::clangd::IgnoreDiagnostics IgnoreDiags; |
| IntrusiveRefCntPtr<DiagnosticsEngine> Diags = |
| CompilerInstance::createDiagnostics(*VFS, new DiagnosticOptions, |
| &IgnoreDiags, |
| /*ShouldOwnClient=*/false); |
| |
| LangOptions LangOpts; |
| LangOpts.SkipODRCheckInGMF = true; |
| |
| FileManager FileMgr(FileSystemOptions(), VFS); |
| |
| SourceManager SourceMgr(*Diags, FileMgr); |
| |
| HeaderSearch HeaderInfo(HSOpts, SourceMgr, *Diags, LangOpts, |
| /*Target=*/nullptr); |
| |
| TrivialModuleLoader ModuleLoader; |
| Preprocessor PP(std::make_shared<PreprocessorOptions>(), *Diags, LangOpts, |
| SourceMgr, HeaderInfo, ModuleLoader); |
| |
| IntrusiveRefCntPtr<ModuleCache> ModCache = createCrossProcessModuleCache(); |
| PCHContainerOperations PCHOperations; |
| ASTReader Reader(PP, *ModCache, /*ASTContext=*/nullptr, |
| PCHOperations.getRawReader(), {}); |
| |
| // We don't need any listener here. By default it will use a validator |
| // listener. |
| Reader.setListener(nullptr); |
| |
| if (Reader.ReadAST(ModuleFilePath, serialization::MK_MainFile, |
| SourceLocation(), |
| ASTReader::ARR_None) != ASTReader::Success) |
| return false; |
| |
| bool UpToDate = true; |
| Reader.getModuleManager().visit([&](serialization::ModuleFile &MF) -> bool { |
| Reader.visitInputFiles( |
| MF, /*IncludeSystem=*/false, /*Complain=*/false, |
| [&](const serialization::InputFile &IF, bool isSystem) { |
| if (!IF.getFile() || IF.isOutOfDate()) |
| UpToDate = false; |
| }); |
| return !UpToDate; |
| }); |
| return UpToDate; |
| } |
| |
| bool IsModuleFilesUpToDate( |
| llvm::SmallVector<PathRef> ModuleFilePaths, |
| const PrerequisiteModules &RequisiteModules, |
| llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS) { |
| return llvm::all_of( |
| ModuleFilePaths, [&RequisiteModules, VFS](auto ModuleFilePath) { |
| return IsModuleFileUpToDate(ModuleFilePath, RequisiteModules, VFS); |
| }); |
| } |
| |
| /// Build a module file for module with `ModuleName`. The information of built |
| /// module file are stored in \param BuiltModuleFiles. |
| llvm::Expected<ModuleFile> |
| buildModuleFile(llvm::StringRef ModuleName, PathRef ModuleUnitFileName, |
| const GlobalCompilationDatabase &CDB, const ThreadsafeFS &TFS, |
| const ReusablePrerequisiteModules &BuiltModuleFiles) { |
| // Try cheap operation earlier to boil-out cheaply if there are problems. |
| auto Cmd = CDB.getCompileCommand(ModuleUnitFileName); |
| if (!Cmd) |
| return llvm::createStringError( |
| llvm::formatv("No compile command for {0}", ModuleUnitFileName)); |
| |
| llvm::SmallString<256> ModuleFilesPrefix = |
| getUniqueModuleFilesPath(ModuleUnitFileName); |
| |
| Cmd->Output = getModuleFilePath(ModuleName, ModuleFilesPrefix); |
| |
| ParseInputs Inputs; |
| Inputs.TFS = &TFS; |
| Inputs.CompileCommand = std::move(*Cmd); |
| |
| IgnoreDiagnostics IgnoreDiags; |
| auto CI = buildCompilerInvocation(Inputs, IgnoreDiags); |
| if (!CI) |
| return llvm::createStringError("Failed to build compiler invocation"); |
| |
| auto FS = Inputs.TFS->view(Inputs.CompileCommand.Directory); |
| auto Buf = FS->getBufferForFile(Inputs.CompileCommand.Filename); |
| if (!Buf) |
| return llvm::createStringError("Failed to create buffer"); |
| |
| // In clang's driver, we will suppress the check for ODR violation in GMF. |
| // See the implementation of RenderModulesOptions in Clang.cpp. |
| CI->getLangOpts().SkipODRCheckInGMF = true; |
| |
| // Hash the contents of input files and store the hash value to the BMI files. |
| // So that we can check if the files are still valid when we want to reuse the |
| // BMI files. |
| CI->getHeaderSearchOpts().ValidateASTInputFilesContent = true; |
| |
| BuiltModuleFiles.adjustHeaderSearchOptions(CI->getHeaderSearchOpts()); |
| |
| CI->getFrontendOpts().OutputFile = Inputs.CompileCommand.Output; |
| auto Clang = |
| prepareCompilerInstance(std::move(CI), /*Preamble=*/nullptr, |
| std::move(*Buf), std::move(FS), IgnoreDiags); |
| if (!Clang) |
| return llvm::createStringError("Failed to prepare compiler instance"); |
| |
| GenerateReducedModuleInterfaceAction Action; |
| Clang->ExecuteAction(Action); |
| |
| if (Clang->getDiagnostics().hasErrorOccurred()) |
| return llvm::createStringError("Compilation failed"); |
| |
| return ModuleFile{ModuleName, Inputs.CompileCommand.Output}; |
| } |
| |
| bool ReusablePrerequisiteModules::canReuse( |
| const CompilerInvocation &CI, |
| llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS) const { |
| if (RequiredModules.empty()) |
| return true; |
| |
| llvm::SmallVector<llvm::StringRef> BMIPaths; |
| for (auto &MF : RequiredModules) |
| BMIPaths.push_back(MF->getModuleFilePath()); |
| return IsModuleFilesUpToDate(BMIPaths, *this, VFS); |
| } |
| |
| class ModuleFileCache { |
| public: |
| ModuleFileCache(const GlobalCompilationDatabase &CDB) : CDB(CDB) {} |
| const GlobalCompilationDatabase &getCDB() const { return CDB; } |
| |
| std::shared_ptr<const ModuleFile> getModule(StringRef ModuleName); |
| |
| void add(StringRef ModuleName, std::shared_ptr<const ModuleFile> ModuleFile) { |
| std::lock_guard<std::mutex> Lock(ModuleFilesMutex); |
| |
| ModuleFiles[ModuleName] = ModuleFile; |
| } |
| |
| void remove(StringRef ModuleName); |
| |
| private: |
| const GlobalCompilationDatabase &CDB; |
| |
| llvm::StringMap<std::weak_ptr<const ModuleFile>> ModuleFiles; |
| // Mutex to guard accesses to ModuleFiles. |
| std::mutex ModuleFilesMutex; |
| }; |
| |
| std::shared_ptr<const ModuleFile> |
| ModuleFileCache::getModule(StringRef ModuleName) { |
| std::lock_guard<std::mutex> Lock(ModuleFilesMutex); |
| |
| auto Iter = ModuleFiles.find(ModuleName); |
| if (Iter == ModuleFiles.end()) |
| return nullptr; |
| |
| if (auto Res = Iter->second.lock()) |
| return Res; |
| |
| ModuleFiles.erase(Iter); |
| return nullptr; |
| } |
| |
| void ModuleFileCache::remove(StringRef ModuleName) { |
| std::lock_guard<std::mutex> Lock(ModuleFilesMutex); |
| |
| ModuleFiles.erase(ModuleName); |
| } |
| |
| class ModuleNameToSourceCache { |
| public: |
| std::string getSourceForModuleName(llvm::StringRef ModuleName) { |
| std::lock_guard<std::mutex> Lock(CacheMutex); |
| auto Iter = ModuleNameToSourceCache.find(ModuleName); |
| if (Iter != ModuleNameToSourceCache.end()) |
| return Iter->second; |
| return ""; |
| } |
| |
| void addEntry(llvm::StringRef ModuleName, PathRef Source) { |
| std::lock_guard<std::mutex> Lock(CacheMutex); |
| ModuleNameToSourceCache[ModuleName] = Source.str(); |
| } |
| |
| void eraseEntry(llvm::StringRef ModuleName) { |
| std::lock_guard<std::mutex> Lock(CacheMutex); |
| ModuleNameToSourceCache.erase(ModuleName); |
| } |
| |
| private: |
| std::mutex CacheMutex; |
| llvm::StringMap<std::string> ModuleNameToSourceCache; |
| }; |
| |
| class CachingProjectModules : public ProjectModules { |
| public: |
| CachingProjectModules(std::unique_ptr<ProjectModules> MDB, |
| ModuleNameToSourceCache &Cache) |
| : MDB(std::move(MDB)), Cache(Cache) { |
| assert(this->MDB && "CachingProjectModules should only be created with a " |
| "valid underlying ProjectModules"); |
| } |
| |
| std::vector<std::string> getRequiredModules(PathRef File) override { |
| return MDB->getRequiredModules(File); |
| } |
| |
| std::string getModuleNameForSource(PathRef File) override { |
| return MDB->getModuleNameForSource(File); |
| } |
| |
| std::string getSourceForModuleName(llvm::StringRef ModuleName, |
| PathRef RequiredSrcFile) override { |
| std::string CachedResult = Cache.getSourceForModuleName(ModuleName); |
| |
| // Verify Cached Result by seeing if the source declaring the same module |
| // as we query. |
| if (!CachedResult.empty()) { |
| std::string ModuleNameOfCachedSource = |
| MDB->getModuleNameForSource(CachedResult); |
| if (ModuleNameOfCachedSource == ModuleName) |
| return CachedResult; |
| |
| // Cached Result is invalid. Clear it. |
| Cache.eraseEntry(ModuleName); |
| } |
| |
| auto Result = MDB->getSourceForModuleName(ModuleName, RequiredSrcFile); |
| Cache.addEntry(ModuleName, Result); |
| |
| return Result; |
| } |
| |
| private: |
| std::unique_ptr<ProjectModules> MDB; |
| ModuleNameToSourceCache &Cache; |
| }; |
| |
| /// Collect the directly and indirectly required module names for \param |
| /// ModuleName in topological order. The \param ModuleName is guaranteed to |
| /// be the last element in \param ModuleNames. |
| llvm::SmallVector<StringRef> getAllRequiredModules(PathRef RequiredSource, |
| CachingProjectModules &MDB, |
| StringRef ModuleName) { |
| llvm::SmallVector<llvm::StringRef> ModuleNames; |
| llvm::StringSet<> ModuleNamesSet; |
| |
| auto VisitDeps = [&](StringRef ModuleName, auto Visitor) -> void { |
| ModuleNamesSet.insert(ModuleName); |
| |
| for (StringRef RequiredModuleName : MDB.getRequiredModules( |
| MDB.getSourceForModuleName(ModuleName, RequiredSource))) |
| if (ModuleNamesSet.insert(RequiredModuleName).second) |
| Visitor(RequiredModuleName, Visitor); |
| |
| ModuleNames.push_back(ModuleName); |
| }; |
| VisitDeps(ModuleName, VisitDeps); |
| |
| return ModuleNames; |
| } |
| |
| } // namespace |
| |
| class ModulesBuilder::ModulesBuilderImpl { |
| public: |
| ModulesBuilderImpl(const GlobalCompilationDatabase &CDB) : Cache(CDB) {} |
| |
| ModuleNameToSourceCache &getProjectModulesCache() { |
| return ProjectModulesCache; |
| } |
| const GlobalCompilationDatabase &getCDB() const { return Cache.getCDB(); } |
| |
| llvm::Error |
| getOrBuildModuleFile(PathRef RequiredSource, StringRef ModuleName, |
| const ThreadsafeFS &TFS, CachingProjectModules &MDB, |
| ReusablePrerequisiteModules &BuiltModuleFiles); |
| |
| private: |
| ModuleFileCache Cache; |
| ModuleNameToSourceCache ProjectModulesCache; |
| }; |
| |
| llvm::Error ModulesBuilder::ModulesBuilderImpl::getOrBuildModuleFile( |
| PathRef RequiredSource, StringRef ModuleName, const ThreadsafeFS &TFS, |
| CachingProjectModules &MDB, ReusablePrerequisiteModules &BuiltModuleFiles) { |
| if (BuiltModuleFiles.isModuleUnitBuilt(ModuleName)) |
| return llvm::Error::success(); |
| |
| std::string ModuleUnitFileName = |
| MDB.getSourceForModuleName(ModuleName, RequiredSource); |
| /// It is possible that we're meeting third party modules (modules whose |
| /// source are not in the project. e.g, the std module may be a third-party |
| /// module for most project) or something wrong with the implementation of |
| /// ProjectModules. |
| /// FIXME: How should we treat third party modules here? If we want to ignore |
| /// third party modules, we should return true instead of false here. |
| /// Currently we simply bail out. |
| if (ModuleUnitFileName.empty()) |
| return llvm::createStringError( |
| llvm::formatv("Don't get the module unit for module {0}", ModuleName)); |
| |
| // Get Required modules in topological order. |
| auto ReqModuleNames = getAllRequiredModules(RequiredSource, MDB, ModuleName); |
| for (llvm::StringRef ReqModuleName : ReqModuleNames) { |
| if (BuiltModuleFiles.isModuleUnitBuilt(ModuleName)) |
| continue; |
| |
| if (auto Cached = Cache.getModule(ReqModuleName)) { |
| if (IsModuleFileUpToDate(Cached->getModuleFilePath(), BuiltModuleFiles, |
| TFS.view(std::nullopt))) { |
| log("Reusing module {0} from {1}", ModuleName, |
| Cached->getModuleFilePath()); |
| BuiltModuleFiles.addModuleFile(std::move(Cached)); |
| continue; |
| } |
| Cache.remove(ReqModuleName); |
| } |
| |
| llvm::Expected<ModuleFile> MF = buildModuleFile( |
| ModuleName, ModuleUnitFileName, getCDB(), TFS, BuiltModuleFiles); |
| if (llvm::Error Err = MF.takeError()) |
| return Err; |
| |
| log("Built module {0} to {1}", ModuleName, MF->getModuleFilePath()); |
| auto BuiltModuleFile = std::make_shared<const ModuleFile>(std::move(*MF)); |
| Cache.add(ModuleName, BuiltModuleFile); |
| BuiltModuleFiles.addModuleFile(std::move(BuiltModuleFile)); |
| } |
| |
| return llvm::Error::success(); |
| } |
| |
| std::unique_ptr<PrerequisiteModules> |
| ModulesBuilder::buildPrerequisiteModulesFor(PathRef File, |
| const ThreadsafeFS &TFS) { |
| std::unique_ptr<ProjectModules> MDB = Impl->getCDB().getProjectModules(File); |
| if (!MDB) { |
| elog("Failed to get Project Modules information for {0}", File); |
| return std::make_unique<FailedPrerequisiteModules>(); |
| } |
| CachingProjectModules CachedMDB(std::move(MDB), |
| Impl->getProjectModulesCache()); |
| |
| std::vector<std::string> RequiredModuleNames = |
| CachedMDB.getRequiredModules(File); |
| if (RequiredModuleNames.empty()) |
| return std::make_unique<ReusablePrerequisiteModules>(); |
| |
| auto RequiredModules = std::make_unique<ReusablePrerequisiteModules>(); |
| for (llvm::StringRef RequiredModuleName : RequiredModuleNames) { |
| // Return early if there is any error. |
| if (llvm::Error Err = Impl->getOrBuildModuleFile( |
| File, RequiredModuleName, TFS, CachedMDB, *RequiredModules.get())) { |
| elog("Failed to build module {0}; due to {1}", RequiredModuleName, |
| toString(std::move(Err))); |
| return std::make_unique<FailedPrerequisiteModules>(); |
| } |
| } |
| |
| return std::move(RequiredModules); |
| } |
| |
| ModulesBuilder::ModulesBuilder(const GlobalCompilationDatabase &CDB) { |
| Impl = std::make_unique<ModulesBuilderImpl>(CDB); |
| } |
| |
| ModulesBuilder::~ModulesBuilder() {} |
| |
| } // namespace clangd |
| } // namespace clang |