[clang] Delay normalization of `-fmodules-cache-path` (#150123)

This PR is part of an effort to remove file system usage from the
command line parsing code. The reason for that is that it's impossible
to do file system access correctly without a configured VFS, and the VFS
can only be configured after the command line is parsed. I don't want to
intertwine command line parsing and VFS configuration, so I decided to
perform the file system access after the command line is parsed and the
VFS is configured - ideally right before the file system entity is used
for the first time.

This patch delays normalization of the module cache path until
`CompilerInstance` is asked for the cache path in the current
compilation context.
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 337460a..a7c514e 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -3281,7 +3281,8 @@
 def fmodules_cache_path : Joined<["-"], "fmodules-cache-path=">, Group<i_Group>,
   Flags<[]>, Visibility<[ClangOption, CC1Option]>,
   MetaVarName<"<directory>">,
-  HelpText<"Specify the module cache path">;
+  HelpText<"Specify the module cache path">,
+  MarshallingInfoString<HeaderSearchOpts<"ModuleCachePath">>;
 def fmodules_user_build_path : Separate<["-"], "fmodules-user-build-path">, Group<i_Group>,
   Flags<[]>, Visibility<[ClangOption, CC1Option]>,
   MetaVarName<"<directory>">,
diff --git a/clang/include/clang/Frontend/CompilerInstance.h b/clang/include/clang/Frontend/CompilerInstance.h
index 9f3d5f9..cbc72a0 100644
--- a/clang/include/clang/Frontend/CompilerInstance.h
+++ b/clang/include/clang/Frontend/CompilerInstance.h
@@ -705,6 +705,8 @@
     GetDependencyDirectives = std::move(Getter);
   }
 
+  static void normalizeModuleCachePath(FileManager &FileMgr, StringRef Path,
+                                       SmallVectorImpl<char> &NormalizedPath);
   std::string getSpecificModuleCachePath(StringRef ModuleHash);
   std::string getSpecificModuleCachePath() {
     return getSpecificModuleCachePath(getInvocation().getModuleHash());
diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp
index 8d1e9d6..0a23e0b 100644
--- a/clang/lib/Frontend/CompilerInstance.cpp
+++ b/clang/lib/Frontend/CompilerInstance.cpp
@@ -553,10 +553,25 @@
     PP->setDependencyDirectivesGetter(*GetDependencyDirectives);
 }
 
+void CompilerInstance::normalizeModuleCachePath(
+    FileManager &FileMgr, StringRef Path,
+    SmallVectorImpl<char> &NormalizedPath) {
+  NormalizedPath.assign(Path.begin(), Path.end());
+  FileMgr.makeAbsolutePath(NormalizedPath);
+  llvm::sys::path::remove_dots(NormalizedPath);
+}
+
 std::string CompilerInstance::getSpecificModuleCachePath(StringRef ModuleHash) {
+  assert(FileMgr && "Specific module cache path requires a FileManager");
+
+  if (getHeaderSearchOpts().ModuleCachePath.empty())
+    return "";
+
   // Set up the module path, including the hash for the module-creation options.
-  SmallString<256> SpecificModuleCache(getHeaderSearchOpts().ModuleCachePath);
-  if (!SpecificModuleCache.empty() && !getHeaderSearchOpts().DisableModuleHash)
+  SmallString<256> SpecificModuleCache;
+  normalizeModuleCachePath(*FileMgr, getHeaderSearchOpts().ModuleCachePath,
+                           SpecificModuleCache);
+  if (!getHeaderSearchOpts().DisableModuleHash)
     llvm::sys::path::append(SpecificModuleCache, ModuleHash);
   return std::string(SpecificModuleCache);
 }
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 8411d00..931766d 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -3315,9 +3315,6 @@
   if (Opts.UseLibcxx)
     GenerateArg(Consumer, OPT_stdlib_EQ, "libc++");
 
-  if (!Opts.ModuleCachePath.empty())
-    GenerateArg(Consumer, OPT_fmodules_cache_path, Opts.ModuleCachePath);
-
   for (const auto &File : Opts.PrebuiltModuleFiles)
     GenerateArg(Consumer, OPT_fmodule_file, File.first + "=" + File.second);
 
@@ -3420,8 +3417,7 @@
 }
 
 static bool ParseHeaderSearchArgs(HeaderSearchOptions &Opts, ArgList &Args,
-                                  DiagnosticsEngine &Diags,
-                                  const std::string &WorkingDir) {
+                                  DiagnosticsEngine &Diags) {
   unsigned NumErrorsBefore = Diags.getNumErrors();
 
   HeaderSearchOptions *HeaderSearchOpts = &Opts;
@@ -3434,17 +3430,6 @@
   if (const Arg *A = Args.getLastArg(OPT_stdlib_EQ))
     Opts.UseLibcxx = (strcmp(A->getValue(), "libc++") == 0);
 
-  // Canonicalize -fmodules-cache-path before storing it.
-  SmallString<128> P(Args.getLastArgValue(OPT_fmodules_cache_path));
-  if (!(P.empty() || llvm::sys::path::is_absolute(P))) {
-    if (WorkingDir.empty())
-      llvm::sys::fs::make_absolute(P);
-    else
-      llvm::sys::fs::make_absolute(WorkingDir, P);
-  }
-  llvm::sys::path::remove_dots(P);
-  Opts.ModuleCachePath = std::string(P);
-
   // Only the -fmodule-file=<name>=<file> form.
   for (const auto *A : Args.filtered(OPT_fmodule_file)) {
     StringRef Val = A->getValue();
@@ -5021,8 +5006,7 @@
   InputKind DashX = Res.getFrontendOpts().DashX;
   ParseTargetArgs(Res.getTargetOpts(), Args, Diags);
   llvm::Triple T(Res.getTargetOpts().Triple);
-  ParseHeaderSearchArgs(Res.getHeaderSearchOpts(), Args, Diags,
-                        Res.getFileSystemOpts().WorkingDir);
+  ParseHeaderSearchArgs(Res.getHeaderSearchOpts(), Args, Diags);
   if (Res.getFrontendOpts().GenReducedBMI ||
       Res.getFrontendOpts().ProgramAction ==
           frontend::GenerateReducedModuleInterface ||
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index 8e219e5..2f847ec 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -57,6 +57,7 @@
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Basic/TargetOptions.h"
 #include "clang/Basic/Version.h"
+#include "clang/Frontend/CompilerInstance.h"
 #include "clang/Lex/HeaderSearch.h"
 #include "clang/Lex/HeaderSearchOptions.h"
 #include "clang/Lex/MacroInfo.h"
@@ -1710,9 +1711,13 @@
   const HeaderSearchOptions &HSOpts =
       PP.getHeaderSearchInfo().getHeaderSearchOpts();
 
+  SmallString<256> HSOpts_ModuleCachePath;
+  CompilerInstance::normalizeModuleCachePath(
+      PP.getFileManager(), HSOpts.ModuleCachePath, HSOpts_ModuleCachePath);
+
   AddString(HSOpts.Sysroot, Record);
   AddString(HSOpts.ResourceDir, Record);
-  AddString(HSOpts.ModuleCachePath, Record);
+  AddString(HSOpts_ModuleCachePath, Record);
   AddString(HSOpts.ModuleUserBuildPath, Record);
   Record.push_back(HSOpts.DisableModuleHash);
   Record.push_back(HSOpts.ImplicitModuleMaps);
diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
index f32747d..2a48c33 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
@@ -449,9 +449,10 @@
 
     // Use the dependency scanning optimized file system if requested to do so.
     if (DepFS) {
-      StringRef ModulesCachePath =
-          ScanInstance.getHeaderSearchOpts().ModuleCachePath;
-
+      SmallString<256> ModulesCachePath;
+      CompilerInstance::normalizeModuleCachePath(
+          *FileMgr, ScanInstance.getHeaderSearchOpts().ModuleCachePath,
+          ModulesCachePath);
       DepFS->resetBypassedPathPrefix();
       if (!ModulesCachePath.empty())
         DepFS->setBypassedPathPrefix(ModulesCachePath);
diff --git a/clang/test/Modules/modules-cache-path-canonicalization-output.c b/clang/test/Modules/modules-cache-path-canonicalization-output.c
new file mode 100644
index 0000000..ad71b69
--- /dev/null
+++ b/clang/test/Modules/modules-cache-path-canonicalization-output.c
@@ -0,0 +1,18 @@
+// This checks that implicitly-built modules produce identical PCM
+// files regardless of the spelling of the same module cache path.
+
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fsyntax-only %t/tu.c \
+// RUN:   -fmodules-cache-path=%t/cache -fdisable-module-hash
+// RUN: mv %t/cache/M.pcm %t/M.pcm
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fsyntax-only %t/tu.c \
+// RUN:   -fmodules-cache-path=%t/./cache -fdisable-module-hash
+// RUN: diff %t/./cache/M.pcm %t/M.pcm
+
+//--- tu.c
+#include "M.h"
+//--- M.h
+//--- module.modulemap
+module M { header "M.h" }