[Modules][PCH] Hash input files content

Summary:
When files often get touched during builds, the mtime based validation
leads to different problems in implicit modules builds, even when the
content doesn't actually change:

- Modules only: module invalidation due to out of date files. Usually causing rebuild traffic.
- Modules + PCH: build failures because clang cannot rebuild a module if it comes from building a PCH.
- PCH: build failures because clang cannot rebuild a PCH in case one of the input headers has different mtime.

This patch proposes hashing the content of input files (headers and
module maps), which is performed during serialization time. When looking
at input files for validation, clang only computes the hash in case
there's a mtime mismatch.

I've tested a couple of different hash algorithms availble in LLVM in
face of building modules+pch for `#import <Cocoa/Cocoa.h>`:
- `hash_code`: performace diff within the noise, total module cache increased by 0.07%.
- `SHA1`: 5% slowdown. Haven't done real size measurements, but it'd be BLOCK_ID+20 bytes per input file, instead of BLOCK_ID+8 bytes from `hash_code`.
- `MD5`: 3% slowdown. Like above, but BLOCK_ID+16 bytes per input file.

Given the numbers above, the patch uses `hash_code`. The patch also
improves invalidation error msgs to point out which type of problem the
user is facing: "mtime", "size" or "content".

rdar://problem/29320105

Reviewers: dexonsmith, arphaman, rsmith, aprantl

Subscribers: jkorous, cfe-commits, ributzka

Tags: #clang

Differential Revision: https://reviews.llvm.org/D67249

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@374841 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/include/clang/Basic/DiagnosticSerializationKinds.td b/include/clang/Basic/DiagnosticSerializationKinds.td
index 0461d2f..757dbbe 100644
--- a/include/clang/Basic/DiagnosticSerializationKinds.td
+++ b/include/clang/Basic/DiagnosticSerializationKinds.td
@@ -18,13 +18,16 @@
 def err_fe_pch_malformed_block : Error<
     "malformed block record in PCH file: '%0'">, DefaultFatal;
 def err_fe_pch_file_modified : Error<
-    "file '%0' has been modified since the precompiled header '%1' was built">,
+    "file '%0' has been modified since the precompiled header '%1' was built"
+    ": %select{size|mtime|content}2 changed">,
     DefaultFatal;
 def err_fe_module_file_modified : Error<
-    "file '%0' has been modified since the module file '%1' was built">,
+    "file '%0' has been modified since the module file '%1' was built"
+    ": %select{size|mtime|content}2 changed">,
     DefaultFatal;
 def err_fe_ast_file_modified : Error<
-    "file '%0' has been modified since the AST file '%1' was built">,
+    "file '%0' has been modified since the AST file '%1' was built"
+    ": %select{size|mtime|content}2 changed">,
     DefaultFatal;
 def err_fe_pch_file_overridden : Error<
     "file '%0' from the precompiled header has been overridden">;
@@ -399,6 +402,8 @@
 def err_module_no_size_mtime_for_header : Error<
   "cannot emit module %0: %select{size|mtime}1 must be explicitly specified "
   "for missing header file \"%2\"">;
+def err_module_unable_to_hash_content : Error<
+  "failed to hash content for '%0' because memory buffer cannot be retrieved">;
 } // let CategoryName
 } // let Component
 
diff --git a/include/clang/Driver/Options.td b/include/clang/Driver/Options.td
index 22676b6..3ce6fcf 100644
--- a/include/clang/Driver/Options.td
+++ b/include/clang/Driver/Options.td
@@ -1368,6 +1368,28 @@
   HelpText<"Validate the system headers that a module depends on when loading the module">;
 def fno_modules_validate_system_headers : Flag<["-"], "fno-modules-validate-system-headers">,
   Group<i_Group>, Flags<[DriverOption]>;
+
+def fvalidate_ast_input_files_content:
+  Flag <["-"], "fvalidate-ast-input-files-content">,
+  Group<f_Group>, Flags<[CC1Option]>,
+  HelpText<"Compute and store the hash of input files used to build an AST."
+           " Files with mismatching mtime's are considered valid"
+           " if both contents is identical">;
+def fmodules_validate_input_files_content:
+  Flag <["-"], "fmodules-validate-input-files-content">,
+  Group<f_Group>, Flags<[DriverOption]>,
+  HelpText<"Validate PCM input files based on content if mtime differs">;
+def fno_modules_validate_input_files_content:
+  Flag <["-"], "fno_modules-validate-input-files-content">,
+  Group<f_Group>, Flags<[DriverOption]>;
+def fpch_validate_input_files_content:
+  Flag <["-"], "fpch-validate-input-files-content">,
+  Group<f_Group>, Flags<[DriverOption]>,
+  HelpText<"Validate PCH input files based on content if mtime differs">;
+def fno_pch_validate_input_files_content:
+  Flag <["-"], "fno_pch-validate-input-files-content">,
+  Group<f_Group>, Flags<[DriverOption]>;
+
 def fmodules : Flag <["-"], "fmodules">, Group<f_Group>,
   Flags<[DriverOption, CC1Option]>,
   HelpText<"Enable the 'modules' language feature">;
diff --git a/include/clang/Lex/HeaderSearchOptions.h b/include/clang/Lex/HeaderSearchOptions.h
index ed128bc..3c30974 100644
--- a/include/clang/Lex/HeaderSearchOptions.h
+++ b/include/clang/Lex/HeaderSearchOptions.h
@@ -195,6 +195,10 @@
   /// Whether to validate system input files when a module is loaded.
   unsigned ModulesValidateSystemHeaders : 1;
 
+  // Whether the content of input files should be hashed and used to
+  // validate consistency.
+  unsigned ValidateASTInputFilesContent : 1;
+
   /// Whether the module includes debug information (-gmodules).
   unsigned UseDebugInfo : 1;
 
@@ -208,7 +212,8 @@
         UseBuiltinIncludes(true), UseStandardSystemIncludes(true),
         UseStandardCXXIncludes(true), UseLibcxx(false), Verbose(false),
         ModulesValidateOncePerBuildSession(false),
-        ModulesValidateSystemHeaders(false), UseDebugInfo(false),
+        ModulesValidateSystemHeaders(false),
+        ValidateASTInputFilesContent(false), UseDebugInfo(false),
         ModulesValidateDiagnosticOptions(true), ModulesHashContent(false) {}
 
   /// AddPath - Add the \p Path path to the specified \p Group list.
diff --git a/include/clang/Serialization/ASTBitCodes.h b/include/clang/Serialization/ASTBitCodes.h
index 94721f9..eab1ec8 100644
--- a/include/clang/Serialization/ASTBitCodes.h
+++ b/include/clang/Serialization/ASTBitCodes.h
@@ -382,7 +382,10 @@
     /// inside the control block.
     enum InputFileRecordTypes {
       /// An input file.
-      INPUT_FILE = 1
+      INPUT_FILE = 1,
+
+      /// The input file content hash
+      INPUT_FILE_HASH
     };
 
     /// Record types that occur within the AST block itself.
diff --git a/include/clang/Serialization/ASTReader.h b/include/clang/Serialization/ASTReader.h
index 13d1653..7495c2b 100644
--- a/include/clang/Serialization/ASTReader.h
+++ b/include/clang/Serialization/ASTReader.h
@@ -930,6 +930,9 @@
   /// Whether validate system input files.
   bool ValidateSystemInputs;
 
+  /// Whether validate headers and module maps using hash based on contents.
+  bool ValidateASTInputFilesContent;
+
   /// Whether we are allowed to use the global module index.
   bool UseGlobalIndex;
 
@@ -1203,6 +1206,7 @@
 
   struct InputFileInfo {
     std::string Filename;
+    uint64_t ContentHash;
     off_t StoredSize;
     time_t StoredTime;
     bool Overridden;
@@ -1437,6 +1441,8 @@
   void Error(StringRef Msg) const;
   void Error(unsigned DiagID, StringRef Arg1 = StringRef(),
              StringRef Arg2 = StringRef()) const;
+  void Error(unsigned DiagID, StringRef Arg1, StringRef Arg2,
+             unsigned Select) const;
   void Error(llvm::Error &&Err) const;
 
 public:
@@ -1485,7 +1491,9 @@
             StringRef isysroot = "", bool DisableValidation = false,
             bool AllowASTWithCompilerErrors = false,
             bool AllowConfigurationMismatch = false,
-            bool ValidateSystemInputs = false, bool UseGlobalIndex = true,
+            bool ValidateSystemInputs = false,
+            bool ValidateASTInputFilesContent = false,
+            bool UseGlobalIndex = true,
             std::unique_ptr<llvm::Timer> ReadTimer = {});
   ASTReader(const ASTReader &) = delete;
   ASTReader &operator=(const ASTReader &) = delete;
diff --git a/lib/Driver/ToolChains/Clang.cpp b/lib/Driver/ToolChains/Clang.cpp
index dfb3f33..b897eb6 100644
--- a/lib/Driver/ToolChains/Clang.cpp
+++ b/lib/Driver/ToolChains/Clang.cpp
@@ -2777,6 +2777,10 @@
           std::string("-fprebuilt-module-path=") + A->getValue()));
       A->claim();
     }
+    if (Args.hasFlag(options::OPT_fmodules_validate_input_files_content,
+                     options::OPT_fno_modules_validate_input_files_content,
+                     false))
+      CmdArgs.push_back("-fvalidate-ast-input-files-content");
   }
 
   // -fmodule-name specifies the module that is currently being built (or
@@ -4899,6 +4903,10 @@
       Std && (Std->containsValue("c++2a") || Std->containsValue("c++latest"));
   RenderModulesOptions(C, D, Args, Input, Output, CmdArgs, HaveModules);
 
+  if (Args.hasFlag(options::OPT_fpch_validate_input_files_content,
+                   options::OPT_fno_pch_validate_input_files_content, false))
+    CmdArgs.push_back("-fvalidate-ast-input-files-content");
+
   Args.AddLastArg(CmdArgs, options::OPT_fexperimental_new_pass_manager,
                   options::OPT_fno_experimental_new_pass_manager);
 
diff --git a/lib/Frontend/CompilerInstance.cpp b/lib/Frontend/CompilerInstance.cpp
index a7b7114..97b9266 100644
--- a/lib/Frontend/CompilerInstance.cpp
+++ b/lib/Frontend/CompilerInstance.cpp
@@ -510,7 +510,8 @@
       PP, ModuleCache, &Context, PCHContainerRdr, Extensions,
       Sysroot.empty() ? "" : Sysroot.data(), DisablePCHValidation,
       AllowPCHWithCompilerErrors, /*AllowConfigurationMismatch*/ false,
-      HSOpts.ModulesValidateSystemHeaders, UseGlobalModuleIndex));
+      HSOpts.ModulesValidateSystemHeaders, HSOpts.ValidateASTInputFilesContent,
+      UseGlobalModuleIndex));
 
   // We need the external source to be set up before we read the AST, because
   // eagerly-deserialized declarations may use it.
@@ -1492,6 +1493,7 @@
         /*AllowASTWithCompilerErrors=*/false,
         /*AllowConfigurationMismatch=*/false,
         HSOpts.ModulesValidateSystemHeaders,
+        HSOpts.ValidateASTInputFilesContent,
         getFrontendOpts().UseGlobalModuleIndex, std::move(ReadTimer));
     if (hasASTConsumer()) {
       ModuleManager->setDeserializationListener(
diff --git a/lib/Frontend/CompilerInvocation.cpp b/lib/Frontend/CompilerInvocation.cpp
index 9d5987f..0aa4425 100644
--- a/lib/Frontend/CompilerInvocation.cpp
+++ b/lib/Frontend/CompilerInvocation.cpp
@@ -2081,6 +2081,8 @@
       getLastArgUInt64Value(Args, OPT_fbuild_session_timestamp, 0);
   Opts.ModulesValidateSystemHeaders =
       Args.hasArg(OPT_fmodules_validate_system_headers);
+  Opts.ValidateASTInputFilesContent =
+      Args.hasArg(OPT_fvalidate_ast_input_files_content);
   if (const Arg *A = Args.getLastArg(OPT_fmodule_format_EQ))
     Opts.ModuleFormat = A->getValue();
 
diff --git a/lib/Serialization/ASTReader.cpp b/lib/Serialization/ASTReader.cpp
index 60be157..20c6cbe 100644
--- a/lib/Serialization/ASTReader.cpp
+++ b/lib/Serialization/ASTReader.cpp
@@ -1247,6 +1247,12 @@
     Diag(DiagID) << Arg1 << Arg2;
 }
 
+void ASTReader::Error(unsigned DiagID, StringRef Arg1, StringRef Arg2,
+                      unsigned Select) const {
+  if (!Diags.isDiagnosticInFlight())
+    Diag(DiagID) << Arg1 << Arg2 << Select;
+}
+
 void ASTReader::Error(llvm::Error &&Err) const {
   Error(toString(std::move(Err)));
 }
@@ -2241,6 +2247,24 @@
   R.TopLevelModuleMap = static_cast<bool>(Record[5]);
   R.Filename = Blob;
   ResolveImportedPath(F, R.Filename);
+
+  Expected<llvm::BitstreamEntry> MaybeEntry = Cursor.advance();
+  if (!MaybeEntry) // FIXME this drops errors on the floor.
+    consumeError(MaybeEntry.takeError());
+  llvm::BitstreamEntry Entry = MaybeEntry.get();
+  assert(Entry.Kind == llvm::BitstreamEntry::Record &&
+         "expected record type for input file hash");
+
+  Record.clear();
+  if (Expected<unsigned> Maybe = Cursor.readRecord(Entry.ID, Record))
+    assert(static_cast<InputFileRecordTypes>(Maybe.get()) == INPUT_FILE_HASH &&
+           "invalid record type for input file hash");
+  else {
+    // FIXME this drops errors on the floor.
+    consumeError(Maybe.takeError());
+  }
+  R.ContentHash = (static_cast<uint64_t>(Record[1]) << 32) |
+                  static_cast<uint64_t>(Record[0]);
   return R;
 }
 
@@ -2271,6 +2295,7 @@
   bool Overridden = FI.Overridden;
   bool Transient = FI.Transient;
   StringRef Filename = FI.Filename;
+  uint64_t StoredContentHash = FI.ContentHash;
 
   const FileEntry *File = nullptr;
   if (auto FE = FileMgr.getFile(Filename, /*OpenFile=*/false))
@@ -2325,14 +2350,46 @@
     }
   }
 
-  bool IsOutOfDate = false;
+  enum ModificationType {
+    Size,
+    ModTime,
+    Content,
+    None,
+  };
+  auto HasInputFileChanged = [&]() {
+    if (StoredSize != File->getSize())
+      return ModificationType::Size;
+    if (!DisableValidation && StoredTime &&
+        StoredTime != File->getModificationTime()) {
+      // In case the modification time changes but not the content,
+      // accept the cached file as legit.
+      if (ValidateASTInputFilesContent &&
+          StoredContentHash != static_cast<uint64_t>(llvm::hash_code(-1))) {
+        auto MemBuffOrError = FileMgr.getBufferForFile(File);
+        if (!MemBuffOrError) {
+          if (!Complain)
+            return ModificationType::ModTime;
+          std::string ErrorStr = "could not get buffer for file '";
+          ErrorStr += File->getName();
+          ErrorStr += "'";
+          Error(ErrorStr);
+          return ModificationType::ModTime;
+        }
 
+        auto ContentHash = hash_value(MemBuffOrError.get()->getBuffer());
+        if (StoredContentHash == static_cast<uint64_t>(ContentHash))
+          return ModificationType::None;
+        return ModificationType::Content;
+      }
+      return ModificationType::ModTime;
+    }
+    return ModificationType::None;
+  };
+
+  bool IsOutOfDate = false;
+  auto FileChange = HasInputFileChanged();
   // For an overridden file, there is nothing to validate.
-  if (!Overridden && //
-      (StoredSize != File->getSize() ||
-       (StoredTime && StoredTime != File->getModificationTime() &&
-        !DisableValidation)
-       )) {
+  if (!Overridden && FileChange != ModificationType::None) {
     if (Complain) {
       // Build a list of the PCH imports that got us here (in reverse).
       SmallVector<ModuleFile *, 4> ImportStack(1, &F);
@@ -2341,13 +2398,17 @@
 
       // The top-level PCH is stale.
       StringRef TopLevelPCHName(ImportStack.back()->FileName);
-      unsigned DiagnosticKind = moduleKindForDiagnostic(ImportStack.back()->Kind);
+      unsigned DiagnosticKind =
+          moduleKindForDiagnostic(ImportStack.back()->Kind);
       if (DiagnosticKind == 0)
-        Error(diag::err_fe_pch_file_modified, Filename, TopLevelPCHName);
+        Error(diag::err_fe_pch_file_modified, Filename, TopLevelPCHName,
+              (unsigned)FileChange);
       else if (DiagnosticKind == 1)
-        Error(diag::err_fe_module_file_modified, Filename, TopLevelPCHName);
+        Error(diag::err_fe_module_file_modified, Filename, TopLevelPCHName,
+              (unsigned)FileChange);
       else
-        Error(diag::err_fe_ast_file_modified, Filename, TopLevelPCHName);
+        Error(diag::err_fe_ast_file_modified, Filename, TopLevelPCHName,
+              (unsigned)FileChange);
 
       // Print the import stack.
       if (ImportStack.size() > 1 && !Diags.isDiagnosticInFlight()) {
@@ -5192,6 +5253,8 @@
           consumeError(MaybeRecordType.takeError());
         }
         switch ((InputFileRecordTypes)MaybeRecordType.get()) {
+        case INPUT_FILE_HASH:
+          break;
         case INPUT_FILE:
           bool Overridden = static_cast<bool>(Record[3]);
           std::string Filename = Blob;
@@ -12153,7 +12216,7 @@
                      StringRef isysroot, bool DisableValidation,
                      bool AllowASTWithCompilerErrors,
                      bool AllowConfigurationMismatch, bool ValidateSystemInputs,
-                     bool UseGlobalIndex,
+                     bool ValidateASTInputFilesContent, bool UseGlobalIndex,
                      std::unique_ptr<llvm::Timer> ReadTimer)
     : Listener(DisableValidation
                    ? cast<ASTReaderListener>(new SimpleASTReaderListener(PP))
@@ -12167,6 +12230,7 @@
       AllowASTWithCompilerErrors(AllowASTWithCompilerErrors),
       AllowConfigurationMismatch(AllowConfigurationMismatch),
       ValidateSystemInputs(ValidateSystemInputs),
+      ValidateASTInputFilesContent(ValidateASTInputFilesContent),
       UseGlobalIndex(UseGlobalIndex), CurrSwitchCaseStmts(&SwitchCaseStmts) {
   SourceMgr.setExternalSLocEntrySource(this);
 
diff --git a/lib/Serialization/ASTWriter.cpp b/lib/Serialization/ASTWriter.cpp
index 3badff4..5e9e650 100644
--- a/lib/Serialization/ASTWriter.cpp
+++ b/lib/Serialization/ASTWriter.cpp
@@ -1099,6 +1099,7 @@
 
   BLOCK(INPUT_FILES_BLOCK);
   RECORD(INPUT_FILE);
+  RECORD(INPUT_FILE_HASH);
 
   // AST Top-Level Block.
   BLOCK(AST_BLOCK);
@@ -1764,6 +1765,7 @@
   bool IsTransient;
   bool BufferOverridden;
   bool IsTopLevelModuleMap;
+  uint32_t ContentHash[2];
 };
 
 } // namespace
@@ -1787,6 +1789,13 @@
   IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // File name
   unsigned IFAbbrevCode = Stream.EmitAbbrev(std::move(IFAbbrev));
 
+  // Create input file hash abbreviation.
+  auto IFHAbbrev = std::make_shared<BitCodeAbbrev>();
+  IFHAbbrev->Add(BitCodeAbbrevOp(INPUT_FILE_HASH));
+  IFHAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
+  IFHAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
+  unsigned IFHAbbrevCode = Stream.EmitAbbrev(std::move(IFHAbbrev));
+
   // Get all ContentCache objects for files, sorted by whether the file is a
   // system one or not. System files go at the back, users files at the front.
   std::deque<InputFileEntry> SortedFiles;
@@ -1810,6 +1819,25 @@
     Entry.BufferOverridden = Cache->BufferOverridden;
     Entry.IsTopLevelModuleMap = isModuleMap(File.getFileCharacteristic()) &&
                                 File.getIncludeLoc().isInvalid();
+
+    auto ContentHash = hash_code(-1);
+    if (PP->getHeaderSearchInfo()
+            .getHeaderSearchOpts()
+            .ValidateASTInputFilesContent) {
+      auto *MemBuff = Cache->getRawBuffer();
+      if (MemBuff)
+        ContentHash = hash_value(MemBuff->getBuffer());
+      else
+        // FIXME: The path should be taken from the FileEntryRef.
+        PP->Diag(SourceLocation(), diag::err_module_unable_to_hash_content)
+            << Entry.File->getName();
+    }
+    auto CH = llvm::APInt(64, ContentHash);
+    Entry.ContentHash[0] =
+        static_cast<uint32_t>(CH.getLoBits(32).getZExtValue());
+    Entry.ContentHash[1] =
+        static_cast<uint32_t>(CH.getHiBits(32).getZExtValue());
+
     if (Entry.IsSystemFile)
       SortedFiles.push_back(Entry);
     else
@@ -1834,17 +1862,26 @@
 
     // Emit size/modification time for this file.
     // And whether this file was overridden.
-    RecordData::value_type Record[] = {
-        INPUT_FILE,
-        InputFileOffsets.size(),
-        (uint64_t)Entry.File->getSize(),
-        (uint64_t)getTimestampForOutput(Entry.File),
-        Entry.BufferOverridden,
-        Entry.IsTransient,
-        Entry.IsTopLevelModuleMap};
+    {
+      RecordData::value_type Record[] = {
+          INPUT_FILE,
+          InputFileOffsets.size(),
+          (uint64_t)Entry.File->getSize(),
+          (uint64_t)getTimestampForOutput(Entry.File),
+          Entry.BufferOverridden,
+          Entry.IsTransient,
+          Entry.IsTopLevelModuleMap};
 
-    // FIXME: The path should be taken from the FileEntryRef.
-    EmitRecordWithPath(IFAbbrevCode, Record, Entry.File->getName());
+      // FIXME: The path should be taken from the FileEntryRef.
+      EmitRecordWithPath(IFAbbrevCode, Record, Entry.File->getName());
+    }
+
+    // Emit content hash for this file.
+    {
+      RecordData::value_type Record[] = {INPUT_FILE_HASH, Entry.ContentHash[0],
+                                         Entry.ContentHash[1]};
+      Stream.EmitRecordWithAbbrev(IFHAbbrevCode, Record);
+    }
   }
 
   Stream.ExitBlock();
diff --git a/test/Modules/validate-file-content.m b/test/Modules/validate-file-content.m
new file mode 100644
index 0000000..327a68a
--- /dev/null
+++ b/test/Modules/validate-file-content.m
@@ -0,0 +1,33 @@
+// REQUIRES: shell
+//
+// Check driver works
+// RUN: %clang -fmodules -fsyntax-only -fmodules-validate-input-files-content %s -### 2>&1 | FileCheck --check-prefix=CHECK-CC1 %s
+// CHECK-CC1: -fvalidate-ast-input-files-content
+//
+// PCH+Modules: Test that a mtime mismatch without content change is fine
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: echo '// m.h' > %t/m.h
+// RUN: echo '#include "m.h"' > %t/a.h
+// RUN: echo 'module m { header "m.h" }' > %t/module.modulemap
+// RUN: %clang_cc1 -emit-pch -fmodules-cache-path=%t/cache -fmodules -fimplicit-module-maps -o %t/a.pch -I %t -x objective-c-header %t/a.h -fvalidate-ast-input-files-content
+// RUN: touch -m -a -t 202901010000 %t/m.h
+// RUN: %clang_cc1 -fsyntax-only -fmodules-cache-path=%t/cache -fmodules -fimplicit-module-maps -I %t -include-pch %t/a.pch %s -verify -fvalidate-ast-input-files-content
+//
+// PCH+Modules: Test that a mtime mismatch with content change
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: echo '// m.h' > %t/m.h
+// RUN: echo '#include "m.h"' > %t/a.h
+// RUN: echo 'module m { header "m.h" }' > %t/module.modulemap
+// RUN: %clang_cc1 -emit-pch -fmodules-cache-path=%t/cache -fmodules -fimplicit-module-maps -o %t/a.pch -I %t -x objective-c-header %t/a.h -fvalidate-ast-input-files-content
+// RUN: echo '// m.x' > %t/m.h
+// RUN: touch -m -a -t 202901010000 %t/m.h
+// RUN: not %clang_cc1 -fsyntax-only -fmodules-cache-path=%t/cache -fmodules -fimplicit-module-maps -I %t -include-pch %t/a.pch %s -fvalidate-ast-input-files-content 2> %t/stderr
+// RUN: FileCheck %s < %t/stderr
+//
+// CHECK: file '[[M_H:.*[/\\]m\.h]]' has been modified since the precompiled header '[[A_PCH:.*/a\.pch]]' was built: content changed
+// CHECK: '[[M_H]]' required by '[[M_PCM:.*[/\\]m.*\.pcm]]'
+// CHECK: '[[M_PCM]]' required by '[[A_PCH]]'
+// CHECK: please rebuild precompiled header '[[A_PCH]]'
+// expected-no-diagnostics
diff --git a/test/PCH/validate-file-content.m b/test/PCH/validate-file-content.m
new file mode 100644
index 0000000..512e0c9
--- /dev/null
+++ b/test/PCH/validate-file-content.m
@@ -0,0 +1,29 @@
+// REQUIRES: shell
+//
+// Check driver works
+// RUN: %clang -x objective-c-header -fsyntax-only -fpch-validate-input-files-content %t/a.h -### 2>&1 | FileCheck --check-prefix=CHECK-CC1 %s
+// CHECK-CC1: -fvalidate-ast-input-files-content
+//
+// PCH only: Test that a mtime mismatch without content change is fine
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: echo '// m.h' > %t/m.h
+// RUN: echo '#include "m.h"' > %t/a.h
+// RUN: %clang_cc1 -emit-pch -o %t/a.pch -I %t -x objective-c-header %t/a.h -fvalidate-ast-input-files-content
+// RUN: touch -m -a -t 202901010000 %t/m.h
+// RUN: %clang_cc1 -fsyntax-only -I %t -include-pch %t/a.pch %s -verify -fvalidate-ast-input-files-content
+//
+// PCH only: Test that a mtime mismatch with content change
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: echo '// m.h' > %t/m.h
+// RUN: echo '#include "m.h"' > %t/a.h
+// RUN: %clang_cc1 -emit-pch -o %t/a.pch -I %t -x objective-c-header %t/a.h -fvalidate-ast-input-files-content
+// RUN: echo '// m.x' > %t/m.h
+// RUN: touch -m -a -t 202901010000 %t/m.h
+// RUN: not %clang_cc1 -fsyntax-only -I %t -include-pch %t/a.pch %s -fvalidate-ast-input-files-content 2> %t/stderr
+// RUN: FileCheck %s < %t/stderr
+//
+// CHECK: file '[[M_H:.*[/\\]m\.h]]' has been modified since the precompiled header '[[A_PCH:.*/a\.pch]]' was built: content changed
+// CHECK: please rebuild precompiled header '[[A_PCH]]'
+// expected-no-diagnostics