diff --git a/clangd/CMakeLists.txt b/clangd/CMakeLists.txt
index 778210f..0461c99 100644
--- a/clangd/CMakeLists.txt
+++ b/clangd/CMakeLists.txt
@@ -62,6 +62,7 @@
   Logger.cpp
   Protocol.cpp
   Quality.cpp
+  Preamble.cpp
   RIFF.cpp
   Selection.cpp
   SemanticHighlighting.cpp
diff --git a/clangd/ClangdServer.cpp b/clangd/ClangdServer.cpp
index 01cac6e..ec77514 100644
--- a/clangd/ClangdServer.cpp
+++ b/clangd/ClangdServer.cpp
@@ -13,6 +13,7 @@
 #include "Format.h"
 #include "FormattedString.h"
 #include "Headers.h"
+#include "Preamble.h"
 #include "Protocol.h"
 #include "SemanticHighlighting.h"
 #include "SourceCode.h"
diff --git a/clangd/ClangdUnit.cpp b/clangd/ClangdUnit.cpp
index bc4ec4b..7cc8f07 100644
--- a/clangd/ClangdUnit.cpp
+++ b/clangd/ClangdUnit.cpp
@@ -53,13 +53,6 @@
 namespace clangd {
 namespace {
 
-bool compileCommandsAreEqual(const tooling::CompileCommand &LHS,
-                             const tooling::CompileCommand &RHS) {
-  // We don't check for Output, it should not matter to clangd.
-  return LHS.Directory == RHS.Directory && LHS.Filename == RHS.Filename &&
-         llvm::makeArrayRef(LHS.CommandLine).equals(RHS.CommandLine);
-}
-
 template <class T> std::size_t getUsedBytes(const std::vector<T> &Vec) {
   return Vec.capacity() * sizeof(T);
 }
@@ -105,10 +98,9 @@
   std::vector<Decl *> TopLevelDecls;
 };
 
-// CollectMainFileMacroExpansions and CollectMainFileMacros are two different
-// classes as CollectMainFileMacroExpansions is only used when building the AST
-// for the main file. CollectMainFileMacros is only used when building the
-// preamble.
+// This collects macro expansions in the main file.
+// (Contrast with CollectMainFileMacros in Preamble.cpp, which collects macro
+// *definitions* in the preamble region of the main file).
 class CollectMainFileMacroExpansions : public PPCallbacks {
   const SourceManager &SM;
   std::vector<SourceLocation> &MainFileMacroLocs;
@@ -127,83 +119,6 @@
   }
 };
 
-class CollectMainFileMacros : public PPCallbacks {
-public:
-  explicit CollectMainFileMacros(const SourceManager &SM,
-                                 std::vector<std::string> *Out)
-      : SM(SM), Out(Out) {}
-
-  void FileChanged(SourceLocation Loc, FileChangeReason,
-                   SrcMgr::CharacteristicKind, FileID Prev) {
-    InMainFile = SM.isWrittenInMainFile(Loc);
-  }
-
-  void MacroDefined(const Token &MacroName, const MacroDirective *MD) {
-    if (InMainFile)
-      MainFileMacros.insert(MacroName.getIdentifierInfo()->getName());
-  }
-
-  void EndOfMainFile() {
-    for (const auto &Entry : MainFileMacros)
-      Out->push_back(Entry.getKey());
-    llvm::sort(*Out);
-  }
-
-private:
-  const SourceManager &SM;
-  bool InMainFile = true;
-  llvm::StringSet<> MainFileMacros;
-  std::vector<std::string> *Out;
-};
-
-class CppFilePreambleCallbacks : public PreambleCallbacks {
-public:
-  CppFilePreambleCallbacks(PathRef File, PreambleParsedCallback ParsedCallback)
-      : File(File), ParsedCallback(ParsedCallback) {
-  }
-
-  IncludeStructure takeIncludes() { return std::move(Includes); }
-
-  std::vector<std::string> takeMainFileMacros() {
-    return std::move(MainFileMacros);
-  }
-
-  CanonicalIncludes takeCanonicalIncludes() { return std::move(CanonIncludes); }
-
-  void AfterExecute(CompilerInstance &CI) override {
-    if (!ParsedCallback)
-      return;
-    trace::Span Tracer("Running PreambleCallback");
-    ParsedCallback(CI.getASTContext(), CI.getPreprocessorPtr(), CanonIncludes);
-  }
-
-  void BeforeExecute(CompilerInstance &CI) override {
-    addSystemHeadersMapping(&CanonIncludes, CI.getLangOpts());
-    SourceMgr = &CI.getSourceManager();
-  }
-
-  std::unique_ptr<PPCallbacks> createPPCallbacks() override {
-    assert(SourceMgr && "SourceMgr must be set at this point");
-    return std::make_unique<PPChainedCallbacks>(
-        collectIncludeStructureCallback(*SourceMgr, &Includes),
-        std::make_unique<CollectMainFileMacros>(*SourceMgr, &MainFileMacros));
-  }
-
-  CommentHandler *getCommentHandler() override {
-    IWYUHandler = collectIWYUHeaderMaps(&CanonIncludes);
-    return IWYUHandler.get();
-  }
-
-private:
-  PathRef File;
-  PreambleParsedCallback ParsedCallback;
-  IncludeStructure Includes;
-  CanonicalIncludes CanonIncludes;
-  std::vector<std::string> MainFileMacros;
-  std::unique_ptr<CommentHandler> IWYUHandler = nullptr;
-  SourceManager *SourceMgr = nullptr;
-};
-
 // When using a preamble, only preprocessor events outside its bounds are seen.
 // This is almost what we want: replaying transitive preprocessing wastes time.
 // However this confuses clang-tidy checks: they don't see any #includes!
@@ -585,16 +500,6 @@
   return CanonIncludes;
 }
 
-PreambleData::PreambleData(PrecompiledPreamble Preamble,
-                           std::vector<Diag> Diags, IncludeStructure Includes,
-                           std::vector<std::string> MainFileMacros,
-                           std::unique_ptr<PreambleFileStatusCache> StatCache,
-                           CanonicalIncludes CanonIncludes)
-    : Preamble(std::move(Preamble)), Diags(std::move(Diags)),
-      Includes(std::move(Includes)), MainFileMacros(std::move(MainFileMacros)),
-      StatCache(std::move(StatCache)), CanonIncludes(std::move(CanonIncludes)) {
-}
-
 ParsedAST::ParsedAST(std::shared_ptr<const PreambleData> Preamble,
                      std::unique_ptr<CompilerInstance> Clang,
                      std::unique_ptr<FrontendAction> Action,
@@ -613,79 +518,6 @@
   assert(this->Action);
 }
 
-std::shared_ptr<const PreambleData>
-buildPreamble(PathRef FileName, CompilerInvocation &CI,
-              std::shared_ptr<const PreambleData> OldPreamble,
-              const tooling::CompileCommand &OldCompileCommand,
-              const ParseInputs &Inputs, bool StoreInMemory,
-              PreambleParsedCallback PreambleCallback) {
-  // Note that we don't need to copy the input contents, preamble can live
-  // without those.
-  auto ContentsBuffer =
-      llvm::MemoryBuffer::getMemBuffer(Inputs.Contents, FileName);
-  auto Bounds =
-      ComputePreambleBounds(*CI.getLangOpts(), ContentsBuffer.get(), 0);
-
-  if (OldPreamble &&
-      compileCommandsAreEqual(Inputs.CompileCommand, OldCompileCommand) &&
-      OldPreamble->Preamble.CanReuse(CI, ContentsBuffer.get(), Bounds,
-                                     Inputs.FS.get())) {
-    vlog("Reusing preamble for file {0}", llvm::Twine(FileName));
-    return OldPreamble;
-  }
-  vlog("Preamble for file {0} cannot be reused. Attempting to rebuild it.",
-       FileName);
-
-  trace::Span Tracer("BuildPreamble");
-  SPAN_ATTACH(Tracer, "File", FileName);
-  StoreDiags PreambleDiagnostics;
-  llvm::IntrusiveRefCntPtr<DiagnosticsEngine> PreambleDiagsEngine =
-      CompilerInstance::createDiagnostics(&CI.getDiagnosticOpts(),
-                                          &PreambleDiagnostics, false);
-
-  // Skip function bodies when building the preamble to speed up building
-  // the preamble and make it smaller.
-  assert(!CI.getFrontendOpts().SkipFunctionBodies);
-  CI.getFrontendOpts().SkipFunctionBodies = true;
-  // We don't want to write comment locations into PCH. They are racy and slow
-  // to read back. We rely on dynamic index for the comments instead.
-  CI.getPreprocessorOpts().WriteCommentListToPCH = false;
-
-  CppFilePreambleCallbacks SerializedDeclsCollector(FileName, PreambleCallback);
-  if (Inputs.FS->setCurrentWorkingDirectory(Inputs.CompileCommand.Directory)) {
-    log("Couldn't set working directory when building the preamble.");
-    // We proceed anyway, our lit-tests rely on results for non-existing working
-    // dirs.
-  }
-
-  llvm::SmallString<32> AbsFileName(FileName);
-  Inputs.FS->makeAbsolute(AbsFileName);
-  auto StatCache = std::make_unique<PreambleFileStatusCache>(AbsFileName);
-  auto BuiltPreamble = PrecompiledPreamble::Build(
-      CI, ContentsBuffer.get(), Bounds, *PreambleDiagsEngine,
-      StatCache->getProducingFS(Inputs.FS),
-      std::make_shared<PCHContainerOperations>(), StoreInMemory,
-      SerializedDeclsCollector);
-
-  // When building the AST for the main file, we do want the function
-  // bodies.
-  CI.getFrontendOpts().SkipFunctionBodies = false;
-
-  if (BuiltPreamble) {
-    vlog("Built preamble of size {0} for file {1}", BuiltPreamble->getSize(),
-         FileName);
-    std::vector<Diag> Diags = PreambleDiagnostics.take();
-    return std::make_shared<PreambleData>(
-        std::move(*BuiltPreamble), std::move(Diags),
-        SerializedDeclsCollector.takeIncludes(),
-        SerializedDeclsCollector.takeMainFileMacros(), std::move(StatCache),
-        SerializedDeclsCollector.takeCanonicalIncludes());
-  } else {
-    elog("Could not build a preamble for file {0}", FileName);
-    return nullptr;
-  }
-}
-
 llvm::Optional<ParsedAST>
 buildAST(PathRef FileName, std::unique_ptr<CompilerInvocation> Invocation,
          llvm::ArrayRef<Diag> CompilerInvocationDiags,
diff --git a/clangd/ClangdUnit.h b/clangd/ClangdUnit.h
index 6fc8777..cf3badf 100644
--- a/clangd/ClangdUnit.h
+++ b/clangd/ClangdUnit.h
@@ -11,64 +11,23 @@
 
 #include "Compiler.h"
 #include "Diagnostics.h"
-#include "FS.h"
-#include "Function.h"
 #include "Headers.h"
 #include "Path.h"
-#include "Protocol.h"
+#include "Preamble.h"
 #include "index/CanonicalIncludes.h"
-#include "index/Index.h"
 #include "clang/Frontend/FrontendAction.h"
 #include "clang/Frontend/PrecompiledPreamble.h"
 #include "clang/Lex/Preprocessor.h"
-#include "clang/Serialization/ASTBitCodes.h"
 #include "clang/Tooling/CompilationDatabase.h"
-#include "clang/Tooling/Core/Replacement.h"
 #include "clang/Tooling/Syntax/Tokens.h"
 #include "llvm/ADT/ArrayRef.h"
 #include <memory>
 #include <string>
 #include <vector>
 
-namespace llvm {
-class raw_ostream;
-
-namespace vfs {
-class FileSystem;
-} // namespace vfs
-} // namespace llvm
-
 namespace clang {
-
-namespace tooling {
-struct CompileCommand;
-} // namespace tooling
-
 namespace clangd {
-
-// Stores Preamble and associated data.
-struct PreambleData {
-  PreambleData(PrecompiledPreamble Preamble, std::vector<Diag> Diags,
-               IncludeStructure Includes,
-               std::vector<std::string> MainFileMacros,
-               std::unique_ptr<PreambleFileStatusCache> StatCache,
-               CanonicalIncludes CanonIncludes);
-
-  tooling::CompileCommand CompileCommand;
-  PrecompiledPreamble Preamble;
-  std::vector<Diag> Diags;
-  // Processes like code completions and go-to-definitions will need #include
-  // information, and their compile action skips preamble range.
-  IncludeStructure Includes;
-  // Macros defined in the preamble section of the main file.
-  // Users care about headers vs main-file, not preamble vs non-preamble.
-  // These should be treated as main-file entities e.g. for code completion.
-  std::vector<std::string> MainFileMacros;
-  // Cache of FS operations performed when building the preamble.
-  // When reusing a preamble, this cache can be consumed to save IO.
-  std::unique_ptr<PreambleFileStatusCache> StatCache;
-  CanonicalIncludes CanonIncludes;
-};
+class SymbolIndex;
 
 /// Stores and provides access to parsed AST.
 class ParsedAST {
@@ -161,23 +120,6 @@
   CanonicalIncludes CanonIncludes;
 };
 
-using PreambleParsedCallback =
-    std::function<void(ASTContext &, std::shared_ptr<clang::Preprocessor>,
-                       const CanonicalIncludes &)>;
-
-/// Rebuild the preamble for the new inputs unless the old one can be reused.
-/// If \p OldPreamble can be reused, it is returned unchanged.
-/// If \p OldPreamble is null, always builds the preamble.
-/// If \p PreambleCallback is set, it will be run on top of the AST while
-/// building the preamble. Note that if the old preamble was reused, no AST is
-/// built and, therefore, the callback will not be executed.
-std::shared_ptr<const PreambleData>
-buildPreamble(PathRef FileName, CompilerInvocation &CI,
-              std::shared_ptr<const PreambleData> OldPreamble,
-              const tooling::CompileCommand &OldCompileCommand,
-              const ParseInputs &Inputs, bool StoreInMemory,
-              PreambleParsedCallback PreambleCallback);
-
 /// Build an AST from provided user inputs. This function does not check if
 /// preamble can be reused, as this function expects that \p Preamble is the
 /// result of calling buildPreamble.
diff --git a/clangd/CodeComplete.cpp b/clangd/CodeComplete.cpp
index 045320f..4340620 100644
--- a/clangd/CodeComplete.cpp
+++ b/clangd/CodeComplete.cpp
@@ -19,7 +19,6 @@
 
 #include "CodeComplete.h"
 #include "AST.h"
-#include "ClangdUnit.h"
 #include "CodeCompletionStrings.h"
 #include "Compiler.h"
 #include "Diagnostics.h"
@@ -28,6 +27,7 @@
 #include "FuzzyMatch.h"
 #include "Headers.h"
 #include "Logger.h"
+#include "Preamble.h"
 #include "Protocol.h"
 #include "Quality.h"
 #include "SourceCode.h"
diff --git a/clangd/CodeComplete.h b/clangd/CodeComplete.h
index 381c97e..dda4f44 100644
--- a/clangd/CodeComplete.h
+++ b/clangd/CodeComplete.h
@@ -22,7 +22,6 @@
 #include "index/Index.h"
 #include "index/Symbol.h"
 #include "index/SymbolOrigin.h"
-#include "clang/Frontend/PrecompiledPreamble.h"
 #include "clang/Sema/CodeCompleteConsumer.h"
 #include "clang/Sema/CodeCompleteOptions.h"
 #include "clang/Tooling/CompilationDatabase.h"
diff --git a/clangd/Compiler.h b/clangd/Compiler.h
index 689514a..6ab1b0f 100644
--- a/clangd/Compiler.h
+++ b/clangd/Compiler.h
@@ -7,8 +7,8 @@
 //===----------------------------------------------------------------------===//
 //
 // Shared utilities for invoking the clang compiler.
-// ClangdUnit takes care of much of this, but some features like CodeComplete
-// run their own compile actions that share logic.
+// Most callers will use this through Preamble/ParsedAST, but some features like
+// CodeComplete run their own compile actions that share these low-level pieces.
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/clangd/Preamble.cpp b/clangd/Preamble.cpp
new file mode 100644
index 0000000..5e890fd
--- /dev/null
+++ b/clangd/Preamble.cpp
@@ -0,0 +1,193 @@
+//===--- Preamble.cpp - Reusing expensive parts of the AST ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "Preamble.h"
+#include "Logger.h"
+#include "Trace.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Lex/PPCallbacks.h"
+#include "clang/Lex/PreprocessorOptions.h"
+
+namespace clang {
+namespace clangd {
+namespace {
+
+bool compileCommandsAreEqual(const tooling::CompileCommand &LHS,
+                             const tooling::CompileCommand &RHS) {
+  // We don't check for Output, it should not matter to clangd.
+  return LHS.Directory == RHS.Directory && LHS.Filename == RHS.Filename &&
+         llvm::makeArrayRef(LHS.CommandLine).equals(RHS.CommandLine);
+}
+
+// This collects macro definitions in the *preamble region* of the main file.
+// (Contrast with CollectMainFileMacroExpansions in ClangdUnit.cpp, which
+// collects macro *expansions* in the rest of the main file.
+class CollectMainFileMacros : public PPCallbacks {
+public:
+  explicit CollectMainFileMacros(const SourceManager &SM,
+                                 std::vector<std::string> *Out)
+      : SM(SM), Out(Out) {}
+
+  void FileChanged(SourceLocation Loc, FileChangeReason,
+                   SrcMgr::CharacteristicKind, FileID Prev) {
+    InMainFile = SM.isWrittenInMainFile(Loc);
+  }
+
+  void MacroDefined(const Token &MacroName, const MacroDirective *MD) {
+    if (InMainFile)
+      MainFileMacros.insert(MacroName.getIdentifierInfo()->getName());
+  }
+
+  void EndOfMainFile() {
+    for (const auto &Entry : MainFileMacros)
+      Out->push_back(Entry.getKey());
+    llvm::sort(*Out);
+  }
+
+private:
+  const SourceManager &SM;
+  bool InMainFile = true;
+  llvm::StringSet<> MainFileMacros;
+  std::vector<std::string> *Out;
+};
+
+class CppFilePreambleCallbacks : public PreambleCallbacks {
+public:
+  CppFilePreambleCallbacks(PathRef File, PreambleParsedCallback ParsedCallback)
+      : File(File), ParsedCallback(ParsedCallback) {
+  }
+
+  IncludeStructure takeIncludes() { return std::move(Includes); }
+
+  std::vector<std::string> takeMainFileMacros() {
+    return std::move(MainFileMacros);
+  }
+
+  CanonicalIncludes takeCanonicalIncludes() { return std::move(CanonIncludes); }
+
+  void AfterExecute(CompilerInstance &CI) override {
+    if (!ParsedCallback)
+      return;
+    trace::Span Tracer("Running PreambleCallback");
+    ParsedCallback(CI.getASTContext(), CI.getPreprocessorPtr(), CanonIncludes);
+  }
+
+  void BeforeExecute(CompilerInstance &CI) override {
+    addSystemHeadersMapping(&CanonIncludes, CI.getLangOpts());
+    SourceMgr = &CI.getSourceManager();
+  }
+
+  std::unique_ptr<PPCallbacks> createPPCallbacks() override {
+    assert(SourceMgr && "SourceMgr must be set at this point");
+    return std::make_unique<PPChainedCallbacks>(
+        collectIncludeStructureCallback(*SourceMgr, &Includes),
+        std::make_unique<CollectMainFileMacros>(*SourceMgr, &MainFileMacros));
+  }
+
+  CommentHandler *getCommentHandler() override {
+    IWYUHandler = collectIWYUHeaderMaps(&CanonIncludes);
+    return IWYUHandler.get();
+  }
+
+private:
+  PathRef File;
+  PreambleParsedCallback ParsedCallback;
+  IncludeStructure Includes;
+  CanonicalIncludes CanonIncludes;
+  std::vector<std::string> MainFileMacros;
+  std::unique_ptr<CommentHandler> IWYUHandler = nullptr;
+  SourceManager *SourceMgr = nullptr;
+};
+
+} // namespace
+
+PreambleData::PreambleData(PrecompiledPreamble Preamble,
+                           std::vector<Diag> Diags, IncludeStructure Includes,
+                           std::vector<std::string> MainFileMacros,
+                           std::unique_ptr<PreambleFileStatusCache> StatCache,
+                           CanonicalIncludes CanonIncludes)
+    : Preamble(std::move(Preamble)), Diags(std::move(Diags)),
+      Includes(std::move(Includes)), MainFileMacros(std::move(MainFileMacros)),
+      StatCache(std::move(StatCache)), CanonIncludes(std::move(CanonIncludes)) {
+}
+
+std::shared_ptr<const PreambleData>
+buildPreamble(PathRef FileName, CompilerInvocation &CI,
+              std::shared_ptr<const PreambleData> OldPreamble,
+              const tooling::CompileCommand &OldCompileCommand,
+              const ParseInputs &Inputs, bool StoreInMemory,
+              PreambleParsedCallback PreambleCallback) {
+  // Note that we don't need to copy the input contents, preamble can live
+  // without those.
+  auto ContentsBuffer =
+      llvm::MemoryBuffer::getMemBuffer(Inputs.Contents, FileName);
+  auto Bounds =
+      ComputePreambleBounds(*CI.getLangOpts(), ContentsBuffer.get(), 0);
+
+  if (OldPreamble &&
+      compileCommandsAreEqual(Inputs.CompileCommand, OldCompileCommand) &&
+      OldPreamble->Preamble.CanReuse(CI, ContentsBuffer.get(), Bounds,
+                                     Inputs.FS.get())) {
+    vlog("Reusing preamble for file {0}", llvm::Twine(FileName));
+    return OldPreamble;
+  }
+  vlog("Preamble for file {0} cannot be reused. Attempting to rebuild it.",
+       FileName);
+
+  trace::Span Tracer("BuildPreamble");
+  SPAN_ATTACH(Tracer, "File", FileName);
+  StoreDiags PreambleDiagnostics;
+  llvm::IntrusiveRefCntPtr<DiagnosticsEngine> PreambleDiagsEngine =
+      CompilerInstance::createDiagnostics(&CI.getDiagnosticOpts(),
+                                          &PreambleDiagnostics, false);
+
+  // Skip function bodies when building the preamble to speed up building
+  // the preamble and make it smaller.
+  assert(!CI.getFrontendOpts().SkipFunctionBodies);
+  CI.getFrontendOpts().SkipFunctionBodies = true;
+  // We don't want to write comment locations into PCH. They are racy and slow
+  // to read back. We rely on dynamic index for the comments instead.
+  CI.getPreprocessorOpts().WriteCommentListToPCH = false;
+
+  CppFilePreambleCallbacks SerializedDeclsCollector(FileName, PreambleCallback);
+  if (Inputs.FS->setCurrentWorkingDirectory(Inputs.CompileCommand.Directory)) {
+    log("Couldn't set working directory when building the preamble.");
+    // We proceed anyway, our lit-tests rely on results for non-existing working
+    // dirs.
+  }
+
+  llvm::SmallString<32> AbsFileName(FileName);
+  Inputs.FS->makeAbsolute(AbsFileName);
+  auto StatCache = std::make_unique<PreambleFileStatusCache>(AbsFileName);
+  auto BuiltPreamble = PrecompiledPreamble::Build(
+      CI, ContentsBuffer.get(), Bounds, *PreambleDiagsEngine,
+      StatCache->getProducingFS(Inputs.FS),
+      std::make_shared<PCHContainerOperations>(), StoreInMemory,
+      SerializedDeclsCollector);
+
+  // When building the AST for the main file, we do want the function
+  // bodies.
+  CI.getFrontendOpts().SkipFunctionBodies = false;
+
+  if (BuiltPreamble) {
+    vlog("Built preamble of size {0} for file {1}", BuiltPreamble->getSize(),
+         FileName);
+    std::vector<Diag> Diags = PreambleDiagnostics.take();
+    return std::make_shared<PreambleData>(
+        std::move(*BuiltPreamble), std::move(Diags),
+        SerializedDeclsCollector.takeIncludes(),
+        SerializedDeclsCollector.takeMainFileMacros(), std::move(StatCache),
+        SerializedDeclsCollector.takeCanonicalIncludes());
+  } else {
+    elog("Could not build a preamble for file {0}", FileName);
+    return nullptr;
+  }
+}
+
+} // namespace clangd
+} // namespace clang
diff --git a/clangd/Preamble.h b/clangd/Preamble.h
new file mode 100644
index 0000000..c1632ff
--- /dev/null
+++ b/clangd/Preamble.h
@@ -0,0 +1,88 @@
+//===--- Preamble.h - Reusing expensive parts of the AST ---------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// The vast majority of code in a typical translation unit is in the headers
+// included at the top of the file.
+//
+// The preamble optimization says that we can parse this code once, and reuse
+// the result multiple times. The preamble is invalidated by changes to the
+// code in the preamble region, to the compile command, or to files on disk.
+//
+// This is the most important optimization in clangd: it allows operations like
+// code-completion to have sub-second latency. It is supported by the
+// PrecompiledPreamble functionality in clang, which wraps the techniques used
+// by PCH files, modules etc into a convenient interface.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_PREAMBLE_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_PREAMBLE_H
+
+#include "Compiler.h"
+#include "Diagnostics.h"
+#include "FS.h"
+#include "Headers.h"
+#include "index/CanonicalIncludes.h"
+#include "clang/Frontend/PrecompiledPreamble.h"
+#include "clang/Tooling/CompilationDatabase.h"
+
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace clang {
+namespace clangd {
+
+/// The parsed preamble and associated data.
+///
+/// As we must avoid re-parsing the preamble, any information that can only
+/// be obtained during parsing must be eagerly captured and stored here.
+struct PreambleData {
+  PreambleData(PrecompiledPreamble Preamble, std::vector<Diag> Diags,
+               IncludeStructure Includes,
+               std::vector<std::string> MainFileMacros,
+               std::unique_ptr<PreambleFileStatusCache> StatCache,
+               CanonicalIncludes CanonIncludes);
+
+  tooling::CompileCommand CompileCommand;
+  PrecompiledPreamble Preamble;
+  std::vector<Diag> Diags;
+  // Processes like code completions and go-to-definitions will need #include
+  // information, and their compile action skips preamble range.
+  IncludeStructure Includes;
+  // Macros defined in the preamble section of the main file.
+  // Users care about headers vs main-file, not preamble vs non-preamble.
+  // These should be treated as main-file entities e.g. for code completion.
+  std::vector<std::string> MainFileMacros;
+  // Cache of FS operations performed when building the preamble.
+  // When reusing a preamble, this cache can be consumed to save IO.
+  std::unique_ptr<PreambleFileStatusCache> StatCache;
+  CanonicalIncludes CanonIncludes;
+};
+
+using PreambleParsedCallback =
+    std::function<void(ASTContext &, std::shared_ptr<clang::Preprocessor>,
+                       const CanonicalIncludes &)>;
+
+/// Build a preamble for the new inputs unless an old one can be reused.
+/// If \p OldPreamble can be reused, it is returned unchanged.
+/// If \p OldPreamble is null, always builds the preamble.
+/// If \p PreambleCallback is set, it will be run on top of the AST while
+/// building the preamble. Note that if the old preamble was reused, no AST is
+/// built and, therefore, the callback will not be executed.
+std::shared_ptr<const PreambleData>
+buildPreamble(PathRef FileName, CompilerInvocation &CI,
+              std::shared_ptr<const PreambleData> OldPreamble,
+              const tooling::CompileCommand &OldCompileCommand,
+              const ParseInputs &Inputs, bool StoreInMemory,
+              PreambleParsedCallback PreambleCallback);
+
+
+} // namespace clangd
+} // namespace clang
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_PREAMBLE_H
diff --git a/clangd/TUScheduler.cpp b/clangd/TUScheduler.cpp
index 7052fec..98f3945 100644
--- a/clangd/TUScheduler.cpp
+++ b/clangd/TUScheduler.cpp
@@ -43,10 +43,12 @@
 
 #include "TUScheduler.h"
 #include "Cancellation.h"
+#include "ClangdUnit.h"
 #include "Compiler.h"
 #include "Diagnostics.h"
 #include "GlobalCompilationDatabase.h"
 #include "Logger.h"
+#include "Preamble.h"
 #include "Trace.h"
 #include "index/CanonicalIncludes.h"
 #include "clang/Frontend/CompilerInvocation.h"
diff --git a/clangd/TUScheduler.h b/clangd/TUScheduler.h
index e02250d..3103af9 100644
--- a/clangd/TUScheduler.h
+++ b/clangd/TUScheduler.h
@@ -9,7 +9,7 @@
 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_TUSCHEDULER_H
 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_TUSCHEDULER_H
 
-#include "ClangdUnit.h"
+#include "Compiler.h"
 #include "Diagnostics.h"
 #include "Function.h"
 #include "GlobalCompilationDatabase.h"
@@ -24,6 +24,8 @@
 
 namespace clang {
 namespace clangd {
+class ParsedAST;
+struct PreambleData;
 
 /// Returns a number of a default async threads to use for TUScheduler.
 /// Returned value is always >= 1 (i.e. will not cause requests to be processed
diff --git a/clangd/unittests/TUSchedulerTests.cpp b/clangd/unittests/TUSchedulerTests.cpp
index ff28cbb..5b7f8a7 100644
--- a/clangd/unittests/TUSchedulerTests.cpp
+++ b/clangd/unittests/TUSchedulerTests.cpp
@@ -12,6 +12,7 @@
 #include "Diagnostics.h"
 #include "Matchers.h"
 #include "Path.h"
+#include "Preamble.h"
 #include "TUScheduler.h"
 #include "TestFS.h"
 #include "Threading.h"
