[Coverage] Store compilation dir separately in coverage mapping

We currently always store absolute filenames in coverage mapping.  This
is problematic for several reasons. It poses a problem for distributed
compilation as source location might vary across machines.  We are also
duplicating the path prefix potentially wasting space.

This change modifies how we store filenames in coverage mapping. Rather
than absolute paths, it stores the compilation directory and file paths
as given to the compiler, either relative or absolute. Later when
reading the coverage mapping information, we recombine relative paths
with the working directory. This approach is similar to handling
ofDW_AT_comp_dir in DWARF.

Finally, we also provide a new option, -fprofile-compilation-dir akin
to -fdebug-compilation-dir which can be used to manually override the
compilation directory which is useful in distributed compilation cases.

Differential Revision: https://reviews.llvm.org/D95753

GitOrigin-RevId: 97ec8fa5bb07e3f5bf25ddcb216b545cd3d03b65
diff --git a/docs/CoverageMappingFormat.rst b/docs/CoverageMappingFormat.rst
index 4dc9dc3..0dc5ce6 100644
--- a/docs/CoverageMappingFormat.rst
+++ b/docs/CoverageMappingFormat.rst
@@ -266,7 +266,16 @@
    [32 x i8] c"..." ; Encoded data (dissected later)
   }, section "__llvm_covmap", align 8
 
-The current version of the format is version 5. There is one difference from version 4:
+The current version of the format is version 6.
+
+There is one difference between versions 6 and 5:
+
+* The first entry in the filename list is the compilation directory. When the
+  filename is relative, the compilation directory is combined with the relative
+  path to get an absolute path. This can reduce size by omitting the duplicate
+  prefix in filenames.
+
+There is one difference between versions 5 and 4:
 
 * The notion of branch region has been introduced along with a corresponding
   region kind.  Branch regions encode two counters, one to track how many
diff --git a/include/llvm/ProfileData/Coverage/CoverageMapping.h b/include/llvm/ProfileData/Coverage/CoverageMapping.h
index 09f2167..957dfe9 100644
--- a/include/llvm/ProfileData/Coverage/CoverageMapping.h
+++ b/include/llvm/ProfileData/Coverage/CoverageMapping.h
@@ -996,7 +996,10 @@
   Version4 = 3,
   // Branch regions referring to two counters are added
   Version5 = 4,
-  // The current version is Version5.
+  // Compilation directory is stored separately and combined with relative
+  // filenames to produce an absolute file path.
+  Version6 = 5,
+  // The current version is Version6.
   CurrentVersion = INSTR_PROF_COVMAP_VERSION
 };
 
diff --git a/include/llvm/ProfileData/Coverage/CoverageMappingReader.h b/include/llvm/ProfileData/Coverage/CoverageMappingReader.h
index 3a611bc..86a3c4a 100644
--- a/include/llvm/ProfileData/Coverage/CoverageMappingReader.h
+++ b/include/llvm/ProfileData/Coverage/CoverageMappingReader.h
@@ -125,14 +125,14 @@
 
 /// Reader for the raw coverage mapping data.
 class RawCoverageMappingReader : public RawCoverageReader {
-  ArrayRef<StringRef> TranslationUnitFilenames;
+  ArrayRef<std::string> &TranslationUnitFilenames;
   std::vector<StringRef> &Filenames;
   std::vector<CounterExpression> &Expressions;
   std::vector<CounterMappingRegion> &MappingRegions;
 
 public:
   RawCoverageMappingReader(StringRef MappingData,
-                           ArrayRef<StringRef> TranslationUnitFilenames,
+                           ArrayRef<std::string> &TranslationUnitFilenames,
                            std::vector<StringRef> &Filenames,
                            std::vector<CounterExpression> &Expressions,
                            std::vector<CounterMappingRegion> &MappingRegions)
@@ -174,10 +174,8 @@
           FilenamesBegin(FilenamesBegin), FilenamesSize(FilenamesSize) {}
   };
 
-  using DecompressedData = std::vector<std::unique_ptr<SmallVector<char, 0>>>;
-
 private:
-  std::vector<StringRef> Filenames;
+  std::vector<std::string> Filenames;
   std::vector<ProfileMappingRecord> MappingRecords;
   InstrProfSymtab ProfileNames;
   size_t CurrentRecord = 0;
@@ -190,10 +188,6 @@
   // D69471, which can split up function records into multiple sections on ELF.
   std::string FuncRecords;
 
-  // Used to tie the lifetimes of decompressed strings to the lifetime of this
-  // BinaryCoverageReader instance.
-  DecompressedData Decompressed;
-
   BinaryCoverageReader(std::string &&FuncRecords)
       : FuncRecords(std::move(FuncRecords)) {}
 
@@ -216,20 +210,20 @@
 
 /// Reader for the raw coverage filenames.
 class RawCoverageFilenamesReader : public RawCoverageReader {
-  std::vector<StringRef> &Filenames;
+  std::vector<std::string> &Filenames;
 
   // Read an uncompressed sequence of filenames.
-  Error readUncompressed(uint64_t NumFilenames);
+  Error readUncompressed(CovMapVersion Version, uint64_t NumFilenames);
 
 public:
-  RawCoverageFilenamesReader(StringRef Data, std::vector<StringRef> &Filenames)
+  RawCoverageFilenamesReader(StringRef Data,
+                             std::vector<std::string> &Filenames)
       : RawCoverageReader(Data), Filenames(Filenames) {}
   RawCoverageFilenamesReader(const RawCoverageFilenamesReader &) = delete;
   RawCoverageFilenamesReader &
   operator=(const RawCoverageFilenamesReader &) = delete;
 
-  Error read(CovMapVersion Version,
-             BinaryCoverageReader::DecompressedData &Decompressed);
+  Error read(CovMapVersion Version);
 };
 
 } // end namespace coverage
diff --git a/include/llvm/ProfileData/Coverage/CoverageMappingWriter.h b/include/llvm/ProfileData/Coverage/CoverageMappingWriter.h
index 303e518..1420675 100644
--- a/include/llvm/ProfileData/Coverage/CoverageMappingWriter.h
+++ b/include/llvm/ProfileData/Coverage/CoverageMappingWriter.h
@@ -27,10 +27,10 @@
 /// Writer of the filenames section for the instrumentation
 /// based code coverage.
 class CoverageFilenamesSectionWriter {
-  ArrayRef<StringRef> Filenames;
+  ArrayRef<std::string> Filenames;
 
 public:
-  CoverageFilenamesSectionWriter(ArrayRef<StringRef> Filenames);
+  CoverageFilenamesSectionWriter(ArrayRef<std::string> Filenames);
 
   /// Write encoded filenames to the given output stream. If \p Compress is
   /// true, attempt to compress the filenames.
diff --git a/include/llvm/ProfileData/InstrProfData.inc b/include/llvm/ProfileData/InstrProfData.inc
index 3ee7af7..9112c9f 100644
--- a/include/llvm/ProfileData/InstrProfData.inc
+++ b/include/llvm/ProfileData/InstrProfData.inc
@@ -649,7 +649,7 @@
 /* Indexed profile format version (start from 1). */
 #define INSTR_PROF_INDEX_VERSION 7
 /* Coverage mapping format version (start from 0). */
-#define INSTR_PROF_COVMAP_VERSION 4
+#define INSTR_PROF_COVMAP_VERSION 5
 
 /* Profile version is always of type uint64_t. Reserve the upper 8 bits in the
  * version for other variants of profile. We set the lowest bit of the upper 8
diff --git a/lib/ProfileData/Coverage/CoverageMappingReader.cpp b/lib/ProfileData/Coverage/CoverageMappingReader.cpp
index 1acdcb4..942f8e7 100644
--- a/lib/ProfileData/Coverage/CoverageMappingReader.cpp
+++ b/lib/ProfileData/Coverage/CoverageMappingReader.cpp
@@ -97,9 +97,7 @@
   return Error::success();
 }
 
-Error RawCoverageFilenamesReader::read(
-    CovMapVersion Version,
-    BinaryCoverageReader::DecompressedData &Decompressed) {
+Error RawCoverageFilenamesReader::read(CovMapVersion Version) {
   uint64_t NumFilenames;
   if (auto Err = readSize(NumFilenames))
     return Err;
@@ -107,7 +105,7 @@
     return make_error<CoverageMapError>(coveragemap_error::malformed);
 
   if (Version < CovMapVersion::Version4)
-    return readUncompressed(NumFilenames);
+    return readUncompressed(Version, NumFilenames);
 
   // The uncompressed length may exceed the size of the encoded filenames.
   // Skip size validation.
@@ -124,11 +122,8 @@
       return make_error<CoverageMapError>(
           coveragemap_error::decompression_failed);
 
-    // Allocate memory for the decompressed filenames. Transfer ownership of
-    // the memory to BinaryCoverageReader.
-    auto DecompressedStorage = std::make_unique<SmallVector<char, 0>>();
-    SmallVectorImpl<char> &StorageBuf = *DecompressedStorage.get();
-    Decompressed.push_back(std::move(DecompressedStorage));
+    // Allocate memory for the decompressed filenames.
+    SmallVector<char, 0> StorageBuf;
 
     // Read compressed filenames.
     StringRef CompressedFilenames = Data.substr(0, CompressedLen);
@@ -143,19 +138,40 @@
 
     StringRef UncompressedFilenames(StorageBuf.data(), StorageBuf.size());
     RawCoverageFilenamesReader Delegate(UncompressedFilenames, Filenames);
-    return Delegate.readUncompressed(NumFilenames);
+    return Delegate.readUncompressed(Version, NumFilenames);
   }
 
-  return readUncompressed(NumFilenames);
+  return readUncompressed(Version, NumFilenames);
 }
 
-Error RawCoverageFilenamesReader::readUncompressed(uint64_t NumFilenames) {
+Error RawCoverageFilenamesReader::readUncompressed(CovMapVersion Version,
+                                                   uint64_t NumFilenames) {
   // Read uncompressed filenames.
-  for (size_t I = 0; I < NumFilenames; ++I) {
-    StringRef Filename;
-    if (auto Err = readString(Filename))
+  if (Version < CovMapVersion::Version6) {
+    for (size_t I = 0; I < NumFilenames; ++I) {
+      StringRef Filename;
+      if (auto Err = readString(Filename))
+        return Err;
+      Filenames.push_back(Filename.str());
+    }
+  } else {
+    StringRef CWD;
+    if (auto Err = readString(CWD))
       return Err;
-    Filenames.push_back(Filename);
+    Filenames.push_back(CWD.str());
+
+    for (size_t I = 1; I < NumFilenames; ++I) {
+      StringRef Filename;
+      if (auto Err = readString(Filename))
+        return Err;
+      if (sys::path::is_absolute(Filename)) {
+        Filenames.push_back(Filename.str());
+      } else {
+        SmallString<256> P(CWD);
+        llvm::sys::path::append(P, Filename);
+        Filenames.push_back(static_cast<std::string>(P));
+      }
+    }
   }
   return Error::success();
 }
@@ -481,9 +497,8 @@
   //
   // Returns a pointer to the next \c CovHeader if it exists, or to an address
   // greater than \p CovEnd if not.
-  virtual Expected<const char *>
-  readCoverageHeader(const char *CovBuf, const char *CovBufEnd,
-                     BinaryCoverageReader::DecompressedData &Decompressed) = 0;
+  virtual Expected<const char *> readCoverageHeader(const char *CovBuf,
+                                                    const char *CovBufEnd) = 0;
 
   // Read function records.
   //
@@ -505,7 +520,7 @@
   static Expected<std::unique_ptr<CovMapFuncRecordReader>>
   get(CovMapVersion Version, InstrProfSymtab &P,
       std::vector<BinaryCoverageReader::ProfileMappingRecord> &R,
-      std::vector<StringRef> &F);
+      std::vector<std::string> &F);
 };
 
 // A class for reading coverage mapping function records for a module.
@@ -519,7 +534,7 @@
   // in \c Records.
   DenseMap<NameRefType, size_t> FunctionRecords;
   InstrProfSymtab &ProfileNames;
-  std::vector<StringRef> &Filenames;
+  std::vector<std::string> &Filenames;
   std::vector<BinaryCoverageReader::ProfileMappingRecord> &Records;
 
   // Maps a hash of the filenames in a TU to a \c FileRange. The range
@@ -579,14 +594,13 @@
   VersionedCovMapFuncRecordReader(
       InstrProfSymtab &P,
       std::vector<BinaryCoverageReader::ProfileMappingRecord> &R,
-      std::vector<StringRef> &F)
+      std::vector<std::string> &F)
       : ProfileNames(P), Filenames(F), Records(R) {}
 
   ~VersionedCovMapFuncRecordReader() override = default;
 
-  Expected<const char *> readCoverageHeader(
-      const char *CovBuf, const char *CovBufEnd,
-      BinaryCoverageReader::DecompressedData &Decompressed) override {
+  Expected<const char *> readCoverageHeader(const char *CovBuf,
+                                            const char *CovBufEnd) override {
     using namespace support;
 
     if (CovBuf + sizeof(CovMapHeader) > CovBufEnd)
@@ -615,7 +629,7 @@
     size_t FilenamesBegin = Filenames.size();
     StringRef FilenameRegion(CovBuf, FilenamesSize);
     RawCoverageFilenamesReader Reader(FilenameRegion, Filenames);
-    if (auto Err = Reader.read(Version, Decompressed))
+    if (auto Err = Reader.read(Version))
       return std::move(Err);
     CovBuf += FilenamesSize;
     FilenameRange FileRange(FilenamesBegin, Filenames.size() - FilenamesBegin);
@@ -721,7 +735,7 @@
 Expected<std::unique_ptr<CovMapFuncRecordReader>> CovMapFuncRecordReader::get(
     CovMapVersion Version, InstrProfSymtab &P,
     std::vector<BinaryCoverageReader::ProfileMappingRecord> &R,
-    std::vector<StringRef> &F) {
+    std::vector<std::string> &F) {
   using namespace coverage;
 
   switch (Version) {
@@ -732,6 +746,7 @@
   case CovMapVersion::Version3:
   case CovMapVersion::Version4:
   case CovMapVersion::Version5:
+  case CovMapVersion::Version6:
     // Decompress the name data.
     if (Error E = P.create(P.getNameData()))
       return std::move(E);
@@ -747,6 +762,9 @@
     else if (Version == CovMapVersion::Version5)
       return std::make_unique<VersionedCovMapFuncRecordReader<
           CovMapVersion::Version5, IntPtrT, Endian>>(P, R, F);
+    else if (Version == CovMapVersion::Version6)
+      return std::make_unique<VersionedCovMapFuncRecordReader<
+          CovMapVersion::Version6, IntPtrT, Endian>>(P, R, F);
   }
   llvm_unreachable("Unsupported version");
 }
@@ -755,8 +773,7 @@
 static Error readCoverageMappingData(
     InstrProfSymtab &ProfileNames, StringRef CovMap, StringRef FuncRecords,
     std::vector<BinaryCoverageReader::ProfileMappingRecord> &Records,
-    std::vector<StringRef> &Filenames,
-    BinaryCoverageReader::DecompressedData &Decompressed) {
+    std::vector<std::string> &Filenames) {
   using namespace coverage;
 
   // Read the records in the coverage data section.
@@ -782,8 +799,7 @@
     // header.
     //
     // Return a pointer to the next coverage header.
-    auto NextOrErr =
-        Reader->readCoverageHeader(CovBuf, CovBufEnd, Decompressed);
+    auto NextOrErr = Reader->readCoverageHeader(CovBuf, CovBufEnd);
     if (auto E = NextOrErr.takeError())
       return E;
     CovBuf = NextOrErr.get();
@@ -810,25 +826,23 @@
     if (Error E =
             readCoverageMappingData<uint32_t, support::endianness::little>(
                 Reader->ProfileNames, Coverage, FuncRecordsRef,
-                Reader->MappingRecords, Reader->Filenames,
-                Reader->Decompressed))
+                Reader->MappingRecords, Reader->Filenames))
       return std::move(E);
   } else if (BytesInAddress == 4 && Endian == support::endianness::big) {
     if (Error E = readCoverageMappingData<uint32_t, support::endianness::big>(
             Reader->ProfileNames, Coverage, FuncRecordsRef,
-            Reader->MappingRecords, Reader->Filenames, Reader->Decompressed))
+            Reader->MappingRecords, Reader->Filenames))
       return std::move(E);
   } else if (BytesInAddress == 8 && Endian == support::endianness::little) {
     if (Error E =
             readCoverageMappingData<uint64_t, support::endianness::little>(
                 Reader->ProfileNames, Coverage, FuncRecordsRef,
-                Reader->MappingRecords, Reader->Filenames,
-                Reader->Decompressed))
+                Reader->MappingRecords, Reader->Filenames))
       return std::move(E);
   } else if (BytesInAddress == 8 && Endian == support::endianness::big) {
     if (Error E = readCoverageMappingData<uint64_t, support::endianness::big>(
             Reader->ProfileNames, Coverage, FuncRecordsRef,
-            Reader->MappingRecords, Reader->Filenames, Reader->Decompressed))
+            Reader->MappingRecords, Reader->Filenames))
       return std::move(E);
   } else
     return make_error<CoverageMapError>(coveragemap_error::malformed);
@@ -1075,10 +1089,9 @@
   Expressions.clear();
   MappingRegions.clear();
   auto &R = MappingRecords[CurrentRecord];
-  RawCoverageMappingReader Reader(
-      R.CoverageMapping,
-      makeArrayRef(Filenames).slice(R.FilenamesBegin, R.FilenamesSize),
-      FunctionsFilenames, Expressions, MappingRegions);
+  auto F = makeArrayRef(Filenames).slice(R.FilenamesBegin, R.FilenamesSize);
+  RawCoverageMappingReader Reader(R.CoverageMapping, F, FunctionsFilenames,
+                                  Expressions, MappingRegions);
   if (auto Err = Reader.read())
     return Err;
 
diff --git a/lib/ProfileData/Coverage/CoverageMappingWriter.cpp b/lib/ProfileData/Coverage/CoverageMappingWriter.cpp
index 65b83d1..6a9258f 100644
--- a/lib/ProfileData/Coverage/CoverageMappingWriter.cpp
+++ b/lib/ProfileData/Coverage/CoverageMappingWriter.cpp
@@ -27,7 +27,7 @@
 using namespace coverage;
 
 CoverageFilenamesSectionWriter::CoverageFilenamesSectionWriter(
-    ArrayRef<StringRef> Filenames)
+    ArrayRef<std::string> Filenames)
     : Filenames(Filenames) {
 #ifndef NDEBUG
   StringSet<> NameSet;
diff --git a/test/tools/llvm-cov/Inputs/binary-formats.v6.linux64l b/test/tools/llvm-cov/Inputs/binary-formats.v6.linux64l
new file mode 100755
index 0000000..bd00cd0
--- /dev/null
+++ b/test/tools/llvm-cov/Inputs/binary-formats.v6.linux64l
Binary files differ
diff --git a/test/tools/llvm-cov/binary-formats.c b/test/tools/llvm-cov/binary-formats.c
index 009583c..a5bfc01 100644
--- a/test/tools/llvm-cov/binary-formats.c
+++ b/test/tools/llvm-cov/binary-formats.c
@@ -8,5 +8,6 @@
 // RUN: llvm-cov show %S/Inputs/binary-formats.macho64l -instr-profile %t.profdata -path-equivalence=/tmp,%S %s | FileCheck %s
 // RUN: llvm-cov show %S/Inputs/binary-formats.macho32b -instr-profile %t.profdata -path-equivalence=/tmp,%S %s | FileCheck %s
 // RUN: llvm-cov show %S/Inputs/binary-formats.v3.macho64l -instr-profile %t.profdata -path-equivalence=/tmp,%S %s | FileCheck %s
+// RUN: llvm-cov show %S/Inputs/binary-formats.v6.linux64l -instr-profile %t.profdata -path-equivalence=/tmp,%S %s | FileCheck %s
 
 // RUN: llvm-cov export %S/Inputs/binary-formats.macho64l -instr-profile %t.profdata | FileCheck %S/Inputs/binary-formats.canonical.json
diff --git a/unittests/ProfileData/CoverageMappingTest.cpp b/unittests/ProfileData/CoverageMappingTest.cpp
index cbe9c1e..44b7a13 100644
--- a/unittests/ProfileData/CoverageMappingTest.cpp
+++ b/unittests/ProfileData/CoverageMappingTest.cpp
@@ -129,6 +129,7 @@
 struct CoverageMappingTest : ::testing::TestWithParam<std::pair<bool, bool>> {
   bool UseMultipleReaders;
   StringMap<unsigned> Files;
+  std::vector<std::string> Filenames;
   std::vector<InputFunctionCoverageData> InputFunctions;
   std::vector<OutputFunctionCoverageData> OutputFunctions;
 
@@ -146,7 +147,7 @@
     auto R = Files.find(Name);
     if (R != Files.end())
       return R->second;
-    unsigned Index = Files.size();
+    unsigned Index = Files.size() + 1;
     Files.try_emplace(Name, Index);
     return Index;
   }
@@ -200,11 +201,12 @@
 
   void readCoverageRegions(const std::string &Coverage,
                            OutputFunctionCoverageData &Data) {
-    SmallVector<StringRef, 8> Filenames(Files.size());
+    Filenames.resize(Files.size() + 1);
     for (const auto &E : Files)
-      Filenames[E.getValue()] = E.getKey();
+      Filenames[E.getValue()] = E.getKey().str();
     std::vector<CounterExpression> Expressions;
-    RawCoverageMappingReader Reader(Coverage, Filenames, Data.Filenames,
+    ArrayRef<std::string> FilenameRefs = llvm::makeArrayRef(Filenames);
+    RawCoverageMappingReader Reader(Coverage, FilenameRefs, Data.Filenames,
                                     Expressions, Data.Regions);
     EXPECT_THAT_ERROR(Reader.read(), Succeeded());
   }
@@ -895,7 +897,7 @@
                                           std::pair<bool, bool>({true, true})),);
 
 TEST(CoverageMappingTest, filename_roundtrip) {
-  std::vector<StringRef> Paths({"a", "b", "c", "d", "e"});
+  std::vector<std::string> Paths({"", "a", "b", "c", "d", "e"});
 
   for (bool Compress : {false, true}) {
     std::string EncodedFilenames;
@@ -905,16 +907,12 @@
       Writer.write(OS, Compress);
     }
 
-    std::vector<StringRef> ReadFilenames;
+    std::vector<std::string> ReadFilenames;
     RawCoverageFilenamesReader Reader(EncodedFilenames, ReadFilenames);
-    BinaryCoverageReader::DecompressedData Decompressed;
-    EXPECT_THAT_ERROR(Reader.read(CovMapVersion::CurrentVersion, Decompressed),
-                      Succeeded());
-    if (!Compress)
-      ASSERT_EQ(Decompressed.size(), 0U);
+    EXPECT_THAT_ERROR(Reader.read(CovMapVersion::CurrentVersion), Succeeded());
 
     ASSERT_EQ(ReadFilenames.size(), Paths.size());
-    for (unsigned I = 0; I < Paths.size(); ++I)
+    for (unsigned I = 1; I < Paths.size(); ++I)
       ASSERT_TRUE(ReadFilenames[I] == Paths[I]);
   }
 }