[llvm] Prevent duplicate files in debug line header in dwarf 5: another attempt

Another attempt to land the changes in debug line header to prevent duplicate
files in Dwarf 5. I rolled back my previous commit because of a mistake in
generating the object file in a test. Meanwhile, I addressed some offline
comments and changed the implementation; the largest difference is that
MCDwarfLineTableHeader does not keep DwarfVersion but gets it as a parameter. I
also merged the patch to fix two lld tests that will strt to fail into this
patch.

Original Commit:

https://reviews.llvm.org/D59515

Original Message:
Motivation: In previous dwarf versions, file name indexes started from 1, and
the primary source file was not explicit. Dwarf 5 standard (6.2.4) prescribes
the primary source file to be explicitly given an entry with an index number 0.

The current implementation honors the specification by just duplicating the
main source file, once with index number 0, and later maybe with another
index number. While this is compliant with the letter of the standard, the
duplication causes problems for consumers of this information such as lldb.
(Some files are duplicated, where only some of them have a line table although
all refer to the same file)

With this change, dwarf 5 debug line section files always start from 0, and
the zeroth entry is not duplicated whenever possible. This requires different
handling of dwarf 4 and dwarf 5 during generation (e.g. when a function returns
an index zero for a file name, it signals an error in dwarf 4, but not in dwarf
5) However, I think the minor complication is worth it, because it enables all
consumers (lldb, gdb, dwarfdump, objdump, and so on) to treat all files in the
file name list homogenously.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@358732 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
index 0fc34a8..1503a92 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
@@ -243,6 +243,9 @@
     bool hasFileAtIndex(uint64_t FileIndex) const;
 
     /// Extracts filename by its index in filename table in prologue.
+    /// In Dwarf 4, the files are 1-indexed and the current compilation file
+    /// name is not represented in the list. In Dwarf 5, the files are
+    /// 0-indexed and the primary source file has the index 0.
     /// Returns true on success.
     bool getFileNameByIndex(uint64_t FileIndex, const char *CompDir,
                             DILineInfoSpecifier::FileLineInfoKind Kind,
@@ -275,6 +278,8 @@
     SequenceVector Sequences;
 
   private:
+    const llvm::DWARFDebugLine::FileNameEntry &
+    getFileNameEntry(uint64_t Index) const;
     uint32_t findRowInSeq(const DWARFDebugLine::Sequence &Seq,
                           object::SectionedAddress Address) const;
     Optional<StringRef>
diff --git a/include/llvm/MC/MCDwarf.h b/include/llvm/MC/MCDwarf.h
index 63040cb..2a24347 100644
--- a/include/llvm/MC/MCDwarf.h
+++ b/include/llvm/MC/MCDwarf.h
@@ -41,11 +41,14 @@
 class SMLoc;
 class SourceMgr;
 
-/// Instances of this class represent the name of the dwarf
-/// .file directive and its associated dwarf file number in the MC file,
-/// and MCDwarfFile's are created and uniqued by the MCContext class where
-/// the file number for each is its index into the vector of DwarfFiles (note
-/// index 0 is not used and not a valid dwarf file number).
+/// Instances of this class represent the name of the dwarf .file directive and
+/// its associated dwarf file number in the MC file. MCDwarfFile's are created
+/// and uniqued by the MCContext class. In Dwarf 4 file numbers start from 1;
+/// i.e. the entry with file number 1 is the first element in the vector of
+/// DwarfFiles and there is no MCDwarfFile with file number 0. In Dwarf 5 file
+/// numbers start from 0, with the MCDwarfFile with file number 0 being the
+/// primary source file, and file numbers correspond to their index in the
+/// vector.
 struct MCDwarfFile {
   // The base name of the file without its directory path.
   std::string Name;
@@ -225,6 +228,7 @@
   Expected<unsigned> tryGetFile(StringRef &Directory, StringRef &FileName,
                                 Optional<MD5::MD5Result> Checksum,
                                 Optional<StringRef> Source,
+                                uint16_t DwarfVersion,
                                 unsigned FileNumber = 0);
   std::pair<MCSymbol *, MCSymbol *>
   Emit(MCStreamer *MCOS, MCDwarfLineTableParams Params,
@@ -245,6 +249,18 @@
     return MCDwarfFiles.empty() || (HasAllMD5 == HasAnyMD5);
   }
 
+  void setRootFile(StringRef Directory, StringRef FileName,
+                   Optional<MD5::MD5Result> Checksum,
+                   Optional<StringRef> Source) {
+    CompilationDir = Directory;
+    RootFile.Name = FileName;
+    RootFile.DirIndex = 0;
+    RootFile.Checksum = Checksum;
+    RootFile.Source = Source;
+    trackMD5Usage(Checksum.hasValue());
+    HasSource = Source.hasValue();
+  }
+
 private:
   void emitV2FileDirTables(MCStreamer *MCOS) const;
   void emitV5FileDirTables(MCStreamer *MCOS, Optional<MCDwarfLineStr> &LineStr) const;
@@ -252,24 +268,23 @@
 
 class MCDwarfDwoLineTable {
   MCDwarfLineTableHeader Header;
+  bool HasSplitLineTable = false;
 
 public:
   void maybeSetRootFile(StringRef Directory, StringRef FileName,
-                        Optional<MD5::MD5Result> Checksum, Optional<StringRef> Source) {
+                        Optional<MD5::MD5Result> Checksum,
+                        Optional<StringRef> Source) {
     if (!Header.RootFile.Name.empty())
       return;
-    Header.CompilationDir = Directory;
-    Header.RootFile.Name = FileName;
-    Header.RootFile.DirIndex = 0;
-    Header.RootFile.Checksum = Checksum;
-    Header.RootFile.Source = Source;
-    Header.trackMD5Usage(Checksum.hasValue());
-    Header.HasSource = Source.hasValue();
+    Header.setRootFile(Directory, FileName, Checksum, Source);
   }
 
   unsigned getFile(StringRef Directory, StringRef FileName,
-                   Optional<MD5::MD5Result> Checksum, Optional<StringRef> Source) {
-    return cantFail(Header.tryGetFile(Directory, FileName, Checksum, Source));
+                   Optional<MD5::MD5Result> Checksum, uint16_t DwarfVersion,
+                   Optional<StringRef> Source) {
+    HasSplitLineTable = true;
+    return cantFail(Header.tryGetFile(Directory, FileName, Checksum, Source,
+                                      DwarfVersion));
   }
 
   void Emit(MCStreamer &MCOS, MCDwarfLineTableParams Params,
@@ -291,12 +306,13 @@
   Expected<unsigned> tryGetFile(StringRef &Directory, StringRef &FileName,
                                 Optional<MD5::MD5Result> Checksum,
                                 Optional<StringRef> Source,
+                                uint16_t DwarfVersion,
                                 unsigned FileNumber = 0);
   unsigned getFile(StringRef &Directory, StringRef &FileName,
                    Optional<MD5::MD5Result> Checksum, Optional<StringRef> Source,
-                   unsigned FileNumber = 0) {
+                   uint16_t DwarfVersion, unsigned FileNumber = 0) {
     return cantFail(tryGetFile(Directory, FileName, Checksum, Source,
-                               FileNumber));
+                               DwarfVersion, FileNumber));
   }
 
   void setRootFile(StringRef Directory, StringRef FileName,
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index eec1028..b14eadd 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -315,7 +315,9 @@
     addSectionOffset(getUnitDie(), dwarf::DW_AT_stmt_list, 0);
   }
   return SplitLineTable->getFile(File->getDirectory(), File->getFilename(),
-                                 getMD5AsBytes(File), File->getSource());
+                                 getMD5AsBytes(File),
+                                 Asm->OutContext.getDwarfVersion(),
+                                 File->getSource());
 }
 
 void DwarfUnit::addOpAddress(DIELoc &Die, const MCSymbol *Sym) {
@@ -397,7 +399,6 @@
     return;
 
   unsigned FileID = getOrCreateSourceID(File);
-  assert(FileID && "Invalid file id");
   addUInt(Die, dwarf::DW_AT_decl_file, None, FileID);
   addUInt(Die, dwarf::DW_AT_decl_line, None, Line);
 }
diff --git a/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
index 47df83e..84e13ad 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
@@ -969,14 +969,29 @@
 }
 
 bool DWARFDebugLine::LineTable::hasFileAtIndex(uint64_t FileIndex) const {
+  uint16_t DwarfVersion = Prologue.getVersion();
+  assert(DwarfVersion != 0 && "LineTable has no dwarf version information");
+  if (DwarfVersion >= 5)
+    return FileIndex < Prologue.FileNames.size();
   return FileIndex != 0 && FileIndex <= Prologue.FileNames.size();
 }
 
+const llvm::DWARFDebugLine::FileNameEntry &
+DWARFDebugLine::LineTable::getFileNameEntry(uint64_t Index) const {
+  uint16_t DwarfVersion = Prologue.getVersion();
+  assert(DwarfVersion != 0 && "LineTable has no dwarf version information");
+  // Unlike previous versions, in Dwarf 5 the file names is 0-indexed.
+  if (DwarfVersion >= 5)
+    return Prologue.FileNames[Index];
+  else
+    return Prologue.FileNames[Index - 1];
+}
+
 Optional<StringRef> DWARFDebugLine::LineTable::getSourceByIndex(uint64_t FileIndex,
                                                                 FileLineInfoKind Kind) const {
   if (Kind == FileLineInfoKind::None || !hasFileAtIndex(FileIndex))
     return None;
-  const FileNameEntry &Entry = Prologue.FileNames[FileIndex - 1];
+  const FileNameEntry &Entry = getFileNameEntry(FileIndex);
   if (Optional<const char *> source = Entry.Source.getAsCString())
     return StringRef(*source);
   return None;
@@ -996,7 +1011,7 @@
                                                    std::string &Result) const {
   if (Kind == FileLineInfoKind::None || !hasFileAtIndex(FileIndex))
     return false;
-  const FileNameEntry &Entry = Prologue.FileNames[FileIndex - 1];
+  const FileNameEntry &Entry = getFileNameEntry(FileIndex);
   StringRef FileName = Entry.Name.getAsCString().getValue();
   if (Kind != FileLineInfoKind::AbsoluteFilePath ||
       isPathAbsoluteOnWindowsOrPosix(FileName)) {
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index ef5c466..350502d 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -1190,7 +1190,8 @@
   MCDwarfLineTable &Table = getContext().getMCDwarfLineTable(CUID);
   unsigned NumFiles = Table.getMCDwarfFiles().size();
   Expected<unsigned> FileNoOrErr =
-      Table.tryGetFile(Directory, Filename, Checksum, Source, FileNo);
+      Table.tryGetFile(Directory, Filename, Checksum, Source,
+                       getContext().getDwarfVersion(), FileNo);
   if (!FileNoOrErr)
     return FileNoOrErr.takeError();
   FileNo = FileNoOrErr.get();
diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp
index 4d16715..6ca7d3d 100644
--- a/lib/MC/MCContext.cpp
+++ b/lib/MC/MCContext.cpp
@@ -605,7 +605,8 @@
                                            Optional<StringRef> Source,
                                            unsigned CUID) {
   MCDwarfLineTable &Table = MCDwarfLineTablesCUMap[CUID];
-  return Table.tryGetFile(Directory, FileName, Checksum, Source, FileNumber);
+  return Table.tryGetFile(Directory, FileName, Checksum, Source, DwarfVersion,
+                          FileNumber);
 }
 
 /// isValidDwarfFileNumber - takes a dwarf file number and returns true if it
@@ -613,7 +614,7 @@
 bool MCContext::isValidDwarfFileNumber(unsigned FileNumber, unsigned CUID) {
   const MCDwarfLineTable &LineTable = getMCDwarfLineTable(CUID);
   if (FileNumber == 0)
-    return getDwarfVersion() >= 5 && LineTable.hasRootFile();
+    return getDwarfVersion() >= 5;
   if (FileNumber >= LineTable.getMCDwarfFiles().size())
     return false;
 
diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp
index 262c520..e8edf6b 100644
--- a/lib/MC/MCDwarf.cpp
+++ b/lib/MC/MCDwarf.cpp
@@ -259,7 +259,7 @@
 
 void MCDwarfDwoLineTable::Emit(MCStreamer &MCOS, MCDwarfLineTableParams Params,
                                MCSection *Section) const {
-  if (Header.MCDwarfFiles.empty())
+  if (!HasSplitLineTable)
     return;
   Optional<MCDwarfLineStr> NoLineStr(None);
   MCOS.SwitchSection(Section);
@@ -538,8 +538,17 @@
                                                 StringRef &FileName,
                                                 Optional<MD5::MD5Result> Checksum,
                                                 Optional<StringRef> Source,
+                                                uint16_t DwarfVersion,
                                                 unsigned FileNumber) {
-  return Header.tryGetFile(Directory, FileName, Checksum, Source, FileNumber);
+  return Header.tryGetFile(Directory, FileName, Checksum, Source, DwarfVersion,
+                           FileNumber);
+}
+
+bool isRootFile(const MCDwarfFile &RootFile, StringRef &Directory,
+                StringRef &FileName, Optional<MD5::MD5Result> Checksum) {
+  if (RootFile.Name.empty() || RootFile.Name != FileName.data())
+    return false;
+  return RootFile.Checksum == Checksum;
 }
 
 Expected<unsigned>
@@ -547,6 +556,7 @@
                                    StringRef &FileName,
                                    Optional<MD5::MD5Result> Checksum,
                                    Optional<StringRef> Source,
+                                   uint16_t DwarfVersion,
                                    unsigned FileNumber) {
   if (Directory == CompilationDir)
     Directory = "";
@@ -561,6 +571,8 @@
     trackMD5Usage(Checksum.hasValue());
     HasSource = (Source != None);
   }
+  if (isRootFile(RootFile, Directory, FileName, Checksum) && DwarfVersion >= 5)
+    return 0;
   if (FileNumber == 0) {
     // File numbers start with 1 and/or after any file numbers
     // allocated by inline-assembler .file directives.
diff --git a/test/MC/ARM/dwarf-asm-multiple-sections.s b/test/MC/ARM/dwarf-asm-multiple-sections.s
index cff8f00..f995958 100644
--- a/test/MC/ARM/dwarf-asm-multiple-sections.s
+++ b/test/MC/ARM/dwarf-asm-multiple-sections.s
@@ -2,9 +2,9 @@
 // RUN: llvm-dwarfdump -v %t | FileCheck -check-prefix DWARF -check-prefix DWARF45 %s
 // RUN: llvm-dwarfdump --debug-line %t | FileCheck -check-prefix DWARF-DL -check-prefix DWARF-DL-5 -DDWVER=5 -DDWFILE=0 %s
 // RUN: llvm-objdump -r %t | FileCheck -check-prefix RELOC -check-prefix RELOC5 %s
-// RUN: llvm-mc < %s -triple=armv7-linux-gnueabi -filetype=obj -o %t -g -fdebug-compilation-dir=/tmp
+// RUN: llvm-mc < %s -triple=armv7-linux-gnueabi -filetype=obj -o %t -g -dwarf-version 4 -fdebug-compilation-dir=/tmp
 // RUN: llvm-dwarfdump -v %t | FileCheck -check-prefix DWARF -check-prefix DWARF45 %s
-// RUN: llvm-dwarfdump --debug-line %t | FileCheck -check-prefix DWARF-DL -DDWVER=4 -DDWFILE=1 %s
+// RUN: llvm-dwarfdump --debug-line %t | FileCheck -check-prefix DWARF-DL -check-prefix DWARF-DL-4 -DDWVER=4 -DDWFILE=1 %s
 // RUN: llvm-objdump -r %t | FileCheck -check-prefix RELOC -check-prefix RELOC4 %s
 // RUN: llvm-mc < %s -triple=armv7-linux-gnueabi -filetype=obj -o %t -g -dwarf-version 3 -fdebug-compilation-dir=/tmp
 // RUN: llvm-dwarfdump -v %t | FileCheck -check-prefix DWARF -check-prefix DWARF3 %s
@@ -57,10 +57,14 @@
 // DWARF-DL-5:    include_directories[  0] = "/tmp"
 // DWARF-DL:      file_names[  [[DWFILE]]]:
 // DWARF-DL:      name: "{{(<stdin>|-)}}"
-// DWARF-DL:      0x0000000000000000     17      0      1   0   0  is_stmt
-// DWARF-DL-NEXT: 0x0000000000000004     17      0      1   0   0  is_stmt end_sequence
-// DWARF-DL-NEXT: 0x0000000000000000     21      0      1   0   0  is_stmt
-// DWARF-DL-NEXT: 0x0000000000000004     21      0      1   0   0  is_stmt end_sequence
+// DWARF-DL-5:      0x0000000000000000     17      0      0   0   0  is_stmt
+// DWARF-DL-5-NEXT: 0x0000000000000004     17      0      0   0   0  is_stmt end_sequence
+// DWARF-DL-5-NEXT: 0x0000000000000000     21      0      0   0   0  is_stmt
+// DWARF-DL-5-NEXT: 0x0000000000000004     21      0      0   0   0  is_stmt end_sequence
+// DWARF-DL-4:      0x0000000000000000     17      0      1   0   0  is_stmt
+// DWARF-DL-4-NEXT: 0x0000000000000004     17      0      1   0   0  is_stmt end_sequence
+// DWARF-DL-4-NEXT: 0x0000000000000000     21      0      1   0   0  is_stmt
+// DWARF-DL-4-NEXT: 0x0000000000000004     21      0      1   0   0  is_stmt end_sequence
 
 
 // DWARF: .debug_ranges contents:
diff --git a/test/MC/ELF/debug-mixed-md5.ll b/test/MC/ELF/debug-mixed-md5.ll
index 2ec8141..d48e42c 100644
--- a/test/MC/ELF/debug-mixed-md5.ll
+++ b/test/MC/ELF/debug-mixed-md5.ll
@@ -1,8 +1,7 @@
 ; RUN: %llc_dwarf -filetype=asm -dwarf-version=5 %s -o - | FileCheck %s -check-prefix=ASM
 ; RUN: %llc_dwarf -filetype=obj -dwarf-version=5 %s -o - | llvm-dwarfdump -debug-line - | FileCheck %s -check-prefix=OBJ
 ; ASM: .file 0 "{{.+}}" md5
-; ASM: .file 1 "{{.+}}" md5
-; ASM: .file 2 "t1.cpp"
+; ASM: .file 1 "t1.cpp"
 ; ASM-NOT: md5
 ; OBJ: file_names[ 0]:
 ; OBJ-NOT: md5
diff --git a/test/MC/ELF/dwarf-file0.s b/test/MC/ELF/dwarf-file0.s
index 1a3afb6..f98fdcc 100644
--- a/test/MC/ELF/dwarf-file0.s
+++ b/test/MC/ELF/dwarf-file0.s
@@ -19,16 +19,14 @@
 # CHECK:       file_names[ 1]:
 # CHECK-NEXT:  name: "header.h"
 # CHECK-NEXT:  dir_index: 1
-# CHECK:       file_names[ 2]:
-# CHECK-NEXT:  name: "root.cpp"
+# CHECK-4:     file_names[ 2]:
+# CHECK-4-NEXT: name: "root.cpp"
 # CHECK-4-NEXT: dir_index: 2
-# CHECK-5-NEXT: dir_index: 0
 
 # ASM-NOT: .file
 # ASM-5:   .file 0 "/test" "root.cpp"
 # ASM:     .file 1 "/include" "header.h"
 # ASM-4:   .file 2 "/test" "root.cpp"
-# ASM-5:   .file 2 "root.cpp"
 # ASM-NOT: .file
 
 # WARN:      file 0 not supported prior to DWARF-5
diff --git a/test/tools/llvm-objdump/Inputs/embedded-source b/test/tools/llvm-objdump/Inputs/embedded-source
index 072b5a9..8274a66 100644
--- a/test/tools/llvm-objdump/Inputs/embedded-source
+++ b/test/tools/llvm-objdump/Inputs/embedded-source
Binary files differ
diff --git a/test/tools/llvm-objdump/X86/function-sections-line-numbers.s b/test/tools/llvm-objdump/X86/function-sections-line-numbers.s
index b932a5d..4660744 100644
--- a/test/tools/llvm-objdump/X86/function-sections-line-numbers.s
+++ b/test/tools/llvm-objdump/X86/function-sections-line-numbers.s
@@ -30,8 +30,7 @@
 _Z2f1v:                                 # @_Z2f1v
 .Lfunc_begin0:
 	.file	0 "/home/avl" "test.cpp" md5 0xefae234cc05b45384d782316d3a5d338
-	.file	1 "test.cpp" md5 0xefae234cc05b45384d782316d3a5d338
-	.loc	1 1 0                   # test.cpp:1:0
+	.loc	0 1 0                   # test.cpp:1:0
 	.cfi_startproc
 # %bb.0:                                # %entry
 	pushq	%rbp
@@ -40,7 +39,7 @@
 	movq	%rsp, %rbp
 	.cfi_def_cfa_register %rbp
 .Ltmp0:
-	.loc	1 1 12 prologue_end     # test.cpp:1:12
+	.loc	0 1 12 prologue_end     # test.cpp:1:12
 	popq	%rbp
 	.cfi_def_cfa %rsp, 8
 	retq
@@ -55,7 +54,7 @@
 	.type	_Z2f2v,@function
 _Z2f2v:                                 # @_Z2f2v
 .Lfunc_begin1:
-	.loc	1 2 0                   # test.cpp:2:0
+	.loc	0 2 0                   # test.cpp:2:0
 	.cfi_startproc
 # %bb.0:                                # %entry
 	pushq	%rbp
@@ -64,7 +63,7 @@
 	movq	%rsp, %rbp
 	.cfi_def_cfa_register %rbp
 .Ltmp2:
-	.loc	1 2 12 prologue_end     # test.cpp:2:12
+	.loc	0 2 12 prologue_end     # test.cpp:2:12
 	popq	%rbp
 	.cfi_def_cfa %rsp, 8
 	retq