[llvm-objdump] - Implement -z/--disassemble-zeroes.

This is https://bugs.llvm.org/show_bug.cgi?id=37151,

GNU objdump spec says that "Normally the disassembly output will skip blocks of zeroes.",
but currently, llvm-objdump prints them.

The patch implements the -z/--disassemble-zeroes option and switches the default to always
skip blocks of zeroes.

Differential revision: https://reviews.llvm.org/D56083

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@350823 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/test/CodeGen/Mips/micromips-b-range.ll b/test/CodeGen/Mips/micromips-b-range.ll
index f761d1c..5831ae8 100644
--- a/test/CodeGen/Mips/micromips-b-range.ll
+++ b/test/CodeGen/Mips/micromips-b-range.ll
@@ -44,8 +44,7 @@
 ; CHECK-NEXT:    9a:	ff fd 00 00 	lw	$ra, 0($sp)
 ; CHECK-NEXT:    9e:	00 01 0f 3c 	jr	$1
 ; CHECK-NEXT:    a2:	33 bd 00 08 	addiu	$sp, $sp, 8
-
-; CHECK:      10466:	00 00 00 00 	nop
+; CHECK:                ...
 ; CHECK-NEXT: 1046a:	94 00 00 02 	b	8 <foo+0x10472>
 ; CHECK-NEXT: 1046e:	00 00 00 00 	nop
 ; CHECK-NEXT: 10472:	33 bd ff f8 	addiu	$sp, $sp, -8
diff --git a/test/MC/Mips/cpsetup.s b/test/MC/Mips/cpsetup.s
index 907e4fe..c963df0 100644
--- a/test/MC/Mips/cpsetup.s
+++ b/test/MC/Mips/cpsetup.s
@@ -1,5 +1,5 @@
 # RUN: llvm-mc -triple mips-unknown-linux -target-abi o32 -filetype=obj -o - %s | \
-# RUN:   llvm-objdump -d -r - | FileCheck -check-prefixes=ALL,O32 %s
+# RUN:   llvm-objdump -d -r -z - | FileCheck -check-prefixes=ALL,O32 %s
 
 # RUN: llvm-mc -triple mips-unknown-linux -target-abi o32 %s | \
 # RUN:   FileCheck -check-prefixes=ALL,ASM,ASM-O32 %s
@@ -7,14 +7,14 @@
 # FIXME: Now we check .cpsetup expansion for `-mno-shared` case only.
 #        We also need to implement/check the `-mshared` case.
 # RUN: llvm-mc -triple mips64-unknown-linux -target-abi n32 -filetype=obj -o - %s | \
-# RUN:   llvm-objdump -d -r - | \
+# RUN:   llvm-objdump -d -r -z - | \
 # RUN:   FileCheck -check-prefixes=ALL,NXX,N32 %s
 
 # RUN: llvm-mc -triple mips64-unknown-linux -target-abi n32 %s | \
 # RUN:   FileCheck -check-prefixes=ALL,ASM,ASM-N32 %s
 
 # RUN: llvm-mc -triple mips64-unknown-linux %s -filetype=obj -o - | \
-# RUN:   llvm-objdump -d -r - | \
+# RUN:   llvm-objdump -d -r -z - | \
 # RUN:   FileCheck -check-prefixes=ALL,NXX,N64 %s
 
 # RUN: llvm-mc -triple mips64-unknown-linux %s | \
diff --git a/test/MC/Mips/nacl-mask.s b/test/MC/Mips/nacl-mask.s
index 604b5c8..e7eba37 100644
--- a/test/MC/Mips/nacl-mask.s
+++ b/test/MC/Mips/nacl-mask.s
@@ -1,5 +1,5 @@
 # RUN: llvm-mc -filetype=obj -triple=mipsel-unknown-nacl %s \
-# RUN:   | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s
+# RUN:   | llvm-objdump -disassemble -z -no-show-raw-insn - | FileCheck %s
 
 # This test tests that address-masking sandboxing is added when given assembly
 # input.
diff --git a/test/MC/X86/disassemble-zeroes.s b/test/MC/X86/disassemble-zeroes.s
new file mode 100644
index 0000000..2ecfdad
--- /dev/null
+++ b/test/MC/X86/disassemble-zeroes.s
@@ -0,0 +1,81 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t
+// RUN: llvm-objdump -d %t | FileCheck %s --check-prefix=NODISASM
+
+// The exact rules of skipping the bytes you can find in the code.
+// This test checks that we follow these rules and can force
+// dissasembly of zero blocks with the -z and --disassemble-zeroes options.
+
+// NODISASM:      Disassembly of section .text:
+// NODISASM-NEXT:  0000000000000000 main:
+// NODISASM-NEXT:   0:  00 00               addb %al, (%rax)
+// NODISASM-NEXT:   2:  00 00               addb %al, (%rax)
+// NODISASM-NEXT:   4:  00 00               addb %al, (%rax)
+// NODISASM-NEXT:   6:  00 90 00 00 00 00   addb %dl, (%rax)
+// NODISASM-NEXT:       ...
+// NODISASM-NEXT:   20: 90                  nop
+// NODISASM-NEXT:       ...
+// NODISASM:      0000000000000031 foo:
+// NODISASM-NEXT:   31: 00 00               addb %al, (%rax)
+// NODISASM-NEXT:   33: 00 00               addb %al, (%rax)
+// NODISASM:      0000000000000035 bar:
+// NODISASM-NEXT:       ...
+
+// Check that with -z we disassemble blocks of zeroes.
+// RUN: llvm-objdump -d -z %t | FileCheck %s --check-prefix=DISASM
+
+// DISASM: Disassembly of section .text:
+// DISASM-NEXT: 0000000000000000 main:
+// DISASM-NEXT:   0: 00 00              addb %al, (%rax)
+// DISASM-NEXT:   2: 00 00              addb %al, (%rax)
+// DISASM-NEXT:   4: 00 00              addb %al, (%rax)
+// DISASM-NEXT:   6: 00 90 00 00 00 00  addb %dl, (%rax)
+// DISASM-NEXT:   c: 00 00              addb %al, (%rax)
+// DISASM-NEXT:   e: 00 00              addb %al, (%rax)
+// DISASM-NEXT:  10: 00 00              addb %al, (%rax)
+// DISASM-NEXT:  12: 00 00              addb %al, (%rax)
+// DISASM-NEXT:  14: 00 00              addb %al, (%rax)
+// DISASM-NEXT:  16: 00 00              addb %al, (%rax)
+// DISASM-NEXT:  18: 00 00              addb %al, (%rax)
+// DISASM-NEXT:  1a: 00 00              addb %al, (%rax)
+// DISASM-NEXT:  1c: 00 00              addb %al, (%rax)
+// DISASM-NEXT:  1e: 00 00              addb %al, (%rax)
+// DISASM-NEXT:  20: 90                 nop
+// DISASM-NEXT:  21: 00 00              addb %al, (%rax)
+// DISASM-NEXT:  23: 00 00              addb %al, (%rax)
+// DISASM-NEXT:  25: 00 00              addb %al, (%rax)
+// DISASM-NEXT:  27: 00 00              addb %al, (%rax)
+// DISASM-NEXT:  29: 00 00              addb %al, (%rax)
+// DISASM-NEXT:  2b: 00 00              addb %al, (%rax)
+// DISASM-NEXT:  2d: 00 00              addb %al, (%rax)
+// DISASM-NEXT:  2f: 00 00              addb %al, (%rax)
+// DISASM:      0000000000000031 foo:
+// DISASM-NEXT:  31: 00 00              addb %al, (%rax)
+// DISASM-NEXT:  33: 00 00              addb %al, (%rax)
+// DISASM:      0000000000000035 bar:
+// DISASM-NEXT:  35: 00 00              addb %al, (%rax)
+// DISASM-NEXT:  37: 00 00              addb %al, (%rax)
+// DISASM-NEXT:  39: 00 00              addb %al, (%rax)
+// DISASM-NEXT:  3b: 00 00              addb %al, (%rax)
+
+// Check that --disassemble-zeroes work as alias for -z.
+// RUN: llvm-objdump -d --disassemble-zeroes %t | FileCheck %s --check-prefix=DISASM
+
+.text
+.globl main
+.type main, @function
+main:
+ .long 0
+ .byte 0
+ .byte 0
+ .byte 0
+ nop
+ .quad 0
+ .quad 0
+ .quad 0
+ nop
+ .quad 0
+ .quad 0
+foo:
+ .long 0
+bar:
+ .quad 0
diff --git a/tools/llvm-objdump/llvm-objdump.cpp b/tools/llvm-objdump/llvm-objdump.cpp
index 22fd1eb..21359a7 100644
--- a/tools/llvm-objdump/llvm-objdump.cpp
+++ b/tools/llvm-objdump/llvm-objdump.cpp
@@ -265,8 +265,17 @@
     StartAddress("start-address", cl::desc("Disassemble beginning at address"),
                  cl::value_desc("address"), cl::init(0));
 cl::opt<unsigned long long>
-    StopAddress("stop-address", cl::desc("Stop disassembly at address"),
+    StopAddress("stop-address",
+                cl::desc("Do not skip blocks of zeroes when disassembling"),
                 cl::value_desc("address"), cl::init(UINT64_MAX));
+
+cl::opt<bool> DisassembleZeroes("disassemble-zeroes",
+                                cl::desc("Do not skip blocks of zeroes when "
+                                         "disassembling the blocks of zeroes"));
+cl::alias DisassembleZeroesShort("z",
+                                 cl::desc("Alias for --disassemble-zeroes"),
+                                 cl::aliasopt(DisassembleZeroes));
+
 static StringRef ToolName;
 
 typedef std::vector<std::tuple<uint64_t, StringRef, uint8_t>> SectionSymbolsTy;
@@ -1298,6 +1307,29 @@
   }
 }
 
+// Normally the disassembly output will skip blocks of zeroes. This function
+// returns the number of zero bytes that can be skipped when dumping the
+// disassembly of the instructions in Buf.
+static size_t countSkippableZeroBytes(ArrayRef<uint8_t> Buf) {
+  // When -z or --disassemble-zeroes are given we always dissasemble them.
+  if (DisassembleZeroes)
+    return 0;
+
+  // Find the number of leading zeroes.
+  size_t N = 0;
+  while (N < Buf.size() && !Buf[N])
+    ++N;
+
+  // We may want to skip blocks of zero bytes, but unless we see
+  // at least 8 of them in a row.
+  if (N < 8)
+    return 0;
+
+  // We skip zeroes in multiples of 4 because do not want to truncate an
+  // instruction if it starts with a zero byte.
+  return N & ~0x3;
+}
+
 static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
   if (StartAddress > StopAddress)
     error("Start address should be less than stop address");
@@ -1732,6 +1764,14 @@
         if (Index >= End)
           break;
 
+        if (size_t N =
+                countSkippableZeroBytes(Bytes.slice(Index, End - Index))) {
+          outs() << "\t\t..." << '\n';
+          Index += N;
+          if (Index >= End)
+            break;
+        }
+
         // Disassemble a real instruction or a data when disassemble all is
         // provided
         bool Disassembled = DisAsm->getInstruction(Inst, Size, Bytes.slice(Index),