[ELF] Add -z separate-loadable-segments to complement separate-code and noseparate-code

D64906 allows PT_LOAD to have overlapping p_offset ranges. In the
default R RX RW RW layout + -z noseparate-code case, we do not tail pad
segments when transiting to another segment. This can save at most
3*maxPageSize bytes.

a) Before D64906, we tail pad R, RX and the first RW.
b) With -z separate-code, we tail pad R and RX, but not the first RW (RELRO).

In some cases, b) saves one file page. In some cases, b) wastes one
virtual memory page. The waste is a concern on Fuchsia. Because it uses
compressed binaries, it doesn't benefit from the saved file page.

This patch adds -z separate-loadable-segments to restore the behavior before
D64906. It can affect section addresses and can thus be used as a
debugging mechanism (see PR43214 and ld.so partition bug in
crbug.com/998712).

Reviewed By: jakehehrlich, ruiu

Differential Revision: https://reviews.llvm.org/D67481

git-svn-id: https://llvm.org/svn/llvm-project/lld/trunk@372807 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/ELF/Config.h b/ELF/Config.h
index 6ea1533..0c68a84 100644
--- a/ELF/Config.h
+++ b/ELF/Config.h
@@ -61,6 +61,9 @@
 // For tracking ARM Float Argument PCS
 enum class ARMVFPArgKind { Default, Base, VFP, ToolChain };
 
+// For -z noseparate-code, -z separate-code and -z separate-loadable-segments.
+enum class SeparateSegmentKind { None, Code, Loadable };
+
 struct SymbolVersion {
   llvm::StringRef name;
   bool isExternCpp;
@@ -209,7 +212,6 @@
   bool zOrigin;
   bool zRelro;
   bool zRodynamic;
-  bool zSeparateCode;
   bool zText;
   bool zRetpolineplt;
   bool zWxneeded;
@@ -222,6 +224,7 @@
   Target2Policy target2;
   ARMVFPArgKind armVFPArgs = ARMVFPArgKind::Default;
   BuildIdKind buildId = BuildIdKind::None;
+  SeparateSegmentKind zSeparate;
   ELFKind ekind = ELFNoneKind;
   uint16_t emachine = llvm::ELF::EM_NONE;
   llvm::Optional<uint64_t> imageBase;
diff --git a/ELF/Driver.cpp b/ELF/Driver.cpp
index e5f3feb..b2c7288 100644
--- a/ELF/Driver.cpp
+++ b/ELF/Driver.cpp
@@ -380,16 +380,30 @@
   return Default;
 }
 
+static SeparateSegmentKind getZSeparate(opt::InputArgList &args) {
+  for (auto *arg : args.filtered_reverse(OPT_z)) {
+    StringRef v = arg->getValue();
+    if (v == "noseparate-code")
+      return SeparateSegmentKind::None;
+    if (v == "separate-code")
+      return SeparateSegmentKind::Code;
+    if (v == "separate-loadable-segments")
+      return SeparateSegmentKind::Loadable;
+  }
+  return SeparateSegmentKind::None;
+}
+
 static bool isKnownZFlag(StringRef s) {
   return s == "combreloc" || s == "copyreloc" || s == "defs" ||
          s == "execstack" || s == "global" || s == "hazardplt" ||
          s == "ifunc-noplt" || s == "initfirst" || s == "interpose" ||
          s == "keep-text-section-prefix" || s == "lazy" || s == "muldefs" ||
-         s == "separate-code" || s == "nocombreloc" || s == "nocopyreloc" ||
-         s == "nodefaultlib" || s == "nodelete" || s == "nodlopen" ||
-         s == "noexecstack" || s == "nokeep-text-section-prefix" ||
-         s == "norelro" || s == "noseparate-code" || s == "notext" ||
-         s == "now" || s == "origin" || s == "relro" || s == "retpolineplt" ||
+         s == "separate-code" || s == "separate-loadable-segments" ||
+         s == "nocombreloc" || s == "nocopyreloc" || s == "nodefaultlib" ||
+         s == "nodelete" || s == "nodlopen" || s == "noexecstack" ||
+         s == "nokeep-text-section-prefix" || s == "norelro" ||
+         s == "noseparate-code" || s == "notext" || s == "now" ||
+         s == "origin" || s == "relro" || s == "retpolineplt" ||
          s == "rodynamic" || s == "text" || s == "undefs" || s == "wxneeded" ||
          s.startswith("common-page-size=") || s.startswith("max-page-size=") ||
          s.startswith("stack-size=");
@@ -950,7 +964,7 @@
   config->zRelro = getZFlag(args, "relro", "norelro", true);
   config->zRetpolineplt = hasZOption(args, "retpolineplt");
   config->zRodynamic = hasZOption(args, "rodynamic");
-  config->zSeparateCode = getZFlag(args, "separate-code", "noseparate-code", false);
+  config->zSeparate = getZSeparate(args);
   config->zStackSize = args::getZOptionValue(args, OPT_z, "stack-size", 0);
   config->zText = getZFlag(args, "text", "notext", true);
   config->zWxneeded = hasZOption(args, "wxneeded");
diff --git a/ELF/Writer.cpp b/ELF/Writer.cpp
index 1a5504b..c51ef14 100644
--- a/ELF/Writer.cpp
+++ b/ELF/Writer.cpp
@@ -589,7 +589,8 @@
     return;
 
   if (!config->oFormatBinary) {
-    writeTrapInstr();
+    if (config->zSeparate != SeparateSegmentKind::None)
+      writeTrapInstr();
     writeHeader();
     writeSections();
   } else {
@@ -2233,7 +2234,8 @@
       // maximum page size boundary so that we can find the ELF header at the
       // start. We cannot benefit from overlapping p_offset ranges with the
       // previous segment anyway.
-      if ((config->zSeparateCode && prev &&
+      if (config->zSeparate == SeparateSegmentKind::Loadable ||
+          (config->zSeparate == SeparateSegmentKind::Code && prev &&
            (prev->p_flags & PF_X) != (p->p_flags & PF_X)) ||
           cmd->type == SHT_LLVM_PART_EHDR)
         cmd->addrExpr = [] {
@@ -2342,7 +2344,8 @@
     // If this is a last section of the last executable segment and that
     // segment is the last loadable segment, align the offset of the
     // following section to avoid loading non-segments parts of the file.
-    if (config->zSeparateCode && lastRX && lastRX->lastSec == sec)
+    if (config->zSeparate != SeparateSegmentKind::None && lastRX &&
+        lastRX->lastSec == sec)
       off = alignTo(off, config->commonPageSize);
   }
 
@@ -2614,9 +2617,6 @@
 // We'll leave other pages in segments as-is because the rest will be
 // overwritten by output sections.
 template <class ELFT> void Writer<ELFT>::writeTrapInstr() {
-  if (!config->zSeparateCode)
-    return;
-
   for (Partition &part : partitions) {
     // Fill the last page.
     for (PhdrEntry *p : part.phdrs)
diff --git a/docs/ld.lld.1 b/docs/ld.lld.1
index 74a5140..1746ae9 100644
--- a/docs/ld.lld.1
+++ b/docs/ld.lld.1
@@ -591,18 +591,21 @@
 Use wrapper functions for symbol.
 .It Fl z Ar option
 Linker option extensions.
-.Bl -tag -width indent
+.Bl -tag -width indent -compact
+.Pp
 .It Cm execstack
 Make the main stack executable.
 Stack permissions are recorded in the
 .Dv PT_GNU_STACK
 segment.
+.Pp
 .It Cm global
 Sets the
 .Dv DF_1_GLOBAL flag in the
 .Dv DYNAMIC
 section.
 Different loaders can decide how to handle this flag on their own.
+.Pp
 .It Cm ifunc-noplt
 Do not emit PLT entries for ifunc symbols.
 Instead, emit text relocations referencing the resolver.
@@ -611,64 +614,78 @@
 This option must be combined with the
 .Fl z Li notext
 option.
+.Pp
 .It Cm initfirst
 Sets the
 .Dv DF_1_INITFIRST
 flag to indicate the module should be initialized first.
+.Pp
 .It Cm interpose
 Set the
 .Dv DF_1_INTERPOSE
 flag to indicate to the runtime linker that the object is an interposer.
 During symbol resolution interposers are searched after the application
 but before other dependencies.
+.Pp
 .It Cm muldefs
 Do not error if a symbol is defined multiple times.
 The first definition will be used.
 This is a synonym for
 .Fl -allow-multiple-definition.
+.Pp
 .It Cm nocombreloc
 Disable combining and sorting multiple relocation sections.
+.Pp
 .It Cm nocopyreloc
 Disable the creation of copy relocations.
+.Pp
 .It Cm nodefaultlib
 Set the
 .Dv DF_1_NODEFLIB
 flag to indicate that default library search paths should be ignored.
+.Pp
 .It Cm nodelete
 Set the
 .Dv DF_1_NODELETE
 flag to indicate that the object cannot be unloaded from a process.
+.Pp
 .It Cm nodlopen
 Set the
 .Dv DF_1_NOOPEN
 flag to indicate that the object may not be opened by
 .Xr dlopen 3 .
+.Pp
 .It Cm norelro
 Do not indicate that portions of the object shold be mapped read-only
 after initial relocation processing.
 The object will omit the
 .Dv PT_GNU_RELRO
 segment.
+.Pp
 .It Cm notext
 Allow relocations against read-only segments.
 Sets the
 .Dv DT_TEXTREL flag in the
 .Dv DYNAMIC
 section.
+.Pp
 .It Cm now
 Set the
 .Dv DF_BIND_NOW
 flag to indicate that the run-time loader should perform all relocation
 processing as part of object initialization.
 By default relocations may be performed on demand.
+.Pp
 .It Cm origin
 Set the
 .Dv DF_ORIGIN
 flag to indicate that the object requires
 $ORIGIN
 processing.
+.Pp
 .It Cm retpolineplt
 Emit retpoline format PLT entries as a mitigation for CVE-2017-5715.
+.Pp
 .It Cm rodynamic
 Make the
 .Li .dynamic
@@ -676,6 +693,18 @@
 The
 .Dv DT_DEBUG
 tag will not be emitted.
+.Pp
+.It Cm separate-loadable-segments
+.It Cm separate-code
+.It Cm noseparate-code
+Specify whether two adjacent PT_LOAD segments are allowed to overlap in pages.
+.Cm noseparate-code
+(default) allows overlap.
+.Cm separate-code
+allows overlap between two executable segments, or two non-executable segments.
+.Cm separate-loadable-segments
+disallows overlap.
+.Pp
 .It Cm stack-size Ns = Ns Ar size
 Set the main thread's stack size to
 .Ar size .
@@ -683,9 +712,11 @@
 .Ar size .
 .Dv PT_GNU_STACK
 program segment.
+.Pp
 .It Cm text
 Do not allow relocations against read-only segments.
 This is the default.
+.Pp
 .It Cm wxneeded
 Create a
 .Dv PT_OPENBSD_WXNEEDED
diff --git a/test/ELF/fill-trap.s b/test/ELF/fill-trap.s
index 89660e7..bd26c3f 100644
--- a/test/ELF/fill-trap.s
+++ b/test/ELF/fill-trap.s
@@ -12,6 +12,11 @@
 # RUN: llvm-readobj -l %t | FileCheck %s --check-prefixes=CHECK,PAD
 # RUN: od -Ax -x -N16 -j0x1ff0 %t | FileCheck %s --check-prefix=FILL
 
+## -z separate-loadable-segments pads all segments, including the text segment.
+# RUN: ld.lld %t.o -z separate-loadable-segments -o %t
+# RUN: llvm-readobj -l %t | FileCheck %s --check-prefixes=CHECK,PAD
+# RUN: od -Ax -x -N16 -j0x1ff0 %t | FileCheck %s --check-prefix=FILL
+
 # RUN: ld.lld %t.o -z separate-code -z noseparate-code -o %t
 # RUN: llvm-readobj -l %t | FileCheck %s --check-prefixes=CHECK,NOPAD
 
diff --git a/test/ELF/separate-segments.s b/test/ELF/separate-segments.s
new file mode 100644
index 0000000..a0c910f
--- /dev/null
+++ b/test/ELF/separate-segments.s
@@ -0,0 +1,33 @@
+# REQUIRES: x86
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o
+
+## -z noseparate-code is the default. All PT_LOAD can have overlapping p_offset
+## ranges at runtime.
+# RUN: ld.lld -pie %t.o -o %t
+# RUN: llvm-readelf -l %t | FileCheck --check-prefix=NONE %s
+# NONE:      LOAD 0x000000 0x0000000000000000 0x0000000000000000 0x000245 0x000245 R   0x1000
+# NONE-NEXT: LOAD 0x000248 0x0000000000001248 0x0000000000001248 0x000001 0x000001 R E 0x1000
+# NONE-NEXT: LOAD 0x000250 0x0000000000002250 0x0000000000002250 0x000080 0x000080 RW  0x1000
+# NONE-NEXT: LOAD 0x0002d0 0x00000000000032d0 0x00000000000032d0 0x000001 0x000001 RW  0x1000
+
+## -z separate-code makes text segment (RX) separate.
+## The two RW can have overlapping p_offset ranges at runtime.
+# RUN: ld.lld -pie %t.o -z separate-code -o %t
+# RUN: llvm-readelf -l %t | FileCheck --check-prefix=CODE %s
+# CODE:      LOAD 0x000000 0x0000000000000000 0x0000000000000000 0x000245 0x000245 R   0x1000
+# CODE-NEXT: LOAD 0x001000 0x0000000000001000 0x0000000000001000 0x000001 0x000001 R E 0x1000
+# CODE-NEXT: LOAD 0x002000 0x0000000000002000 0x0000000000002000 0x000080 0x000080 RW  0x1000
+# CODE-NEXT: LOAD 0x002080 0x0000000000003080 0x0000000000003080 0x000001 0x000001 RW  0x1000
+
+## -z separate-loadable-segments makes all segments separate.
+# RUN: ld.lld -pie %t.o -z separate-loadable-segments -o %t
+# RUN: llvm-readelf -l %t | FileCheck --check-prefix=ALL %s
+# ALL:       LOAD 0x000000 0x0000000000000000 0x0000000000000000 0x000245 0x000245 R   0x1000
+# ALL-NEXT:  LOAD 0x001000 0x0000000000001000 0x0000000000001000 0x000001 0x000001 R E 0x1000
+# ALL-NEXT:  LOAD 0x002000 0x0000000000002000 0x0000000000002000 0x000080 0x000080 RW  0x1000
+# ALL-NEXT:  LOAD 0x003000 0x0000000000003000 0x0000000000003000 0x000001 0x000001 RW  0x1000
+
+nop
+
+.data
+.byte 0