[ELF] Support placing .lbss/.lrodata/.ldata after .bss

https://reviews.llvm.org/D150510 places .lrodata before .rodata to
minimize the number of permission transitions in the memory image.
However, this layout is less ideal for -fno-pic code (which is still
important).

Small code model -fno-pic code has R_X86_64_32S relocations with a range
of `[0,2**31)` (if we ignore the negative area). Placing `.lrodata`
earlier exerts relocation pressure on such code. Non-x86 64-bit
architectures generally have a similar `[0,2**31)` limitation if they
don't use PC-relative relocations.

If we place .lrodata later, we will need one extra PT_LOAD. Two layouts
are appealing:

* .bss/.lbss/.lrodata/.ldata (GNU ld)
* .bss/.ldata/.lbss/.lrodata

The GNU ld layout has the nice property that there is only one BSS
(except .tbss/.relro_padding). Add -z lrodata-after-bss to support
this layout.

Since a read-only PT_LOAD segment (for large data sections) may appear
after RW PT_LOAD segments. The placement of `_etext` has to be adjusted.

Pull Request: https://github.com/llvm/llvm-project/pull/81224

GitOrigin-RevId: 78762357d449cfcd11426c8e152302a27f2e7d4d
diff --git a/ELF/Config.h b/ELF/Config.h
index fcca8c4..691ebfc 100644
--- a/ELF/Config.h
+++ b/ELF/Config.h
@@ -310,6 +310,7 @@
   bool zInitfirst;
   bool zInterpose;
   bool zKeepTextSectionPrefix;
+  bool zLrodataAfterBss;
   bool zNodefaultlib;
   bool zNodelete;
   bool zNodlopen;
diff --git a/ELF/Driver.cpp b/ELF/Driver.cpp
index 4bb9b7a..24faa17 100644
--- a/ELF/Driver.cpp
+++ b/ELF/Driver.cpp
@@ -1436,6 +1436,8 @@
   config->zInterpose = hasZOption(args, "interpose");
   config->zKeepTextSectionPrefix = getZFlag(
       args, "keep-text-section-prefix", "nokeep-text-section-prefix", false);
+  config->zLrodataAfterBss =
+      getZFlag(args, "lrodata-after-bss", "nolrodata-after-bss", false);
   config->zNodefaultlib = hasZOption(args, "nodefaultlib");
   config->zNodelete = hasZOption(args, "nodelete");
   config->zNodlopen = hasZOption(args, "nodlopen");
diff --git a/ELF/Writer.cpp b/ELF/Writer.cpp
index 5b7dfd3..0bbf43d 100644
--- a/ELF/Writer.cpp
+++ b/ELF/Writer.cpp
@@ -911,11 +911,12 @@
   RF_NOT_ALLOC = 1 << 26,
   RF_PARTITION = 1 << 18, // Partition number (8 bits)
   RF_NOT_SPECIAL = 1 << 17,
-  RF_WRITE = 1 << 16,
-  RF_EXEC_WRITE = 1 << 15,
-  RF_EXEC = 1 << 14,
-  RF_RODATA = 1 << 13,
-  RF_LARGE = 1 << 12,
+  RF_LARGE_ALT = 1 << 15,
+  RF_WRITE = 1 << 14,
+  RF_EXEC_WRITE = 1 << 13,
+  RF_EXEC = 1 << 12,
+  RF_RODATA = 1 << 11,
+  RF_LARGE = 1 << 10,
   RF_NOT_RELRO = 1 << 9,
   RF_NOT_TLS = 1 << 8,
   RF_BSS = 1 << 7,
@@ -974,8 +975,14 @@
     if (osec.type == SHT_PROGBITS)
       rank |= RF_RODATA;
     // Among PROGBITS sections, place .lrodata further from .text.
-    if (!(osec.flags & SHF_X86_64_LARGE && config->emachine == EM_X86_64))
-      rank |= RF_LARGE;
+    // For -z lrodata-after-bss, place .lrodata after .lbss like GNU ld. This
+    // layout has one extra PT_LOAD, but alleviates relocation overflow
+    // pressure for absolute relocations referencing small data from -fno-pic
+    // relocatable files.
+    if (osec.flags & SHF_X86_64_LARGE && config->emachine == EM_X86_64)
+      rank |= config->zLrodataAfterBss ? RF_LARGE_ALT : 0;
+    else
+      rank |= config->zLrodataAfterBss ? 0 : RF_LARGE;
   } else if (isExec) {
     rank |= isWrite ? RF_EXEC_WRITE : RF_EXEC;
   } else {
@@ -988,10 +995,15 @@
       osec.relro = true;
     else
       rank |= RF_NOT_RELRO;
-    // Place .ldata and .lbss after .bss. Making .bss closer to .text alleviates
-    // relocation overflow pressure.
-    if (osec.flags & SHF_X86_64_LARGE && config->emachine == EM_X86_64)
-      rank |= RF_LARGE;
+    // Place .ldata and .lbss after .bss. Making .bss closer to .text
+    // alleviates relocation overflow pressure.
+    // For -z lrodata-after-bss, place .lbss/.lrodata/.ldata after .bss.
+    // .bss/.lbss being adjacent reuses the NOBITS size optimization.
+    if (osec.flags & SHF_X86_64_LARGE && config->emachine == EM_X86_64) {
+      rank |= config->zLrodataAfterBss
+                  ? (osec.type == SHT_NOBITS ? 1 : RF_LARGE_ALT)
+                  : RF_LARGE;
+    }
   }
 
   // Within TLS sections, or within other RelRo sections, or within non-RelRo
@@ -1103,7 +1115,7 @@
   }
 
   PhdrEntry *last = nullptr;
-  PhdrEntry *lastRO = nullptr;
+  OutputSection *lastRO = nullptr;
   auto isLarge = [](OutputSection *osec) {
     return config->emachine == EM_X86_64 && osec->flags & SHF_X86_64_LARGE;
   };
@@ -1112,17 +1124,18 @@
       if (p->p_type != PT_LOAD)
         continue;
       last = p;
-      if (!(p->p_flags & PF_W))
-        lastRO = p;
+      if (!(p->p_flags & PF_W) && p->lastSec && !isLarge(p->lastSec))
+        lastRO = p->lastSec;
     }
   }
 
   if (lastRO) {
-    // _etext is the first location after the last read-only loadable segment.
+    // _etext is the first location after the last read-only loadable segment
+    // that does not contain large sections.
     if (ElfSym::etext1)
-      ElfSym::etext1->section = lastRO->lastSec;
+      ElfSym::etext1->section = lastRO;
     if (ElfSym::etext2)
-      ElfSym::etext2->section = lastRO->lastSec;
+      ElfSym::etext2->section = lastRO;
   }
 
   if (last) {
diff --git a/docs/ld.lld.1 b/docs/ld.lld.1
index 12b17dd..e4d39e4 100644
--- a/docs/ld.lld.1
+++ b/docs/ld.lld.1
@@ -791,6 +791,9 @@
 During symbol resolution interposers are searched after the application
 but before other dependencies.
 .Pp
+.It Cm lrodata-after-bss
+Place .lrodata after .bss.
+.Pp
 .It Cm muldefs
 Do not error if a symbol is defined multiple times.
 The first definition will be used.
diff --git a/test/ELF/lto/codemodel.ll b/test/ELF/lto/codemodel.ll
index a35f877..cf7d0e4 100644
--- a/test/ELF/lto/codemodel.ll
+++ b/test/ELF/lto/codemodel.ll
@@ -2,8 +2,8 @@
 ; RUN: llvm-as %s -o %t.o
 ; RUN: ld.lld %t.o -o %ts -mllvm -code-model=small
 ; RUN: ld.lld %t.o -o %tl -mllvm -code-model=large
-; RUN: llvm-objdump --no-print-imm-hex -d %ts | FileCheck %s --check-prefix=CHECK-SMALL
-; RUN: llvm-objdump --no-print-imm-hex -d %tl | FileCheck %s --check-prefix=CHECK-LARGE
+; RUN: llvm-objdump -d %ts | FileCheck %s --check-prefix=CHECK-SMALL
+; RUN: llvm-objdump -d %tl | FileCheck %s --check-prefix=CHECK-LARGE
 
 target triple = "x86_64-unknown-linux-gnu"
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
@@ -13,8 +13,8 @@
 define ptr @_start() nounwind readonly {
 entry:
 ; CHECK-SMALL-LABEL:  <_start>:
-; CHECK-SMALL: movl    $2097440, %eax
+; CHECK-SMALL: movl    ${{.*}}, %eax
 ; CHECK-LARGE-LABEL: <_start>:
-; CHECK-LARGE: movabsq $2097440, %rax
+; CHECK-LARGE: movabsq ${{.*}}, %rax
     ret ptr @data
 }
diff --git a/test/ELF/x86-64-section-layout.s b/test/ELF/x86-64-section-layout.s
index 0ba6053..b03d3e6 100644
--- a/test/ELF/x86-64-section-layout.s
+++ b/test/ELF/x86-64-section-layout.s
@@ -12,9 +12,12 @@
 # RUN: ld.lld --section-start=.note=0x200300 a1.o -o a1
 # RUN: llvm-readelf -S -sX a1 | FileCheck %s --check-prefix=CHECK1
 
-# RUN: ld.lld -T b.lds -z norelro a.o -o b
+# RUN: ld.lld -T b.lds -z norelro a.o -z lrodata-after-bss -z nolrodata-after-bss -o b --fatal-warnings
 # RUN: llvm-readelf -S -l b | FileCheck %s --check-prefix=CHECK2
 
+# RUN: ld.lld --section-start=.note=0x200300 a.o -z lrodata-after-bss -o a3
+# RUN: llvm-readelf -S -l -sX a3 | FileCheck %s --check-prefix=CHECK3
+
 # CHECK:       Name              Type            Address          Off    Size   ES Flg Lk Inf Al
 # CHECK-NEXT:                    NULL            0000000000000000 000000 000000 00      0   0  0
 # CHECK-NEXT:  .note             NOTE            0000000000200300 000300 000001 00   A  0   0  1
@@ -80,6 +83,39 @@
 # CHECK2-NEXT:   LOAD  0x000305 0x0000000000200305 0x0000000000200305 0x001805 0x002a06 RW  0x1000
 # CHECK2-NEXT:   TLS   0x000305 0x0000000000200305 0x0000000000200305 0x000001 0x000003 R   0x1
 
+# CHECK3:       Name              Type            Address          Off    Size   ES Flg Lk Inf Al
+# CHECK3-NEXT:                    NULL            0000000000000000 000000 000000 00      0   0  0
+# CHECK3-NEXT:  .note             NOTE            0000000000200300 000300 000001 00   A  0   0  1
+# CHECK3-NEXT:  .rodata           PROGBITS        0000000000200301 000301 000001 00   A  0   0  1
+# CHECK3-NEXT:  .text             PROGBITS        0000000000201304 000304 000001 00  AX  0   0  4
+# CHECK3-NEXT:  .tdata            PROGBITS        0000000000202305 000305 000001 00 WAT  0   0  1
+# CHECK3-NEXT:  .tbss             NOBITS          0000000000202306 000306 000002 00 WAT  0   0  1
+# CHECK3-NEXT:  .relro_padding    NOBITS          0000000000202306 000306 000cfa 00  WA  0   0  1
+# CHECK3-NEXT:  .data             PROGBITS        0000000000203306 000306 000001 00  WA  0   0  1
+# CHECK3-NEXT:  .bss              NOBITS          0000000000203307 000307 001800 00  WA  0   0  1
+## We spend (size(.bss) + size(.lbss)) % MAXPAGESIZE bytes.
+# CHECK3-NEXT:  .lbss             NOBITS          0000000000204b07 000307 001201 00 WAl  0   0  1
+# CHECK3-NEXT:  .lrodata          PROGBITS        0000000000206d08 000d08 000002 00  Al  0   0  1
+# CHECK3-NEXT:  .ldata            PROGBITS        0000000000207d0a 000d0a 000002 00 WAl  0   0  1
+# CHECK3-NEXT:  .ldata2           PROGBITS        0000000000207d0c 000d0c 000001 00 WAl  0   0  1
+# CHECK3-NEXT:  .comment          PROGBITS        0000000000000000 000d0d {{.*}} 01  MS  0   0  1
+
+# CHECK3:       Program Headers:
+# CHECK3-NEXT:    Type  Offset   VirtAddr           PhysAddr           FileSiz  MemSiz   Flg Align
+# CHECK3-NEXT:    PHDR  0x000040 0x0000000000200040 0x0000000000200040 {{.*}}   {{.*}}   R   0x8
+# CHECK3-NEXT:    LOAD  0x000000 0x0000000000200000 0x0000000000200000 0x000302 0x000302 R   0x1000
+# CHECK3-NEXT:    LOAD  0x000304 0x0000000000201304 0x0000000000201304 0x000001 0x000001 R E 0x1000
+# CHECK3-NEXT:    LOAD  0x000305 0x0000000000202305 0x0000000000202305 0x000001 0x000cfb RW  0x1000
+# CHECK3-NEXT:    LOAD  0x000306 0x0000000000203306 0x0000000000203306 0x000001 0x002a02 RW  0x1000
+# CHECK3-NEXT:    LOAD  0x000d08 0x0000000000206d08 0x0000000000206d08 0x000002 0x000002 R   0x1000
+# CHECK3-NEXT:    LOAD  0x000d0a 0x0000000000207d0a 0x0000000000207d0a 0x000003 0x000003 RW  0x1000
+# CHECK3-NEXT:    TLS   0x000305 0x0000000000202305 0x0000000000202305 0x000001 0x000003 R   0x1
+
+# CHECK3:       0000000000201304     0 NOTYPE  GLOBAL DEFAULT [[#]] (.text)   _start
+# CHECK3-NEXT:  0000000000201305     0 NOTYPE  GLOBAL DEFAULT [[#]] (.text)   _etext
+# CHECK3-NEXT:  0000000000203307     0 NOTYPE  GLOBAL DEFAULT [[#]] (.data)   _edata
+# CHECK3-NEXT:  0000000000207d0d     0 NOTYPE  GLOBAL DEFAULT [[#]] (.ldata2) _end
+
 #--- a.s
 .globl _start, _etext, _edata, _end
 _start: