[ELF] Better resemble GNU ld when placing orphan sections into memory regions

An orphan section should be placed in the same memory region as its
anchor section if the latter specifies the memory region explicitly.
If there is no explicit assignment for the anchor section in the linker
script, its memory region is selected by matching attributes, and the
same should be done for the orphan section.

Before the patch, some scripts that were handled smoothly in GNU ld
caused an "error: no memory region specified for section" in lld.

Differential Revision: https://reviews.llvm.org/D112925

GitOrigin-RevId: d2dd36bbbe508ba97ab0adc5a834306f6fdc3a78
diff --git a/ELF/LinkerScript.cpp b/ELF/LinkerScript.cpp
index 189a703..1810d8a 100644
--- a/ELF/LinkerScript.cpp
+++ b/ELF/LinkerScript.cpp
@@ -882,34 +882,41 @@
 
 // This function searches for a memory region to place the given output
 // section in. If found, a pointer to the appropriate memory region is
-// returned. Otherwise, a nullptr is returned.
-MemoryRegion *LinkerScript::findMemoryRegion(OutputSection *sec) {
+// returned in the first member of the pair. Otherwise, a nullptr is returned.
+// The second member of the pair is a hint that should be passed to the
+// subsequent call of this method.
+std::pair<MemoryRegion *, MemoryRegion *>
+LinkerScript::findMemoryRegion(OutputSection *sec, MemoryRegion *hint) {
   // If a memory region name was specified in the output section command,
   // then try to find that region first.
   if (!sec->memoryRegionName.empty()) {
     if (MemoryRegion *m = memoryRegions.lookup(sec->memoryRegionName))
-      return m;
+      return {m, m};
     error("memory region '" + sec->memoryRegionName + "' not declared");
-    return nullptr;
+    return {nullptr, nullptr};
   }
 
   // If at least one memory region is defined, all sections must
   // belong to some memory region. Otherwise, we don't need to do
   // anything for memory regions.
   if (memoryRegions.empty())
-    return nullptr;
+    return {nullptr, nullptr};
+
+  // An allocatable orphan section should continue the previous memory region.
+  if (sec->sectionIndex == UINT32_MAX && (sec->flags & SHF_ALLOC) && hint)
+    return {hint, hint};
 
   // See if a region can be found by matching section flags.
   for (auto &pair : memoryRegions) {
     MemoryRegion *m = pair.second;
     if ((m->flags & sec->flags) && (m->negFlags & sec->flags) == 0)
-      return m;
+      return {m, nullptr};
   }
 
   // Otherwise, no suitable region was found.
   if (sec->flags & SHF_ALLOC)
     error("no memory region specified for section '" + sec->name + "'");
-  return nullptr;
+  return {nullptr, nullptr};
 }
 
 static OutputSection *findFirstSection(PhdrEntry *load) {
@@ -1132,6 +1139,7 @@
 
 void LinkerScript::adjustSectionsAfterSorting() {
   // Try and find an appropriate memory region to assign offsets in.
+  MemoryRegion *hint = nullptr;
   for (BaseCommand *base : sectionCommands) {
     if (auto *sec = dyn_cast<OutputSection>(base)) {
       if (!sec->lmaRegionName.empty()) {
@@ -1140,7 +1148,7 @@
         else
           error("memory region '" + sec->lmaRegionName + "' not declared");
       }
-      sec->memRegion = findMemoryRegion(sec);
+      std::tie(sec->memRegion, hint) = findMemoryRegion(sec, hint);
     }
   }
 
diff --git a/ELF/LinkerScript.h b/ELF/LinkerScript.h
index d2487ae..e6fec02 100644
--- a/ELF/LinkerScript.h
+++ b/ELF/LinkerScript.h
@@ -272,7 +272,8 @@
 
   std::vector<size_t> getPhdrIndices(OutputSection *sec);
 
-  MemoryRegion *findMemoryRegion(OutputSection *sec);
+  std::pair<MemoryRegion *, MemoryRegion *>
+  findMemoryRegion(OutputSection *sec, MemoryRegion *hint);
 
   void switchTo(OutputSection *sec);
   uint64_t advance(uint64_t size, unsigned align);
diff --git a/ELF/Writer.cpp b/ELF/Writer.cpp
index ff16922..19a71ad 100644
--- a/ELF/Writer.cpp
+++ b/ELF/Writer.cpp
@@ -1256,10 +1256,13 @@
   // Consider all existing sections with the same proximity.
   int proximity = getRankProximity(sec, *i);
   unsigned sortRank = sec->sortRank;
-  if (script->hasPhdrsCommands())
-    // Prevent the orphan section to be placed before the found section because
-    // that can result in adding it to a previous segment and changing flags of
-    // that segment, for example, making a read-only segment writable.
+  if (script->hasPhdrsCommands() || !script->memoryRegions.empty())
+    // Prevent the orphan section to be placed before the found section. If
+    // custom program headers are defined, that helps to avoid adding it to a
+    // previous segment and changing flags of that segment, for example, making
+    // a read-only segment writable. If memory regions are defined, an orphan
+    // section should continue the same region as the found section to better
+    // resemble the behavior of GNU ld.
     sortRank = std::max(sortRank, foundSec->sortRank);
   for (; i != e; ++i) {
     auto *curSec = dyn_cast<OutputSection>(*i);
diff --git a/test/ELF/linkerscript/orphan-memory.test b/test/ELF/linkerscript/orphan-memory.test
new file mode 100644
index 0000000..77c0326
--- /dev/null
+++ b/test/ELF/linkerscript/orphan-memory.test
@@ -0,0 +1,118 @@
+REQUIRES: x86
+
+RUN: split-file %s %ts
+RUN: llvm-mc -filetype=obj -triple=x86_64 %ts/s -o %t.o
+
+## Check that despite having a lower sort rank, an orphan section '.init_array'
+## is placed after '.data' and '.data2' and in the same memory region.
+
+## Also check that a non-SHF_ALLOC orphan section '.nonalloc' is not placed in
+## a memory region. Both defined memory regions are exhausted after all expected
+## sections are added, thus, trying to put any unexpected section would lead to
+## an error.
+
+RUN: ld.lld -o %t -T %ts/t %t.o
+RUN: llvm-readelf -S %t | FileCheck %s
+
+CHECK: Name        Type       Address          Off           Size
+CHECK: .text       PROGBITS   0000000000008000 {{[0-9a-f]+}} 000004
+CHECK: .data       PROGBITS   0000000000009000 {{[0-9a-f]+}} 000008
+CHECK: .data2      PROGBITS   0000000000009008 {{[0-9a-f]+}} 00000c
+CHECK: .init_array INIT_ARRAY 0000000000009014 {{[0-9a-f]+}} 000010
+CHECK: .nonalloc   PROGBITS   0000000000000000 {{[0-9a-f]+}} 000010
+
+## Check that attributes of memory regions are ignored for orphan sections when
+## the anchor section specifies the memory region explicitly, This seems to
+## contradict https://sourceware.org/binutils/docs/ld/MEMORY.html, but better
+## resembles the way GNU ld actually works.
+
+RUN: ld.lld -o %t2 -T %ts/t2 %t.o
+RUN: llvm-readelf -S %t2 | FileCheck %s
+
+## Same as the previous case, but now properties of sections conflict with
+## memory region attributes. Still, orphan sections are placed in the same
+## regions as their anchors.
+
+RUN: ld.lld -o %t3 -T %ts/t3 %t.o
+RUN: llvm-readelf -S %t3 | FileCheck %s
+
+## Check that when memory regions for anchor sections are not specified
+## explicitly and are selected by attributes, orphan sections are also assigned
+## to memory regions by matching properties.
+
+RUN: ld.lld -o %t4 -T %ts/t4 %t.o
+RUN: llvm-readelf -S %t4 | FileCheck %s --check-prefix=CHECK4
+
+CHECK4: Name        Type       Address          Off           Size
+CHECK4: .text       PROGBITS   0000000000008000 {{[0-9a-f]+}} 000004
+CHECK4: .init_array INIT_ARRAY 0000000000009000 {{[0-9a-f]+}} 000010
+CHECK4: .data       PROGBITS   0000000000009010 {{[0-9a-f]+}} 000008
+CHECK4: .data2      PROGBITS   0000000000009018 {{[0-9a-f]+}} 00000c
+CHECK4: .nonalloc   PROGBITS   0000000000000000 {{[0-9a-f]+}} 000010
+
+#--- s
+  .text
+  .zero 4
+
+  .data
+  .zero 8
+
+  .section .data2,"aw",@progbits
+  .zero 0xc
+
+  .section .init_array,"aw",@init_array
+  .zero 0x10
+
+  .section .nonalloc,""
+  .zero 0x10
+
+#--- t
+MEMORY
+{
+  TEXT : ORIGIN = 0x8000, LENGTH = 0x4
+  DATA : ORIGIN = 0x9000, LENGTH = 0x24
+}
+
+SECTIONS
+{
+  .text : { *(.text) } > TEXT
+  .data : { *(.data) } > DATA
+}
+
+#--- t2
+MEMORY
+{
+  TEXT (rwx) : ORIGIN = 0x8000, LENGTH = 0x4
+  DATA (rwx) : ORIGIN = 0x9000, LENGTH = 0x24
+}
+
+SECTIONS
+{
+  .text : { *(.text) } > TEXT
+  .data : { *(.data) } > DATA
+}
+
+#--- t3
+MEMORY
+{
+  TEXT (!w) : ORIGIN = 0x8000, LENGTH = 0x4
+  DATA (!w) : ORIGIN = 0x9000, LENGTH = 0x24
+}
+
+SECTIONS
+{
+  .text : { *(.text) } > TEXT
+  .data : { *(.data) } > DATA
+}
+
+#--- t4
+MEMORY
+{
+  TEXT (rx)  : ORIGIN = 0x8000, LENGTH = 0x4
+  DATA (w!x) : ORIGIN = 0x9000, LENGTH = 0x24
+}
+
+SECTIONS
+{
+  .text : { *(.text) }
+}