Merging r361090:

------------------------------------------------------------------------
r361090 | maskray | 2019-05-17 20:16:00 -0700 (Fri, 17 May 2019) | 34 lines

[ARM][AArch64] Revert Android Bionic PT_TLS overaligning hack

This reverts D53906.

D53906 increased p_align of PT_TLS on ARM/AArch64 to 32/64 to make the
static TLS layout compatible with Android Bionic's ELF TLS. However,
this may cause glibc ARM/AArch64 programs to crash (see PR41527).

The faulty PT_TLS in the executable satisfies p_vaddr%p_align != 0. The
remainder is normally 0 but may be non-zero with the hack in place. The
problem is that we increase PT_TLS's p_align after OutputSections'
addresses are fixed (assignAddress()). It is possible that
p_vaddr%old_p_align = 0 while p_vaddr%new_p_align != 0.

For a thread local variable defined in the executable, lld computed TLS
offset (local exec) is different from glibc computed TLS offset from
another module (initial exec/generic dynamic). Note: PR41527 said the
bug affects initial exec but actually generic dynamic is affected as
well.

(glibc is correct in that it compute offsets that satisfy
`offset%p_align == p_vaddr%p_align`, which is a basic ELF requirement.
This hack appears to work on FreeBSD rtld, musl<=1.1.22, and Bionic, but
that is just because they (and lld) incorrectly compute offsets that
satisfy `offset%p_align = 0` instead.)

Android developers are fine to revert this patch, carry this patch in
their tree before figuring out a long-term solution (e.g. a dummy .tdata
with sh_addralign=64 sh_size={0,1} in crtbegin*.o files. The overhead is
now insignificant after D62059).

Reviewed By: rprichard, srhines

Differential Revision: https://reviews.llvm.org/D62055
------------------------------------------------------------------------

git-svn-id: https://llvm.org/svn/llvm-project/lld/branches/release_80@362858 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/ELF/InputSection.cpp b/ELF/InputSection.cpp
index 4148413..ca2f49c 100644
--- a/ELF/InputSection.cpp
+++ b/ELF/InputSection.cpp
@@ -580,10 +580,6 @@
     // Variant 1. The thread pointer points to a TCB with a fixed 2-word size,
     // followed by a variable amount of alignment padding, followed by the TLS
     // segment.
-    //
-    // NB: While the ARM/AArch64 ABI formally has a 2-word TCB size, lld
-    // effectively increases the TCB size to 8 words for Android compatibility.
-    // It accomplishes this by increasing the segment's alignment.
     return alignTo(Config->Wordsize * 2, Out::TlsPhdr->p_align);
   case EM_386:
   case EM_X86_64:
diff --git a/ELF/Writer.cpp b/ELF/Writer.cpp
index a3ef3f1..36ba025 100644
--- a/ELF/Writer.cpp
+++ b/ELF/Writer.cpp
@@ -2197,17 +2197,6 @@
     }
 
     if (P->p_type == PT_TLS && P->p_memsz) {
-      if (!Config->Shared &&
-          (Config->EMachine == EM_ARM || Config->EMachine == EM_AARCH64)) {
-        // On ARM/AArch64, reserve extra space (8 words) between the thread
-        // pointer and an executable's TLS segment by overaligning the segment.
-        // This reservation is needed for backwards compatibility with Android's
-        // TCB, which allocates several slots after the thread pointer (e.g.
-        // TLS_SLOT_STACK_GUARD==5). For simplicity, this overalignment is also
-        // done on other operating systems.
-        P->p_align = std::max<uint64_t>(P->p_align, Config->Wordsize * 8);
-      }
-
       // The TLS pointer goes after PT_TLS for variant 2 targets. At least glibc
       // will align it, so round up the size to make sure the offsets are
       // correct.
diff --git a/test/ELF/aarch64-cortex-a53-843419-tlsrelax.s b/test/ELF/aarch64-cortex-a53-843419-tlsrelax.s
index 2db5c7e..bff72d3 100644
--- a/test/ELF/aarch64-cortex-a53-843419-tlsrelax.s
+++ b/test/ELF/aarch64-cortex-a53-843419-tlsrelax.s
@@ -26,9 +26,9 @@
 // CHECK: _start:
 // CHECK-NEXT:   210ff8:        41 d0 3b d5     mrs     x1, TPIDR_EL0
 // CHECK-NEXT:   210ffc:        00 00 a0 d2     movz    x0, #0, lsl #16
-// CHECK-NEXT:   211000:        01 08 80 f2     movk    x1, #64
+// CHECK-NEXT:   211000:        01 02 80 f2     movk    x1, #16
 // CHECK-NEXT:   211004:        00 00 a0 d2     movz    x0, #0, lsl #16
-// CHECK-NEXT:   211008:        01 08 80 f2     movk    x1, #64
+// CHECK-NEXT:   211008:        01 02 80 f2     movk    x1, #16
 // CHECK-NEXT:   21100c:        c0 03 5f d6     ret
 
  .type  v,@object
diff --git a/test/ELF/aarch64-tls-gdle.s b/test/ELF/aarch64-tls-gdle.s
index 882ec8c..19fdc1d 100644
--- a/test/ELF/aarch64-tls-gdle.s
+++ b/test/ELF/aarch64-tls-gdle.s
@@ -9,11 +9,11 @@
 #RELOC:      Relocations [
 #RELOC-NEXT: ]
 
-# TCB size = 64 and foo is first element from TLS register.
+# TCB size = 0x16 and foo is first element from TLS register.
 # CHECK: Disassembly of section .text:
 # CHECK: _start:
 # CHECK:  210000:	00 00 a0 d2	movz	x0, #0, lsl #16
-# CHECK:  210004:	00 08 80 f2 	movk	x0, #64
+# CHECK:  210004:	00 02 80 f2 	movk	x0, #16
 # CHECK:  210008:	1f 20 03 d5 	nop
 # CHECK:  21000c:	1f 20 03 d5 	nop
 
diff --git a/test/ELF/aarch64-tls-iele.s b/test/ELF/aarch64-tls-iele.s
index 0229d66..9fec4ee 100644
--- a/test/ELF/aarch64-tls-iele.s
+++ b/test/ELF/aarch64-tls-iele.s
@@ -9,13 +9,13 @@
 # RELOC:      Relocations [
 # RELOC-NEXT: ]
 
-# TCB size = 64 and foo is first element from TLS register.
+# TCB size = 0x16 and foo is first element from TLS register.
 # CHECK: Disassembly of section .text:
 # CHECK: _start:
 # CHECK-NEXT: 210000:  00 00 a0 d2   movz   x0, #0, lsl #16
-# CHECK-NEXT: 210004:  80 08 80 f2   movk   x0, #68
+# CHECK-NEXT: 210004:  80 02 80 f2   movk   x0, #20
 # CHECK-NEXT: 210008:  00 00 a0 d2   movz   x0, #0, lsl #16
-# CHECK-NEXT: 21000c:  00 08 80 f2   movk   x0, #64
+# CHECK-NEXT: 21000c:  00 02 80 f2   movk   x0, #16
 
 .section .tdata
 .align 2
diff --git a/test/ELF/aarch64-tls-le.s b/test/ELF/aarch64-tls-le.s
index 49c322f..eda1375 100644
--- a/test/ELF/aarch64-tls-le.s
+++ b/test/ELF/aarch64-tls-le.s
@@ -17,12 +17,12 @@
  add x0, x0, :tprel_hi12:v2
  add x0, x0, :tprel_lo12_nc:v2
 
-# TCB size = 64 and foo is first element from TLS register.
+# TCB size = 0x16 and foo is first element from TLS register.
 #CHECK: Disassembly of section .text:
 #CHECK: _start:
 #CHECK:  210000: 40 d0 3b d5     mrs     x0, TPIDR_EL0
 #CHECK:  210004: 00 00 40 91     add     x0, x0, #0, lsl #12
-#CHECK:  210008: 00 00 01 91     add     x0, x0, #64
+#CHECK:  210008: 00 40 00 91     add     x0, x0, #16
 #CHECK:  21000c: 40 d0 3b d5     mrs     x0, TPIDR_EL0
 #CHECK:  210010: 00 fc 7f 91     add     x0, x0, #4095, lsl #12
 #CHECK:  210014: 00 e0 3f 91     add     x0, x0, #4088
@@ -36,9 +36,9 @@
 .word  0
 .size  v1, 4
 
-# The current offset from the thread pointer is 68. Raise it to just below the
+# The current offset from the thread pointer is 20. Raise it to just below the
 # 24-bit limit.
-.space (0xfffff8 - 68)
+.space (0xfffff8 - 20)
 
 .type   v2,@object
 .globl  v2
diff --git a/test/ELF/aarch64-tlsld-ldst.s b/test/ELF/aarch64-tlsld-ldst.s
index 8ebdc2f..3144ca5 100644
--- a/test/ELF/aarch64-tlsld-ldst.s
+++ b/test/ELF/aarch64-tlsld-ldst.s
@@ -26,27 +26,27 @@
 
 // CHECK: _start:
 // CHECK-NEXT:    210000:       48 d0 3b d5     mrs     x8, TPIDR_EL0
-// 0x0 + c40 = 0xc40       = tcb (64-bytes) + var0
-// CHECK-NEXT:    210004:       08 01 40 91     add x8, x8, #0, lsl #12
-// CHECK-NEXT:    210008:       14 11 c3 3d     ldr q20, [x8, #3136]
-// 0x1000 + 0x850 = 0x1850 = tcb + var1
-// CHECK-NEXT:    21000c:       08 05 40 91     add x8, x8, #1, lsl #12
-// CHECK-NEXT:    210010:       00 29 44 f9     ldr x0, [x8, #2128]
-// 0x2000 + 0x458 = 0x2458 = tcb + var2
-// CHECK-NEXT:    210014:       08 09 40 91     add x8, x8, #2, lsl #12
-// CHECK-NEXT:    210018:       00 59 44 b9     ldr w0, [x8, #1112]
-// 0x3000 + 0x5c  = 0x305c = tcb + var3
-// CHECK-NEXT:    21001c:       08 0d 40 91     add x8, x8, #3, lsl #12
-// CHECK-NEXT:    210020:       00 b9 40 79     ldrh  w0, [x8, #92]
-// 0x3000 + 0xc5e = 0x3c5e = tcb + var4
-// CHECK-NEXT:    210024:       08 0d 40 91     add x8, x8, #3, lsl #12
-// CHECK-NEXT:    210028:       00 79 71 39     ldrb  w0, [x8, #3166]
+// 0x0 + c10 = 0xc10       = tcb (16-bytes) + var0
+// CHECK-NEXT:    210004:       08 01 40 91     add     x8, x8, #0, lsl #12
+// CHECK-NEXT:    210008:       14 05 c3 3d     ldr     q20, [x8, #3088]
+// 0x1000 + 0x820 = 0x1820 = tcb + var1
+// CHECK-NEXT:    21000c:       08 05 40 91     add     x8, x8, #1, lsl #12
+// CHECK-NEXT:    210010:       00 11 44 f9     ldr     x0, [x8, #2080]
+// 0x2000 + 0x428 = 0x2428 = tcb + var2
+// CHECK-NEXT:    210014:       08 09 40 91     add     x8, x8, #2, lsl #12
+// CHECK-NEXT:    210018:       00 29 44 b9     ldr     w0, [x8, #1064]
+// 0x3000 + 0x2c  = 0x302c = tcb + var3
+// CHECK-NEXT:    21001c:       08 0d 40 91     add     x8, x8, #3, lsl #12
+// CHECK-NEXT:    210020:       00 59 40 79     ldrh    w0, [x8, #44]
+// 0x3000 + 0xc2e = 0x32ce = tcb + var4
+// CHECK-NEXT:    210024:       08 0d 40 91     add     x8, x8, #3, lsl #12
+// CHECK-NEXT:    210028:       00 b9 70 39     ldrb    w0, [x8, #3118]
 
-// CHECK-SYMS:      0000000000000c00    16 TLS     GLOBAL DEFAULT    2 var0
-// CHECK-SYMS-NEXT: 0000000000001810     8 TLS     GLOBAL DEFAULT    2 var1
-// CHECK-SYMS-NEXT: 0000000000002418     4 TLS     GLOBAL DEFAULT    2 var2
-// CHECK-SYMS-NEXT: 000000000000301c     2 TLS     GLOBAL DEFAULT    2 var3
-// CHECK-SYMS-NEXT: 0000000000003c1e     1 TLS     GLOBAL DEFAULT    2 var4
+// CHECK-SYMS:      0000000000000c00     0 TLS     GLOBAL DEFAULT    2 var0
+// CHECK-SYMS-NEXT: 0000000000001810     4 TLS     GLOBAL DEFAULT    2 var1
+// CHECK-SYMS-NEXT: 0000000000002418     2 TLS     GLOBAL DEFAULT    2 var2
+// CHECK-SYMS-NEXT: 000000000000301c     1 TLS     GLOBAL DEFAULT    2 var3
+// CHECK-SYMS-NEXT: 0000000000003c1e     0 TLS     GLOBAL DEFAULT    2 var4
 
         .globl var0
         .globl var1
@@ -59,12 +59,12 @@
         .type var3,@object
 
 .section .tbss,"awT",@nobits
-        .balign 64
+        .balign 16
         .space 1024 * 3
 var0:
         .quad 0
         .quad 0
-        .size var0, 16
+        .size var1, 16
         .space 1024 * 3
 var1:
         .quad 0
@@ -72,14 +72,14 @@
         .space 1024 * 3
 var2:
         .word 0
-        .size var2, 4
+        .size var1, 4
 
         .space 1024 * 3
 var3:
         .hword 0
-        .size var3, 2
+        .size var2, 2
         .space 1024 * 3
 var4:
         .byte 0
-        .size var4, 1
+        .size var3, 1
         .space 1024 * 3
diff --git a/test/ELF/arm-tls-le32.s b/test/ELF/arm-tls-le32.s
index f9a5fa9..7834ded 100644
--- a/test/ELF/arm-tls-le32.s
+++ b/test/ELF/arm-tls-le32.s
@@ -69,9 +69,9 @@
 
 // CHECK: Disassembly of section .text:
 // CHECK-NEXT: _start:
-// offset of x from Thread pointer = (TcbSize + 0x0 = 0x20)
-// CHECK-NEXT:   11000:         20 00 00 00
-// offset of z from Thread pointer = (TcbSize + 0x8 = 0x28)
-// CHECK-NEXT:   11004:         28 00 00 00
-// offset of y from Thread pointer = (TcbSize + 0x4 = 0x24)
-// CHECK-NEXT:   11008:         24 00 00 00
+// offset of x from Thread pointer = (TcbSize + 0x0 = 0x8)
+// CHECK-NEXT:   11000:         08 00 00 00
+// offset of z from Thread pointer = (TcbSize + 0x8 = 0x10)
+// CHECK-NEXT:   11004:         10 00 00 00
+// offset of y from Thread pointer = (TcbSize + 0x4 = 0xc)
+// CHECK-NEXT:   11008:         0c 00 00 00
diff --git a/test/ELF/arm-tls-norelax-ie-le.s b/test/ELF/arm-tls-norelax-ie-le.s
index 11c3e4f..4a52f54 100644
--- a/test/ELF/arm-tls-norelax-ie-le.s
+++ b/test/ELF/arm-tls-norelax-ie-le.s
@@ -37,5 +37,5 @@
  .type x2, %object
 
 // CHECK: Contents of section .got:
-// x1 at offset 0x20 from TP, x2 at offset 0x24 from TP. Offsets include TCB size of 0x20
-// CHECK-NEXT: 13064 20000000 24000000
+// x1 at offset 8 from TP, x2 at offset 0xc from TP. Offsets include TCB size of 8
+// CHECK-NEXT: 13064 08000000 0c000000