[LLD][PowerPC] Fix bug in PC-Relative initial exec

There is a bug when initial exec is relaxed to local exec.
In the following situation:

extern __thread unsigned TGlobal;
unsigned getConst(unsigned*);
unsigned addVal(unsigned, unsigned*);

unsigned GetAddrT() {
  return addVal(getConst(&TGlobal), &TGlobal);

__thread unsigned TGlobal;

unsigned getConst(unsigned* A) {
  return *A + 3;

unsigned addVal(unsigned A, unsigned* B) {
  return A + *B;

The problem is in InitExec.c but Def.c is required if you want to link the example and see the problem.
To compile everything:
clang -O3 -mcpu=pwr10 -c InitExec.c
clang -O3 -mcpu=pwr10 -c Def.c
ld.lld InitExec.o Def.o -o IeToLe

If you objdump the problem object file:
$ llvm-objdump -dr --mcpu=pwr10 InitExec.o
you will get the following assembly:
0000000000000000 <GetAddrT>:
       0: a6 02 08 7c  	mflr 0
       4: f0 ff c1 fb  	std 30, -16(1)
       8: 10 00 01 f8  	std 0, 16(1)
       c: d1 ff 21 f8  	stdu 1, -48(1)
      10: 00 00 10 04 00 00 60 e4      	pld 3, 0(0), 1
		0000000000000010:  R_PPC64_GOT_TPREL_PCREL34	TGlobal
      18: 14 6a c3 7f  	add 30, 3, 13
		0000000000000019:  R_PPC64_TLS	TGlobal
      1c: 78 f3 c3 7f  	mr	3, 30
      20: 01 00 00 48  	bl 0x20
		0000000000000020:  R_PPC64_REL24_NOTOC	getConst
      24: 78 f3 c4 7f  	mr	4, 30
      28: 30 00 21 38  	addi 1, 1, 48
      2c: 10 00 01 e8  	ld 0, 16(1)
      30: f0 ff c1 eb  	ld 30, -16(1)
      34: a6 03 08 7c  	mtlr 0
      38: 00 00 00 48  	b 0x38
		0000000000000038:  R_PPC64_REL24_NOTOC	addVal
The lines of interest are:
      10: 00 00 10 04 00 00 60 e4      	pld 3, 0(0), 1
		0000000000000010:  R_PPC64_GOT_TPREL_PCREL34	TGlobal
      18: 14 6a c3 7f  	add 30, 3, 13
		0000000000000019:  R_PPC64_TLS	TGlobal
      1c: 78 f3 c3 7f  	mr	3, 30
Which once linked gets turned into:
10010210: ff ff 03 06 00 90 6d 38      	paddi 3, 13, -28672, 0
10010218: 00 00 00 60  	nop
1001021c: 78 f3 c3 7f  	mr	3, 30
The problem is that register 30 is never set after the optimization.

Therefore it is not correct to relax the above instructions by replacing
the add instruction with a nop.
Instead the add instruction should be replaced with a copy (mr) instruction.
If the add uses the same resgiter as input and as ouput then it is safe to
continue to replace the add with a nop.

Reviewed By: MaskRay

Differential Revision: https://reviews.llvm.org/D95262

GitOrigin-RevId: f21704e080a04580ef837822244a624c6e2e7cac
diff --git a/ELF/Arch/PPC64.cpp b/ELF/Arch/PPC64.cpp
index 03ecc81..a0c2d16 100644
--- a/ELF/Arch/PPC64.cpp
+++ b/ELF/Arch/PPC64.cpp
@@ -920,7 +920,15 @@
       // that comes before it will already have computed the address of the
       // symbol.
       if (secondaryOp == 266) {
-        write32(loc - 1, NOP);
+        // Check if the add uses the same result register as the input register.
+        uint32_t rt = (tlsInstr & 0x03E00000) >> 21; // bits 6-10
+        uint32_t ra = (tlsInstr & 0x001F0000) >> 16; // bits 11-15
+        if (ra == rt) {
+          write32(loc - 1, NOP);
+        } else {
+          // mr rt, ra
+          write32(loc - 1, 0x7C000378 | (rt << 16) | (ra << 21) | (ra << 11));
+        }
       } else {
         uint32_t dFormOp = getPPCDFormOp(secondaryOp);
         if (dFormOp == 0)
diff --git a/test/ELF/ppc64-tls-le-relax.s b/test/ELF/ppc64-tls-le-relax.s
new file mode 100644
index 0000000..3c3bc33
--- /dev/null
+++ b/test/ELF/ppc64-tls-le-relax.s
@@ -0,0 +1,81 @@
+# REQUIRES: ppc
+# RUN: split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple=powerpc64le %t/initexec -o %t/initexec.o
+# RUN: llvm-mc -filetype=obj -triple=powerpc64le %t/defs -o %t/defs.o
+# RUN: ld.lld %t/initexec.o %t/defs.o -o %t/out
+# RUN: llvm-objdump -d --mcpu=pwr10 --no-show-raw-insn %t/out | FileCheck %s
+# CHECK-LABEL: <GetAddrT>:
+# CHECK:         mflr 0
+# CHECK-NEXT:    std 30, -16(1)
+# CHECK-NEXT:    std 0, 16(1)
+# CHECK-NEXT:    stdu 1, -48(1)
+# CHECK-NEXT:    paddi 3, 13, -28672, 0
+# CHECK-NEXT:    mr 30, 3
+# CHECK-NEXT:    mr 3, 30
+# CHECK-NEXT:    bl
+# CHECK-NEXT:    mr 4, 30
+# CHECK-NEXT:    addi 1, 1, 48
+# CHECK-NEXT:    ld 0, 16(1)
+# CHECK-NEXT:    ld 30, -16(1)
+# CHECK-NEXT:    mtlr 0
+# CHECK-NEXT:    b
+## Generated From:
+## extern __thread unsigned TGlobal;
+## unsigned getConst(unsigned*);
+## unsigned addVal(unsigned, unsigned*);
+## unsigned GetAddrT() {
+##   return addVal(getConst(&TGlobal), &TGlobal);
+## }
+//--- initexec
+  mflr 0
+  std 30, -16(1)
+  std 0, 16(1)
+  stdu 1, -48(1)
+  pld 3, TGlobal@got@tprel@pcrel(0), 1
+  add 30, 3, TGlobal@tls@pcrel
+  mr      3, 30
+  bl getConst@notoc
+  mr      4, 30
+  addi 1, 1, 48
+  ld 0, 16(1)
+  ld 30, -16(1)
+  mtlr 0
+  b addVal@notoc
+## Generated From:
+## __thread unsigned TGlobal;
+## unsigned getConst(unsigned* A) {
+##   return *A + 3;
+## }
+## unsigned addVal(unsigned A, unsigned* B) {
+##   return A + *B;
+## }
+//--- defs
+.globl  getConst
+  lwz 3, 0(3)
+  addi 3, 3, 3
+  clrldi  3, 3, 32
+  blr
+.globl  addVal
+  lwz 4, 0(4)
+  add 3, 4, 3
+  clrldi  3, 3, 32
+  blr
+.section        .tbss,"awT",@nobits
+.globl  TGlobal
+.p2align        2
+  .long   0
+  .size   TGlobal, 4
diff --git a/test/ELF/ppc64-tls-pcrel-ie.s b/test/ELF/ppc64-tls-pcrel-ie.s
index 93a286a..eaa8619 100644
--- a/test/ELF/ppc64-tls-pcrel-ie.s
+++ b/test/ELF/ppc64-tls-pcrel-ie.s
@@ -54,9 +54,9 @@
 # LE-RELOC: There are no relocations in this file.
-# LE-SYM: Symbol table '.symtab' contains 7 entries:
-# LE-SYM: 5: 0000000000000000     0 TLS     GLOBAL DEFAULT     6 x
-# LE-SYM: 6: 0000000000000004     0 TLS     GLOBAL DEFAULT     6 y
+# LE-SYM: Symbol table '.symtab' contains 8 entries:
+# LE-SYM: 6: 0000000000000000     0 TLS     GLOBAL DEFAULT     6 x
+# LE-SYM: 7: 0000000000000004     0 TLS     GLOBAL DEFAULT     6 y
 # LE-GOT: could not find section '.got'
@@ -74,6 +74,20 @@
 	add 3, 3, x@tls@pcrel
+# IE-LABEL: <IEAddrCopy>:
+# IE-NEXT:    pld 3, 12488(0), 1
+# IE-NEXT:    add 4, 3, 13
+# IE-NEXT:    blr
+# LE-LABEL: <IEAddrCopy>:
+# LE-NEXT:    paddi 3, 13, -28672, 0
+# LE-NEXT:    mr 4, 3
+# LE-NEXT:    blr
+.section .text_addr, "ax", %progbits
+	pld 3, x@got@tprel@pcrel(0), 1
+	add 4, 3, x@tls@pcrel
+	blr
 # IE-LABEL: <IEVal>:
 # IE-NEXT:    pld 3, 8408(0), 1
 # IE-NEXT:    lwzx 3, 3, 13