[ELF][ARM] Add support for architecture v6m thunks

ARM Architecture v6m is used by the smallest microcontrollers such as the
cortex-m0. It is Thumb only (no Thumb 2) which prevents it from using the
existing Thumb 2 range extension thunks as these use the Thumb 2 movt/movw
instructions. Range extension thunks are not usually needed for
microcontrollers due to the small amount of flash and ram on the device,
however if code is copied from flash into ram then a range extension thunk
is required to call that code.

This change adds support for v6m range extension thunks. The procedure call
standard APCS permits a thunk to corrupt the intra-procedural scratch
register r12 (referred to as ip in the APCS). Most Thumb instructions do
not permit access to high registers (r8 - r15) so the thunks must spill
some low registers (r0 - r7) to perform the control transfer.

Fixes pr39922

Differential Revision: https://reviews.llvm.org/D55555

git-svn-id: https://llvm.org/svn/llvm-project/lld/trunk@349337 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/ELF/Thunks.cpp b/ELF/Thunks.cpp
index c8e7dca..5486f23 100644
--- a/ELF/Thunks.cpp
+++ b/ELF/Thunks.cpp
@@ -184,6 +184,25 @@
   bool isCompatibleWith(uint32_t RelocType) const override;
+// Implementations of Thunks for Arm v6-M. Only Thumb instructions are permitted
+class ThumbV6MABSLongThunk final : public ThumbThunk {
+  ThumbV6MABSLongThunk(Symbol &Dest) : ThumbThunk(Dest) {}
+  uint32_t sizeLong() override { return 12; }
+  void writeLong(uint8_t *Buf) override;
+  void addSymbols(ThunkSection &IS) override;
+class ThumbV6MPILongThunk final : public ThumbThunk {
+  ThumbV6MPILongThunk(Symbol &Dest) : ThumbThunk(Dest) {}
+  uint32_t sizeLong() override { return 16; }
+  void writeLong(uint8_t *Buf) override;
+  void addSymbols(ThunkSection &IS) override;
 // MIPS LA25 thunk
 class MipsThunk final : public Thunk {
@@ -544,6 +563,56 @@
   return RelocType != R_ARM_THM_JUMP19 && RelocType != R_ARM_THM_JUMP24;
+void ThumbV6MABSLongThunk::writeLong(uint8_t *Buf) {
+  // Most Thumb instructions cannot access the high registers r8 - r15. As the
+  // only register we can corrupt is r12 we must instead spill a low register
+  // to the stack to use as a scratch register. We push r1 even though we
+  // don't need to get some space to use for the return address.
+  const uint8_t Data[] = {
+      0x03, 0xb4,            // push {r0, r1} ; Obtain scratch registers
+      0x01, 0x48,            // ldr r0, [pc, #4] ; L1
+      0x01, 0x90,            // str r0, [sp, #4] ; SP + 4 = S
+      0x01, 0xbd,            // pop {r0, pc} ; restore r0 and branch to dest
+      0x00, 0x00, 0x00, 0x00 // L1: .word S
+  };
+  uint64_t S = getARMThunkDestVA(Destination);
+  memcpy(Buf, Data, sizeof(Data));
+  Target->relocateOne(Buf + 8, R_ARM_ABS32, S);
+void ThumbV6MABSLongThunk::addSymbols(ThunkSection &IS) {
+  addSymbol(Saver.save("__Thumbv6MABSLongThunk_" + Destination.getName()),
+            STT_FUNC, 1, IS);
+  addSymbol("$t", STT_NOTYPE, 0, IS);
+  addSymbol("$d", STT_NOTYPE, 8, IS);
+void ThumbV6MPILongThunk::writeLong(uint8_t *Buf) {
+  // Most Thumb instructions cannot access the high registers r8 - r15. As the
+  // only register we can corrupt is ip (r12) we must instead spill a low
+  // register to the stack to use as a scratch register.
+  const uint8_t Data[] = {
+      0x01, 0xb4,             // P:  push {r0}        ; Obtain scratch register
+      0x02, 0x48,             //     ldr r0, [pc, #8] ; L2
+      0x84, 0x46,             //     mov ip, r0       ; high to low register
+      0x01, 0xbc,             //     pop {r0}         ; restore scratch register
+      0xe7, 0x44,             // L1: add pc, ip       ; transfer control
+      0xc0, 0x46,             //     nop              ; pad to 4-byte boundary
+      0x00, 0x00, 0x00, 0x00, // L2: .word S - (P + (L1 - P) + 4)
+  };
+  uint64_t S = getARMThunkDestVA(Destination);
+  uint64_t P = getThunkTargetSym()->getVA() & ~0x1;
+  memcpy(Buf, Data, sizeof(Data));
+  Target->relocateOne(Buf + 12, R_ARM_REL32, S - P - 12);
+void ThumbV6MPILongThunk::addSymbols(ThunkSection &IS) {
+  addSymbol(Saver.save("__Thumbv6MPILongThunk_" + Destination.getName()),
+            STT_FUNC, 1, IS);
+  addSymbol("$t", STT_NOTYPE, 0, IS);
+  addSymbol("$d", STT_NOTYPE, 12, IS);
 // Write MIPS LA25 thunk code to call PIC function from the non-PIC one.
 void MipsThunk::writeTo(uint8_t *Buf) {
   uint64_t S = Destination.getVA();
@@ -678,6 +747,24 @@
         " not supported for Armv5 or Armv6 targets");
+// Create a thunk for Thumb long branch on V6-M.
+// Arm Architecture v6-M only supports Thumb instructions. This means
+// - MOVT and MOVW instructions cannot be used.
+// - Only a limited number of instructions can access registers r8 and above
+// - No interworking support is needed (all Thumb).
+static Thunk *addThunkV6M(RelType Reloc, Symbol &S) {
+  switch (Reloc) {
+  case R_ARM_THM_JUMP19:
+  case R_ARM_THM_JUMP24:
+  case R_ARM_THM_CALL:
+    if (Config->Pic)
+      return make<ThumbV6MPILongThunk>(S);
+    return make<ThumbV6MABSLongThunk>(S);
+  }
+  fatal("relocation " + toString(Reloc) + " to " + toString(S) +
+        " not supported for Armv6-M targets");
 // Creates a thunk for Thumb-ARM interworking or branch range extension.
 static Thunk *addThunkArm(RelType Reloc, Symbol &S) {
   // Decide which Thunk is needed based on:
@@ -692,14 +779,14 @@
   //   either Arm or Thumb.
   // Position independent Thunks if we require position independent code.
+  // Handle architectures that have restrictions on the instructions that they
+  // can use in Thunks. The flags below are set by reading the BuildAttributes
+  // of the input objects. InputFiles.cpp contains the mapping from ARM
+  // architecture to flag.
   if (!Config->ARMHasMovtMovw) {
     if (!Config->ARMJ1J2BranchEncoding)
       return addThunkPreArmv7(Reloc, S);
-    else
-      // The Armv6-m architecture (Cortex-M0) does not have Arm instructions or
-      // support the MOVT MOVW instructions so it cannot use any of the Thunks
-      // currently implemented.
-      fatal("thunks not supported for architecture Armv6-m");
+    return addThunkV6M(Reloc, S);
   switch (Reloc) {
diff --git a/test/ELF/arm-thumb-nov6thunk.s b/test/ELF/arm-thumb-nov6thunk.s
deleted file mode 100644
index 8c1d1f8..0000000
--- a/test/ELF/arm-thumb-nov6thunk.s
+++ /dev/null
@@ -1,39 +0,0 @@
-// REQUIRES: arm
-// RUN: llvm-mc -arm-add-build-attributes -filetype=obj -triple=armv6m-none-eabi %s -o %t
-// RUN: echo "SECTIONS { \
-// RUN:       . = SIZEOF_HEADERS; \
-// RUN:       .text_low : { *(.text_low) *(.text_low2) } \
-// RUN:       .text_high 0x2000000 : { *(.text_high) *(.text_high2) } \
-// RUN:       } " > %t.script
-// RUN: not ld.lld --script %t.script %t -o %t2 2>&1 | FileCheck %s
-// CHECK:  error: thunks not supported for architecture Armv6-m
-// Range extension thunks are not currently supported on Armv6-m due to a
-// combination of Armv6-m being aimed at low-end microcontrollers that typically
-// have < 512 Kilobytes of memory, and the restrictions of the instruction set
-// that make thunks inefficient. The main restriction is that the
-// interprocedural scratch register r12 (ip) cannot be accessed from many
-// instructions so we must use the stack to avoid corrupting the program.
-// A v6-m Thunk would look like
-//     push {r0, r1} ; Make 8-bytes of stack for restoring r0, and destination
-//     ldr r0, [pc, #4] ; L1
-//     str r0, [sp, #4] ; store destination address into sp + 4
-//     pop {r0, pc} ; restore r0 and load pc with destination
-// L1: .word destination
- .syntax unified
- .section .text_low, "ax", %progbits
- .thumb
- .type _start, %function
- .globl _start
- bl far
- .section .text_high, "ax", %progbits
- .globl far
- .type far, %function
- bx lr
diff --git a/test/ELF/arm-thumb-thunk-v6m.s b/test/ELF/arm-thumb-thunk-v6m.s
new file mode 100644
index 0000000..bf5afd5
--- /dev/null
+++ b/test/ELF/arm-thumb-thunk-v6m.s
@@ -0,0 +1,61 @@
+// REQUIRES: arm
+// RUN: llvm-mc -arm-add-build-attributes -filetype=obj -triple=armv6m-none-eabi %s -o %t
+// RUN: echo "SECTIONS { \
+// RUN:       . = SIZEOF_HEADERS; \
+// RUN:       .text_low : { *(.text_low) *(.text_low2) } \
+// RUN:       .text_high 0x2000000 : { *(.text_high) *(.text_high2) } \
+// RUN:       } " > %t.script
+// RUN: ld.lld --script %t.script %t -o %t2
+// RUN: llvm-objdump -d %t2 -triple=armv6m-none-eabi | FileCheck %s
+// RUN: ld.lld --script %t.script %t -o %t3 --pie
+// RUN: llvm-objdump -d %t3 -triple=armv6m-none-eabi | FileCheck -check-prefix=CHECK-PI %s
+// Range extension thunks for Arm Architecture v6m. Only Thumb instructions
+// are permitted which limits the access to instructions that can access the
+// high registers (r8 - r15), this means that the thunks have to spill
+// low registers (r0 - r7) in order to perform the transfer of control.
+ .syntax unified
+ .section .text_low, "ax", %progbits
+ .thumb
+ .type _start, %function
+ .balign 4
+ .globl _start
+ bl far
+ .section .text_high, "ax", %progbits
+ .globl far
+ .type far, %function
+ bx lr
+// CHECK: Disassembly of section .text_low:
+// CHECK-NEXT: _start:
+// CHECK-NEXT:       94:        00 f0 00 f8     bl      #0
+// CHECK: __Thumbv6MABSLongThunk_far:
+// CHECK-NEXT:       98:        03 b4   push    {r0, r1}
+// CHECK-NEXT:       9a:        01 48   ldr     r0, [pc, #4]
+// CHECK-NEXT:       9c:        01 90   str     r0, [sp, #4]
+// CHECK-NEXT:       9e:        01 bd   pop     {r0, pc}
+// CHECK:       a0:     01 00 00 02     .word   0x02000001
+// CHECK: Disassembly of section .text_high:
+// CHECK-NEXT: far:
+// CHECK-NEXT:  2000000:        70 47   bx      lr
+// CHECK-PI: Disassembly of section .text_low:
+// CHECK-PI-NEXT: _start:
+// CHECK-PI-NEXT:      130:     00 f0 00 f8     bl      #0
+// CHECK-PI: __Thumbv6MPILongThunk_far:
+// CHECK-PI-NEXT:      134:     01 b4   push    {r0}
+// CHECK-PI-NEXT:      136:     02 48   ldr     r0, [pc, #8]
+// CHECK-PI-NEXT:      138:     84 46   mov     r12, r0
+// CHECK-PI-NEXT:      13a:     01 bc   pop     {r0}
+// pc = pc (0x13c + 4) + r12 (1fffec1) = 0x2000001 = .far
+// CHECK-PI-NEXT:      13c:     e7 44   add     pc, r12
+// CHECK-PI-NEXT:      13e:     c0 46   mov     r8, r8
+// CHECK-PI:           140:     c1 fe ff 01     .word   0x01fffec1
+// CHECK-PI: Disassembly of section .text_high:
+// CHECK-PI-NEXT: far:
+// CHECK-PI-NEXT:  2000000:     70 47   bx      lr