| ## Check that in lite mode llvm-bolt updates function references in |
| ## non-optimized code. |
| |
| # RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o |
| # RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \ |
| # RUN: --defsym COMPACT=1 %s -o %t.compact.o |
| # RUN: link_fdata %s %t.o %t.fdata |
| # RUN: llvm-strip --strip-unneeded %t*.o |
| # RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -static |
| # RUN: %clang %cflags %t.compact.o -o %t.compact.exe -Wl,-q -static |
| # RUN: llvm-bolt %t.exe -o %t.bolt --data %t.fdata --lite |
| # RUN: llvm-bolt %t.compact.exe -o %t.compact.bolt --data %t.fdata --lite \ |
| # RUN: --compact-code-model |
| # RUN: llvm-objdump -d --disassemble-symbols=cold_function %t.exe \ |
| # RUN: | FileCheck %s --check-prefix=CHECK-INPUT |
| # RUN: llvm-objdump -d --disassemble-symbols=cold_function %t.bolt \ |
| # RUN: | FileCheck %s |
| # RUN: llvm-objdump -d --disassemble-symbols=_start.org.0 %t.bolt \ |
| # RUN: | FileCheck %s --check-prefix=CHECK-PATCH |
| # RUN: llvm-objdump -d %t.compact.bolt \ |
| # RUN: | FileCheck %s --check-prefix=CHECK-COMPACT |
| |
| ## In compact mode, make sure we do not create an unnecessary patch thunk. |
| # CHECK-COMPACT-NOT: <_start.org.0> |
| |
| ## Verify that the number of FDEs matches the number of functions in the output |
| ## binary. There are three original functions and two optimized. |
| ## NOTE: at the moment we are emitting extra FDEs for patched functions, thus |
| ## there is one more FDE for _start. |
| # RUN: llvm-readelf -u %t.bolt | grep -wc FDE \ |
| # RUN: | FileCheck --check-prefix=CHECK-FDE %s |
| # CHECK-FDE: 6 |
| |
| ## In lite mode, optimized code will be separated from the original .text by |
| ## over 128MB, making it impossible for call/bl instructions in cold functions |
| ## to reach optimized functions directly. |
| |
| .text |
| .globl _start |
| .type _start, %function |
| _start: |
| # FDATA: 0 [unknown] 0 1 _start 0 0 100 |
| .cfi_startproc |
| |
| ## Check that the code at the original location is converted into a |
| ## veneer/thunk. |
| # CHECK-PATCH-LABEL: <_start.org.0> |
| # CHECK-PATCH-NEXT: adrp x16 |
| # CHECK-PATCH-NEXT: add x16, x16, |
| # CHECK-PATCH-NEXT: br x16 |
| cmp x0, 1 |
| b.eq .L0 |
| bl cold_function |
| .L0: |
| ret x30 |
| .cfi_endproc |
| .size _start, .-_start |
| |
| ## Cold non-optimized function with references to hot functions. |
| # CHECK: Disassembly of section .bolt.org.text: |
| # CHECK-LABEL: <cold_function> |
| .globl cold_function |
| .type cold_function, %function |
| cold_function: |
| .cfi_startproc |
| |
| ## Absolute 64-bit function pointer reference. |
| ## We check for the lower 16 bits of _start to be zeros after update. |
| movz x0, :abs_g3:_start |
| movk x0, :abs_g2_nc:_start |
| movk x0, :abs_g1_nc:_start |
| # CHECK-INPUT-NOT: movk x0, #0x0{{$}} |
| # CHECK: movk x0, #0x0{{$}} |
| movk x0, :abs_g0_nc:_start |
| |
| ## Relaxable address reference. |
| # CHECK-INPUT: nop |
| # CHECK-INPUT-NEXT: adr x1 |
| # CHECK-NEXT: adrp x1, [[ADDR:0x[0-9a-f]+]] <{{.*}}> |
| # CHECK-NEXT: add x1 |
| adrp x1, _start |
| add x1, x1, :lo12:_start |
| |
| ## Non-relaxable address reference. |
| # CHECK-INPUT-NEXT: adrp x2 |
| # CHECK-INPUT-NEXT: add x2 |
| # CHECK-NEXT: adrp x2, [[ADDR]] |
| # CHECK-NEXT: add x2 |
| adrp x2, far_func |
| add x2, x2, :lo12:far_func |
| |
| ## Check that fully-relaxed GOT reference is converted into ADRP+ADD. |
| adrp x3, :got:_start |
| ldr x3, [x3, #:got_lo12:_start] |
| # CHECK-INPUT-NEXT: nop |
| # CHECK-INPUT-NEXT: adr x3 |
| # CHECK-NEXT: adrp x3, [[ADDR]] |
| # CHECK-NEXT: add x3 |
| |
| ## Check that partially-relaxed GOT reference is converted into ADRP+ADD. |
| adrp x4, :got:far_func |
| ldr x4, [x4, #:got_lo12:far_func] |
| # CHECK-INPUT-NEXT: adrp x4 |
| # CHECK-INPUT-NEXT: add x4 |
| # CHECK-NEXT: adrp x4, [[ADDR]] |
| # CHECK-NEXT: add x4 |
| |
| ## Check that non-relaxable GOT load is left intact. |
| adrp x5, :got:far_func |
| nop |
| ldr x5, [x5, #:got_lo12:far_func] |
| # CHECK-INPUT-NEXT: adrp x5 |
| # CHECK-INPUT-NEXT: nop |
| # CHECK-INPUT-NEXT: ldr x5 |
| # CHECK-NEXT: adrp x5 |
| # CHECK-NOT: [[ADDR]] |
| # CHECK-NEXT: nop |
| # CHECK-NEXT: ldr x5 |
| |
| ## Since _start is relocated further than 128MB from the call site, we check |
| ## that the call is converted into a call to its original version. That original |
| ## version should contain a veneer/thunk code that we check separately. |
| bl _start |
| # CHECK-INPUT-NEXT: bl {{.*}} <_start> |
| # CHECK-NEXT: bl {{.*}} <_start.org.0> |
| |
| ## Same as above, but the instruction is a tail call. |
| b _start |
| # CHECK-INPUT-NEXT: b {{.*}} <_start> |
| # CHECK-NEXT: b {{.*}} <_start.org.0> |
| |
| ## Quick test for conditional tail calls. A proper test is being added in: |
| ## https://github.com/llvm/llvm-project/pull/139565 |
| ## For now check that llvm-bolt doesn't choke on CTCs. |
| .ifndef COMPACT |
| b.eq _start |
| cbz x0, _start |
| tbz x0, 42, _start |
| .endif |
| |
| .cfi_endproc |
| .size cold_function, .-cold_function |
| |
| .ifndef COMPACT |
| ## Reserve 128MB of space to make functions that follow unreachable by ADRs in |
| ## code that precedes this gap. |
| .space 0x8000000 |
| .endif |
| |
| .globl far_func |
| .type far_func, %function |
| far_func: |
| # FDATA: 0 [unknown] 0 1 far_func 0 0 100 |
| .cfi_startproc |
| ret x30 |
| .cfi_endproc |
| .size far_func, .-far_func |