| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc -mtriple=armv7a-none-eabi %s -o - | FileCheck %s |
| |
| declare i32 @many_args_callee(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5) |
| |
| define i32 @many_args_tail(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5) { |
| ; CHECK-LABEL: many_args_tail: |
| ; CHECK: @ %bb.0: |
| ; CHECK-NEXT: mov r0, #5 |
| ; CHECK-NEXT: mov r1, #2 |
| ; CHECK-NEXT: str r0, [sp] |
| ; CHECK-NEXT: mov r0, #6 |
| ; CHECK-NEXT: str r0, [sp, #4] |
| ; CHECK-NEXT: mov r0, #1 |
| ; CHECK-NEXT: mov r2, #3 |
| ; CHECK-NEXT: mov r3, #4 |
| ; CHECK-NEXT: b many_args_callee |
| %ret = tail call i32 @many_args_callee(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6) |
| ret i32 %ret |
| } |
| |
| define i32 @many_args_musttail(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5) { |
| ; CHECK-LABEL: many_args_musttail: |
| ; CHECK: @ %bb.0: |
| ; CHECK-NEXT: mov r0, #5 |
| ; CHECK-NEXT: mov r1, #2 |
| ; CHECK-NEXT: str r0, [sp] |
| ; CHECK-NEXT: mov r0, #6 |
| ; CHECK-NEXT: str r0, [sp, #4] |
| ; CHECK-NEXT: mov r0, #1 |
| ; CHECK-NEXT: mov r2, #3 |
| ; CHECK-NEXT: mov r3, #4 |
| ; CHECK-NEXT: b many_args_callee |
| %ret = musttail call i32 @many_args_callee(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6) |
| ret i32 %ret |
| } |
| |
| ; This function has more arguments than it's tail-callee. This isn't valid for |
| ; the musttail attribute, but can still be tail-called as a non-guaranteed |
| ; optimisation, because the outgoing arguments to @many_args_callee fit in the |
| ; stack space allocated by the caller of @more_args_tail. |
| define i32 @more_args_tail(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6) { |
| ; CHECK-LABEL: more_args_tail: |
| ; CHECK: @ %bb.0: |
| ; CHECK-NEXT: mov r0, #5 |
| ; CHECK-NEXT: mov r1, #2 |
| ; CHECK-NEXT: str r0, [sp] |
| ; CHECK-NEXT: mov r0, #6 |
| ; CHECK-NEXT: str r0, [sp, #4] |
| ; CHECK-NEXT: mov r0, #1 |
| ; CHECK-NEXT: mov r2, #3 |
| ; CHECK-NEXT: mov r3, #4 |
| ; CHECK-NEXT: b many_args_callee |
| %ret = tail call i32 @many_args_callee(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6) |
| ret i32 %ret |
| } |
| |
| ; Again, this isn't valid for musttail, but can be tail-called in practice |
| ; because the stack size if the same. |
| define i32 @different_args_tail(i64 %0, i64 %1, i64 %2) { |
| ; CHECK-LABEL: different_args_tail: |
| ; CHECK: @ %bb.0: |
| ; CHECK-NEXT: mov r0, #5 |
| ; CHECK-NEXT: mov r1, #2 |
| ; CHECK-NEXT: str r0, [sp] |
| ; CHECK-NEXT: mov r0, #6 |
| ; CHECK-NEXT: str r0, [sp, #4] |
| ; CHECK-NEXT: mov r0, #1 |
| ; CHECK-NEXT: mov r2, #3 |
| ; CHECK-NEXT: mov r3, #4 |
| ; CHECK-NEXT: b many_args_callee |
| %ret = tail call i32 @many_args_callee(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6) |
| ret i32 %ret |
| } |
| |
| ; Here, the caller requires less stack space for it's arguments than the |
| ; callee, so it would not ba valid to do a tail-call. |
| define i32 @fewer_args_tail(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4) { |
| ; CHECK-LABEL: fewer_args_tail: |
| ; CHECK: @ %bb.0: |
| ; CHECK-NEXT: .save {r11, lr} |
| ; CHECK-NEXT: push {r11, lr} |
| ; CHECK-NEXT: .pad #8 |
| ; CHECK-NEXT: sub sp, sp, #8 |
| ; CHECK-NEXT: mov r1, #6 |
| ; CHECK-NEXT: mov r0, #5 |
| ; CHECK-NEXT: strd r0, r1, [sp] |
| ; CHECK-NEXT: mov r0, #1 |
| ; CHECK-NEXT: mov r1, #2 |
| ; CHECK-NEXT: mov r2, #3 |
| ; CHECK-NEXT: mov r3, #4 |
| ; CHECK-NEXT: bl many_args_callee |
| ; CHECK-NEXT: add sp, sp, #8 |
| ; CHECK-NEXT: pop {r11, pc} |
| %ret = tail call i32 @many_args_callee(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6) |
| ret i32 %ret |
| } |
| |
| declare void @sret_callee(ptr sret({ double, double }) align 8) |
| |
| ; Functions which return by sret can be tail-called because the incoming sret |
| ; pointer gets passed through to the callee. |
| define void @sret_caller_tail(ptr sret({ double, double }) align 8 %result) { |
| ; CHECK-LABEL: sret_caller_tail: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: b sret_callee |
| entry: |
| tail call void @sret_callee(ptr sret({ double, double }) align 8 %result) |
| ret void |
| } |
| |
| define void @sret_caller_musttail(ptr sret({ double, double }) align 8 %result) { |
| ; CHECK-LABEL: sret_caller_musttail: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: b sret_callee |
| entry: |
| musttail call void @sret_callee(ptr sret({ double, double }) align 8 %result) |
| ret void |
| } |
| |
| ; Clang only uses byval for arguments of 65 bytes or larger, but we test with a |
| ; 20 byte struct to keep the tests more readable. This size was chosen to still |
| ; make sure that it will be split between registers and the stack, to test all |
| ; of the interesting code paths in the backend. |
| %twenty_bytes = type { [5 x i32] } |
| declare void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4) |
| |
| ; Functions with byval parameters can be tail-called, because the value is |
| ; actually passed in registers and the stack in the same way for the caller and |
| ; callee. Within @large_caller the first 16 bytes of the argument are spilled |
| ; to the local stack frame, but for the tail-call they are passed in r0-r3, so |
| ; it's safe to de-allocate that memory before the call. |
| ; TODO: The SUB and STM instructions are unnecessary and could be optimised |
| ; out, but the behaviour of this is still correct. |
| define void @large_caller(%twenty_bytes* byval(%twenty_bytes) align 4 %a) { |
| ; CHECK-LABEL: large_caller: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: .pad #16 |
| ; CHECK-NEXT: sub sp, sp, #16 |
| ; CHECK-NEXT: stm sp!, {r0, r1, r2, r3} |
| ; CHECK-NEXT: b large_callee |
| entry: |
| musttail call void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %a) |
| ret void |
| } |
| |
| ; As above, but with some inline asm to test that the arguments in r0-r3 are |
| ; re-loaded before the call. |
| define void @large_caller_check_regs(%twenty_bytes* byval(%twenty_bytes) align 4 %a) { |
| ; CHECK-LABEL: large_caller_check_regs: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: .pad #16 |
| ; CHECK-NEXT: sub sp, sp, #16 |
| ; CHECK-NEXT: stm sp, {r0, r1, r2, r3} |
| ; CHECK-NEXT: @APP |
| ; CHECK-NEXT: @NO_APP |
| ; CHECK-NEXT: pop {r0, r1, r2, r3} |
| ; CHECK-NEXT: b large_callee |
| entry: |
| tail call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3}"() |
| musttail call void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %a) |
| ret void |
| } |
| |
| ; The IR for this one looks dodgy, because it has an alloca passed to a |
| ; musttail function, but it is passed as a byval argument, so will be copied |
| ; into the stack space allocated by @large_caller_new_value's caller, so is |
| ; valid. |
| define void @large_caller_new_value(%twenty_bytes* byval(%twenty_bytes) align 4 %a) { |
| ; CHECK-LABEL: large_caller_new_value: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: .pad #36 |
| ; CHECK-NEXT: sub sp, sp, #36 |
| ; CHECK-NEXT: add r12, sp, #20 |
| ; CHECK-NEXT: stm r12, {r0, r1, r2, r3} |
| ; CHECK-NEXT: mov r0, #4 |
| ; CHECK-NEXT: add r1, sp, #36 |
| ; CHECK-NEXT: str r0, [sp, #16] |
| ; CHECK-NEXT: mov r0, #3 |
| ; CHECK-NEXT: str r0, [sp, #12] |
| ; CHECK-NEXT: mov r0, #2 |
| ; CHECK-NEXT: str r0, [sp, #8] |
| ; CHECK-NEXT: mov r0, #1 |
| ; CHECK-NEXT: str r0, [sp, #4] |
| ; CHECK-NEXT: mov r0, #0 |
| ; CHECK-NEXT: str r0, [sp] |
| ; CHECK-NEXT: mov r0, sp |
| ; CHECK-NEXT: add r0, r0, #16 |
| ; CHECK-NEXT: mov r3, #3 |
| ; CHECK-NEXT: ldr r2, [r0], #4 |
| ; CHECK-NEXT: str r2, [r1], #4 |
| ; CHECK-NEXT: mov r0, #0 |
| ; CHECK-NEXT: mov r1, #1 |
| ; CHECK-NEXT: mov r2, #2 |
| ; CHECK-NEXT: add sp, sp, #36 |
| ; CHECK-NEXT: b large_callee |
| entry: |
| %y = alloca %twenty_bytes, align 4 |
| store i32 0, ptr %y, align 4 |
| %0 = getelementptr inbounds i8, ptr %y, i32 4 |
| store i32 1, ptr %0, align 4 |
| %1 = getelementptr inbounds i8, ptr %y, i32 8 |
| store i32 2, ptr %1, align 4 |
| %2 = getelementptr inbounds i8, ptr %y, i32 12 |
| store i32 3, ptr %2, align 4 |
| %3 = getelementptr inbounds i8, ptr %y, i32 16 |
| store i32 4, ptr %3, align 4 |
| musttail call void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %y) |
| ret void |
| } |
| |
| declare void @two_byvals_callee(%twenty_bytes* byval(%twenty_bytes) align 4, %twenty_bytes* byval(%twenty_bytes) align 4) |
| define void @swap_byvals(%twenty_bytes* byval(%twenty_bytes) align 4 %a, %twenty_bytes* byval(%twenty_bytes) align 4 %b) { |
| ; CHECK-LABEL: swap_byvals: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: .pad #16 |
| ; CHECK-NEXT: sub sp, sp, #16 |
| ; CHECK-NEXT: .save {r4, r5, r11, lr} |
| ; CHECK-NEXT: push {r4, r5, r11, lr} |
| ; CHECK-NEXT: .pad #40 |
| ; CHECK-NEXT: sub sp, sp, #40 |
| ; CHECK-NEXT: add r12, sp, #56 |
| ; CHECK-NEXT: add lr, sp, #20 |
| ; CHECK-NEXT: stm r12, {r0, r1, r2, r3} |
| ; CHECK-NEXT: add r0, sp, #56 |
| ; CHECK-NEXT: mov r12, sp |
| ; CHECK-NEXT: ldr r1, [r0], #4 |
| ; CHECK-NEXT: mov r2, r12 |
| ; CHECK-NEXT: str r1, [r2], #4 |
| ; CHECK-NEXT: add r3, sp, #20 |
| ; CHECK-NEXT: ldr r1, [r0], #4 |
| ; CHECK-NEXT: add r4, sp, #76 |
| ; CHECK-NEXT: str r1, [r2], #4 |
| ; CHECK-NEXT: ldr r1, [r0], #4 |
| ; CHECK-NEXT: str r1, [r2], #4 |
| ; CHECK-NEXT: ldr r1, [r0], #4 |
| ; CHECK-NEXT: str r1, [r2], #4 |
| ; CHECK-NEXT: ldr r1, [r0], #4 |
| ; CHECK-NEXT: add r0, sp, #76 |
| ; CHECK-NEXT: str r1, [r2], #4 |
| ; CHECK-NEXT: mov r2, lr |
| ; CHECK-NEXT: ldr r1, [r0], #4 |
| ; CHECK-NEXT: str r1, [r2], #4 |
| ; CHECK-NEXT: ldr r1, [r0], #4 |
| ; CHECK-NEXT: str r1, [r2], #4 |
| ; CHECK-NEXT: ldr r1, [r0], #4 |
| ; CHECK-NEXT: str r1, [r2], #4 |
| ; CHECK-NEXT: ldr r1, [r0], #4 |
| ; CHECK-NEXT: str r1, [r2], #4 |
| ; CHECK-NEXT: ldr r1, [r0], #4 |
| ; CHECK-NEXT: str r1, [r2], #4 |
| ; CHECK-NEXT: ldm r3, {r0, r1, r2, r3} |
| ; CHECK-NEXT: ldr r5, [r12], #4 |
| ; CHECK-NEXT: str r5, [r4], #4 |
| ; CHECK-NEXT: ldr r5, [r12], #4 |
| ; CHECK-NEXT: str r5, [r4], #4 |
| ; CHECK-NEXT: ldr r5, [r12], #4 |
| ; CHECK-NEXT: str r5, [r4], #4 |
| ; CHECK-NEXT: ldr r5, [r12], #4 |
| ; CHECK-NEXT: str r5, [r4], #4 |
| ; CHECK-NEXT: ldr r5, [r12], #4 |
| ; CHECK-NEXT: str r5, [r4], #4 |
| ; CHECK-NEXT: add r5, lr, #16 |
| ; CHECK-NEXT: add r12, sp, #72 |
| ; CHECK-NEXT: ldr r4, [r5], #4 |
| ; CHECK-NEXT: str r4, [r12], #4 |
| ; CHECK-NEXT: add sp, sp, #40 |
| ; CHECK-NEXT: pop {r4, r5, r11, lr} |
| ; CHECK-NEXT: add sp, sp, #16 |
| ; CHECK-NEXT: b two_byvals_callee |
| entry: |
| musttail call void @two_byvals_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %b, %twenty_bytes* byval(%twenty_bytes) align 4 %a) |
| ret void |
| } |
| |
| ; A forwarded byval arg, but at a different offset on the stack, so it needs to |
| ; be copied to the local stack frame first. This can't be musttail because of |
| ; the different signatures, but is still tail-called as an optimisation. |
| declare void @shift_byval_callee(%twenty_bytes* byval(%twenty_bytes) align 4) |
| define void @shift_byval(i32 %a, %twenty_bytes* byval(%twenty_bytes) align 4 %b) { |
| ; CHECK-LABEL: shift_byval: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: .pad #12 |
| ; CHECK-NEXT: sub sp, sp, #12 |
| ; CHECK-NEXT: .save {r4, lr} |
| ; CHECK-NEXT: push {r4, lr} |
| ; CHECK-NEXT: .pad #20 |
| ; CHECK-NEXT: sub sp, sp, #20 |
| ; CHECK-NEXT: add r0, sp, #28 |
| ; CHECK-NEXT: add lr, sp, #40 |
| ; CHECK-NEXT: stm r0, {r1, r2, r3} |
| ; CHECK-NEXT: add r0, sp, #28 |
| ; CHECK-NEXT: mov r1, sp |
| ; CHECK-NEXT: ldr r2, [r0], #4 |
| ; CHECK-NEXT: add r12, r1, #16 |
| ; CHECK-NEXT: str r2, [r1], #4 |
| ; CHECK-NEXT: ldr r2, [r0], #4 |
| ; CHECK-NEXT: str r2, [r1], #4 |
| ; CHECK-NEXT: ldr r2, [r0], #4 |
| ; CHECK-NEXT: str r2, [r1], #4 |
| ; CHECK-NEXT: ldr r2, [r0], #4 |
| ; CHECK-NEXT: str r2, [r1], #4 |
| ; CHECK-NEXT: ldr r2, [r0], #4 |
| ; CHECK-NEXT: str r2, [r1], #4 |
| ; CHECK-NEXT: ldm sp, {r0, r1, r2, r3} |
| ; CHECK-NEXT: ldr r4, [r12], #4 |
| ; CHECK-NEXT: str r4, [lr], #4 |
| ; CHECK-NEXT: add sp, sp, #20 |
| ; CHECK-NEXT: pop {r4, lr} |
| ; CHECK-NEXT: add sp, sp, #12 |
| ; CHECK-NEXT: b shift_byval_callee |
| entry: |
| tail call void @shift_byval_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %b) |
| ret void |
| } |
| |
| ; A global object passed to a byval argument, so it must be copied, but doesn't |
| ; need a stack temporary. |
| @large_global = external global %twenty_bytes |
| define void @large_caller_from_global(%twenty_bytes* byval(%twenty_bytes) align 4 %a) { |
| ; CHECK-LABEL: large_caller_from_global: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: .pad #16 |
| ; CHECK-NEXT: sub sp, sp, #16 |
| ; CHECK-NEXT: .save {r4, lr} |
| ; CHECK-NEXT: push {r4, lr} |
| ; CHECK-NEXT: add r12, sp, #8 |
| ; CHECK-NEXT: add lr, sp, #24 |
| ; CHECK-NEXT: stm r12, {r0, r1, r2, r3} |
| ; CHECK-NEXT: movw r3, :lower16:large_global |
| ; CHECK-NEXT: movt r3, :upper16:large_global |
| ; CHECK-NEXT: add r12, r3, #16 |
| ; CHECK-NEXT: ldm r3, {r0, r1, r2, r3} |
| ; CHECK-NEXT: ldr r4, [r12], #4 |
| ; CHECK-NEXT: str r4, [lr], #4 |
| ; CHECK-NEXT: pop {r4, lr} |
| ; CHECK-NEXT: add sp, sp, #16 |
| ; CHECK-NEXT: b large_callee |
| entry: |
| musttail call void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4 @large_global) |
| ret void |
| } |