| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc -mtriple=riscv32 %s -o - | FileCheck %s --check-prefix=RV32 |
| ; RUN: llc -mtriple=riscv64 %s -o - | FileCheck %s --check-prefix=RV64 |
| |
| declare i32 @many_args_callee(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9) |
| |
| define i32 @many_args_tail(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9) { |
| ; RV32-LABEL: many_args_tail: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: li a0, 8 |
| ; RV32-NEXT: li t0, 9 |
| ; RV32-NEXT: li a1, 1 |
| ; RV32-NEXT: li a2, 2 |
| ; RV32-NEXT: li a3, 3 |
| ; RV32-NEXT: li a4, 4 |
| ; RV32-NEXT: li a5, 5 |
| ; RV32-NEXT: li a6, 6 |
| ; RV32-NEXT: sw a0, 0(sp) |
| ; RV32-NEXT: li a7, 7 |
| ; RV32-NEXT: sw t0, 4(sp) |
| ; RV32-NEXT: li a0, 0 |
| ; RV32-NEXT: tail many_args_callee |
| ; |
| ; RV64-LABEL: many_args_tail: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: li a0, 8 |
| ; RV64-NEXT: li t0, 9 |
| ; RV64-NEXT: li a1, 1 |
| ; RV64-NEXT: li a2, 2 |
| ; RV64-NEXT: li a3, 3 |
| ; RV64-NEXT: li a4, 4 |
| ; RV64-NEXT: li a5, 5 |
| ; RV64-NEXT: li a6, 6 |
| ; RV64-NEXT: sd a0, 0(sp) |
| ; RV64-NEXT: li a7, 7 |
| ; RV64-NEXT: sd t0, 8(sp) |
| ; RV64-NEXT: li a0, 0 |
| ; RV64-NEXT: tail many_args_callee |
| %ret = tail call i32 @many_args_callee(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9) |
| ret i32 %ret |
| } |
| |
| define i32 @many_args_musttail(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9) { |
| ; RV32-LABEL: many_args_musttail: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: li a0, 8 |
| ; RV32-NEXT: li t0, 9 |
| ; RV32-NEXT: li a1, 1 |
| ; RV32-NEXT: li a2, 2 |
| ; RV32-NEXT: li a3, 3 |
| ; RV32-NEXT: li a4, 4 |
| ; RV32-NEXT: li a5, 5 |
| ; RV32-NEXT: li a6, 6 |
| ; RV32-NEXT: sw a0, 0(sp) |
| ; RV32-NEXT: li a7, 7 |
| ; RV32-NEXT: sw t0, 4(sp) |
| ; RV32-NEXT: li a0, 0 |
| ; RV32-NEXT: tail many_args_callee |
| ; |
| ; RV64-LABEL: many_args_musttail: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: li a0, 8 |
| ; RV64-NEXT: li t0, 9 |
| ; RV64-NEXT: li a1, 1 |
| ; RV64-NEXT: li a2, 2 |
| ; RV64-NEXT: li a3, 3 |
| ; RV64-NEXT: li a4, 4 |
| ; RV64-NEXT: li a5, 5 |
| ; RV64-NEXT: li a6, 6 |
| ; RV64-NEXT: sd a0, 0(sp) |
| ; RV64-NEXT: li a7, 7 |
| ; RV64-NEXT: sd t0, 8(sp) |
| ; RV64-NEXT: li a0, 0 |
| ; RV64-NEXT: tail many_args_callee |
| %ret = musttail call i32 @many_args_callee(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9) |
| ret i32 %ret |
| } |
| |
| ; This function has more arguments than it's tail-callee. This isn't valid for |
| ; the musttail attribute, but can still be tail-called as a non-guaranteed |
| ; optimisation, because the outgoing arguments to @many_args_callee fit in the |
| ; stack space allocated by the caller of @more_args_tail. |
| define i32 @more_args_tail(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9) { |
| ; RV32-LABEL: more_args_tail: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: li a0, 8 |
| ; RV32-NEXT: li t0, 9 |
| ; RV32-NEXT: li a1, 1 |
| ; RV32-NEXT: li a2, 2 |
| ; RV32-NEXT: li a3, 3 |
| ; RV32-NEXT: li a4, 4 |
| ; RV32-NEXT: li a5, 5 |
| ; RV32-NEXT: li a6, 6 |
| ; RV32-NEXT: sw a0, 0(sp) |
| ; RV32-NEXT: li a7, 7 |
| ; RV32-NEXT: sw t0, 4(sp) |
| ; RV32-NEXT: li a0, 0 |
| ; RV32-NEXT: tail many_args_callee |
| ; |
| ; RV64-LABEL: more_args_tail: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: li a0, 8 |
| ; RV64-NEXT: li t0, 9 |
| ; RV64-NEXT: li a1, 1 |
| ; RV64-NEXT: li a2, 2 |
| ; RV64-NEXT: li a3, 3 |
| ; RV64-NEXT: li a4, 4 |
| ; RV64-NEXT: li a5, 5 |
| ; RV64-NEXT: li a6, 6 |
| ; RV64-NEXT: sd a0, 0(sp) |
| ; RV64-NEXT: li a7, 7 |
| ; RV64-NEXT: sd t0, 8(sp) |
| ; RV64-NEXT: li a0, 0 |
| ; RV64-NEXT: tail many_args_callee |
| %ret = tail call i32 @many_args_callee(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9) |
| ret i32 %ret |
| } |
| |
| ; Again, this isn't valid for musttail, but can be tail-called in practice |
| ; because the stack size is the same. |
| define i32 @different_args_tail_32bit(i64 %0, i64 %1, i64 %2, i64 %3, i64 %4) nounwind { |
| ; RV32-LABEL: different_args_tail_32bit: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: li a0, 8 |
| ; RV32-NEXT: li t0, 9 |
| ; RV32-NEXT: li a1, 1 |
| ; RV32-NEXT: li a2, 2 |
| ; RV32-NEXT: li a3, 3 |
| ; RV32-NEXT: li a4, 4 |
| ; RV32-NEXT: li a5, 5 |
| ; RV32-NEXT: li a6, 6 |
| ; RV32-NEXT: sw a0, 0(sp) |
| ; RV32-NEXT: li a7, 7 |
| ; RV32-NEXT: sw t0, 4(sp) |
| ; RV32-NEXT: li a0, 0 |
| ; RV32-NEXT: tail many_args_callee |
| ; |
| ; RV64-LABEL: different_args_tail_32bit: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: addi sp, sp, -32 |
| ; RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill |
| ; RV64-NEXT: li a0, 9 |
| ; RV64-NEXT: li t0, 8 |
| ; RV64-NEXT: li a1, 1 |
| ; RV64-NEXT: li a2, 2 |
| ; RV64-NEXT: li a3, 3 |
| ; RV64-NEXT: li a4, 4 |
| ; RV64-NEXT: li a5, 5 |
| ; RV64-NEXT: li a6, 6 |
| ; RV64-NEXT: li a7, 7 |
| ; RV64-NEXT: sd t0, 0(sp) |
| ; RV64-NEXT: sd a0, 8(sp) |
| ; RV64-NEXT: li a0, 0 |
| ; RV64-NEXT: call many_args_callee |
| ; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload |
| ; RV64-NEXT: addi sp, sp, 32 |
| ; RV64-NEXT: ret |
| %ret = tail call i32 @many_args_callee(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9) |
| ret i32 %ret |
| } |
| |
| define i32 @different_args_tail_64bit(i128 %0, i128 %1, i128 %2, i128 %3, i128 %4) nounwind { |
| ; RV32-LABEL: different_args_tail_64bit: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: addi sp, sp, -16 |
| ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill |
| ; RV32-NEXT: li a0, 9 |
| ; RV32-NEXT: li t0, 8 |
| ; RV32-NEXT: li a1, 1 |
| ; RV32-NEXT: li a2, 2 |
| ; RV32-NEXT: li a3, 3 |
| ; RV32-NEXT: li a4, 4 |
| ; RV32-NEXT: li a5, 5 |
| ; RV32-NEXT: li a6, 6 |
| ; RV32-NEXT: li a7, 7 |
| ; RV32-NEXT: sw t0, 0(sp) |
| ; RV32-NEXT: sw a0, 4(sp) |
| ; RV32-NEXT: li a0, 0 |
| ; RV32-NEXT: call many_args_callee |
| ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload |
| ; RV32-NEXT: addi sp, sp, 16 |
| ; RV32-NEXT: ret |
| ; |
| ; RV64-LABEL: different_args_tail_64bit: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: li a0, 8 |
| ; RV64-NEXT: li t0, 9 |
| ; RV64-NEXT: li a1, 1 |
| ; RV64-NEXT: li a2, 2 |
| ; RV64-NEXT: li a3, 3 |
| ; RV64-NEXT: li a4, 4 |
| ; RV64-NEXT: li a5, 5 |
| ; RV64-NEXT: li a6, 6 |
| ; RV64-NEXT: sd a0, 0(sp) |
| ; RV64-NEXT: li a7, 7 |
| ; RV64-NEXT: sd t0, 8(sp) |
| ; RV64-NEXT: li a0, 0 |
| ; RV64-NEXT: tail many_args_callee |
| %ret = tail call i32 @many_args_callee(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9) |
| ret i32 %ret |
| } |
| |
| ; Here, the caller requires less stack space for it's arguments than the |
| ; callee, so it would not ba valid to do a tail-call. |
| define i32 @fewer_args_tail(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4) nounwind { |
| ; RV32-LABEL: fewer_args_tail: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: addi sp, sp, -16 |
| ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill |
| ; RV32-NEXT: li a0, 9 |
| ; RV32-NEXT: li t0, 8 |
| ; RV32-NEXT: li a1, 1 |
| ; RV32-NEXT: li a2, 2 |
| ; RV32-NEXT: li a3, 3 |
| ; RV32-NEXT: li a4, 4 |
| ; RV32-NEXT: li a5, 5 |
| ; RV32-NEXT: li a6, 6 |
| ; RV32-NEXT: li a7, 7 |
| ; RV32-NEXT: sw t0, 0(sp) |
| ; RV32-NEXT: sw a0, 4(sp) |
| ; RV32-NEXT: li a0, 0 |
| ; RV32-NEXT: call many_args_callee |
| ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload |
| ; RV32-NEXT: addi sp, sp, 16 |
| ; RV32-NEXT: ret |
| ; |
| ; RV64-LABEL: fewer_args_tail: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: addi sp, sp, -32 |
| ; RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill |
| ; RV64-NEXT: li a0, 9 |
| ; RV64-NEXT: li t0, 8 |
| ; RV64-NEXT: li a1, 1 |
| ; RV64-NEXT: li a2, 2 |
| ; RV64-NEXT: li a3, 3 |
| ; RV64-NEXT: li a4, 4 |
| ; RV64-NEXT: li a5, 5 |
| ; RV64-NEXT: li a6, 6 |
| ; RV64-NEXT: li a7, 7 |
| ; RV64-NEXT: sd t0, 0(sp) |
| ; RV64-NEXT: sd a0, 8(sp) |
| ; RV64-NEXT: li a0, 0 |
| ; RV64-NEXT: call many_args_callee |
| ; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload |
| ; RV64-NEXT: addi sp, sp, 32 |
| ; RV64-NEXT: ret |
| %ret = tail call i32 @many_args_callee(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9) |
| ret i32 %ret |
| } |
| |
| declare void @foo(i32, i32, i32, i32, i32, i32, i32, i32, i32) |
| |
| define void @bar(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8) nounwind { |
| ; RV32-LABEL: bar: |
| ; RV32: # %bb.0: # %entry |
| ; RV32-NEXT: addi sp, sp, -48 |
| ; RV32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill |
| ; RV32-NEXT: sw s0, 40(sp) # 4-byte Folded Spill |
| ; RV32-NEXT: sw s1, 36(sp) # 4-byte Folded Spill |
| ; RV32-NEXT: sw s2, 32(sp) # 4-byte Folded Spill |
| ; RV32-NEXT: sw s3, 28(sp) # 4-byte Folded Spill |
| ; RV32-NEXT: sw s4, 24(sp) # 4-byte Folded Spill |
| ; RV32-NEXT: sw s5, 20(sp) # 4-byte Folded Spill |
| ; RV32-NEXT: sw s6, 16(sp) # 4-byte Folded Spill |
| ; RV32-NEXT: sw s7, 12(sp) # 4-byte Folded Spill |
| ; RV32-NEXT: mv s0, a7 |
| ; RV32-NEXT: mv s1, a6 |
| ; RV32-NEXT: mv s2, a5 |
| ; RV32-NEXT: mv s3, a4 |
| ; RV32-NEXT: mv s4, a3 |
| ; RV32-NEXT: mv s5, a2 |
| ; RV32-NEXT: mv s6, a1 |
| ; RV32-NEXT: mv s7, a0 |
| ; RV32-NEXT: li a0, 1 |
| ; RV32-NEXT: sw a0, 0(sp) |
| ; RV32-NEXT: mv a0, s7 |
| ; RV32-NEXT: call foo |
| ; RV32-NEXT: li a0, 2 |
| ; RV32-NEXT: sw a0, 48(sp) |
| ; RV32-NEXT: mv a0, s7 |
| ; RV32-NEXT: mv a1, s6 |
| ; RV32-NEXT: mv a2, s5 |
| ; RV32-NEXT: mv a3, s4 |
| ; RV32-NEXT: mv a4, s3 |
| ; RV32-NEXT: mv a5, s2 |
| ; RV32-NEXT: mv a6, s1 |
| ; RV32-NEXT: mv a7, s0 |
| ; RV32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload |
| ; RV32-NEXT: lw s0, 40(sp) # 4-byte Folded Reload |
| ; RV32-NEXT: lw s1, 36(sp) # 4-byte Folded Reload |
| ; RV32-NEXT: lw s2, 32(sp) # 4-byte Folded Reload |
| ; RV32-NEXT: lw s3, 28(sp) # 4-byte Folded Reload |
| ; RV32-NEXT: lw s4, 24(sp) # 4-byte Folded Reload |
| ; RV32-NEXT: lw s5, 20(sp) # 4-byte Folded Reload |
| ; RV32-NEXT: lw s6, 16(sp) # 4-byte Folded Reload |
| ; RV32-NEXT: lw s7, 12(sp) # 4-byte Folded Reload |
| ; RV32-NEXT: addi sp, sp, 48 |
| ; RV32-NEXT: tail foo |
| ; |
| ; RV64-LABEL: bar: |
| ; RV64: # %bb.0: # %entry |
| ; RV64-NEXT: addi sp, sp, -80 |
| ; RV64-NEXT: sd ra, 72(sp) # 8-byte Folded Spill |
| ; RV64-NEXT: sd s0, 64(sp) # 8-byte Folded Spill |
| ; RV64-NEXT: sd s1, 56(sp) # 8-byte Folded Spill |
| ; RV64-NEXT: sd s2, 48(sp) # 8-byte Folded Spill |
| ; RV64-NEXT: sd s3, 40(sp) # 8-byte Folded Spill |
| ; RV64-NEXT: sd s4, 32(sp) # 8-byte Folded Spill |
| ; RV64-NEXT: sd s5, 24(sp) # 8-byte Folded Spill |
| ; RV64-NEXT: sd s6, 16(sp) # 8-byte Folded Spill |
| ; RV64-NEXT: sd s7, 8(sp) # 8-byte Folded Spill |
| ; RV64-NEXT: mv s0, a7 |
| ; RV64-NEXT: mv s1, a6 |
| ; RV64-NEXT: mv s2, a5 |
| ; RV64-NEXT: mv s3, a4 |
| ; RV64-NEXT: mv s4, a3 |
| ; RV64-NEXT: mv s5, a2 |
| ; RV64-NEXT: mv s6, a1 |
| ; RV64-NEXT: mv s7, a0 |
| ; RV64-NEXT: li a0, 1 |
| ; RV64-NEXT: sd a0, 0(sp) |
| ; RV64-NEXT: mv a0, s7 |
| ; RV64-NEXT: call foo |
| ; RV64-NEXT: li a0, 2 |
| ; RV64-NEXT: sd a0, 80(sp) |
| ; RV64-NEXT: mv a0, s7 |
| ; RV64-NEXT: mv a1, s6 |
| ; RV64-NEXT: mv a2, s5 |
| ; RV64-NEXT: mv a3, s4 |
| ; RV64-NEXT: mv a4, s3 |
| ; RV64-NEXT: mv a5, s2 |
| ; RV64-NEXT: mv a6, s1 |
| ; RV64-NEXT: mv a7, s0 |
| ; RV64-NEXT: ld ra, 72(sp) # 8-byte Folded Reload |
| ; RV64-NEXT: ld s0, 64(sp) # 8-byte Folded Reload |
| ; RV64-NEXT: ld s1, 56(sp) # 8-byte Folded Reload |
| ; RV64-NEXT: ld s2, 48(sp) # 8-byte Folded Reload |
| ; RV64-NEXT: ld s3, 40(sp) # 8-byte Folded Reload |
| ; RV64-NEXT: ld s4, 32(sp) # 8-byte Folded Reload |
| ; RV64-NEXT: ld s5, 24(sp) # 8-byte Folded Reload |
| ; RV64-NEXT: ld s6, 16(sp) # 8-byte Folded Reload |
| ; RV64-NEXT: ld s7, 8(sp) # 8-byte Folded Reload |
| ; RV64-NEXT: addi sp, sp, 80 |
| ; RV64-NEXT: tail foo |
| entry: |
| call void @foo(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 1) |
| musttail call void @foo(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 2) |
| ret void |
| } |
| |
| declare void @sret_callee(ptr sret({ double, double }) align 8) |
| |
| ; Functions which return by sret can be tail-called because the incoming sret |
| ; pointer gets passed through to the callee. |
| define void @sret_caller_tail(ptr sret({ double, double }) align 8 %result) { |
| ; RV32-LABEL: sret_caller_tail: |
| ; RV32: # %bb.0: # %entry |
| ; RV32-NEXT: tail sret_callee |
| ; |
| ; RV64-LABEL: sret_caller_tail: |
| ; RV64: # %bb.0: # %entry |
| ; RV64-NEXT: tail sret_callee |
| entry: |
| tail call void @sret_callee(ptr sret({ double, double }) align 8 %result) |
| ret void |
| } |
| |
| define void @sret_caller_musttail(ptr sret({ double, double }) align 8 %result) { |
| ; RV32-LABEL: sret_caller_musttail: |
| ; RV32: # %bb.0: # %entry |
| ; RV32-NEXT: tail sret_callee |
| ; |
| ; RV64-LABEL: sret_caller_musttail: |
| ; RV64: # %bb.0: # %entry |
| ; RV64-NEXT: tail sret_callee |
| entry: |
| musttail call void @sret_callee(ptr sret({ double, double }) align 8 %result) |
| ret void |
| } |
| |
| %twenty_bytes = type { [5 x i32] } |
| declare void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4) |
| |
| ; Functions with byval parameters can be tail-called, because the value is |
| ; actually passed in registers in the same way for the caller and callee. |
| define void @large_caller(%twenty_bytes* byval(%twenty_bytes) align 4 %a) { |
| ; RV32-LABEL: large_caller: |
| ; RV32: # %bb.0: # %entry |
| ; RV32-NEXT: tail large_callee |
| ; |
| ; RV64-LABEL: large_caller: |
| ; RV64: # %bb.0: # %entry |
| ; RV64-NEXT: tail large_callee |
| entry: |
| musttail call void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %a) |
| ret void |
| } |
| |
| ; As above, but with some inline asm to test that the arguments in r4 is |
| ; re-loaded before the call. |
| define void @large_caller_check_regs(%twenty_bytes* byval(%twenty_bytes) align 4 %a) nounwind { |
| ; RV32-LABEL: large_caller_check_regs: |
| ; RV32: # %bb.0: # %entry |
| ; RV32-NEXT: #APP |
| ; RV32-NEXT: #NO_APP |
| ; RV32-NEXT: tail large_callee |
| ; |
| ; RV64-LABEL: large_caller_check_regs: |
| ; RV64: # %bb.0: # %entry |
| ; RV64-NEXT: #APP |
| ; RV64-NEXT: #NO_APP |
| ; RV64-NEXT: tail large_callee |
| entry: |
| tail call void asm sideeffect "", "~{r4}"() |
| musttail call void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %a) |
| ret void |
| } |
| |
| ; The IR for this one looks dodgy, because it has an alloca passed to a |
| ; musttail function, but it is passed as a byval argument, so will be copied |
| ; into the stack space allocated by @large_caller_new_value's caller, so is |
| ; valid. |
| define void @large_caller_new_value(%twenty_bytes* byval(%twenty_bytes) align 4 %a) nounwind { |
| ; RV32-LABEL: large_caller_new_value: |
| ; RV32: # %bb.0: # %entry |
| ; RV32-NEXT: addi sp, sp, -32 |
| ; RV32-NEXT: li a1, 1 |
| ; RV32-NEXT: li a2, 2 |
| ; RV32-NEXT: li a3, 3 |
| ; RV32-NEXT: li a4, 4 |
| ; RV32-NEXT: sw zero, 12(sp) |
| ; RV32-NEXT: sw a1, 16(sp) |
| ; RV32-NEXT: sw a2, 20(sp) |
| ; RV32-NEXT: sw a3, 24(sp) |
| ; RV32-NEXT: sw a4, 28(sp) |
| ; RV32-NEXT: sw a4, 16(a0) |
| ; RV32-NEXT: sw zero, 0(a0) |
| ; RV32-NEXT: sw a1, 4(a0) |
| ; RV32-NEXT: sw a2, 8(a0) |
| ; RV32-NEXT: sw a3, 12(a0) |
| ; RV32-NEXT: addi sp, sp, 32 |
| ; RV32-NEXT: tail large_callee |
| ; |
| ; RV64-LABEL: large_caller_new_value: |
| ; RV64: # %bb.0: # %entry |
| ; RV64-NEXT: addi sp, sp, -32 |
| ; RV64-NEXT: li a1, 1 |
| ; RV64-NEXT: li a2, 2 |
| ; RV64-NEXT: li a3, 3 |
| ; RV64-NEXT: li a4, 4 |
| ; RV64-NEXT: sw zero, 12(sp) |
| ; RV64-NEXT: sw a1, 16(sp) |
| ; RV64-NEXT: sw a2, 20(sp) |
| ; RV64-NEXT: sw a3, 24(sp) |
| ; RV64-NEXT: sw a4, 28(sp) |
| ; RV64-NEXT: sw a4, 16(a0) |
| ; RV64-NEXT: sw zero, 0(a0) |
| ; RV64-NEXT: sw a1, 4(a0) |
| ; RV64-NEXT: sw a2, 8(a0) |
| ; RV64-NEXT: sw a3, 12(a0) |
| ; RV64-NEXT: addi sp, sp, 32 |
| ; RV64-NEXT: tail large_callee |
| entry: |
| %y = alloca %twenty_bytes, align 4 |
| store i32 0, ptr %y, align 4 |
| %0 = getelementptr inbounds i8, ptr %y, i32 4 |
| store i32 1, ptr %0, align 4 |
| %1 = getelementptr inbounds i8, ptr %y, i32 8 |
| store i32 2, ptr %1, align 4 |
| %2 = getelementptr inbounds i8, ptr %y, i32 12 |
| store i32 3, ptr %2, align 4 |
| %3 = getelementptr inbounds i8, ptr %y, i32 16 |
| store i32 4, ptr %3, align 4 |
| musttail call void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %y) |
| ret void |
| } |
| |
| declare void @two_byvals_callee(%twenty_bytes* byval(%twenty_bytes) align 4, %twenty_bytes* byval(%twenty_bytes) align 4) |
| define void @swap_byvals(%twenty_bytes* byval(%twenty_bytes) align 4 %a, %twenty_bytes* byval(%twenty_bytes) align 4 %b) { |
| ; RV32-LABEL: swap_byvals: |
| ; RV32: # %bb.0: # %entry |
| ; RV32-NEXT: mv a2, a0 |
| ; RV32-NEXT: mv a0, a1 |
| ; RV32-NEXT: mv a1, a2 |
| ; RV32-NEXT: tail two_byvals_callee |
| ; |
| ; RV64-LABEL: swap_byvals: |
| ; RV64: # %bb.0: # %entry |
| ; RV64-NEXT: mv a2, a0 |
| ; RV64-NEXT: mv a0, a1 |
| ; RV64-NEXT: mv a1, a2 |
| ; RV64-NEXT: tail two_byvals_callee |
| entry: |
| musttail call void @two_byvals_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %b, %twenty_bytes* byval(%twenty_bytes) align 4 %a) |
| ret void |
| } |
| |
| ; A forwarded byval arg, but in a different argument register, so it needs to |
| ; be moved between registers first. This can't be musttail because of the |
| ; different signatures, but is still tail-called as an optimisation. |
| declare void @shift_byval_callee(%twenty_bytes* byval(%twenty_bytes) align 4) |
| define void @shift_byval(i32 %a, %twenty_bytes* byval(%twenty_bytes) align 4 %b) { |
| ; RV32-LABEL: shift_byval: |
| ; RV32: # %bb.0: # %entry |
| ; RV32-NEXT: mv a0, a1 |
| ; RV32-NEXT: tail shift_byval_callee |
| ; |
| ; RV64-LABEL: shift_byval: |
| ; RV64: # %bb.0: # %entry |
| ; RV64-NEXT: mv a0, a1 |
| ; RV64-NEXT: tail shift_byval_callee |
| entry: |
| tail call void @shift_byval_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %b) |
| ret void |
| } |
| |
| ; A global object passed to a byval argument, so it must be copied, but doesn't |
| ; need a stack temporary. |
| @large_global = external global %twenty_bytes |
| define void @large_caller_from_global(%twenty_bytes* byval(%twenty_bytes) align 4 %a) { |
| ; RV32-LABEL: large_caller_from_global: |
| ; RV32: # %bb.0: # %entry |
| ; RV32-NEXT: lui a1, %hi(large_global) |
| ; RV32-NEXT: addi a1, a1, %lo(large_global) |
| ; RV32-NEXT: lw a2, 16(a1) |
| ; RV32-NEXT: sw a2, 16(a0) |
| ; RV32-NEXT: lw a2, 12(a1) |
| ; RV32-NEXT: sw a2, 12(a0) |
| ; RV32-NEXT: lw a2, 8(a1) |
| ; RV32-NEXT: sw a2, 8(a0) |
| ; RV32-NEXT: lw a2, 4(a1) |
| ; RV32-NEXT: sw a2, 4(a0) |
| ; RV32-NEXT: lw a1, 0(a1) |
| ; RV32-NEXT: sw a1, 0(a0) |
| ; RV32-NEXT: tail large_callee |
| ; |
| ; RV64-LABEL: large_caller_from_global: |
| ; RV64: # %bb.0: # %entry |
| ; RV64-NEXT: lui a1, %hi(large_global) |
| ; RV64-NEXT: addi a1, a1, %lo(large_global) |
| ; RV64-NEXT: lw a2, 16(a1) |
| ; RV64-NEXT: sw a2, 16(a0) |
| ; RV64-NEXT: lw a2, 12(a1) |
| ; RV64-NEXT: sw a2, 12(a0) |
| ; RV64-NEXT: lw a2, 8(a1) |
| ; RV64-NEXT: sw a2, 8(a0) |
| ; RV64-NEXT: lw a2, 4(a1) |
| ; RV64-NEXT: sw a2, 4(a0) |
| ; RV64-NEXT: lw a1, 0(a1) |
| ; RV64-NEXT: sw a1, 0(a0) |
| ; RV64-NEXT: tail large_callee |
| entry: |
| musttail call void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4 @large_global) |
| ret void |
| } |