blob: 4c27822525511cf18f43939f1e0bf007b119e000 [file] [log] [blame] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=riscv32 %s -o - | FileCheck %s --check-prefix=RV32
; RUN: llc -mtriple=riscv64 %s -o - | FileCheck %s --check-prefix=RV64
declare i32 @many_args_callee(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9)
define i32 @many_args_tail(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9) {
; RV32-LABEL: many_args_tail:
; RV32: # %bb.0:
; RV32-NEXT: li a0, 8
; RV32-NEXT: li t0, 9
; RV32-NEXT: li a1, 1
; RV32-NEXT: li a2, 2
; RV32-NEXT: li a3, 3
; RV32-NEXT: li a4, 4
; RV32-NEXT: li a5, 5
; RV32-NEXT: li a6, 6
; RV32-NEXT: sw a0, 0(sp)
; RV32-NEXT: li a7, 7
; RV32-NEXT: sw t0, 4(sp)
; RV32-NEXT: li a0, 0
; RV32-NEXT: tail many_args_callee
;
; RV64-LABEL: many_args_tail:
; RV64: # %bb.0:
; RV64-NEXT: li a0, 8
; RV64-NEXT: li t0, 9
; RV64-NEXT: li a1, 1
; RV64-NEXT: li a2, 2
; RV64-NEXT: li a3, 3
; RV64-NEXT: li a4, 4
; RV64-NEXT: li a5, 5
; RV64-NEXT: li a6, 6
; RV64-NEXT: sd a0, 0(sp)
; RV64-NEXT: li a7, 7
; RV64-NEXT: sd t0, 8(sp)
; RV64-NEXT: li a0, 0
; RV64-NEXT: tail many_args_callee
%ret = tail call i32 @many_args_callee(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9)
ret i32 %ret
}
define i32 @many_args_musttail(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9) {
; RV32-LABEL: many_args_musttail:
; RV32: # %bb.0:
; RV32-NEXT: li a0, 8
; RV32-NEXT: li t0, 9
; RV32-NEXT: li a1, 1
; RV32-NEXT: li a2, 2
; RV32-NEXT: li a3, 3
; RV32-NEXT: li a4, 4
; RV32-NEXT: li a5, 5
; RV32-NEXT: li a6, 6
; RV32-NEXT: sw a0, 0(sp)
; RV32-NEXT: li a7, 7
; RV32-NEXT: sw t0, 4(sp)
; RV32-NEXT: li a0, 0
; RV32-NEXT: tail many_args_callee
;
; RV64-LABEL: many_args_musttail:
; RV64: # %bb.0:
; RV64-NEXT: li a0, 8
; RV64-NEXT: li t0, 9
; RV64-NEXT: li a1, 1
; RV64-NEXT: li a2, 2
; RV64-NEXT: li a3, 3
; RV64-NEXT: li a4, 4
; RV64-NEXT: li a5, 5
; RV64-NEXT: li a6, 6
; RV64-NEXT: sd a0, 0(sp)
; RV64-NEXT: li a7, 7
; RV64-NEXT: sd t0, 8(sp)
; RV64-NEXT: li a0, 0
; RV64-NEXT: tail many_args_callee
%ret = musttail call i32 @many_args_callee(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9)
ret i32 %ret
}
; This function has more arguments than it's tail-callee. This isn't valid for
; the musttail attribute, but can still be tail-called as a non-guaranteed
; optimisation, because the outgoing arguments to @many_args_callee fit in the
; stack space allocated by the caller of @more_args_tail.
define i32 @more_args_tail(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9) {
; RV32-LABEL: more_args_tail:
; RV32: # %bb.0:
; RV32-NEXT: li a0, 8
; RV32-NEXT: li t0, 9
; RV32-NEXT: li a1, 1
; RV32-NEXT: li a2, 2
; RV32-NEXT: li a3, 3
; RV32-NEXT: li a4, 4
; RV32-NEXT: li a5, 5
; RV32-NEXT: li a6, 6
; RV32-NEXT: sw a0, 0(sp)
; RV32-NEXT: li a7, 7
; RV32-NEXT: sw t0, 4(sp)
; RV32-NEXT: li a0, 0
; RV32-NEXT: tail many_args_callee
;
; RV64-LABEL: more_args_tail:
; RV64: # %bb.0:
; RV64-NEXT: li a0, 8
; RV64-NEXT: li t0, 9
; RV64-NEXT: li a1, 1
; RV64-NEXT: li a2, 2
; RV64-NEXT: li a3, 3
; RV64-NEXT: li a4, 4
; RV64-NEXT: li a5, 5
; RV64-NEXT: li a6, 6
; RV64-NEXT: sd a0, 0(sp)
; RV64-NEXT: li a7, 7
; RV64-NEXT: sd t0, 8(sp)
; RV64-NEXT: li a0, 0
; RV64-NEXT: tail many_args_callee
%ret = tail call i32 @many_args_callee(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9)
ret i32 %ret
}
; Again, this isn't valid for musttail, but can be tail-called in practice
; because the stack size is the same.
define i32 @different_args_tail_32bit(i64 %0, i64 %1, i64 %2, i64 %3, i64 %4) nounwind {
; RV32-LABEL: different_args_tail_32bit:
; RV32: # %bb.0:
; RV32-NEXT: li a0, 8
; RV32-NEXT: li t0, 9
; RV32-NEXT: li a1, 1
; RV32-NEXT: li a2, 2
; RV32-NEXT: li a3, 3
; RV32-NEXT: li a4, 4
; RV32-NEXT: li a5, 5
; RV32-NEXT: li a6, 6
; RV32-NEXT: sw a0, 0(sp)
; RV32-NEXT: li a7, 7
; RV32-NEXT: sw t0, 4(sp)
; RV32-NEXT: li a0, 0
; RV32-NEXT: tail many_args_callee
;
; RV64-LABEL: different_args_tail_32bit:
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -32
; RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; RV64-NEXT: li a0, 9
; RV64-NEXT: li t0, 8
; RV64-NEXT: li a1, 1
; RV64-NEXT: li a2, 2
; RV64-NEXT: li a3, 3
; RV64-NEXT: li a4, 4
; RV64-NEXT: li a5, 5
; RV64-NEXT: li a6, 6
; RV64-NEXT: li a7, 7
; RV64-NEXT: sd t0, 0(sp)
; RV64-NEXT: sd a0, 8(sp)
; RV64-NEXT: li a0, 0
; RV64-NEXT: call many_args_callee
; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 32
; RV64-NEXT: ret
%ret = tail call i32 @many_args_callee(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9)
ret i32 %ret
}
define i32 @different_args_tail_64bit(i128 %0, i128 %1, i128 %2, i128 %3, i128 %4) nounwind {
; RV32-LABEL: different_args_tail_64bit:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32-NEXT: li a0, 9
; RV32-NEXT: li t0, 8
; RV32-NEXT: li a1, 1
; RV32-NEXT: li a2, 2
; RV32-NEXT: li a3, 3
; RV32-NEXT: li a4, 4
; RV32-NEXT: li a5, 5
; RV32-NEXT: li a6, 6
; RV32-NEXT: li a7, 7
; RV32-NEXT: sw t0, 0(sp)
; RV32-NEXT: sw a0, 4(sp)
; RV32-NEXT: li a0, 0
; RV32-NEXT: call many_args_callee
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
; RV64-LABEL: different_args_tail_64bit:
; RV64: # %bb.0:
; RV64-NEXT: li a0, 8
; RV64-NEXT: li t0, 9
; RV64-NEXT: li a1, 1
; RV64-NEXT: li a2, 2
; RV64-NEXT: li a3, 3
; RV64-NEXT: li a4, 4
; RV64-NEXT: li a5, 5
; RV64-NEXT: li a6, 6
; RV64-NEXT: sd a0, 0(sp)
; RV64-NEXT: li a7, 7
; RV64-NEXT: sd t0, 8(sp)
; RV64-NEXT: li a0, 0
; RV64-NEXT: tail many_args_callee
%ret = tail call i32 @many_args_callee(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9)
ret i32 %ret
}
; Here, the caller requires less stack space for it's arguments than the
; callee, so it would not ba valid to do a tail-call.
define i32 @fewer_args_tail(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4) nounwind {
; RV32-LABEL: fewer_args_tail:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32-NEXT: li a0, 9
; RV32-NEXT: li t0, 8
; RV32-NEXT: li a1, 1
; RV32-NEXT: li a2, 2
; RV32-NEXT: li a3, 3
; RV32-NEXT: li a4, 4
; RV32-NEXT: li a5, 5
; RV32-NEXT: li a6, 6
; RV32-NEXT: li a7, 7
; RV32-NEXT: sw t0, 0(sp)
; RV32-NEXT: sw a0, 4(sp)
; RV32-NEXT: li a0, 0
; RV32-NEXT: call many_args_callee
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
; RV64-LABEL: fewer_args_tail:
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -32
; RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; RV64-NEXT: li a0, 9
; RV64-NEXT: li t0, 8
; RV64-NEXT: li a1, 1
; RV64-NEXT: li a2, 2
; RV64-NEXT: li a3, 3
; RV64-NEXT: li a4, 4
; RV64-NEXT: li a5, 5
; RV64-NEXT: li a6, 6
; RV64-NEXT: li a7, 7
; RV64-NEXT: sd t0, 0(sp)
; RV64-NEXT: sd a0, 8(sp)
; RV64-NEXT: li a0, 0
; RV64-NEXT: call many_args_callee
; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 32
; RV64-NEXT: ret
%ret = tail call i32 @many_args_callee(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9)
ret i32 %ret
}
declare void @foo(i32, i32, i32, i32, i32, i32, i32, i32, i32)
define void @bar(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8) nounwind {
; RV32-LABEL: bar:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -48
; RV32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s3, 28(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s4, 24(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s5, 20(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s6, 16(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s7, 12(sp) # 4-byte Folded Spill
; RV32-NEXT: mv s0, a7
; RV32-NEXT: mv s1, a6
; RV32-NEXT: mv s2, a5
; RV32-NEXT: mv s3, a4
; RV32-NEXT: mv s4, a3
; RV32-NEXT: mv s5, a2
; RV32-NEXT: mv s6, a1
; RV32-NEXT: mv s7, a0
; RV32-NEXT: li a0, 1
; RV32-NEXT: sw a0, 0(sp)
; RV32-NEXT: mv a0, s7
; RV32-NEXT: call foo
; RV32-NEXT: li a0, 2
; RV32-NEXT: sw a0, 48(sp)
; RV32-NEXT: mv a0, s7
; RV32-NEXT: mv a1, s6
; RV32-NEXT: mv a2, s5
; RV32-NEXT: mv a3, s4
; RV32-NEXT: mv a4, s3
; RV32-NEXT: mv a5, s2
; RV32-NEXT: mv a6, s1
; RV32-NEXT: mv a7, s0
; RV32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s3, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s4, 24(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s5, 20(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s6, 16(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s7, 12(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 48
; RV32-NEXT: tail foo
;
; RV64-LABEL: bar:
; RV64: # %bb.0: # %entry
; RV64-NEXT: addi sp, sp, -80
; RV64-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
; RV64-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
; RV64-NEXT: sd s1, 56(sp) # 8-byte Folded Spill
; RV64-NEXT: sd s2, 48(sp) # 8-byte Folded Spill
; RV64-NEXT: sd s3, 40(sp) # 8-byte Folded Spill
; RV64-NEXT: sd s4, 32(sp) # 8-byte Folded Spill
; RV64-NEXT: sd s5, 24(sp) # 8-byte Folded Spill
; RV64-NEXT: sd s6, 16(sp) # 8-byte Folded Spill
; RV64-NEXT: sd s7, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: mv s0, a7
; RV64-NEXT: mv s1, a6
; RV64-NEXT: mv s2, a5
; RV64-NEXT: mv s3, a4
; RV64-NEXT: mv s4, a3
; RV64-NEXT: mv s5, a2
; RV64-NEXT: mv s6, a1
; RV64-NEXT: mv s7, a0
; RV64-NEXT: li a0, 1
; RV64-NEXT: sd a0, 0(sp)
; RV64-NEXT: mv a0, s7
; RV64-NEXT: call foo
; RV64-NEXT: li a0, 2
; RV64-NEXT: sd a0, 80(sp)
; RV64-NEXT: mv a0, s7
; RV64-NEXT: mv a1, s6
; RV64-NEXT: mv a2, s5
; RV64-NEXT: mv a3, s4
; RV64-NEXT: mv a4, s3
; RV64-NEXT: mv a5, s2
; RV64-NEXT: mv a6, s1
; RV64-NEXT: mv a7, s0
; RV64-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s1, 56(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s2, 48(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s3, 40(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s4, 32(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s5, 24(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s6, 16(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s7, 8(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 80
; RV64-NEXT: tail foo
entry:
call void @foo(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 1)
musttail call void @foo(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 2)
ret void
}
declare void @sret_callee(ptr sret({ double, double }) align 8)
; Functions which return by sret can be tail-called because the incoming sret
; pointer gets passed through to the callee.
define void @sret_caller_tail(ptr sret({ double, double }) align 8 %result) {
; RV32-LABEL: sret_caller_tail:
; RV32: # %bb.0: # %entry
; RV32-NEXT: tail sret_callee
;
; RV64-LABEL: sret_caller_tail:
; RV64: # %bb.0: # %entry
; RV64-NEXT: tail sret_callee
entry:
tail call void @sret_callee(ptr sret({ double, double }) align 8 %result)
ret void
}
define void @sret_caller_musttail(ptr sret({ double, double }) align 8 %result) {
; RV32-LABEL: sret_caller_musttail:
; RV32: # %bb.0: # %entry
; RV32-NEXT: tail sret_callee
;
; RV64-LABEL: sret_caller_musttail:
; RV64: # %bb.0: # %entry
; RV64-NEXT: tail sret_callee
entry:
musttail call void @sret_callee(ptr sret({ double, double }) align 8 %result)
ret void
}
%twenty_bytes = type { [5 x i32] }
declare void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4)
; Functions with byval parameters can be tail-called, because the value is
; actually passed in registers in the same way for the caller and callee.
define void @large_caller(%twenty_bytes* byval(%twenty_bytes) align 4 %a) {
; RV32-LABEL: large_caller:
; RV32: # %bb.0: # %entry
; RV32-NEXT: tail large_callee
;
; RV64-LABEL: large_caller:
; RV64: # %bb.0: # %entry
; RV64-NEXT: tail large_callee
entry:
musttail call void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %a)
ret void
}
; As above, but with some inline asm to test that the arguments in r4 is
; re-loaded before the call.
define void @large_caller_check_regs(%twenty_bytes* byval(%twenty_bytes) align 4 %a) nounwind {
; RV32-LABEL: large_caller_check_regs:
; RV32: # %bb.0: # %entry
; RV32-NEXT: #APP
; RV32-NEXT: #NO_APP
; RV32-NEXT: tail large_callee
;
; RV64-LABEL: large_caller_check_regs:
; RV64: # %bb.0: # %entry
; RV64-NEXT: #APP
; RV64-NEXT: #NO_APP
; RV64-NEXT: tail large_callee
entry:
tail call void asm sideeffect "", "~{r4}"()
musttail call void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %a)
ret void
}
; The IR for this one looks dodgy, because it has an alloca passed to a
; musttail function, but it is passed as a byval argument, so will be copied
; into the stack space allocated by @large_caller_new_value's caller, so is
; valid.
define void @large_caller_new_value(%twenty_bytes* byval(%twenty_bytes) align 4 %a) nounwind {
; RV32-LABEL: large_caller_new_value:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -32
; RV32-NEXT: li a1, 1
; RV32-NEXT: li a2, 2
; RV32-NEXT: li a3, 3
; RV32-NEXT: li a4, 4
; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: sw a1, 16(sp)
; RV32-NEXT: sw a2, 20(sp)
; RV32-NEXT: sw a3, 24(sp)
; RV32-NEXT: sw a4, 28(sp)
; RV32-NEXT: sw a4, 16(a0)
; RV32-NEXT: sw zero, 0(a0)
; RV32-NEXT: sw a1, 4(a0)
; RV32-NEXT: sw a2, 8(a0)
; RV32-NEXT: sw a3, 12(a0)
; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: tail large_callee
;
; RV64-LABEL: large_caller_new_value:
; RV64: # %bb.0: # %entry
; RV64-NEXT: addi sp, sp, -32
; RV64-NEXT: li a1, 1
; RV64-NEXT: li a2, 2
; RV64-NEXT: li a3, 3
; RV64-NEXT: li a4, 4
; RV64-NEXT: sw zero, 12(sp)
; RV64-NEXT: sw a1, 16(sp)
; RV64-NEXT: sw a2, 20(sp)
; RV64-NEXT: sw a3, 24(sp)
; RV64-NEXT: sw a4, 28(sp)
; RV64-NEXT: sw a4, 16(a0)
; RV64-NEXT: sw zero, 0(a0)
; RV64-NEXT: sw a1, 4(a0)
; RV64-NEXT: sw a2, 8(a0)
; RV64-NEXT: sw a3, 12(a0)
; RV64-NEXT: addi sp, sp, 32
; RV64-NEXT: tail large_callee
entry:
%y = alloca %twenty_bytes, align 4
store i32 0, ptr %y, align 4
%0 = getelementptr inbounds i8, ptr %y, i32 4
store i32 1, ptr %0, align 4
%1 = getelementptr inbounds i8, ptr %y, i32 8
store i32 2, ptr %1, align 4
%2 = getelementptr inbounds i8, ptr %y, i32 12
store i32 3, ptr %2, align 4
%3 = getelementptr inbounds i8, ptr %y, i32 16
store i32 4, ptr %3, align 4
musttail call void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %y)
ret void
}
declare void @two_byvals_callee(%twenty_bytes* byval(%twenty_bytes) align 4, %twenty_bytes* byval(%twenty_bytes) align 4)
define void @swap_byvals(%twenty_bytes* byval(%twenty_bytes) align 4 %a, %twenty_bytes* byval(%twenty_bytes) align 4 %b) {
; RV32-LABEL: swap_byvals:
; RV32: # %bb.0: # %entry
; RV32-NEXT: mv a2, a0
; RV32-NEXT: mv a0, a1
; RV32-NEXT: mv a1, a2
; RV32-NEXT: tail two_byvals_callee
;
; RV64-LABEL: swap_byvals:
; RV64: # %bb.0: # %entry
; RV64-NEXT: mv a2, a0
; RV64-NEXT: mv a0, a1
; RV64-NEXT: mv a1, a2
; RV64-NEXT: tail two_byvals_callee
entry:
musttail call void @two_byvals_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %b, %twenty_bytes* byval(%twenty_bytes) align 4 %a)
ret void
}
; A forwarded byval arg, but in a different argument register, so it needs to
; be moved between registers first. This can't be musttail because of the
; different signatures, but is still tail-called as an optimisation.
declare void @shift_byval_callee(%twenty_bytes* byval(%twenty_bytes) align 4)
define void @shift_byval(i32 %a, %twenty_bytes* byval(%twenty_bytes) align 4 %b) {
; RV32-LABEL: shift_byval:
; RV32: # %bb.0: # %entry
; RV32-NEXT: mv a0, a1
; RV32-NEXT: tail shift_byval_callee
;
; RV64-LABEL: shift_byval:
; RV64: # %bb.0: # %entry
; RV64-NEXT: mv a0, a1
; RV64-NEXT: tail shift_byval_callee
entry:
tail call void @shift_byval_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %b)
ret void
}
; A global object passed to a byval argument, so it must be copied, but doesn't
; need a stack temporary.
@large_global = external global %twenty_bytes
define void @large_caller_from_global(%twenty_bytes* byval(%twenty_bytes) align 4 %a) {
; RV32-LABEL: large_caller_from_global:
; RV32: # %bb.0: # %entry
; RV32-NEXT: lui a1, %hi(large_global)
; RV32-NEXT: addi a1, a1, %lo(large_global)
; RV32-NEXT: lw a2, 16(a1)
; RV32-NEXT: sw a2, 16(a0)
; RV32-NEXT: lw a2, 12(a1)
; RV32-NEXT: sw a2, 12(a0)
; RV32-NEXT: lw a2, 8(a1)
; RV32-NEXT: sw a2, 8(a0)
; RV32-NEXT: lw a2, 4(a1)
; RV32-NEXT: sw a2, 4(a0)
; RV32-NEXT: lw a1, 0(a1)
; RV32-NEXT: sw a1, 0(a0)
; RV32-NEXT: tail large_callee
;
; RV64-LABEL: large_caller_from_global:
; RV64: # %bb.0: # %entry
; RV64-NEXT: lui a1, %hi(large_global)
; RV64-NEXT: addi a1, a1, %lo(large_global)
; RV64-NEXT: lw a2, 16(a1)
; RV64-NEXT: sw a2, 16(a0)
; RV64-NEXT: lw a2, 12(a1)
; RV64-NEXT: sw a2, 12(a0)
; RV64-NEXT: lw a2, 8(a1)
; RV64-NEXT: sw a2, 8(a0)
; RV64-NEXT: lw a2, 4(a1)
; RV64-NEXT: sw a2, 4(a0)
; RV64-NEXT: lw a1, 0(a1)
; RV64-NEXT: sw a1, 0(a0)
; RV64-NEXT: tail large_callee
entry:
musttail call void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4 @large_global)
ret void
}