blob: d98a0ab3594faeec5c14baa09af1323564054fb6 [file] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=riscv32 %s -o - | FileCheck %s --check-prefix=RV32
; RUN: llc -mtriple=riscv64 %s -o - | FileCheck %s --check-prefix=RV64
; Test that musttail with indirect args (fp128 on RV32) forwards the incoming
; pointer instead of creating a new stack temporary. Without this fix, the
; pointer would dangle after the tail call deallocates the caller's frame.
declare i32 @callee_musttail_indirect(fp128 %a)
; fp128 is indirect on RV32 (too large for registers), direct on RV64.
; On RV32, musttail must forward the incoming indirect pointer (a0) directly.
define i32 @caller_musttail_indirect(fp128 %a) nounwind {
; RV32-LABEL: caller_musttail_indirect:
; RV32: # %bb.0:
; RV32-NEXT: tail callee_musttail_indirect
;
; RV64-LABEL: caller_musttail_indirect:
; RV64: # %bb.0:
; RV64-NEXT: tail callee_musttail_indirect
%call = musttail call i32 @callee_musttail_indirect(fp128 %a)
ret i32 %call
}
; Verify that non-musttail tail call with indirect args does NOT tail call
; (this is the PR #184972 fix - indirect args are unsafe for regular tail calls).
define void @caller_no_musttail_indirect() nounwind {
; RV32-LABEL: caller_no_musttail_indirect:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -32
; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32-NEXT: lui a1, 262128
; RV32-NEXT: mv a0, sp
; RV32-NEXT: sw zero, 0(sp)
; RV32-NEXT: sw zero, 4(sp)
; RV32-NEXT: sw zero, 8(sp)
; RV32-NEXT: sw a1, 12(sp)
; RV32-NEXT: call callee_musttail_indirect
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
;
; RV64-LABEL: caller_no_musttail_indirect:
; RV64: # %bb.0:
; RV64-NEXT: lui a1, 16383
; RV64-NEXT: slli a1, a1, 36
; RV64-NEXT: li a0, 0
; RV64-NEXT: tail callee_musttail_indirect
%call = tail call i32 @callee_musttail_indirect(fp128 0xL00000000000000003FFF000000000000)
ret void
}
; Verify that non-musttail tail call forwarding an indirect arg from the
; caller's own parameters also does NOT tail call (the arg lives on the
; caller's frame, which would be deallocated).
define i32 @caller_no_musttail_forward_indirect(fp128 %a) nounwind {
; RV32-LABEL: caller_no_musttail_forward_indirect:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -32
; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
; RV32-NEXT: lw a1, 0(a0)
; RV32-NEXT: lw a2, 4(a0)
; RV32-NEXT: lw a3, 8(a0)
; RV32-NEXT: lw a4, 12(a0)
; RV32-NEXT: mv a0, sp
; RV32-NEXT: sw a1, 0(sp)
; RV32-NEXT: sw a2, 4(sp)
; RV32-NEXT: sw a3, 8(sp)
; RV32-NEXT: sw a4, 12(sp)
; RV32-NEXT: call callee_musttail_indirect
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
;
; RV64-LABEL: caller_no_musttail_forward_indirect:
; RV64: # %bb.0:
; RV64-NEXT: tail callee_musttail_indirect
%call = tail call i32 @callee_musttail_indirect(fp128 %a)
ret i32 %call
}
; Test musttail with two indirect fp128 args on RV32. Both pointers must be
; forwarded. Exercises the DenseMap with two distinct OrigArgIndex values.
declare i32 @callee_musttail_two_indirect(fp128 %a, fp128 %b)
define i32 @caller_musttail_two_indirect(fp128 %a, fp128 %b) nounwind {
; RV32-LABEL: caller_musttail_two_indirect:
; RV32: # %bb.0:
; RV32-NEXT: tail callee_musttail_two_indirect
;
; RV64-LABEL: caller_musttail_two_indirect:
; RV64: # %bb.0:
; RV64-NEXT: tail callee_musttail_two_indirect
%call = musttail call i32 @callee_musttail_two_indirect(fp128 %a, fp128 %b)
ret i32 %call
}
; Test musttail with mixed direct (i32 in register) + indirect (fp128) args.
; Confirms OrigArgIndex lookup works when not all args are indirect.
declare i32 @callee_musttail_mixed(i32 %x, fp128 %a)
define i32 @caller_musttail_mixed(i32 %x, fp128 %a) nounwind {
; RV32-LABEL: caller_musttail_mixed:
; RV32: # %bb.0:
; RV32-NEXT: tail callee_musttail_mixed
;
; RV64-LABEL: caller_musttail_mixed:
; RV64: # %bb.0:
; RV64-NEXT: tail callee_musttail_mixed
%call = musttail call i32 @callee_musttail_mixed(i32 %x, fp128 %a)
ret i32 %call
}
; Test musttail with i128 on RV32 (indirect, split into 4 x i32 parts).
declare i64 @callee_musttail_i128(i128 %a)
define i64 @caller_musttail_i128(i128 %a) nounwind {
; RV32-LABEL: caller_musttail_i128:
; RV32: # %bb.0:
; RV32-NEXT: tail callee_musttail_i128
;
; RV64-LABEL: caller_musttail_i128:
; RV64: # %bb.0:
; RV64-NEXT: tail callee_musttail_i128
%call = musttail call i64 @callee_musttail_i128(i128 %a)
ret i64 %call
}
; Test musttail with i128 (indirect+split on RV32) plus a trailing i32 direct arg.
; Exercises the split-skip logic followed by a normal register arg.
declare i64 @callee_musttail_i128_and_i32(i128 %a, i32 %x)
define i64 @caller_musttail_i128_and_i32(i128 %a, i32 %x) nounwind {
; RV32-LABEL: caller_musttail_i128_and_i32:
; RV32: # %bb.0:
; RV32-NEXT: tail callee_musttail_i128_and_i32
;
; RV64-LABEL: caller_musttail_i128_and_i32:
; RV64: # %bb.0:
; RV64-NEXT: tail callee_musttail_i128_and_i32
%call = musttail call i64 @callee_musttail_i128_and_i32(i128 %a, i32 %x)
ret i64 %call
}
; Test musttail with two indirect args SWAPPED. The pointers must be exchanged
; before the tail call. This exercises the OrigArgIndex -> Argument::getArgNo()
; resolution in LowerCall.
define i32 @caller_musttail_two_indirect_swapped(fp128 %a, fp128 %b) nounwind {
; RV32-LABEL: caller_musttail_two_indirect_swapped:
; RV32: # %bb.0:
; RV32-NEXT: mv a2, a0
; RV32-NEXT: mv a0, a1
; RV32-NEXT: mv a1, a2
; RV32-NEXT: tail callee_musttail_two_indirect
;
; RV64-LABEL: caller_musttail_two_indirect_swapped:
; RV64: # %bb.0:
; RV64-NEXT: mv a4, a1
; RV64-NEXT: mv a5, a0
; RV64-NEXT: mv a0, a2
; RV64-NEXT: mv a1, a3
; RV64-NEXT: mv a2, a5
; RV64-NEXT: mv a3, a4
; RV64-NEXT: tail callee_musttail_two_indirect
%call = musttail call i32 @callee_musttail_two_indirect(fp128 %b, fp128 %a)
ret i32 %call
}
; Test musttail with three indirect args rotated: call @f(%c, %a, %b).
; All three pointers need to be shuffled.
declare i32 @callee_musttail_three_indirect(fp128 %a, fp128 %b, fp128 %c)
define i32 @caller_musttail_three_indirect_rotated(fp128 %a, fp128 %b, fp128 %c) nounwind {
; RV32-LABEL: caller_musttail_three_indirect_rotated:
; RV32: # %bb.0:
; RV32-NEXT: mv a3, a1
; RV32-NEXT: mv a1, a0
; RV32-NEXT: mv a0, a2
; RV32-NEXT: mv a2, a3
; RV32-NEXT: tail callee_musttail_three_indirect
;
; RV64-LABEL: caller_musttail_three_indirect_rotated:
; RV64: # %bb.0:
; RV64-NEXT: mv a6, a3
; RV64-NEXT: mv a7, a2
; RV64-NEXT: mv a3, a1
; RV64-NEXT: mv a2, a0
; RV64-NEXT: mv a0, a4
; RV64-NEXT: mv a1, a5
; RV64-NEXT: mv a4, a7
; RV64-NEXT: mv a5, a6
; RV64-NEXT: tail callee_musttail_three_indirect
%call = musttail call i32 @callee_musttail_three_indirect(fp128 %c, fp128 %a, fp128 %b)
ret i32 %call
}
; Test musttail with mixed direct + indirect args where the indirect args
; are swapped but the direct arg stays in place.
declare i32 @callee_musttail_mixed_two_indirect(i32 %x, fp128 %a, fp128 %b)
define i32 @caller_musttail_mixed_swap_indirect(i32 %x, fp128 %a, fp128 %b) nounwind {
; RV32-LABEL: caller_musttail_mixed_swap_indirect:
; RV32: # %bb.0:
; RV32-NEXT: mv a3, a1
; RV32-NEXT: mv a1, a2
; RV32-NEXT: mv a2, a3
; RV32-NEXT: tail callee_musttail_mixed_two_indirect
;
; RV64-LABEL: caller_musttail_mixed_swap_indirect:
; RV64: # %bb.0:
; RV64-NEXT: mv a5, a2
; RV64-NEXT: mv a6, a1
; RV64-NEXT: mv a1, a3
; RV64-NEXT: mv a2, a4
; RV64-NEXT: mv a3, a6
; RV64-NEXT: mv a4, a5
; RV64-NEXT: tail callee_musttail_mixed_two_indirect
%call = musttail call i32 @callee_musttail_mixed_two_indirect(i32 %x, fp128 %b, fp128 %a)
ret i32 %call
}
; Test musttail with swapped i128 on RV32 (split indirect args).
declare i64 @callee_musttail_two_i128(i128 %a, i128 %b)
define i64 @caller_musttail_two_i128_swapped(i128 %a, i128 %b) nounwind {
; RV32-LABEL: caller_musttail_two_i128_swapped:
; RV32: # %bb.0:
; RV32-NEXT: mv a2, a0
; RV32-NEXT: mv a0, a1
; RV32-NEXT: mv a1, a2
; RV32-NEXT: tail callee_musttail_two_i128
;
; RV64-LABEL: caller_musttail_two_i128_swapped:
; RV64: # %bb.0:
; RV64-NEXT: mv a4, a1
; RV64-NEXT: mv a5, a0
; RV64-NEXT: mv a0, a2
; RV64-NEXT: mv a1, a3
; RV64-NEXT: mv a2, a5
; RV64-NEXT: mv a3, a4
; RV64-NEXT: tail callee_musttail_two_i128
%call = musttail call i64 @callee_musttail_two_i128(i128 %b, i128 %a)
ret i64 %call
}
; Test musttail passing the same indirect arg to both positions.
define i32 @caller_musttail_two_indirect_dup(fp128 %a, fp128 %b) nounwind {
; RV32-LABEL: caller_musttail_two_indirect_dup:
; RV32: # %bb.0:
; RV32-NEXT: mv a1, a0
; RV32-NEXT: tail callee_musttail_two_indirect
;
; RV64-LABEL: caller_musttail_two_indirect_dup:
; RV64: # %bb.0:
; RV64-NEXT: mv a2, a0
; RV64-NEXT: mv a3, a1
; RV64-NEXT: tail callee_musttail_two_indirect
%call = musttail call i32 @callee_musttail_two_indirect(fp128 %a, fp128 %a)
ret i32 %call
}
; Test musttail with enough indirect args to spill to the stack (9 fp128 on
; RV32 uses a0-a7 for the first 8 pointers, 9th goes on the stack).
declare void @callee_musttail_nine_indirect(fp128, fp128, fp128, fp128, fp128, fp128, fp128, fp128, fp128)
define void @caller_musttail_nine_indirect(fp128 %a, fp128 %b, fp128 %c, fp128 %d, fp128 %e, fp128 %f, fp128 %g, fp128 %h, fp128 %i) nounwind {
; RV32-LABEL: caller_musttail_nine_indirect:
; RV32: # %bb.0:
; RV32-NEXT: lw t0, 0(sp)
; RV32-NEXT: sw t0, 0(sp)
; RV32-NEXT: tail callee_musttail_nine_indirect
;
; RV64-LABEL: caller_musttail_nine_indirect:
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -32
; RV64-NEXT: sd s0, 24(sp) # 8-byte Folded Spill
; RV64-NEXT: sd s1, 16(sp) # 8-byte Folded Spill
; RV64-NEXT: sd s2, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: ld t0, 32(sp)
; RV64-NEXT: ld t1, 40(sp)
; RV64-NEXT: ld t2, 96(sp)
; RV64-NEXT: ld t3, 104(sp)
; RV64-NEXT: ld t4, 64(sp)
; RV64-NEXT: ld t5, 72(sp)
; RV64-NEXT: ld t6, 80(sp)
; RV64-NEXT: ld s0, 88(sp)
; RV64-NEXT: ld s1, 48(sp)
; RV64-NEXT: ld s2, 56(sp)
; RV64-NEXT: sd t2, 64(sp)
; RV64-NEXT: sd t3, 72(sp)
; RV64-NEXT: sd t4, 32(sp)
; RV64-NEXT: sd t5, 40(sp)
; RV64-NEXT: sd t6, 48(sp)
; RV64-NEXT: sd s0, 56(sp)
; RV64-NEXT: sd t0, 0(sp)
; RV64-NEXT: sd t1, 8(sp)
; RV64-NEXT: sd s1, 16(sp)
; RV64-NEXT: sd s2, 24(sp)
; RV64-NEXT: ld s0, 24(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s1, 16(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s2, 8(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 32
; RV64-NEXT: tail callee_musttail_nine_indirect
musttail call void @callee_musttail_nine_indirect(fp128 %a, fp128 %b, fp128 %c, fp128 %d, fp128 %e, fp128 %f, fp128 %g, fp128 %h, fp128 %i)
ret void
}
; Test musttail swapping the first (register) and last (stack-spilled) args.
define void @caller_musttail_nine_indirect_swap_first_last(fp128 %a, fp128 %b, fp128 %c, fp128 %d, fp128 %e, fp128 %f, fp128 %g, fp128 %h, fp128 %i) nounwind {
; RV32-LABEL: caller_musttail_nine_indirect_swap_first_last:
; RV32: # %bb.0:
; RV32-NEXT: lw t0, 0(sp)
; RV32-NEXT: mv t1, a0
; RV32-NEXT: mv a0, t0
; RV32-NEXT: sw t1, 0(sp)
; RV32-NEXT: tail callee_musttail_nine_indirect
;
; RV64-LABEL: caller_musttail_nine_indirect_swap_first_last:
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -32
; RV64-NEXT: sd s0, 24(sp) # 8-byte Folded Spill
; RV64-NEXT: sd s1, 16(sp) # 8-byte Folded Spill
; RV64-NEXT: sd s2, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: ld t0, 96(sp)
; RV64-NEXT: ld t1, 104(sp)
; RV64-NEXT: ld t2, 32(sp)
; RV64-NEXT: ld t3, 40(sp)
; RV64-NEXT: ld t4, 64(sp)
; RV64-NEXT: ld t5, 72(sp)
; RV64-NEXT: ld t6, 80(sp)
; RV64-NEXT: ld s0, 88(sp)
; RV64-NEXT: ld s1, 48(sp)
; RV64-NEXT: ld s2, 56(sp)
; RV64-NEXT: sd a0, 64(sp)
; RV64-NEXT: sd a1, 72(sp)
; RV64-NEXT: sd t4, 32(sp)
; RV64-NEXT: sd t5, 40(sp)
; RV64-NEXT: sd t6, 48(sp)
; RV64-NEXT: sd s0, 56(sp)
; RV64-NEXT: sd t2, 0(sp)
; RV64-NEXT: sd t3, 8(sp)
; RV64-NEXT: mv a0, t0
; RV64-NEXT: mv a1, t1
; RV64-NEXT: sd s1, 16(sp)
; RV64-NEXT: sd s2, 24(sp)
; RV64-NEXT: ld s0, 24(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s1, 16(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s2, 8(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 32
; RV64-NEXT: tail callee_musttail_nine_indirect
musttail call void @callee_musttail_nine_indirect(fp128 %i, fp128 %b, fp128 %c, fp128 %d, fp128 %e, fp128 %f, fp128 %g, fp128 %h, fp128 %a)
ret void
}
; Test musttail where the indirect arg is a computed value, not a forwarded
; formal parameter. The computed value must be stored into the incoming
; indirect pointer before tail calling.
define i32 @caller_musttail_computed(fp128 %a) nounwind {
; RV32-LABEL: caller_musttail_computed:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -64
; RV32-NEXT: sw ra, 60(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s0, 56(sp) # 4-byte Folded Spill
; RV32-NEXT: mv s0, a0
; RV32-NEXT: lw a0, 0(a0)
; RV32-NEXT: lw a3, 4(s0)
; RV32-NEXT: lw a4, 8(s0)
; RV32-NEXT: lw a5, 12(s0)
; RV32-NEXT: sw a0, 8(sp)
; RV32-NEXT: sw a0, 24(sp)
; RV32-NEXT: sw a3, 12(sp)
; RV32-NEXT: sw a4, 16(sp)
; RV32-NEXT: sw a5, 20(sp)
; RV32-NEXT: addi a0, sp, 40
; RV32-NEXT: addi a1, sp, 24
; RV32-NEXT: addi a2, sp, 8
; RV32-NEXT: sw a3, 28(sp)
; RV32-NEXT: sw a4, 32(sp)
; RV32-NEXT: sw a5, 36(sp)
; RV32-NEXT: call __addtf3
; RV32-NEXT: lw a0, 40(sp)
; RV32-NEXT: lw a1, 44(sp)
; RV32-NEXT: lw a2, 48(sp)
; RV32-NEXT: lw a3, 52(sp)
; RV32-NEXT: sw a0, 0(s0)
; RV32-NEXT: sw a1, 4(s0)
; RV32-NEXT: mv a0, s0
; RV32-NEXT: sw a2, 8(s0)
; RV32-NEXT: sw a3, 12(s0)
; RV32-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 56(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 64
; RV32-NEXT: tail callee_musttail_indirect
;
; RV64-LABEL: caller_musttail_computed:
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -16
; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: mv a2, a0
; RV64-NEXT: mv a3, a1
; RV64-NEXT: call __addtf3
; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: tail callee_musttail_indirect
%sum = fadd fp128 %a, %a
%r = musttail call i32 @callee_musttail_indirect(fp128 %sum)
ret i32 %r
}
; Test musttail with a computed i128 on RV32 (split indirect). The add result
; must be stored back into the incoming pointer.
define i64 @caller_musttail_computed_i128(i128 %a) nounwind {
; RV32-LABEL: caller_musttail_computed_i128:
; RV32: # %bb.0:
; RV32-NEXT: lw a1, 0(a0)
; RV32-NEXT: lw a2, 4(a0)
; RV32-NEXT: lw a3, 8(a0)
; RV32-NEXT: lw a4, 12(a0)
; RV32-NEXT: addi a1, a1, 1
; RV32-NEXT: seqz a5, a1
; RV32-NEXT: add a2, a2, a5
; RV32-NEXT: or a5, a1, a2
; RV32-NEXT: seqz a5, a5
; RV32-NEXT: add a5, a3, a5
; RV32-NEXT: sltu a3, a5, a3
; RV32-NEXT: add a3, a4, a3
; RV32-NEXT: sw a1, 0(a0)
; RV32-NEXT: sw a2, 4(a0)
; RV32-NEXT: sw a5, 8(a0)
; RV32-NEXT: sw a3, 12(a0)
; RV32-NEXT: tail callee_musttail_i128
;
; RV64-LABEL: caller_musttail_computed_i128:
; RV64: # %bb.0:
; RV64-NEXT: addi a0, a0, 1
; RV64-NEXT: seqz a2, a0
; RV64-NEXT: add a1, a1, a2
; RV64-NEXT: tail callee_musttail_i128
%sum = add i128 %a, 1
%r = musttail call i64 @callee_musttail_i128(i128 %sum)
ret i64 %r
}
; Test musttail with one computed and one forwarded indirect arg.
; Position 0 gets the fadd result (stored into %a's incoming pointer),
; position 1 gets %b's incoming pointer forwarded directly.
define i32 @caller_musttail_computed_and_forwarded(fp128 %a, fp128 %b) nounwind {
; RV32-LABEL: caller_musttail_computed_and_forwarded:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -64
; RV32-NEXT: sw ra, 60(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s0, 56(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s1, 52(sp) # 4-byte Folded Spill
; RV32-NEXT: mv s0, a1
; RV32-NEXT: mv s1, a0
; RV32-NEXT: lw a3, 0(a1)
; RV32-NEXT: lw a4, 4(a1)
; RV32-NEXT: lw a5, 8(a1)
; RV32-NEXT: lw a6, 12(a1)
; RV32-NEXT: lw a0, 0(a0)
; RV32-NEXT: lw a1, 4(s1)
; RV32-NEXT: lw a2, 8(s1)
; RV32-NEXT: lw a7, 12(s1)
; RV32-NEXT: sw a0, 16(sp)
; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: sw a2, 24(sp)
; RV32-NEXT: sw a7, 28(sp)
; RV32-NEXT: addi a0, sp, 32
; RV32-NEXT: addi a1, sp, 16
; RV32-NEXT: mv a2, sp
; RV32-NEXT: sw a3, 0(sp)
; RV32-NEXT: sw a4, 4(sp)
; RV32-NEXT: sw a5, 8(sp)
; RV32-NEXT: sw a6, 12(sp)
; RV32-NEXT: call __addtf3
; RV32-NEXT: lw a0, 32(sp)
; RV32-NEXT: lw a1, 36(sp)
; RV32-NEXT: lw a2, 40(sp)
; RV32-NEXT: lw a3, 44(sp)
; RV32-NEXT: sw a0, 0(s1)
; RV32-NEXT: sw a1, 4(s1)
; RV32-NEXT: mv a0, s1
; RV32-NEXT: mv a1, s0
; RV32-NEXT: sw a2, 8(s1)
; RV32-NEXT: sw a3, 12(s1)
; RV32-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 56(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s1, 52(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 64
; RV32-NEXT: tail callee_musttail_two_indirect
;
; RV64-LABEL: caller_musttail_computed_and_forwarded:
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -32
; RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; RV64-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; RV64-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: mv s0, a3
; RV64-NEXT: mv s1, a2
; RV64-NEXT: call __addtf3
; RV64-NEXT: mv a2, s1
; RV64-NEXT: mv a3, s0
; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 32
; RV64-NEXT: tail callee_musttail_two_indirect
%sum = fadd fp128 %a, %b
%r = musttail call i32 @callee_musttail_two_indirect(fp128 %sum, fp128 %b)
ret i32 %r
}
; Test musttail with one forwarded and one computed indirect arg (reversed).
; Position 0 forwards %a, position 1 gets the computed value.
define i32 @caller_musttail_forwarded_and_computed(fp128 %a, fp128 %b) nounwind {
; RV32-LABEL: caller_musttail_forwarded_and_computed:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -64
; RV32-NEXT: sw ra, 60(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s0, 56(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s1, 52(sp) # 4-byte Folded Spill
; RV32-NEXT: mv s0, a1
; RV32-NEXT: mv s1, a0
; RV32-NEXT: lw a3, 0(a1)
; RV32-NEXT: lw a4, 4(a1)
; RV32-NEXT: lw a5, 8(a1)
; RV32-NEXT: lw a6, 12(a1)
; RV32-NEXT: lw a0, 0(a0)
; RV32-NEXT: lw a1, 4(s1)
; RV32-NEXT: lw a2, 8(s1)
; RV32-NEXT: lw a7, 12(s1)
; RV32-NEXT: sw a0, 16(sp)
; RV32-NEXT: sw a1, 20(sp)
; RV32-NEXT: sw a2, 24(sp)
; RV32-NEXT: sw a7, 28(sp)
; RV32-NEXT: addi a0, sp, 32
; RV32-NEXT: addi a1, sp, 16
; RV32-NEXT: mv a2, sp
; RV32-NEXT: sw a3, 0(sp)
; RV32-NEXT: sw a4, 4(sp)
; RV32-NEXT: sw a5, 8(sp)
; RV32-NEXT: sw a6, 12(sp)
; RV32-NEXT: call __addtf3
; RV32-NEXT: lw a0, 32(sp)
; RV32-NEXT: lw a1, 36(sp)
; RV32-NEXT: lw a2, 40(sp)
; RV32-NEXT: lw a3, 44(sp)
; RV32-NEXT: sw a0, 0(s0)
; RV32-NEXT: sw a1, 4(s0)
; RV32-NEXT: mv a0, s1
; RV32-NEXT: mv a1, s0
; RV32-NEXT: sw a2, 8(s0)
; RV32-NEXT: sw a3, 12(s0)
; RV32-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 56(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s1, 52(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 64
; RV32-NEXT: tail callee_musttail_two_indirect
;
; RV64-LABEL: caller_musttail_forwarded_and_computed:
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -32
; RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; RV64-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; RV64-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: mv s0, a1
; RV64-NEXT: mv s1, a0
; RV64-NEXT: call __addtf3
; RV64-NEXT: mv a2, a0
; RV64-NEXT: mv a3, a1
; RV64-NEXT: mv a0, s1
; RV64-NEXT: mv a1, s0
; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 32
; RV64-NEXT: tail callee_musttail_two_indirect
%sum = fadd fp128 %a, %b
%r = musttail call i32 @callee_musttail_two_indirect(fp128 %a, fp128 %sum)
ret i32 %r
}
; Test musttail with both args computed. Neither can be zero-copy forwarded.
define i32 @caller_musttail_both_computed(fp128 %a, fp128 %b) nounwind {
; RV32-LABEL: caller_musttail_both_computed:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -160
; RV32-NEXT: sw ra, 156(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s0, 152(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s1, 148(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s2, 144(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s3, 140(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s4, 136(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s5, 132(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s6, 128(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s7, 124(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s8, 120(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s9, 116(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s10, 112(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s11, 108(sp) # 4-byte Folded Spill
; RV32-NEXT: mv s0, a1
; RV32-NEXT: mv s1, a0
; RV32-NEXT: lw s5, 0(a1)
; RV32-NEXT: lw s2, 4(a1)
; RV32-NEXT: lw s3, 8(a1)
; RV32-NEXT: lw s4, 12(a1)
; RV32-NEXT: lw s6, 0(a0)
; RV32-NEXT: lw s7, 4(a0)
; RV32-NEXT: lw s8, 8(a0)
; RV32-NEXT: lw s9, 12(a0)
; RV32-NEXT: sw s6, 72(sp)
; RV32-NEXT: sw s7, 76(sp)
; RV32-NEXT: sw s8, 80(sp)
; RV32-NEXT: sw s9, 84(sp)
; RV32-NEXT: addi a0, sp, 88
; RV32-NEXT: addi a1, sp, 72
; RV32-NEXT: addi a2, sp, 56
; RV32-NEXT: sw s5, 56(sp)
; RV32-NEXT: sw s2, 60(sp)
; RV32-NEXT: sw s3, 64(sp)
; RV32-NEXT: sw s4, 68(sp)
; RV32-NEXT: call __addtf3
; RV32-NEXT: lw s10, 88(sp)
; RV32-NEXT: lw s11, 92(sp)
; RV32-NEXT: lw a0, 96(sp)
; RV32-NEXT: sw a0, 0(sp) # 4-byte Folded Spill
; RV32-NEXT: lw a0, 100(sp)
; RV32-NEXT: sw a0, 4(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s6, 24(sp)
; RV32-NEXT: sw s7, 28(sp)
; RV32-NEXT: sw s8, 32(sp)
; RV32-NEXT: sw s9, 36(sp)
; RV32-NEXT: addi a0, sp, 40
; RV32-NEXT: addi a1, sp, 24
; RV32-NEXT: addi a2, sp, 8
; RV32-NEXT: sw s5, 8(sp)
; RV32-NEXT: sw s2, 12(sp)
; RV32-NEXT: sw s3, 16(sp)
; RV32-NEXT: sw s4, 20(sp)
; RV32-NEXT: call __subtf3
; RV32-NEXT: lw a0, 40(sp)
; RV32-NEXT: lw a1, 44(sp)
; RV32-NEXT: lw a2, 48(sp)
; RV32-NEXT: lw a3, 52(sp)
; RV32-NEXT: sw s10, 0(s1)
; RV32-NEXT: sw s11, 4(s1)
; RV32-NEXT: lw a4, 0(sp) # 4-byte Folded Reload
; RV32-NEXT: sw a4, 8(s1)
; RV32-NEXT: lw a4, 4(sp) # 4-byte Folded Reload
; RV32-NEXT: sw a4, 12(s1)
; RV32-NEXT: sw a0, 0(s0)
; RV32-NEXT: sw a1, 4(s0)
; RV32-NEXT: mv a0, s1
; RV32-NEXT: mv a1, s0
; RV32-NEXT: sw a2, 8(s0)
; RV32-NEXT: sw a3, 12(s0)
; RV32-NEXT: lw ra, 156(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 152(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s1, 148(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s2, 144(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s3, 140(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s4, 136(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s5, 132(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s6, 128(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s7, 124(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s8, 120(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s9, 116(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s10, 112(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s11, 108(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 160
; RV32-NEXT: tail callee_musttail_two_indirect
;
; RV64-LABEL: caller_musttail_both_computed:
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -64
; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
; RV64-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
; RV64-NEXT: sd s2, 32(sp) # 8-byte Folded Spill
; RV64-NEXT: sd s3, 24(sp) # 8-byte Folded Spill
; RV64-NEXT: sd s4, 16(sp) # 8-byte Folded Spill
; RV64-NEXT: sd s5, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: mv s0, a3
; RV64-NEXT: mv s1, a2
; RV64-NEXT: mv s2, a1
; RV64-NEXT: mv s3, a0
; RV64-NEXT: call __addtf3
; RV64-NEXT: mv s4, a0
; RV64-NEXT: mv s5, a1
; RV64-NEXT: mv a0, s3
; RV64-NEXT: mv a1, s2
; RV64-NEXT: mv a2, s1
; RV64-NEXT: mv a3, s0
; RV64-NEXT: call __subtf3
; RV64-NEXT: mv a2, a0
; RV64-NEXT: mv a3, a1
; RV64-NEXT: mv a0, s4
; RV64-NEXT: mv a1, s5
; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s2, 32(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s3, 24(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s4, 16(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s5, 8(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 64
; RV64-NEXT: tail callee_musttail_two_indirect
%sum = fadd fp128 %a, %b
%diff = fsub fp128 %a, %b
%r = musttail call i32 @callee_musttail_two_indirect(fp128 %sum, fp128 %diff)
ret i32 %r
}
; Test musttail in a non-entry basic block. The indirect pointer must survive
; across basic blocks (the SelectionDAG is cleared between BBs, so the pointer
; must be preserved in a virtual register, not as a raw SDValue).
declare i32 @callee_musttail_cross_bb(fp128 %a, i1 %c)
define i32 @caller_musttail_cross_bb(fp128 %a, i1 %cond) nounwind {
; RV32-LABEL: caller_musttail_cross_bb:
; RV32: # %bb.0: # %entry
; RV32-NEXT: andi a2, a1, 1
; RV32-NEXT: beqz a2, .LBB19_2
; RV32-NEXT: # %bb.1: # %then
; RV32-NEXT: tail callee_musttail_cross_bb
; RV32-NEXT: .LBB19_2: # %else
; RV32-NEXT: li a0, 0
; RV32-NEXT: ret
;
; RV64-LABEL: caller_musttail_cross_bb:
; RV64: # %bb.0: # %entry
; RV64-NEXT: andi a3, a2, 1
; RV64-NEXT: beqz a3, .LBB19_2
; RV64-NEXT: # %bb.1: # %then
; RV64-NEXT: tail callee_musttail_cross_bb
; RV64-NEXT: .LBB19_2: # %else
; RV64-NEXT: li a0, 0
; RV64-NEXT: ret
entry:
br i1 %cond, label %then, label %else
then:
%r = musttail call i32 @callee_musttail_cross_bb(fp128 %a, i1 %cond)
ret i32 %r
else:
ret i32 0
}
; Test musttail with control flow and a computed indirect arg in a non-entry BB.
declare i32 @callee_musttail_cross_bb_computed(fp128 %a, i1 %c)
define i32 @caller_musttail_cross_bb_computed(fp128 %a, i1 %cond) nounwind {
; RV32-LABEL: caller_musttail_cross_bb_computed:
; RV32: # %bb.0: # %entry
; RV32-NEXT: addi sp, sp, -64
; RV32-NEXT: sw ra, 60(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s0, 56(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s1, 52(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s2, 48(sp) # 4-byte Folded Spill
; RV32-NEXT: mv s0, a0
; RV32-NEXT: lw a0, 0(a0)
; RV32-NEXT: lw a3, 4(s0)
; RV32-NEXT: lw a4, 8(s0)
; RV32-NEXT: lw a5, 12(s0)
; RV32-NEXT: mv s1, a1
; RV32-NEXT: andi s2, a1, 1
; RV32-NEXT: sw a0, 0(sp)
; RV32-NEXT: sw a0, 16(sp)
; RV32-NEXT: sw a3, 4(sp)
; RV32-NEXT: sw a4, 8(sp)
; RV32-NEXT: sw a5, 12(sp)
; RV32-NEXT: addi a0, sp, 32
; RV32-NEXT: addi a1, sp, 16
; RV32-NEXT: mv a2, sp
; RV32-NEXT: sw a3, 20(sp)
; RV32-NEXT: sw a4, 24(sp)
; RV32-NEXT: sw a5, 28(sp)
; RV32-NEXT: call __addtf3
; RV32-NEXT: beqz s2, .LBB20_2
; RV32-NEXT: # %bb.1: # %then
; RV32-NEXT: lw a0, 32(sp)
; RV32-NEXT: lw a1, 36(sp)
; RV32-NEXT: lw a2, 40(sp)
; RV32-NEXT: lw a3, 44(sp)
; RV32-NEXT: sw a0, 0(s0)
; RV32-NEXT: sw a1, 4(s0)
; RV32-NEXT: mv a0, s0
; RV32-NEXT: mv a1, s1
; RV32-NEXT: sw a2, 8(s0)
; RV32-NEXT: sw a3, 12(s0)
; RV32-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 56(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s1, 52(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s2, 48(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 64
; RV32-NEXT: tail callee_musttail_cross_bb_computed
; RV32-NEXT: .LBB20_2: # %else
; RV32-NEXT: li a0, 0
; RV32-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 56(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s1, 52(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s2, 48(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 64
; RV32-NEXT: ret
;
; RV64-LABEL: caller_musttail_cross_bb_computed:
; RV64: # %bb.0: # %entry
; RV64-NEXT: addi sp, sp, -32
; RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; RV64-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; RV64-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: mv s0, a2
; RV64-NEXT: andi s1, a2, 1
; RV64-NEXT: mv a2, a0
; RV64-NEXT: mv a3, a1
; RV64-NEXT: call __addtf3
; RV64-NEXT: beqz s1, .LBB20_2
; RV64-NEXT: # %bb.1: # %then
; RV64-NEXT: mv a2, s0
; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 32
; RV64-NEXT: tail callee_musttail_cross_bb_computed
; RV64-NEXT: .LBB20_2: # %else
; RV64-NEXT: li a0, 0
; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 32
; RV64-NEXT: ret
entry:
%sum = fadd fp128 %a, %a
br i1 %cond, label %then, label %else
then:
%r = musttail call i32 @callee_musttail_cross_bb_computed(fp128 %sum, i1 %cond)
ret i32 %r
else:
ret i32 0
}
; Non-indirect args that spill to the stack (exercises the
; isEligibleForTailCallOptimization stack-size bypass for musttail). Both
; RV32 and RV64 use a0..a7 for the first 8 args and spill from the 9th. The
; spilled args live in the caller's incoming stack slots, which musttail can
; re-use because matching prototypes imply a matching layout.
declare void @callee_musttail_stack_spill(i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
define void @caller_musttail_stack_spill(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8, i32 %a9) nounwind {
; RV32-LABEL: caller_musttail_stack_spill:
; RV32: # %bb.0:
; RV32-NEXT: lw t0, 0(sp)
; RV32-NEXT: lw t1, 4(sp)
; RV32-NEXT: sw t0, 0(sp)
; RV32-NEXT: sw t1, 4(sp)
; RV32-NEXT: tail callee_musttail_stack_spill
;
; RV64-LABEL: caller_musttail_stack_spill:
; RV64: # %bb.0:
; RV64-NEXT: ld t0, 0(sp)
; RV64-NEXT: ld t1, 8(sp)
; RV64-NEXT: sd t0, 0(sp)
; RV64-NEXT: sd t1, 8(sp)
; RV64-NEXT: tail callee_musttail_stack_spill
musttail call void @callee_musttail_stack_spill(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8, i32 %a9)
ret void
}
; sret + musttail: the sret pointer is just a regular pointer arg in a0.
; Tail call forwards it unchanged.
%struct.Large = type { i64, i64, i64, i64 }
declare void @callee_musttail_sret(ptr sret(%struct.Large), i32)
define void @caller_musttail_sret(ptr sret(%struct.Large) %out, i32 %x) nounwind {
; RV32-LABEL: caller_musttail_sret:
; RV32: # %bb.0:
; RV32-NEXT: tail callee_musttail_sret
;
; RV64-LABEL: caller_musttail_sret:
; RV64: # %bb.0:
; RV64-NEXT: tail callee_musttail_sret
musttail call void @callee_musttail_sret(ptr sret(%struct.Large) %out, i32 %x)
ret void
}
; Mix of indirect (fp128) and many i32 args spilled to the stack.
declare void @callee_musttail_indirect_and_spill(fp128, i32, i32, i32, i32, i32, i32, i32, i32, i32)
define void @caller_musttail_indirect_and_spill(fp128 %a, i32 %i0, i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8) nounwind {
; RV32-LABEL: caller_musttail_indirect_and_spill:
; RV32: # %bb.0:
; RV32-NEXT: lw t0, 0(sp)
; RV32-NEXT: lw t1, 4(sp)
; RV32-NEXT: sw t0, 0(sp)
; RV32-NEXT: sw t1, 4(sp)
; RV32-NEXT: tail callee_musttail_indirect_and_spill
;
; RV64-LABEL: caller_musttail_indirect_and_spill:
; RV64: # %bb.0:
; RV64-NEXT: ld t0, 0(sp)
; RV64-NEXT: ld t1, 8(sp)
; RV64-NEXT: ld t2, 16(sp)
; RV64-NEXT: sd t0, 0(sp)
; RV64-NEXT: sd t1, 8(sp)
; RV64-NEXT: sd t2, 16(sp)
; RV64-NEXT: tail callee_musttail_indirect_and_spill
musttail call void @callee_musttail_indirect_and_spill(fp128 %a, i32 %i0, i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8)
ret void
}
; Note: byval + musttail is intentionally NOT tested here.
; isEligibleForTailCallOptimization rejects byval outright, which causes the
; musttail site to hit reportFatalInternalError. Tail-call support for byval
; was reverted in 501417baa60f (RISC-V/LoongArch) pending a vreg-based
; re-implementation; once that lands, musttail + byval can be tested as
; well.