| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc -mtriple=riscv32 %s -o - | FileCheck %s --check-prefix=RV32 |
| ; RUN: llc -mtriple=riscv64 %s -o - | FileCheck %s --check-prefix=RV64 |
| |
| ; Test that musttail with indirect args (fp128 on RV32) forwards the incoming |
| ; pointer instead of creating a new stack temporary. Without this fix, the |
| ; pointer would dangle after the tail call deallocates the caller's frame. |
| |
| declare i32 @callee_musttail_indirect(fp128 %a) |
| |
| ; fp128 is indirect on RV32 (too large for registers), direct on RV64. |
| ; On RV32, musttail must forward the incoming indirect pointer (a0) directly. |
| define i32 @caller_musttail_indirect(fp128 %a) nounwind { |
| ; RV32-LABEL: caller_musttail_indirect: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: tail callee_musttail_indirect |
| ; |
| ; RV64-LABEL: caller_musttail_indirect: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: tail callee_musttail_indirect |
| %call = musttail call i32 @callee_musttail_indirect(fp128 %a) |
| ret i32 %call |
| } |
| |
| ; Verify that non-musttail tail call with indirect args does NOT tail call |
| ; (this is the PR #184972 fix - indirect args are unsafe for regular tail calls). |
| define void @caller_no_musttail_indirect() nounwind { |
| ; RV32-LABEL: caller_no_musttail_indirect: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: addi sp, sp, -32 |
| ; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill |
| ; RV32-NEXT: lui a1, 262128 |
| ; RV32-NEXT: mv a0, sp |
| ; RV32-NEXT: sw zero, 0(sp) |
| ; RV32-NEXT: sw zero, 4(sp) |
| ; RV32-NEXT: sw zero, 8(sp) |
| ; RV32-NEXT: sw a1, 12(sp) |
| ; RV32-NEXT: call callee_musttail_indirect |
| ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload |
| ; RV32-NEXT: addi sp, sp, 32 |
| ; RV32-NEXT: ret |
| ; |
| ; RV64-LABEL: caller_no_musttail_indirect: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: lui a1, 16383 |
| ; RV64-NEXT: slli a1, a1, 36 |
| ; RV64-NEXT: li a0, 0 |
| ; RV64-NEXT: tail callee_musttail_indirect |
| %call = tail call i32 @callee_musttail_indirect(fp128 0xL00000000000000003FFF000000000000) |
| ret void |
| } |
| |
| ; Verify that non-musttail tail call forwarding an indirect arg from the |
| ; caller's own parameters also does NOT tail call (the arg lives on the |
| ; caller's frame, which would be deallocated). |
| define i32 @caller_no_musttail_forward_indirect(fp128 %a) nounwind { |
| ; RV32-LABEL: caller_no_musttail_forward_indirect: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: addi sp, sp, -32 |
| ; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill |
| ; RV32-NEXT: lw a1, 0(a0) |
| ; RV32-NEXT: lw a2, 4(a0) |
| ; RV32-NEXT: lw a3, 8(a0) |
| ; RV32-NEXT: lw a4, 12(a0) |
| ; RV32-NEXT: mv a0, sp |
| ; RV32-NEXT: sw a1, 0(sp) |
| ; RV32-NEXT: sw a2, 4(sp) |
| ; RV32-NEXT: sw a3, 8(sp) |
| ; RV32-NEXT: sw a4, 12(sp) |
| ; RV32-NEXT: call callee_musttail_indirect |
| ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload |
| ; RV32-NEXT: addi sp, sp, 32 |
| ; RV32-NEXT: ret |
| ; |
| ; RV64-LABEL: caller_no_musttail_forward_indirect: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: tail callee_musttail_indirect |
| %call = tail call i32 @callee_musttail_indirect(fp128 %a) |
| ret i32 %call |
| } |
| |
| ; Test musttail with two indirect fp128 args on RV32. Both pointers must be |
| ; forwarded. Exercises the DenseMap with two distinct OrigArgIndex values. |
| declare i32 @callee_musttail_two_indirect(fp128 %a, fp128 %b) |
| |
| define i32 @caller_musttail_two_indirect(fp128 %a, fp128 %b) nounwind { |
| ; RV32-LABEL: caller_musttail_two_indirect: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: tail callee_musttail_two_indirect |
| ; |
| ; RV64-LABEL: caller_musttail_two_indirect: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: tail callee_musttail_two_indirect |
| %call = musttail call i32 @callee_musttail_two_indirect(fp128 %a, fp128 %b) |
| ret i32 %call |
| } |
| |
| ; Test musttail with mixed direct (i32 in register) + indirect (fp128) args. |
| ; Confirms OrigArgIndex lookup works when not all args are indirect. |
| declare i32 @callee_musttail_mixed(i32 %x, fp128 %a) |
| |
| define i32 @caller_musttail_mixed(i32 %x, fp128 %a) nounwind { |
| ; RV32-LABEL: caller_musttail_mixed: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: tail callee_musttail_mixed |
| ; |
| ; RV64-LABEL: caller_musttail_mixed: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: tail callee_musttail_mixed |
| %call = musttail call i32 @callee_musttail_mixed(i32 %x, fp128 %a) |
| ret i32 %call |
| } |
| |
| ; Test musttail with i128 on RV32 (indirect, split into 4 x i32 parts). |
| declare i64 @callee_musttail_i128(i128 %a) |
| |
| define i64 @caller_musttail_i128(i128 %a) nounwind { |
| ; RV32-LABEL: caller_musttail_i128: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: tail callee_musttail_i128 |
| ; |
| ; RV64-LABEL: caller_musttail_i128: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: tail callee_musttail_i128 |
| %call = musttail call i64 @callee_musttail_i128(i128 %a) |
| ret i64 %call |
| } |
| |
| ; Test musttail with i128 (indirect+split on RV32) plus a trailing i32 direct arg. |
| ; Exercises the split-skip logic followed by a normal register arg. |
| declare i64 @callee_musttail_i128_and_i32(i128 %a, i32 %x) |
| |
| define i64 @caller_musttail_i128_and_i32(i128 %a, i32 %x) nounwind { |
| ; RV32-LABEL: caller_musttail_i128_and_i32: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: tail callee_musttail_i128_and_i32 |
| ; |
| ; RV64-LABEL: caller_musttail_i128_and_i32: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: tail callee_musttail_i128_and_i32 |
| %call = musttail call i64 @callee_musttail_i128_and_i32(i128 %a, i32 %x) |
| ret i64 %call |
| } |
| |
| ; Test musttail with two indirect args SWAPPED. The pointers must be exchanged |
| ; before the tail call. This exercises the OrigArgIndex -> Argument::getArgNo() |
| ; resolution in LowerCall. |
| define i32 @caller_musttail_two_indirect_swapped(fp128 %a, fp128 %b) nounwind { |
| ; RV32-LABEL: caller_musttail_two_indirect_swapped: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: mv a2, a0 |
| ; RV32-NEXT: mv a0, a1 |
| ; RV32-NEXT: mv a1, a2 |
| ; RV32-NEXT: tail callee_musttail_two_indirect |
| ; |
| ; RV64-LABEL: caller_musttail_two_indirect_swapped: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: mv a4, a1 |
| ; RV64-NEXT: mv a5, a0 |
| ; RV64-NEXT: mv a0, a2 |
| ; RV64-NEXT: mv a1, a3 |
| ; RV64-NEXT: mv a2, a5 |
| ; RV64-NEXT: mv a3, a4 |
| ; RV64-NEXT: tail callee_musttail_two_indirect |
| %call = musttail call i32 @callee_musttail_two_indirect(fp128 %b, fp128 %a) |
| ret i32 %call |
| } |
| |
| ; Test musttail with three indirect args rotated: call @f(%c, %a, %b). |
| ; All three pointers need to be shuffled. |
| declare i32 @callee_musttail_three_indirect(fp128 %a, fp128 %b, fp128 %c) |
| |
| define i32 @caller_musttail_three_indirect_rotated(fp128 %a, fp128 %b, fp128 %c) nounwind { |
| ; RV32-LABEL: caller_musttail_three_indirect_rotated: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: mv a3, a1 |
| ; RV32-NEXT: mv a1, a0 |
| ; RV32-NEXT: mv a0, a2 |
| ; RV32-NEXT: mv a2, a3 |
| ; RV32-NEXT: tail callee_musttail_three_indirect |
| ; |
| ; RV64-LABEL: caller_musttail_three_indirect_rotated: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: mv a6, a3 |
| ; RV64-NEXT: mv a7, a2 |
| ; RV64-NEXT: mv a3, a1 |
| ; RV64-NEXT: mv a2, a0 |
| ; RV64-NEXT: mv a0, a4 |
| ; RV64-NEXT: mv a1, a5 |
| ; RV64-NEXT: mv a4, a7 |
| ; RV64-NEXT: mv a5, a6 |
| ; RV64-NEXT: tail callee_musttail_three_indirect |
| %call = musttail call i32 @callee_musttail_three_indirect(fp128 %c, fp128 %a, fp128 %b) |
| ret i32 %call |
| } |
| |
| ; Test musttail with mixed direct + indirect args where the indirect args |
| ; are swapped but the direct arg stays in place. |
| declare i32 @callee_musttail_mixed_two_indirect(i32 %x, fp128 %a, fp128 %b) |
| |
| define i32 @caller_musttail_mixed_swap_indirect(i32 %x, fp128 %a, fp128 %b) nounwind { |
| ; RV32-LABEL: caller_musttail_mixed_swap_indirect: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: mv a3, a1 |
| ; RV32-NEXT: mv a1, a2 |
| ; RV32-NEXT: mv a2, a3 |
| ; RV32-NEXT: tail callee_musttail_mixed_two_indirect |
| ; |
| ; RV64-LABEL: caller_musttail_mixed_swap_indirect: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: mv a5, a2 |
| ; RV64-NEXT: mv a6, a1 |
| ; RV64-NEXT: mv a1, a3 |
| ; RV64-NEXT: mv a2, a4 |
| ; RV64-NEXT: mv a3, a6 |
| ; RV64-NEXT: mv a4, a5 |
| ; RV64-NEXT: tail callee_musttail_mixed_two_indirect |
| %call = musttail call i32 @callee_musttail_mixed_two_indirect(i32 %x, fp128 %b, fp128 %a) |
| ret i32 %call |
| } |
| |
| ; Test musttail with swapped i128 on RV32 (split indirect args). |
| declare i64 @callee_musttail_two_i128(i128 %a, i128 %b) |
| |
| define i64 @caller_musttail_two_i128_swapped(i128 %a, i128 %b) nounwind { |
| ; RV32-LABEL: caller_musttail_two_i128_swapped: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: mv a2, a0 |
| ; RV32-NEXT: mv a0, a1 |
| ; RV32-NEXT: mv a1, a2 |
| ; RV32-NEXT: tail callee_musttail_two_i128 |
| ; |
| ; RV64-LABEL: caller_musttail_two_i128_swapped: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: mv a4, a1 |
| ; RV64-NEXT: mv a5, a0 |
| ; RV64-NEXT: mv a0, a2 |
| ; RV64-NEXT: mv a1, a3 |
| ; RV64-NEXT: mv a2, a5 |
| ; RV64-NEXT: mv a3, a4 |
| ; RV64-NEXT: tail callee_musttail_two_i128 |
| %call = musttail call i64 @callee_musttail_two_i128(i128 %b, i128 %a) |
| ret i64 %call |
| } |
| |
| ; Test musttail passing the same indirect arg to both positions. |
| define i32 @caller_musttail_two_indirect_dup(fp128 %a, fp128 %b) nounwind { |
| ; RV32-LABEL: caller_musttail_two_indirect_dup: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: mv a1, a0 |
| ; RV32-NEXT: tail callee_musttail_two_indirect |
| ; |
| ; RV64-LABEL: caller_musttail_two_indirect_dup: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: mv a2, a0 |
| ; RV64-NEXT: mv a3, a1 |
| ; RV64-NEXT: tail callee_musttail_two_indirect |
| %call = musttail call i32 @callee_musttail_two_indirect(fp128 %a, fp128 %a) |
| ret i32 %call |
| } |
| |
| ; Test musttail with enough indirect args to spill to the stack (9 fp128 on |
| ; RV32 uses a0-a7 for the first 8 pointers, 9th goes on the stack). |
| declare void @callee_musttail_nine_indirect(fp128, fp128, fp128, fp128, fp128, fp128, fp128, fp128, fp128) |
| |
| define void @caller_musttail_nine_indirect(fp128 %a, fp128 %b, fp128 %c, fp128 %d, fp128 %e, fp128 %f, fp128 %g, fp128 %h, fp128 %i) nounwind { |
| ; RV32-LABEL: caller_musttail_nine_indirect: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: lw t0, 0(sp) |
| ; RV32-NEXT: sw t0, 0(sp) |
| ; RV32-NEXT: tail callee_musttail_nine_indirect |
| ; |
| ; RV64-LABEL: caller_musttail_nine_indirect: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: addi sp, sp, -32 |
| ; RV64-NEXT: sd s0, 24(sp) # 8-byte Folded Spill |
| ; RV64-NEXT: sd s1, 16(sp) # 8-byte Folded Spill |
| ; RV64-NEXT: sd s2, 8(sp) # 8-byte Folded Spill |
| ; RV64-NEXT: ld t0, 32(sp) |
| ; RV64-NEXT: ld t1, 40(sp) |
| ; RV64-NEXT: ld t2, 96(sp) |
| ; RV64-NEXT: ld t3, 104(sp) |
| ; RV64-NEXT: ld t4, 64(sp) |
| ; RV64-NEXT: ld t5, 72(sp) |
| ; RV64-NEXT: ld t6, 80(sp) |
| ; RV64-NEXT: ld s0, 88(sp) |
| ; RV64-NEXT: ld s1, 48(sp) |
| ; RV64-NEXT: ld s2, 56(sp) |
| ; RV64-NEXT: sd t2, 64(sp) |
| ; RV64-NEXT: sd t3, 72(sp) |
| ; RV64-NEXT: sd t4, 32(sp) |
| ; RV64-NEXT: sd t5, 40(sp) |
| ; RV64-NEXT: sd t6, 48(sp) |
| ; RV64-NEXT: sd s0, 56(sp) |
| ; RV64-NEXT: sd t0, 0(sp) |
| ; RV64-NEXT: sd t1, 8(sp) |
| ; RV64-NEXT: sd s1, 16(sp) |
| ; RV64-NEXT: sd s2, 24(sp) |
| ; RV64-NEXT: ld s0, 24(sp) # 8-byte Folded Reload |
| ; RV64-NEXT: ld s1, 16(sp) # 8-byte Folded Reload |
| ; RV64-NEXT: ld s2, 8(sp) # 8-byte Folded Reload |
| ; RV64-NEXT: addi sp, sp, 32 |
| ; RV64-NEXT: tail callee_musttail_nine_indirect |
| musttail call void @callee_musttail_nine_indirect(fp128 %a, fp128 %b, fp128 %c, fp128 %d, fp128 %e, fp128 %f, fp128 %g, fp128 %h, fp128 %i) |
| ret void |
| } |
| |
| ; Test musttail swapping the first (register) and last (stack-spilled) args. |
| define void @caller_musttail_nine_indirect_swap_first_last(fp128 %a, fp128 %b, fp128 %c, fp128 %d, fp128 %e, fp128 %f, fp128 %g, fp128 %h, fp128 %i) nounwind { |
| ; RV32-LABEL: caller_musttail_nine_indirect_swap_first_last: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: lw t0, 0(sp) |
| ; RV32-NEXT: mv t1, a0 |
| ; RV32-NEXT: mv a0, t0 |
| ; RV32-NEXT: sw t1, 0(sp) |
| ; RV32-NEXT: tail callee_musttail_nine_indirect |
| ; |
| ; RV64-LABEL: caller_musttail_nine_indirect_swap_first_last: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: addi sp, sp, -32 |
| ; RV64-NEXT: sd s0, 24(sp) # 8-byte Folded Spill |
| ; RV64-NEXT: sd s1, 16(sp) # 8-byte Folded Spill |
| ; RV64-NEXT: sd s2, 8(sp) # 8-byte Folded Spill |
| ; RV64-NEXT: ld t0, 96(sp) |
| ; RV64-NEXT: ld t1, 104(sp) |
| ; RV64-NEXT: ld t2, 32(sp) |
| ; RV64-NEXT: ld t3, 40(sp) |
| ; RV64-NEXT: ld t4, 64(sp) |
| ; RV64-NEXT: ld t5, 72(sp) |
| ; RV64-NEXT: ld t6, 80(sp) |
| ; RV64-NEXT: ld s0, 88(sp) |
| ; RV64-NEXT: ld s1, 48(sp) |
| ; RV64-NEXT: ld s2, 56(sp) |
| ; RV64-NEXT: sd a0, 64(sp) |
| ; RV64-NEXT: sd a1, 72(sp) |
| ; RV64-NEXT: sd t4, 32(sp) |
| ; RV64-NEXT: sd t5, 40(sp) |
| ; RV64-NEXT: sd t6, 48(sp) |
| ; RV64-NEXT: sd s0, 56(sp) |
| ; RV64-NEXT: sd t2, 0(sp) |
| ; RV64-NEXT: sd t3, 8(sp) |
| ; RV64-NEXT: mv a0, t0 |
| ; RV64-NEXT: mv a1, t1 |
| ; RV64-NEXT: sd s1, 16(sp) |
| ; RV64-NEXT: sd s2, 24(sp) |
| ; RV64-NEXT: ld s0, 24(sp) # 8-byte Folded Reload |
| ; RV64-NEXT: ld s1, 16(sp) # 8-byte Folded Reload |
| ; RV64-NEXT: ld s2, 8(sp) # 8-byte Folded Reload |
| ; RV64-NEXT: addi sp, sp, 32 |
| ; RV64-NEXT: tail callee_musttail_nine_indirect |
| musttail call void @callee_musttail_nine_indirect(fp128 %i, fp128 %b, fp128 %c, fp128 %d, fp128 %e, fp128 %f, fp128 %g, fp128 %h, fp128 %a) |
| ret void |
| } |
| |
| ; Test musttail where the indirect arg is a computed value, not a forwarded |
| ; formal parameter. The computed value must be stored into the incoming |
| ; indirect pointer before tail calling. |
| define i32 @caller_musttail_computed(fp128 %a) nounwind { |
| ; RV32-LABEL: caller_musttail_computed: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: addi sp, sp, -64 |
| ; RV32-NEXT: sw ra, 60(sp) # 4-byte Folded Spill |
| ; RV32-NEXT: sw s0, 56(sp) # 4-byte Folded Spill |
| ; RV32-NEXT: mv s0, a0 |
| ; RV32-NEXT: lw a0, 0(a0) |
| ; RV32-NEXT: lw a3, 4(s0) |
| ; RV32-NEXT: lw a4, 8(s0) |
| ; RV32-NEXT: lw a5, 12(s0) |
| ; RV32-NEXT: sw a0, 8(sp) |
| ; RV32-NEXT: sw a0, 24(sp) |
| ; RV32-NEXT: sw a3, 12(sp) |
| ; RV32-NEXT: sw a4, 16(sp) |
| ; RV32-NEXT: sw a5, 20(sp) |
| ; RV32-NEXT: addi a0, sp, 40 |
| ; RV32-NEXT: addi a1, sp, 24 |
| ; RV32-NEXT: addi a2, sp, 8 |
| ; RV32-NEXT: sw a3, 28(sp) |
| ; RV32-NEXT: sw a4, 32(sp) |
| ; RV32-NEXT: sw a5, 36(sp) |
| ; RV32-NEXT: call __addtf3 |
| ; RV32-NEXT: lw a0, 40(sp) |
| ; RV32-NEXT: lw a1, 44(sp) |
| ; RV32-NEXT: lw a2, 48(sp) |
| ; RV32-NEXT: lw a3, 52(sp) |
| ; RV32-NEXT: sw a0, 0(s0) |
| ; RV32-NEXT: sw a1, 4(s0) |
| ; RV32-NEXT: mv a0, s0 |
| ; RV32-NEXT: sw a2, 8(s0) |
| ; RV32-NEXT: sw a3, 12(s0) |
| ; RV32-NEXT: lw ra, 60(sp) # 4-byte Folded Reload |
| ; RV32-NEXT: lw s0, 56(sp) # 4-byte Folded Reload |
| ; RV32-NEXT: addi sp, sp, 64 |
| ; RV32-NEXT: tail callee_musttail_indirect |
| ; |
| ; RV64-LABEL: caller_musttail_computed: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: addi sp, sp, -16 |
| ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill |
| ; RV64-NEXT: mv a2, a0 |
| ; RV64-NEXT: mv a3, a1 |
| ; RV64-NEXT: call __addtf3 |
| ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload |
| ; RV64-NEXT: addi sp, sp, 16 |
| ; RV64-NEXT: tail callee_musttail_indirect |
| %sum = fadd fp128 %a, %a |
| %r = musttail call i32 @callee_musttail_indirect(fp128 %sum) |
| ret i32 %r |
| } |
| |
| ; Test musttail with a computed i128 on RV32 (split indirect). The add result |
| ; must be stored back into the incoming pointer. |
| define i64 @caller_musttail_computed_i128(i128 %a) nounwind { |
| ; RV32-LABEL: caller_musttail_computed_i128: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: lw a1, 0(a0) |
| ; RV32-NEXT: lw a2, 4(a0) |
| ; RV32-NEXT: lw a3, 8(a0) |
| ; RV32-NEXT: lw a4, 12(a0) |
| ; RV32-NEXT: addi a1, a1, 1 |
| ; RV32-NEXT: seqz a5, a1 |
| ; RV32-NEXT: add a2, a2, a5 |
| ; RV32-NEXT: or a5, a1, a2 |
| ; RV32-NEXT: seqz a5, a5 |
| ; RV32-NEXT: add a5, a3, a5 |
| ; RV32-NEXT: sltu a3, a5, a3 |
| ; RV32-NEXT: add a3, a4, a3 |
| ; RV32-NEXT: sw a1, 0(a0) |
| ; RV32-NEXT: sw a2, 4(a0) |
| ; RV32-NEXT: sw a5, 8(a0) |
| ; RV32-NEXT: sw a3, 12(a0) |
| ; RV32-NEXT: tail callee_musttail_i128 |
| ; |
| ; RV64-LABEL: caller_musttail_computed_i128: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: addi a0, a0, 1 |
| ; RV64-NEXT: seqz a2, a0 |
| ; RV64-NEXT: add a1, a1, a2 |
| ; RV64-NEXT: tail callee_musttail_i128 |
| %sum = add i128 %a, 1 |
| %r = musttail call i64 @callee_musttail_i128(i128 %sum) |
| ret i64 %r |
| } |
| |
| ; Test musttail with one computed and one forwarded indirect arg. |
| ; Position 0 gets the fadd result (stored into %a's incoming pointer), |
| ; position 1 gets %b's incoming pointer forwarded directly. |
| define i32 @caller_musttail_computed_and_forwarded(fp128 %a, fp128 %b) nounwind { |
| ; RV32-LABEL: caller_musttail_computed_and_forwarded: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: addi sp, sp, -64 |
| ; RV32-NEXT: sw ra, 60(sp) # 4-byte Folded Spill |
| ; RV32-NEXT: sw s0, 56(sp) # 4-byte Folded Spill |
| ; RV32-NEXT: sw s1, 52(sp) # 4-byte Folded Spill |
| ; RV32-NEXT: mv s0, a1 |
| ; RV32-NEXT: mv s1, a0 |
| ; RV32-NEXT: lw a3, 0(a1) |
| ; RV32-NEXT: lw a4, 4(a1) |
| ; RV32-NEXT: lw a5, 8(a1) |
| ; RV32-NEXT: lw a6, 12(a1) |
| ; RV32-NEXT: lw a0, 0(a0) |
| ; RV32-NEXT: lw a1, 4(s1) |
| ; RV32-NEXT: lw a2, 8(s1) |
| ; RV32-NEXT: lw a7, 12(s1) |
| ; RV32-NEXT: sw a0, 16(sp) |
| ; RV32-NEXT: sw a1, 20(sp) |
| ; RV32-NEXT: sw a2, 24(sp) |
| ; RV32-NEXT: sw a7, 28(sp) |
| ; RV32-NEXT: addi a0, sp, 32 |
| ; RV32-NEXT: addi a1, sp, 16 |
| ; RV32-NEXT: mv a2, sp |
| ; RV32-NEXT: sw a3, 0(sp) |
| ; RV32-NEXT: sw a4, 4(sp) |
| ; RV32-NEXT: sw a5, 8(sp) |
| ; RV32-NEXT: sw a6, 12(sp) |
| ; RV32-NEXT: call __addtf3 |
| ; RV32-NEXT: lw a0, 32(sp) |
| ; RV32-NEXT: lw a1, 36(sp) |
| ; RV32-NEXT: lw a2, 40(sp) |
| ; RV32-NEXT: lw a3, 44(sp) |
| ; RV32-NEXT: sw a0, 0(s1) |
| ; RV32-NEXT: sw a1, 4(s1) |
| ; RV32-NEXT: mv a0, s1 |
| ; RV32-NEXT: mv a1, s0 |
| ; RV32-NEXT: sw a2, 8(s1) |
| ; RV32-NEXT: sw a3, 12(s1) |
| ; RV32-NEXT: lw ra, 60(sp) # 4-byte Folded Reload |
| ; RV32-NEXT: lw s0, 56(sp) # 4-byte Folded Reload |
| ; RV32-NEXT: lw s1, 52(sp) # 4-byte Folded Reload |
| ; RV32-NEXT: addi sp, sp, 64 |
| ; RV32-NEXT: tail callee_musttail_two_indirect |
| ; |
| ; RV64-LABEL: caller_musttail_computed_and_forwarded: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: addi sp, sp, -32 |
| ; RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill |
| ; RV64-NEXT: sd s0, 16(sp) # 8-byte Folded Spill |
| ; RV64-NEXT: sd s1, 8(sp) # 8-byte Folded Spill |
| ; RV64-NEXT: mv s0, a3 |
| ; RV64-NEXT: mv s1, a2 |
| ; RV64-NEXT: call __addtf3 |
| ; RV64-NEXT: mv a2, s1 |
| ; RV64-NEXT: mv a3, s0 |
| ; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload |
| ; RV64-NEXT: ld s0, 16(sp) # 8-byte Folded Reload |
| ; RV64-NEXT: ld s1, 8(sp) # 8-byte Folded Reload |
| ; RV64-NEXT: addi sp, sp, 32 |
| ; RV64-NEXT: tail callee_musttail_two_indirect |
| %sum = fadd fp128 %a, %b |
| %r = musttail call i32 @callee_musttail_two_indirect(fp128 %sum, fp128 %b) |
| ret i32 %r |
| } |
| |
| ; Test musttail with one forwarded and one computed indirect arg (reversed). |
| ; Position 0 forwards %a, position 1 gets the computed value. |
| define i32 @caller_musttail_forwarded_and_computed(fp128 %a, fp128 %b) nounwind { |
| ; RV32-LABEL: caller_musttail_forwarded_and_computed: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: addi sp, sp, -64 |
| ; RV32-NEXT: sw ra, 60(sp) # 4-byte Folded Spill |
| ; RV32-NEXT: sw s0, 56(sp) # 4-byte Folded Spill |
| ; RV32-NEXT: sw s1, 52(sp) # 4-byte Folded Spill |
| ; RV32-NEXT: mv s0, a1 |
| ; RV32-NEXT: mv s1, a0 |
| ; RV32-NEXT: lw a3, 0(a1) |
| ; RV32-NEXT: lw a4, 4(a1) |
| ; RV32-NEXT: lw a5, 8(a1) |
| ; RV32-NEXT: lw a6, 12(a1) |
| ; RV32-NEXT: lw a0, 0(a0) |
| ; RV32-NEXT: lw a1, 4(s1) |
| ; RV32-NEXT: lw a2, 8(s1) |
| ; RV32-NEXT: lw a7, 12(s1) |
| ; RV32-NEXT: sw a0, 16(sp) |
| ; RV32-NEXT: sw a1, 20(sp) |
| ; RV32-NEXT: sw a2, 24(sp) |
| ; RV32-NEXT: sw a7, 28(sp) |
| ; RV32-NEXT: addi a0, sp, 32 |
| ; RV32-NEXT: addi a1, sp, 16 |
| ; RV32-NEXT: mv a2, sp |
| ; RV32-NEXT: sw a3, 0(sp) |
| ; RV32-NEXT: sw a4, 4(sp) |
| ; RV32-NEXT: sw a5, 8(sp) |
| ; RV32-NEXT: sw a6, 12(sp) |
| ; RV32-NEXT: call __addtf3 |
| ; RV32-NEXT: lw a0, 32(sp) |
| ; RV32-NEXT: lw a1, 36(sp) |
| ; RV32-NEXT: lw a2, 40(sp) |
| ; RV32-NEXT: lw a3, 44(sp) |
| ; RV32-NEXT: sw a0, 0(s0) |
| ; RV32-NEXT: sw a1, 4(s0) |
| ; RV32-NEXT: mv a0, s1 |
| ; RV32-NEXT: mv a1, s0 |
| ; RV32-NEXT: sw a2, 8(s0) |
| ; RV32-NEXT: sw a3, 12(s0) |
| ; RV32-NEXT: lw ra, 60(sp) # 4-byte Folded Reload |
| ; RV32-NEXT: lw s0, 56(sp) # 4-byte Folded Reload |
| ; RV32-NEXT: lw s1, 52(sp) # 4-byte Folded Reload |
| ; RV32-NEXT: addi sp, sp, 64 |
| ; RV32-NEXT: tail callee_musttail_two_indirect |
| ; |
| ; RV64-LABEL: caller_musttail_forwarded_and_computed: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: addi sp, sp, -32 |
| ; RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill |
| ; RV64-NEXT: sd s0, 16(sp) # 8-byte Folded Spill |
| ; RV64-NEXT: sd s1, 8(sp) # 8-byte Folded Spill |
| ; RV64-NEXT: mv s0, a1 |
| ; RV64-NEXT: mv s1, a0 |
| ; RV64-NEXT: call __addtf3 |
| ; RV64-NEXT: mv a2, a0 |
| ; RV64-NEXT: mv a3, a1 |
| ; RV64-NEXT: mv a0, s1 |
| ; RV64-NEXT: mv a1, s0 |
| ; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload |
| ; RV64-NEXT: ld s0, 16(sp) # 8-byte Folded Reload |
| ; RV64-NEXT: ld s1, 8(sp) # 8-byte Folded Reload |
| ; RV64-NEXT: addi sp, sp, 32 |
| ; RV64-NEXT: tail callee_musttail_two_indirect |
| %sum = fadd fp128 %a, %b |
| %r = musttail call i32 @callee_musttail_two_indirect(fp128 %a, fp128 %sum) |
| ret i32 %r |
| } |
| |
| ; Test musttail with both args computed. Neither can be zero-copy forwarded. |
| define i32 @caller_musttail_both_computed(fp128 %a, fp128 %b) nounwind { |
| ; RV32-LABEL: caller_musttail_both_computed: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: addi sp, sp, -160 |
| ; RV32-NEXT: sw ra, 156(sp) # 4-byte Folded Spill |
| ; RV32-NEXT: sw s0, 152(sp) # 4-byte Folded Spill |
| ; RV32-NEXT: sw s1, 148(sp) # 4-byte Folded Spill |
| ; RV32-NEXT: sw s2, 144(sp) # 4-byte Folded Spill |
| ; RV32-NEXT: sw s3, 140(sp) # 4-byte Folded Spill |
| ; RV32-NEXT: sw s4, 136(sp) # 4-byte Folded Spill |
| ; RV32-NEXT: sw s5, 132(sp) # 4-byte Folded Spill |
| ; RV32-NEXT: sw s6, 128(sp) # 4-byte Folded Spill |
| ; RV32-NEXT: sw s7, 124(sp) # 4-byte Folded Spill |
| ; RV32-NEXT: sw s8, 120(sp) # 4-byte Folded Spill |
| ; RV32-NEXT: sw s9, 116(sp) # 4-byte Folded Spill |
| ; RV32-NEXT: sw s10, 112(sp) # 4-byte Folded Spill |
| ; RV32-NEXT: sw s11, 108(sp) # 4-byte Folded Spill |
| ; RV32-NEXT: mv s0, a1 |
| ; RV32-NEXT: mv s1, a0 |
| ; RV32-NEXT: lw s5, 0(a1) |
| ; RV32-NEXT: lw s2, 4(a1) |
| ; RV32-NEXT: lw s3, 8(a1) |
| ; RV32-NEXT: lw s4, 12(a1) |
| ; RV32-NEXT: lw s6, 0(a0) |
| ; RV32-NEXT: lw s7, 4(a0) |
| ; RV32-NEXT: lw s8, 8(a0) |
| ; RV32-NEXT: lw s9, 12(a0) |
| ; RV32-NEXT: sw s6, 72(sp) |
| ; RV32-NEXT: sw s7, 76(sp) |
| ; RV32-NEXT: sw s8, 80(sp) |
| ; RV32-NEXT: sw s9, 84(sp) |
| ; RV32-NEXT: addi a0, sp, 88 |
| ; RV32-NEXT: addi a1, sp, 72 |
| ; RV32-NEXT: addi a2, sp, 56 |
| ; RV32-NEXT: sw s5, 56(sp) |
| ; RV32-NEXT: sw s2, 60(sp) |
| ; RV32-NEXT: sw s3, 64(sp) |
| ; RV32-NEXT: sw s4, 68(sp) |
| ; RV32-NEXT: call __addtf3 |
| ; RV32-NEXT: lw s10, 88(sp) |
| ; RV32-NEXT: lw s11, 92(sp) |
| ; RV32-NEXT: lw a0, 96(sp) |
| ; RV32-NEXT: sw a0, 0(sp) # 4-byte Folded Spill |
| ; RV32-NEXT: lw a0, 100(sp) |
| ; RV32-NEXT: sw a0, 4(sp) # 4-byte Folded Spill |
| ; RV32-NEXT: sw s6, 24(sp) |
| ; RV32-NEXT: sw s7, 28(sp) |
| ; RV32-NEXT: sw s8, 32(sp) |
| ; RV32-NEXT: sw s9, 36(sp) |
| ; RV32-NEXT: addi a0, sp, 40 |
| ; RV32-NEXT: addi a1, sp, 24 |
| ; RV32-NEXT: addi a2, sp, 8 |
| ; RV32-NEXT: sw s5, 8(sp) |
| ; RV32-NEXT: sw s2, 12(sp) |
| ; RV32-NEXT: sw s3, 16(sp) |
| ; RV32-NEXT: sw s4, 20(sp) |
| ; RV32-NEXT: call __subtf3 |
| ; RV32-NEXT: lw a0, 40(sp) |
| ; RV32-NEXT: lw a1, 44(sp) |
| ; RV32-NEXT: lw a2, 48(sp) |
| ; RV32-NEXT: lw a3, 52(sp) |
| ; RV32-NEXT: sw s10, 0(s1) |
| ; RV32-NEXT: sw s11, 4(s1) |
| ; RV32-NEXT: lw a4, 0(sp) # 4-byte Folded Reload |
| ; RV32-NEXT: sw a4, 8(s1) |
| ; RV32-NEXT: lw a4, 4(sp) # 4-byte Folded Reload |
| ; RV32-NEXT: sw a4, 12(s1) |
| ; RV32-NEXT: sw a0, 0(s0) |
| ; RV32-NEXT: sw a1, 4(s0) |
| ; RV32-NEXT: mv a0, s1 |
| ; RV32-NEXT: mv a1, s0 |
| ; RV32-NEXT: sw a2, 8(s0) |
| ; RV32-NEXT: sw a3, 12(s0) |
| ; RV32-NEXT: lw ra, 156(sp) # 4-byte Folded Reload |
| ; RV32-NEXT: lw s0, 152(sp) # 4-byte Folded Reload |
| ; RV32-NEXT: lw s1, 148(sp) # 4-byte Folded Reload |
| ; RV32-NEXT: lw s2, 144(sp) # 4-byte Folded Reload |
| ; RV32-NEXT: lw s3, 140(sp) # 4-byte Folded Reload |
| ; RV32-NEXT: lw s4, 136(sp) # 4-byte Folded Reload |
| ; RV32-NEXT: lw s5, 132(sp) # 4-byte Folded Reload |
| ; RV32-NEXT: lw s6, 128(sp) # 4-byte Folded Reload |
| ; RV32-NEXT: lw s7, 124(sp) # 4-byte Folded Reload |
| ; RV32-NEXT: lw s8, 120(sp) # 4-byte Folded Reload |
| ; RV32-NEXT: lw s9, 116(sp) # 4-byte Folded Reload |
| ; RV32-NEXT: lw s10, 112(sp) # 4-byte Folded Reload |
| ; RV32-NEXT: lw s11, 108(sp) # 4-byte Folded Reload |
| ; RV32-NEXT: addi sp, sp, 160 |
| ; RV32-NEXT: tail callee_musttail_two_indirect |
| ; |
| ; RV64-LABEL: caller_musttail_both_computed: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: addi sp, sp, -64 |
| ; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill |
| ; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill |
| ; RV64-NEXT: sd s1, 40(sp) # 8-byte Folded Spill |
| ; RV64-NEXT: sd s2, 32(sp) # 8-byte Folded Spill |
| ; RV64-NEXT: sd s3, 24(sp) # 8-byte Folded Spill |
| ; RV64-NEXT: sd s4, 16(sp) # 8-byte Folded Spill |
| ; RV64-NEXT: sd s5, 8(sp) # 8-byte Folded Spill |
| ; RV64-NEXT: mv s0, a3 |
| ; RV64-NEXT: mv s1, a2 |
| ; RV64-NEXT: mv s2, a1 |
| ; RV64-NEXT: mv s3, a0 |
| ; RV64-NEXT: call __addtf3 |
| ; RV64-NEXT: mv s4, a0 |
| ; RV64-NEXT: mv s5, a1 |
| ; RV64-NEXT: mv a0, s3 |
| ; RV64-NEXT: mv a1, s2 |
| ; RV64-NEXT: mv a2, s1 |
| ; RV64-NEXT: mv a3, s0 |
| ; RV64-NEXT: call __subtf3 |
| ; RV64-NEXT: mv a2, a0 |
| ; RV64-NEXT: mv a3, a1 |
| ; RV64-NEXT: mv a0, s4 |
| ; RV64-NEXT: mv a1, s5 |
| ; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload |
| ; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload |
| ; RV64-NEXT: ld s1, 40(sp) # 8-byte Folded Reload |
| ; RV64-NEXT: ld s2, 32(sp) # 8-byte Folded Reload |
| ; RV64-NEXT: ld s3, 24(sp) # 8-byte Folded Reload |
| ; RV64-NEXT: ld s4, 16(sp) # 8-byte Folded Reload |
| ; RV64-NEXT: ld s5, 8(sp) # 8-byte Folded Reload |
| ; RV64-NEXT: addi sp, sp, 64 |
| ; RV64-NEXT: tail callee_musttail_two_indirect |
| %sum = fadd fp128 %a, %b |
| %diff = fsub fp128 %a, %b |
| %r = musttail call i32 @callee_musttail_two_indirect(fp128 %sum, fp128 %diff) |
| ret i32 %r |
| } |
| |
| ; Test musttail in a non-entry basic block. The indirect pointer must survive |
| ; across basic blocks (the SelectionDAG is cleared between BBs, so the pointer |
| ; must be preserved in a virtual register, not as a raw SDValue). |
| declare i32 @callee_musttail_cross_bb(fp128 %a, i1 %c) |
| |
| define i32 @caller_musttail_cross_bb(fp128 %a, i1 %cond) nounwind { |
| ; RV32-LABEL: caller_musttail_cross_bb: |
| ; RV32: # %bb.0: # %entry |
| ; RV32-NEXT: andi a2, a1, 1 |
| ; RV32-NEXT: beqz a2, .LBB19_2 |
| ; RV32-NEXT: # %bb.1: # %then |
| ; RV32-NEXT: tail callee_musttail_cross_bb |
| ; RV32-NEXT: .LBB19_2: # %else |
| ; RV32-NEXT: li a0, 0 |
| ; RV32-NEXT: ret |
| ; |
| ; RV64-LABEL: caller_musttail_cross_bb: |
| ; RV64: # %bb.0: # %entry |
| ; RV64-NEXT: andi a3, a2, 1 |
| ; RV64-NEXT: beqz a3, .LBB19_2 |
| ; RV64-NEXT: # %bb.1: # %then |
| ; RV64-NEXT: tail callee_musttail_cross_bb |
| ; RV64-NEXT: .LBB19_2: # %else |
| ; RV64-NEXT: li a0, 0 |
| ; RV64-NEXT: ret |
| entry: |
| br i1 %cond, label %then, label %else |
| then: |
| %r = musttail call i32 @callee_musttail_cross_bb(fp128 %a, i1 %cond) |
| ret i32 %r |
| else: |
| ret i32 0 |
| } |
| |
| ; Test musttail with control flow and a computed indirect arg in a non-entry BB. |
| declare i32 @callee_musttail_cross_bb_computed(fp128 %a, i1 %c) |
| |
| define i32 @caller_musttail_cross_bb_computed(fp128 %a, i1 %cond) nounwind { |
| ; RV32-LABEL: caller_musttail_cross_bb_computed: |
| ; RV32: # %bb.0: # %entry |
| ; RV32-NEXT: addi sp, sp, -64 |
| ; RV32-NEXT: sw ra, 60(sp) # 4-byte Folded Spill |
| ; RV32-NEXT: sw s0, 56(sp) # 4-byte Folded Spill |
| ; RV32-NEXT: sw s1, 52(sp) # 4-byte Folded Spill |
| ; RV32-NEXT: sw s2, 48(sp) # 4-byte Folded Spill |
| ; RV32-NEXT: mv s0, a0 |
| ; RV32-NEXT: lw a0, 0(a0) |
| ; RV32-NEXT: lw a3, 4(s0) |
| ; RV32-NEXT: lw a4, 8(s0) |
| ; RV32-NEXT: lw a5, 12(s0) |
| ; RV32-NEXT: mv s1, a1 |
| ; RV32-NEXT: andi s2, a1, 1 |
| ; RV32-NEXT: sw a0, 0(sp) |
| ; RV32-NEXT: sw a0, 16(sp) |
| ; RV32-NEXT: sw a3, 4(sp) |
| ; RV32-NEXT: sw a4, 8(sp) |
| ; RV32-NEXT: sw a5, 12(sp) |
| ; RV32-NEXT: addi a0, sp, 32 |
| ; RV32-NEXT: addi a1, sp, 16 |
| ; RV32-NEXT: mv a2, sp |
| ; RV32-NEXT: sw a3, 20(sp) |
| ; RV32-NEXT: sw a4, 24(sp) |
| ; RV32-NEXT: sw a5, 28(sp) |
| ; RV32-NEXT: call __addtf3 |
| ; RV32-NEXT: beqz s2, .LBB20_2 |
| ; RV32-NEXT: # %bb.1: # %then |
| ; RV32-NEXT: lw a0, 32(sp) |
| ; RV32-NEXT: lw a1, 36(sp) |
| ; RV32-NEXT: lw a2, 40(sp) |
| ; RV32-NEXT: lw a3, 44(sp) |
| ; RV32-NEXT: sw a0, 0(s0) |
| ; RV32-NEXT: sw a1, 4(s0) |
| ; RV32-NEXT: mv a0, s0 |
| ; RV32-NEXT: mv a1, s1 |
| ; RV32-NEXT: sw a2, 8(s0) |
| ; RV32-NEXT: sw a3, 12(s0) |
| ; RV32-NEXT: lw ra, 60(sp) # 4-byte Folded Reload |
| ; RV32-NEXT: lw s0, 56(sp) # 4-byte Folded Reload |
| ; RV32-NEXT: lw s1, 52(sp) # 4-byte Folded Reload |
| ; RV32-NEXT: lw s2, 48(sp) # 4-byte Folded Reload |
| ; RV32-NEXT: addi sp, sp, 64 |
| ; RV32-NEXT: tail callee_musttail_cross_bb_computed |
| ; RV32-NEXT: .LBB20_2: # %else |
| ; RV32-NEXT: li a0, 0 |
| ; RV32-NEXT: lw ra, 60(sp) # 4-byte Folded Reload |
| ; RV32-NEXT: lw s0, 56(sp) # 4-byte Folded Reload |
| ; RV32-NEXT: lw s1, 52(sp) # 4-byte Folded Reload |
| ; RV32-NEXT: lw s2, 48(sp) # 4-byte Folded Reload |
| ; RV32-NEXT: addi sp, sp, 64 |
| ; RV32-NEXT: ret |
| ; |
| ; RV64-LABEL: caller_musttail_cross_bb_computed: |
| ; RV64: # %bb.0: # %entry |
| ; RV64-NEXT: addi sp, sp, -32 |
| ; RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill |
| ; RV64-NEXT: sd s0, 16(sp) # 8-byte Folded Spill |
| ; RV64-NEXT: sd s1, 8(sp) # 8-byte Folded Spill |
| ; RV64-NEXT: mv s0, a2 |
| ; RV64-NEXT: andi s1, a2, 1 |
| ; RV64-NEXT: mv a2, a0 |
| ; RV64-NEXT: mv a3, a1 |
| ; RV64-NEXT: call __addtf3 |
| ; RV64-NEXT: beqz s1, .LBB20_2 |
| ; RV64-NEXT: # %bb.1: # %then |
| ; RV64-NEXT: mv a2, s0 |
| ; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload |
| ; RV64-NEXT: ld s0, 16(sp) # 8-byte Folded Reload |
| ; RV64-NEXT: ld s1, 8(sp) # 8-byte Folded Reload |
| ; RV64-NEXT: addi sp, sp, 32 |
| ; RV64-NEXT: tail callee_musttail_cross_bb_computed |
| ; RV64-NEXT: .LBB20_2: # %else |
| ; RV64-NEXT: li a0, 0 |
| ; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload |
| ; RV64-NEXT: ld s0, 16(sp) # 8-byte Folded Reload |
| ; RV64-NEXT: ld s1, 8(sp) # 8-byte Folded Reload |
| ; RV64-NEXT: addi sp, sp, 32 |
| ; RV64-NEXT: ret |
| entry: |
| %sum = fadd fp128 %a, %a |
| br i1 %cond, label %then, label %else |
| then: |
| %r = musttail call i32 @callee_musttail_cross_bb_computed(fp128 %sum, i1 %cond) |
| ret i32 %r |
| else: |
| ret i32 0 |
| } |
| |
| ; Non-indirect args that spill to the stack (exercises the |
| ; isEligibleForTailCallOptimization stack-size bypass for musttail). Both |
| ; RV32 and RV64 use a0..a7 for the first 8 args and spill from the 9th. The |
| ; spilled args live in the caller's incoming stack slots, which musttail can |
| ; re-use because matching prototypes imply a matching layout. |
| declare void @callee_musttail_stack_spill(i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) |
| |
| define void @caller_musttail_stack_spill(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8, i32 %a9) nounwind { |
| ; RV32-LABEL: caller_musttail_stack_spill: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: lw t0, 0(sp) |
| ; RV32-NEXT: lw t1, 4(sp) |
| ; RV32-NEXT: sw t0, 0(sp) |
| ; RV32-NEXT: sw t1, 4(sp) |
| ; RV32-NEXT: tail callee_musttail_stack_spill |
| ; |
| ; RV64-LABEL: caller_musttail_stack_spill: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: ld t0, 0(sp) |
| ; RV64-NEXT: ld t1, 8(sp) |
| ; RV64-NEXT: sd t0, 0(sp) |
| ; RV64-NEXT: sd t1, 8(sp) |
| ; RV64-NEXT: tail callee_musttail_stack_spill |
| musttail call void @callee_musttail_stack_spill(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8, i32 %a9) |
| ret void |
| } |
| |
| ; sret + musttail: the sret pointer is just a regular pointer arg in a0. |
| ; Tail call forwards it unchanged. |
| %struct.Large = type { i64, i64, i64, i64 } |
| declare void @callee_musttail_sret(ptr sret(%struct.Large), i32) |
| |
| define void @caller_musttail_sret(ptr sret(%struct.Large) %out, i32 %x) nounwind { |
| ; RV32-LABEL: caller_musttail_sret: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: tail callee_musttail_sret |
| ; |
| ; RV64-LABEL: caller_musttail_sret: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: tail callee_musttail_sret |
| musttail call void @callee_musttail_sret(ptr sret(%struct.Large) %out, i32 %x) |
| ret void |
| } |
| |
| ; Mix of indirect (fp128) and many i32 args spilled to the stack. |
| declare void @callee_musttail_indirect_and_spill(fp128, i32, i32, i32, i32, i32, i32, i32, i32, i32) |
| |
| define void @caller_musttail_indirect_and_spill(fp128 %a, i32 %i0, i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8) nounwind { |
| ; RV32-LABEL: caller_musttail_indirect_and_spill: |
| ; RV32: # %bb.0: |
| ; RV32-NEXT: lw t0, 0(sp) |
| ; RV32-NEXT: lw t1, 4(sp) |
| ; RV32-NEXT: sw t0, 0(sp) |
| ; RV32-NEXT: sw t1, 4(sp) |
| ; RV32-NEXT: tail callee_musttail_indirect_and_spill |
| ; |
| ; RV64-LABEL: caller_musttail_indirect_and_spill: |
| ; RV64: # %bb.0: |
| ; RV64-NEXT: ld t0, 0(sp) |
| ; RV64-NEXT: ld t1, 8(sp) |
| ; RV64-NEXT: ld t2, 16(sp) |
| ; RV64-NEXT: sd t0, 0(sp) |
| ; RV64-NEXT: sd t1, 8(sp) |
| ; RV64-NEXT: sd t2, 16(sp) |
| ; RV64-NEXT: tail callee_musttail_indirect_and_spill |
| musttail call void @callee_musttail_indirect_and_spill(fp128 %a, i32 %i0, i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8) |
| ret void |
| } |
| |
| ; Note: byval + musttail is intentionally NOT tested here. |
| ; isEligibleForTailCallOptimization rejects byval outright, which causes the |
| ; musttail site to hit reportFatalInternalError. Tail-call support for byval |
| ; was reverted in 501417baa60f (RISC-V/LoongArch) pending a vreg-based |
| ; re-implementation; once that lands, musttail + byval can be tested as |
| ; well. |