| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple=riscv64 -mattr=+m,+v -O2 < %s \ |
| ; RUN: | FileCheck %s -check-prefix=RV64I |
| ; RUN: llc -mtriple=riscv32 -mattr=+m,+v -O2 < %s \ |
| ; RUN: | FileCheck %s -check-prefix=RV32I |
| |
| ; Tests copied from AArch64. |
| |
| ; Dynamically-sized allocation, needs a loop which can handle any size at |
| ; runtime. The final iteration of the loop will temporarily put SP below the |
| ; target address, but this doesn't break any of the ABI constraints on the |
| ; stack, and also doesn't probe below the target SP value. |
| define void @dynamic(i64 %size, ptr %out) #0 { |
| ; RV64I-LABEL: dynamic: |
| ; RV64I: # %bb.0: |
| ; RV64I-NEXT: addi sp, sp, -16 |
| ; RV64I-NEXT: .cfi_def_cfa_offset 16 |
| ; RV64I-NEXT: sd zero, 0(sp) |
| ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill |
| ; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill |
| ; RV64I-NEXT: .cfi_offset ra, -8 |
| ; RV64I-NEXT: .cfi_offset s0, -16 |
| ; RV64I-NEXT: addi s0, sp, 16 |
| ; RV64I-NEXT: .cfi_def_cfa s0, 0 |
| ; RV64I-NEXT: addi a0, a0, 15 |
| ; RV64I-NEXT: andi a0, a0, -16 |
| ; RV64I-NEXT: sub a0, sp, a0 |
| ; RV64I-NEXT: lui a2, 1 |
| ; RV64I-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 |
| ; RV64I-NEXT: sub sp, sp, a2 |
| ; RV64I-NEXT: sd zero, 0(sp) |
| ; RV64I-NEXT: blt a0, sp, .LBB0_1 |
| ; RV64I-NEXT: # %bb.2: |
| ; RV64I-NEXT: mv sp, a0 |
| ; RV64I-NEXT: sd a0, 0(a1) |
| ; RV64I-NEXT: addi sp, s0, -16 |
| ; RV64I-NEXT: .cfi_def_cfa sp, 16 |
| ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload |
| ; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload |
| ; RV64I-NEXT: .cfi_restore ra |
| ; RV64I-NEXT: .cfi_restore s0 |
| ; RV64I-NEXT: addi sp, sp, 16 |
| ; RV64I-NEXT: .cfi_def_cfa_offset 0 |
| ; RV64I-NEXT: ret |
| ; |
| ; RV32I-LABEL: dynamic: |
| ; RV32I: # %bb.0: |
| ; RV32I-NEXT: addi sp, sp, -16 |
| ; RV32I-NEXT: .cfi_def_cfa_offset 16 |
| ; RV32I-NEXT: sw zero, 0(sp) |
| ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill |
| ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill |
| ; RV32I-NEXT: .cfi_offset ra, -4 |
| ; RV32I-NEXT: .cfi_offset s0, -8 |
| ; RV32I-NEXT: addi s0, sp, 16 |
| ; RV32I-NEXT: .cfi_def_cfa s0, 0 |
| ; RV32I-NEXT: addi a0, a0, 15 |
| ; RV32I-NEXT: andi a0, a0, -16 |
| ; RV32I-NEXT: sub a0, sp, a0 |
| ; RV32I-NEXT: lui a1, 1 |
| ; RV32I-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 |
| ; RV32I-NEXT: sub sp, sp, a1 |
| ; RV32I-NEXT: sw zero, 0(sp) |
| ; RV32I-NEXT: blt a0, sp, .LBB0_1 |
| ; RV32I-NEXT: # %bb.2: |
| ; RV32I-NEXT: mv sp, a0 |
| ; RV32I-NEXT: sw a0, 0(a2) |
| ; RV32I-NEXT: addi sp, s0, -16 |
| ; RV32I-NEXT: .cfi_def_cfa sp, 16 |
| ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload |
| ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload |
| ; RV32I-NEXT: .cfi_restore ra |
| ; RV32I-NEXT: .cfi_restore s0 |
| ; RV32I-NEXT: addi sp, sp, 16 |
| ; RV32I-NEXT: .cfi_def_cfa_offset 0 |
| ; RV32I-NEXT: ret |
| %v = alloca i8, i64 %size, align 1 |
| store ptr %v, ptr %out, align 8 |
| ret void |
| } |
| |
| ; This function has a fixed-size stack slot and a dynamic one. The fixed size |
| ; slot isn't large enough that we would normally probe it, but we need to do so |
| ; here otherwise the gap between the CSR save and the first probe of the |
| ; dynamic allocation could be too far apart when the size of the dynamic |
| ; allocation is close to the guard size. |
| define void @dynamic_fixed(i64 %size, ptr %out1, ptr %out2) #0 { |
| ; RV64I-LABEL: dynamic_fixed: |
| ; RV64I: # %bb.0: |
| ; RV64I-NEXT: addi sp, sp, -80 |
| ; RV64I-NEXT: .cfi_def_cfa_offset 80 |
| ; RV64I-NEXT: sd zero, 0(sp) |
| ; RV64I-NEXT: sd ra, 72(sp) # 8-byte Folded Spill |
| ; RV64I-NEXT: sd s0, 64(sp) # 8-byte Folded Spill |
| ; RV64I-NEXT: .cfi_offset ra, -8 |
| ; RV64I-NEXT: .cfi_offset s0, -16 |
| ; RV64I-NEXT: addi s0, sp, 80 |
| ; RV64I-NEXT: .cfi_def_cfa s0, 0 |
| ; RV64I-NEXT: addi a3, s0, -80 |
| ; RV64I-NEXT: addi a0, a0, 15 |
| ; RV64I-NEXT: sd a3, 0(a1) |
| ; RV64I-NEXT: andi a0, a0, -16 |
| ; RV64I-NEXT: sub a0, sp, a0 |
| ; RV64I-NEXT: lui a1, 1 |
| ; RV64I-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 |
| ; RV64I-NEXT: sub sp, sp, a1 |
| ; RV64I-NEXT: sd zero, 0(sp) |
| ; RV64I-NEXT: blt a0, sp, .LBB1_1 |
| ; RV64I-NEXT: # %bb.2: |
| ; RV64I-NEXT: mv sp, a0 |
| ; RV64I-NEXT: sd a0, 0(a2) |
| ; RV64I-NEXT: addi sp, s0, -80 |
| ; RV64I-NEXT: .cfi_def_cfa sp, 80 |
| ; RV64I-NEXT: ld ra, 72(sp) # 8-byte Folded Reload |
| ; RV64I-NEXT: ld s0, 64(sp) # 8-byte Folded Reload |
| ; RV64I-NEXT: .cfi_restore ra |
| ; RV64I-NEXT: .cfi_restore s0 |
| ; RV64I-NEXT: addi sp, sp, 80 |
| ; RV64I-NEXT: .cfi_def_cfa_offset 0 |
| ; RV64I-NEXT: ret |
| ; |
| ; RV32I-LABEL: dynamic_fixed: |
| ; RV32I: # %bb.0: |
| ; RV32I-NEXT: addi sp, sp, -80 |
| ; RV32I-NEXT: .cfi_def_cfa_offset 80 |
| ; RV32I-NEXT: sw zero, 0(sp) |
| ; RV32I-NEXT: sw ra, 76(sp) # 4-byte Folded Spill |
| ; RV32I-NEXT: sw s0, 72(sp) # 4-byte Folded Spill |
| ; RV32I-NEXT: .cfi_offset ra, -4 |
| ; RV32I-NEXT: .cfi_offset s0, -8 |
| ; RV32I-NEXT: addi s0, sp, 80 |
| ; RV32I-NEXT: .cfi_def_cfa s0, 0 |
| ; RV32I-NEXT: addi a1, s0, -72 |
| ; RV32I-NEXT: addi a0, a0, 15 |
| ; RV32I-NEXT: sw a1, 0(a2) |
| ; RV32I-NEXT: andi a0, a0, -16 |
| ; RV32I-NEXT: sub a0, sp, a0 |
| ; RV32I-NEXT: lui a1, 1 |
| ; RV32I-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 |
| ; RV32I-NEXT: sub sp, sp, a1 |
| ; RV32I-NEXT: sw zero, 0(sp) |
| ; RV32I-NEXT: blt a0, sp, .LBB1_1 |
| ; RV32I-NEXT: # %bb.2: |
| ; RV32I-NEXT: mv sp, a0 |
| ; RV32I-NEXT: sw a0, 0(a3) |
| ; RV32I-NEXT: addi sp, s0, -80 |
| ; RV32I-NEXT: .cfi_def_cfa sp, 80 |
| ; RV32I-NEXT: lw ra, 76(sp) # 4-byte Folded Reload |
| ; RV32I-NEXT: lw s0, 72(sp) # 4-byte Folded Reload |
| ; RV32I-NEXT: .cfi_restore ra |
| ; RV32I-NEXT: .cfi_restore s0 |
| ; RV32I-NEXT: addi sp, sp, 80 |
| ; RV32I-NEXT: .cfi_def_cfa_offset 0 |
| ; RV32I-NEXT: ret |
| %v1 = alloca i8, i64 64, align 1 |
| store ptr %v1, ptr %out1, align 8 |
| %v2 = alloca i8, i64 %size, align 1 |
| store ptr %v2, ptr %out2, align 8 |
| ret void |
| } |
| |
| ; Dynamic allocation, with an alignment requirement greater than the alignment |
| ; of SP. Done by ANDing the target SP with a constant to align it down, then |
| ; doing the loop as normal. Note that we also re-align the stack in the prolog, |
| ; which isn't actually needed because the only aligned allocations are dynamic, |
| ; this is done even without stack probing. |
| define void @dynamic_align_64(i64 %size, ptr %out) #0 { |
| ; RV64I-LABEL: dynamic_align_64: |
| ; RV64I: # %bb.0: |
| ; RV64I-NEXT: addi sp, sp, -64 |
| ; RV64I-NEXT: .cfi_def_cfa_offset 64 |
| ; RV64I-NEXT: sd zero, 0(sp) |
| ; RV64I-NEXT: sd ra, 56(sp) # 8-byte Folded Spill |
| ; RV64I-NEXT: sd s0, 48(sp) # 8-byte Folded Spill |
| ; RV64I-NEXT: sd s1, 40(sp) # 8-byte Folded Spill |
| ; RV64I-NEXT: .cfi_offset ra, -8 |
| ; RV64I-NEXT: .cfi_offset s0, -16 |
| ; RV64I-NEXT: .cfi_offset s1, -24 |
| ; RV64I-NEXT: addi s0, sp, 64 |
| ; RV64I-NEXT: .cfi_def_cfa s0, 0 |
| ; RV64I-NEXT: andi sp, sp, -64 |
| ; RV64I-NEXT: mv s1, sp |
| ; RV64I-NEXT: addi a0, a0, 15 |
| ; RV64I-NEXT: andi a0, a0, -16 |
| ; RV64I-NEXT: sub a0, sp, a0 |
| ; RV64I-NEXT: andi a0, a0, -64 |
| ; RV64I-NEXT: lui a2, 1 |
| ; RV64I-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 |
| ; RV64I-NEXT: sub sp, sp, a2 |
| ; RV64I-NEXT: sd zero, 0(sp) |
| ; RV64I-NEXT: blt a0, sp, .LBB2_1 |
| ; RV64I-NEXT: # %bb.2: |
| ; RV64I-NEXT: mv sp, a0 |
| ; RV64I-NEXT: sd a0, 0(a1) |
| ; RV64I-NEXT: addi sp, s0, -64 |
| ; RV64I-NEXT: .cfi_def_cfa sp, 64 |
| ; RV64I-NEXT: ld ra, 56(sp) # 8-byte Folded Reload |
| ; RV64I-NEXT: ld s0, 48(sp) # 8-byte Folded Reload |
| ; RV64I-NEXT: ld s1, 40(sp) # 8-byte Folded Reload |
| ; RV64I-NEXT: .cfi_restore ra |
| ; RV64I-NEXT: .cfi_restore s0 |
| ; RV64I-NEXT: .cfi_restore s1 |
| ; RV64I-NEXT: addi sp, sp, 64 |
| ; RV64I-NEXT: .cfi_def_cfa_offset 0 |
| ; RV64I-NEXT: ret |
| ; |
| ; RV32I-LABEL: dynamic_align_64: |
| ; RV32I: # %bb.0: |
| ; RV32I-NEXT: addi sp, sp, -64 |
| ; RV32I-NEXT: .cfi_def_cfa_offset 64 |
| ; RV32I-NEXT: sw zero, 0(sp) |
| ; RV32I-NEXT: sw ra, 60(sp) # 4-byte Folded Spill |
| ; RV32I-NEXT: sw s0, 56(sp) # 4-byte Folded Spill |
| ; RV32I-NEXT: sw s1, 52(sp) # 4-byte Folded Spill |
| ; RV32I-NEXT: .cfi_offset ra, -4 |
| ; RV32I-NEXT: .cfi_offset s0, -8 |
| ; RV32I-NEXT: .cfi_offset s1, -12 |
| ; RV32I-NEXT: addi s0, sp, 64 |
| ; RV32I-NEXT: .cfi_def_cfa s0, 0 |
| ; RV32I-NEXT: andi sp, sp, -64 |
| ; RV32I-NEXT: mv s1, sp |
| ; RV32I-NEXT: addi a0, a0, 15 |
| ; RV32I-NEXT: andi a0, a0, -16 |
| ; RV32I-NEXT: sub a0, sp, a0 |
| ; RV32I-NEXT: andi a0, a0, -64 |
| ; RV32I-NEXT: lui a1, 1 |
| ; RV32I-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 |
| ; RV32I-NEXT: sub sp, sp, a1 |
| ; RV32I-NEXT: sw zero, 0(sp) |
| ; RV32I-NEXT: blt a0, sp, .LBB2_1 |
| ; RV32I-NEXT: # %bb.2: |
| ; RV32I-NEXT: mv sp, a0 |
| ; RV32I-NEXT: sw a0, 0(a2) |
| ; RV32I-NEXT: addi sp, s0, -64 |
| ; RV32I-NEXT: .cfi_def_cfa sp, 64 |
| ; RV32I-NEXT: lw ra, 60(sp) # 4-byte Folded Reload |
| ; RV32I-NEXT: lw s0, 56(sp) # 4-byte Folded Reload |
| ; RV32I-NEXT: lw s1, 52(sp) # 4-byte Folded Reload |
| ; RV32I-NEXT: .cfi_restore ra |
| ; RV32I-NEXT: .cfi_restore s0 |
| ; RV32I-NEXT: .cfi_restore s1 |
| ; RV32I-NEXT: addi sp, sp, 64 |
| ; RV32I-NEXT: .cfi_def_cfa_offset 0 |
| ; RV32I-NEXT: ret |
| %v = alloca i8, i64 %size, align 64 |
| store ptr %v, ptr %out, align 8 |
| ret void |
| } |
| |
| ; Dynamic allocation, with an alignment greater than the stack guard size. The |
| ; only difference to the dynamic allocation is the constant used for aligning |
| ; the target SP, the loop will probe the whole allocation without needing to |
| ; know about the alignment padding. |
| define void @dynamic_align_8192(i64 %size, ptr %out) #0 { |
| ; RV64I-LABEL: dynamic_align_8192: |
| ; RV64I: # %bb.0: |
| ; RV64I-NEXT: addi sp, sp, -2032 |
| ; RV64I-NEXT: .cfi_def_cfa_offset 2032 |
| ; RV64I-NEXT: sd zero, 0(sp) |
| ; RV64I-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill |
| ; RV64I-NEXT: sd s0, 2016(sp) # 8-byte Folded Spill |
| ; RV64I-NEXT: sd s1, 2008(sp) # 8-byte Folded Spill |
| ; RV64I-NEXT: .cfi_offset ra, -8 |
| ; RV64I-NEXT: .cfi_offset s0, -16 |
| ; RV64I-NEXT: .cfi_offset s1, -24 |
| ; RV64I-NEXT: addi s0, sp, 2032 |
| ; RV64I-NEXT: .cfi_def_cfa s0, 0 |
| ; RV64I-NEXT: lui a2, 1 |
| ; RV64I-NEXT: sub sp, sp, a2 |
| ; RV64I-NEXT: sd zero, 0(sp) |
| ; RV64I-NEXT: sub sp, sp, a2 |
| ; RV64I-NEXT: sd zero, 0(sp) |
| ; RV64I-NEXT: sub sp, sp, a2 |
| ; RV64I-NEXT: sd zero, 0(sp) |
| ; RV64I-NEXT: addi sp, sp, -2048 |
| ; RV64I-NEXT: addi sp, sp, -16 |
| ; RV64I-NEXT: sd zero, 0(sp) |
| ; RV64I-NEXT: srli a2, sp, 13 |
| ; RV64I-NEXT: slli sp, a2, 13 |
| ; RV64I-NEXT: mv s1, sp |
| ; RV64I-NEXT: addi a0, a0, 15 |
| ; RV64I-NEXT: lui a2, 1048574 |
| ; RV64I-NEXT: andi a0, a0, -16 |
| ; RV64I-NEXT: sub a0, sp, a0 |
| ; RV64I-NEXT: and a0, a0, a2 |
| ; RV64I-NEXT: lui a2, 1 |
| ; RV64I-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 |
| ; RV64I-NEXT: sub sp, sp, a2 |
| ; RV64I-NEXT: sd zero, 0(sp) |
| ; RV64I-NEXT: blt a0, sp, .LBB3_1 |
| ; RV64I-NEXT: # %bb.2: |
| ; RV64I-NEXT: mv sp, a0 |
| ; RV64I-NEXT: sd a0, 0(a1) |
| ; RV64I-NEXT: addi sp, s0, -2032 |
| ; RV64I-NEXT: .cfi_def_cfa sp, 2032 |
| ; RV64I-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload |
| ; RV64I-NEXT: ld s0, 2016(sp) # 8-byte Folded Reload |
| ; RV64I-NEXT: ld s1, 2008(sp) # 8-byte Folded Reload |
| ; RV64I-NEXT: .cfi_restore ra |
| ; RV64I-NEXT: .cfi_restore s0 |
| ; RV64I-NEXT: .cfi_restore s1 |
| ; RV64I-NEXT: addi sp, sp, 2032 |
| ; RV64I-NEXT: .cfi_def_cfa_offset 0 |
| ; RV64I-NEXT: ret |
| ; |
| ; RV32I-LABEL: dynamic_align_8192: |
| ; RV32I: # %bb.0: |
| ; RV32I-NEXT: addi sp, sp, -2032 |
| ; RV32I-NEXT: .cfi_def_cfa_offset 2032 |
| ; RV32I-NEXT: sw zero, 0(sp) |
| ; RV32I-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill |
| ; RV32I-NEXT: sw s0, 2024(sp) # 4-byte Folded Spill |
| ; RV32I-NEXT: sw s1, 2020(sp) # 4-byte Folded Spill |
| ; RV32I-NEXT: .cfi_offset ra, -4 |
| ; RV32I-NEXT: .cfi_offset s0, -8 |
| ; RV32I-NEXT: .cfi_offset s1, -12 |
| ; RV32I-NEXT: addi s0, sp, 2032 |
| ; RV32I-NEXT: .cfi_def_cfa s0, 0 |
| ; RV32I-NEXT: lui a1, 1 |
| ; RV32I-NEXT: sub sp, sp, a1 |
| ; RV32I-NEXT: sw zero, 0(sp) |
| ; RV32I-NEXT: sub sp, sp, a1 |
| ; RV32I-NEXT: sw zero, 0(sp) |
| ; RV32I-NEXT: sub sp, sp, a1 |
| ; RV32I-NEXT: sw zero, 0(sp) |
| ; RV32I-NEXT: addi sp, sp, -2048 |
| ; RV32I-NEXT: addi sp, sp, -16 |
| ; RV32I-NEXT: sw zero, 0(sp) |
| ; RV32I-NEXT: srli a1, sp, 13 |
| ; RV32I-NEXT: slli sp, a1, 13 |
| ; RV32I-NEXT: mv s1, sp |
| ; RV32I-NEXT: addi a0, a0, 15 |
| ; RV32I-NEXT: lui a1, 1048574 |
| ; RV32I-NEXT: andi a0, a0, -16 |
| ; RV32I-NEXT: sub a0, sp, a0 |
| ; RV32I-NEXT: and a0, a0, a1 |
| ; RV32I-NEXT: lui a1, 1 |
| ; RV32I-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 |
| ; RV32I-NEXT: sub sp, sp, a1 |
| ; RV32I-NEXT: sw zero, 0(sp) |
| ; RV32I-NEXT: blt a0, sp, .LBB3_1 |
| ; RV32I-NEXT: # %bb.2: |
| ; RV32I-NEXT: mv sp, a0 |
| ; RV32I-NEXT: sw a0, 0(a2) |
| ; RV32I-NEXT: addi sp, s0, -2032 |
| ; RV32I-NEXT: .cfi_def_cfa sp, 2032 |
| ; RV32I-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload |
| ; RV32I-NEXT: lw s0, 2024(sp) # 4-byte Folded Reload |
| ; RV32I-NEXT: lw s1, 2020(sp) # 4-byte Folded Reload |
| ; RV32I-NEXT: .cfi_restore ra |
| ; RV32I-NEXT: .cfi_restore s0 |
| ; RV32I-NEXT: .cfi_restore s1 |
| ; RV32I-NEXT: addi sp, sp, 2032 |
| ; RV32I-NEXT: .cfi_def_cfa_offset 0 |
| ; RV32I-NEXT: ret |
| %v = alloca i8, i64 %size, align 8192 |
| store ptr %v, ptr %out, align 8 |
| ret void |
| } |
| |
| ; If a function has variable-sized stack objects, then any function calls which |
| ; need to pass arguments on the stack must allocate the stack space for them |
| ; dynamically, to ensure they are at the bottom of the frame. |
| define void @no_reserved_call_frame(i64 %n) #0 { |
| ; RV64I-LABEL: no_reserved_call_frame: |
| ; RV64I: # %bb.0: # %entry |
| ; RV64I-NEXT: addi sp, sp, -16 |
| ; RV64I-NEXT: .cfi_def_cfa_offset 16 |
| ; RV64I-NEXT: sd zero, 0(sp) |
| ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill |
| ; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill |
| ; RV64I-NEXT: .cfi_offset ra, -8 |
| ; RV64I-NEXT: .cfi_offset s0, -16 |
| ; RV64I-NEXT: addi s0, sp, 16 |
| ; RV64I-NEXT: .cfi_def_cfa s0, 0 |
| ; RV64I-NEXT: slli a0, a0, 2 |
| ; RV64I-NEXT: addi a0, a0, 15 |
| ; RV64I-NEXT: andi a0, a0, -16 |
| ; RV64I-NEXT: sub a0, sp, a0 |
| ; RV64I-NEXT: lui a1, 1 |
| ; RV64I-NEXT: .LBB4_1: # %entry |
| ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; RV64I-NEXT: sub sp, sp, a1 |
| ; RV64I-NEXT: sd zero, 0(sp) |
| ; RV64I-NEXT: blt a0, sp, .LBB4_1 |
| ; RV64I-NEXT: # %bb.2: # %entry |
| ; RV64I-NEXT: mv sp, a0 |
| ; RV64I-NEXT: lui a1, 1 |
| ; RV64I-NEXT: sub sp, sp, a1 |
| ; RV64I-NEXT: sd zero, 0(sp) |
| ; RV64I-NEXT: call callee_stack_args |
| ; RV64I-NEXT: lui a0, 1 |
| ; RV64I-NEXT: add sp, sp, a0 |
| ; RV64I-NEXT: addi sp, s0, -16 |
| ; RV64I-NEXT: .cfi_def_cfa sp, 16 |
| ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload |
| ; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload |
| ; RV64I-NEXT: .cfi_restore ra |
| ; RV64I-NEXT: .cfi_restore s0 |
| ; RV64I-NEXT: addi sp, sp, 16 |
| ; RV64I-NEXT: .cfi_def_cfa_offset 0 |
| ; RV64I-NEXT: ret |
| ; |
| ; RV32I-LABEL: no_reserved_call_frame: |
| ; RV32I: # %bb.0: # %entry |
| ; RV32I-NEXT: addi sp, sp, -16 |
| ; RV32I-NEXT: .cfi_def_cfa_offset 16 |
| ; RV32I-NEXT: sw zero, 0(sp) |
| ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill |
| ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill |
| ; RV32I-NEXT: .cfi_offset ra, -4 |
| ; RV32I-NEXT: .cfi_offset s0, -8 |
| ; RV32I-NEXT: addi s0, sp, 16 |
| ; RV32I-NEXT: .cfi_def_cfa s0, 0 |
| ; RV32I-NEXT: slli a0, a0, 2 |
| ; RV32I-NEXT: addi a0, a0, 15 |
| ; RV32I-NEXT: andi a0, a0, -16 |
| ; RV32I-NEXT: sub a0, sp, a0 |
| ; RV32I-NEXT: lui a1, 1 |
| ; RV32I-NEXT: .LBB4_1: # %entry |
| ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; RV32I-NEXT: sub sp, sp, a1 |
| ; RV32I-NEXT: sw zero, 0(sp) |
| ; RV32I-NEXT: blt a0, sp, .LBB4_1 |
| ; RV32I-NEXT: # %bb.2: # %entry |
| ; RV32I-NEXT: mv sp, a0 |
| ; RV32I-NEXT: lui a1, 1 |
| ; RV32I-NEXT: sub sp, sp, a1 |
| ; RV32I-NEXT: sw zero, 0(sp) |
| ; RV32I-NEXT: addi sp, sp, -32 |
| ; RV32I-NEXT: sw zero, 0(sp) |
| ; RV32I-NEXT: call callee_stack_args |
| ; RV32I-NEXT: lui a0, 1 |
| ; RV32I-NEXT: addi a0, a0, 32 |
| ; RV32I-NEXT: add sp, sp, a0 |
| ; RV32I-NEXT: addi sp, s0, -16 |
| ; RV32I-NEXT: .cfi_def_cfa sp, 16 |
| ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload |
| ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload |
| ; RV32I-NEXT: .cfi_restore ra |
| ; RV32I-NEXT: .cfi_restore s0 |
| ; RV32I-NEXT: addi sp, sp, 16 |
| ; RV32I-NEXT: .cfi_def_cfa_offset 0 |
| ; RV32I-NEXT: ret |
| entry: |
| %v = alloca i32, i64 %n |
| call void @callee_stack_args(ptr %v, [518 x i64] poison) |
| ret void |
| } |
| |
| ; Same as above but without a variable-sized allocation, so the reserved call |
| ; frame can be folded into the fixed-size allocation in the prologue. |
| define void @reserved_call_frame(i64 %n) #0 { |
| ; RV64I-LABEL: reserved_call_frame: |
| ; RV64I: # %bb.0: # %entry |
| ; RV64I-NEXT: addi sp, sp, -2032 |
| ; RV64I-NEXT: .cfi_def_cfa_offset 2032 |
| ; RV64I-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill |
| ; RV64I-NEXT: .cfi_offset ra, -8 |
| ; RV64I-NEXT: lui a0, 1 |
| ; RV64I-NEXT: sub sp, sp, a0 |
| ; RV64I-NEXT: sd zero, 0(sp) |
| ; RV64I-NEXT: .cfi_def_cfa_offset 4096 |
| ; RV64I-NEXT: addi sp, sp, -48 |
| ; RV64I-NEXT: .cfi_def_cfa_offset 4144 |
| ; RV64I-NEXT: lui a0, 1 |
| ; RV64I-NEXT: add a0, sp, a0 |
| ; RV64I-NEXT: call callee_stack_args |
| ; RV64I-NEXT: lui a0, 1 |
| ; RV64I-NEXT: addiw a0, a0, 48 |
| ; RV64I-NEXT: add sp, sp, a0 |
| ; RV64I-NEXT: .cfi_def_cfa_offset 2032 |
| ; RV64I-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload |
| ; RV64I-NEXT: .cfi_restore ra |
| ; RV64I-NEXT: addi sp, sp, 2032 |
| ; RV64I-NEXT: .cfi_def_cfa_offset 0 |
| ; RV64I-NEXT: ret |
| ; |
| ; RV32I-LABEL: reserved_call_frame: |
| ; RV32I: # %bb.0: # %entry |
| ; RV32I-NEXT: addi sp, sp, -2032 |
| ; RV32I-NEXT: .cfi_def_cfa_offset 2032 |
| ; RV32I-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill |
| ; RV32I-NEXT: .cfi_offset ra, -4 |
| ; RV32I-NEXT: lui a0, 1 |
| ; RV32I-NEXT: sub sp, sp, a0 |
| ; RV32I-NEXT: sw zero, 0(sp) |
| ; RV32I-NEXT: .cfi_def_cfa_offset 4096 |
| ; RV32I-NEXT: addi sp, sp, -80 |
| ; RV32I-NEXT: .cfi_def_cfa_offset 4176 |
| ; RV32I-NEXT: lui a0, 1 |
| ; RV32I-NEXT: addi a0, a0, 36 |
| ; RV32I-NEXT: add a0, sp, a0 |
| ; RV32I-NEXT: call callee_stack_args |
| ; RV32I-NEXT: lui a0, 1 |
| ; RV32I-NEXT: addi a0, a0, 80 |
| ; RV32I-NEXT: add sp, sp, a0 |
| ; RV32I-NEXT: .cfi_def_cfa_offset 2032 |
| ; RV32I-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload |
| ; RV32I-NEXT: .cfi_restore ra |
| ; RV32I-NEXT: addi sp, sp, 2032 |
| ; RV32I-NEXT: .cfi_def_cfa_offset 0 |
| ; RV32I-NEXT: ret |
| entry: |
| %v = alloca i32, i64 518 |
| call void @callee_stack_args(ptr %v, [518 x i64] poison) |
| ret void |
| } |
| |
| declare void @callee_stack_args(ptr, [518 x i64]) |
| |
| ; Dynamic allocation of vectors |
| define void @dynamic_vector(i64 %size, ptr %out) #0 { |
| ; RV64I-LABEL: dynamic_vector: |
| ; RV64I: # %bb.0: |
| ; RV64I-NEXT: addi sp, sp, -16 |
| ; RV64I-NEXT: .cfi_def_cfa_offset 16 |
| ; RV64I-NEXT: sd zero, 0(sp) |
| ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill |
| ; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill |
| ; RV64I-NEXT: .cfi_offset ra, -8 |
| ; RV64I-NEXT: .cfi_offset s0, -16 |
| ; RV64I-NEXT: addi s0, sp, 16 |
| ; RV64I-NEXT: .cfi_def_cfa s0, 0 |
| ; RV64I-NEXT: csrr a2, vlenb |
| ; RV64I-NEXT: mul a0, a2, a0 |
| ; RV64I-NEXT: slli a0, a0, 1 |
| ; RV64I-NEXT: sub a0, sp, a0 |
| ; RV64I-NEXT: lui a2, 1 |
| ; RV64I-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 |
| ; RV64I-NEXT: sub sp, sp, a2 |
| ; RV64I-NEXT: sd zero, 0(sp) |
| ; RV64I-NEXT: blt a0, sp, .LBB6_1 |
| ; RV64I-NEXT: # %bb.2: |
| ; RV64I-NEXT: mv sp, a0 |
| ; RV64I-NEXT: sd a0, 0(a1) |
| ; RV64I-NEXT: addi sp, s0, -16 |
| ; RV64I-NEXT: .cfi_def_cfa sp, 16 |
| ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload |
| ; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload |
| ; RV64I-NEXT: .cfi_restore ra |
| ; RV64I-NEXT: .cfi_restore s0 |
| ; RV64I-NEXT: addi sp, sp, 16 |
| ; RV64I-NEXT: .cfi_def_cfa_offset 0 |
| ; RV64I-NEXT: ret |
| ; |
| ; RV32I-LABEL: dynamic_vector: |
| ; RV32I: # %bb.0: |
| ; RV32I-NEXT: addi sp, sp, -16 |
| ; RV32I-NEXT: .cfi_def_cfa_offset 16 |
| ; RV32I-NEXT: sw zero, 0(sp) |
| ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill |
| ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill |
| ; RV32I-NEXT: .cfi_offset ra, -4 |
| ; RV32I-NEXT: .cfi_offset s0, -8 |
| ; RV32I-NEXT: addi s0, sp, 16 |
| ; RV32I-NEXT: .cfi_def_cfa s0, 0 |
| ; RV32I-NEXT: csrr a1, vlenb |
| ; RV32I-NEXT: mul a0, a1, a0 |
| ; RV32I-NEXT: slli a0, a0, 1 |
| ; RV32I-NEXT: sub a0, sp, a0 |
| ; RV32I-NEXT: lui a1, 1 |
| ; RV32I-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 |
| ; RV32I-NEXT: sub sp, sp, a1 |
| ; RV32I-NEXT: sw zero, 0(sp) |
| ; RV32I-NEXT: blt a0, sp, .LBB6_1 |
| ; RV32I-NEXT: # %bb.2: |
| ; RV32I-NEXT: mv sp, a0 |
| ; RV32I-NEXT: sw a0, 0(a2) |
| ; RV32I-NEXT: addi sp, s0, -16 |
| ; RV32I-NEXT: .cfi_def_cfa sp, 16 |
| ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload |
| ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload |
| ; RV32I-NEXT: .cfi_restore ra |
| ; RV32I-NEXT: .cfi_restore s0 |
| ; RV32I-NEXT: addi sp, sp, 16 |
| ; RV32I-NEXT: .cfi_def_cfa_offset 0 |
| ; RV32I-NEXT: ret |
| %v = alloca <vscale x 4 x float>, i64 %size, align 16 |
| store ptr %v, ptr %out, align 8 |
| ret void |
| } |
| |
| attributes #0 = { uwtable(async) "probe-stack"="inline-asm" "frame-pointer"="none" } |