| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple=loongarch64 -O2 < %s | FileCheck %s -check-prefix=LA64 |
| ; RUN: llc -mtriple=loongarch32 -O2 < %s | FileCheck %s -check-prefix=LA32 |
| |
| ;; From llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll |
| |
| ;; Dynamically-sized allocation, needs a loop which can handle any size at |
| ;; runtime. The final iteration of the loop will temporarily put SP below the |
| ;; target address, but this doesn't break any of the ABI constraints on the |
| ;; stack, and also doesn't probe below the target SP value. |
| define void @dynamic(i64 %size, ptr %out) #0 { |
| ; |
| ; LA64-LABEL: dynamic: |
| ; LA64: # %bb.0: |
| ; LA64-NEXT: addi.d $sp, $sp, -16 |
| ; LA64-NEXT: .cfi_def_cfa_offset 16 |
| ; LA64-NEXT: st.d $zero, $sp, 0 |
| ; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill |
| ; LA64-NEXT: st.d $fp, $sp, 0 # 8-byte Folded Spill |
| ; LA64-NEXT: .cfi_offset 1, -8 |
| ; LA64-NEXT: .cfi_offset 22, -16 |
| ; LA64-NEXT: addi.d $fp, $sp, 16 |
| ; LA64-NEXT: .cfi_def_cfa 22, 0 |
| ; LA64-NEXT: addi.d $a0, $a0, 15 |
| ; LA64-NEXT: bstrins.d $a0, $zero, 3, 0 |
| ; LA64-NEXT: sub.d $a0, $sp, $a0 |
| ; LA64-NEXT: lu12i.w $a2, 1 |
| ; LA64-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 |
| ; LA64-NEXT: sub.d $sp, $sp, $a2 |
| ; LA64-NEXT: st.d $zero, $sp, 0 |
| ; LA64-NEXT: bltu $a0, $sp, .LBB0_1 |
| ; LA64-NEXT: # %bb.2: |
| ; LA64-NEXT: move $sp, $a0 |
| ; LA64-NEXT: st.d $a0, $a1, 0 |
| ; LA64-NEXT: addi.d $sp, $fp, -16 |
| ; LA64-NEXT: ld.d $fp, $sp, 0 # 8-byte Folded Reload |
| ; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload |
| ; LA64-NEXT: addi.d $sp, $sp, 16 |
| ; LA64-NEXT: ret |
| ; |
| ; LA32-LABEL: dynamic: |
| ; LA32: # %bb.0: |
| ; LA32-NEXT: addi.w $sp, $sp, -16 |
| ; LA32-NEXT: .cfi_def_cfa_offset 16 |
| ; LA32-NEXT: st.w $zero, $sp, 0 |
| ; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill |
| ; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill |
| ; LA32-NEXT: .cfi_offset 1, -4 |
| ; LA32-NEXT: .cfi_offset 22, -8 |
| ; LA32-NEXT: addi.w $fp, $sp, 16 |
| ; LA32-NEXT: .cfi_def_cfa 22, 0 |
| ; LA32-NEXT: addi.w $a0, $a0, 15 |
| ; LA32-NEXT: addi.w $a1, $zero, -16 |
| ; LA32-NEXT: and $a0, $a0, $a1 |
| ; LA32-NEXT: sub.w $a0, $sp, $a0 |
| ; LA32-NEXT: lu12i.w $a1, 1 |
| ; LA32-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 |
| ; LA32-NEXT: sub.w $sp, $sp, $a1 |
| ; LA32-NEXT: st.w $zero, $sp, 0 |
| ; LA32-NEXT: bltu $a0, $sp, .LBB0_1 |
| ; LA32-NEXT: # %bb.2: |
| ; LA32-NEXT: move $sp, $a0 |
| ; LA32-NEXT: st.w $a0, $a2, 0 |
| ; LA32-NEXT: addi.w $sp, $fp, -16 |
| ; LA32-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload |
| ; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload |
| ; LA32-NEXT: addi.w $sp, $sp, 16 |
| ; LA32-NEXT: ret |
| %v = alloca i8, i64 %size, align 1 |
| store ptr %v, ptr %out, align 8 |
| ret void |
| } |
| |
| ;; This function has a fixed-size stack slot and a dynamic one. The fixed size |
| ;; slot isn't large enough that we would normally probe it, but we need to do so |
| ;; here otherwise the gap between the CSR save and the first probe of the |
| ;; dynamic allocation could be too far apart when the size of the dynamic |
| ;; allocation is close to the guard size. |
| define void @dynamic_fixed(i64 %size, ptr %out1, ptr %out2) #0 { |
| ; |
| ; LA64-LABEL: dynamic_fixed: |
| ; LA64: # %bb.0: |
| ; LA64-NEXT: addi.d $sp, $sp, -96 |
| ; LA64-NEXT: .cfi_def_cfa_offset 96 |
| ; LA64-NEXT: st.d $zero, $sp, 0 |
| ; LA64-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill |
| ; LA64-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill |
| ; LA64-NEXT: .cfi_offset 1, -8 |
| ; LA64-NEXT: .cfi_offset 22, -16 |
| ; LA64-NEXT: addi.d $fp, $sp, 96 |
| ; LA64-NEXT: .cfi_def_cfa 22, 0 |
| ; LA64-NEXT: addi.d $a3, $fp, -88 |
| ; LA64-NEXT: st.d $a3, $a1, 0 |
| ; LA64-NEXT: addi.d $a0, $a0, 15 |
| ; LA64-NEXT: bstrins.d $a0, $zero, 3, 0 |
| ; LA64-NEXT: sub.d $a0, $sp, $a0 |
| ; LA64-NEXT: lu12i.w $a1, 1 |
| ; LA64-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 |
| ; LA64-NEXT: sub.d $sp, $sp, $a1 |
| ; LA64-NEXT: st.d $zero, $sp, 0 |
| ; LA64-NEXT: bltu $a0, $sp, .LBB1_1 |
| ; LA64-NEXT: # %bb.2: |
| ; LA64-NEXT: move $sp, $a0 |
| ; LA64-NEXT: st.d $a0, $a2, 0 |
| ; LA64-NEXT: addi.d $sp, $fp, -96 |
| ; LA64-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload |
| ; LA64-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload |
| ; LA64-NEXT: addi.d $sp, $sp, 96 |
| ; LA64-NEXT: ret |
| ; |
| ; LA32-LABEL: dynamic_fixed: |
| ; LA32: # %bb.0: |
| ; LA32-NEXT: addi.w $sp, $sp, -80 |
| ; LA32-NEXT: .cfi_def_cfa_offset 80 |
| ; LA32-NEXT: st.w $zero, $sp, 0 |
| ; LA32-NEXT: st.w $ra, $sp, 76 # 4-byte Folded Spill |
| ; LA32-NEXT: st.w $fp, $sp, 72 # 4-byte Folded Spill |
| ; LA32-NEXT: .cfi_offset 1, -4 |
| ; LA32-NEXT: .cfi_offset 22, -8 |
| ; LA32-NEXT: addi.w $fp, $sp, 80 |
| ; LA32-NEXT: .cfi_def_cfa 22, 0 |
| ; LA32-NEXT: addi.w $a1, $fp, -72 |
| ; LA32-NEXT: st.w $a1, $a2, 0 |
| ; LA32-NEXT: addi.w $a0, $a0, 15 |
| ; LA32-NEXT: addi.w $a1, $zero, -16 |
| ; LA32-NEXT: and $a0, $a0, $a1 |
| ; LA32-NEXT: sub.w $a0, $sp, $a0 |
| ; LA32-NEXT: lu12i.w $a1, 1 |
| ; LA32-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 |
| ; LA32-NEXT: sub.w $sp, $sp, $a1 |
| ; LA32-NEXT: st.w $zero, $sp, 0 |
| ; LA32-NEXT: bltu $a0, $sp, .LBB1_1 |
| ; LA32-NEXT: # %bb.2: |
| ; LA32-NEXT: move $sp, $a0 |
| ; LA32-NEXT: st.w $a0, $a3, 0 |
| ; LA32-NEXT: addi.w $sp, $fp, -80 |
| ; LA32-NEXT: ld.w $fp, $sp, 72 # 4-byte Folded Reload |
| ; LA32-NEXT: ld.w $ra, $sp, 76 # 4-byte Folded Reload |
| ; LA32-NEXT: addi.w $sp, $sp, 80 |
| ; LA32-NEXT: ret |
| %v1 = alloca i8, i64 64, align 1 |
| store ptr %v1, ptr %out1, align 8 |
| %v2 = alloca i8, i64 %size, align 1 |
| store ptr %v2, ptr %out2, align 8 |
| ret void |
| } |
| |
| ;; Dynamic allocation, with an alignment requirement greater than the alignment |
| ;; of SP. Done by ANDing the target SP with a constant to align it down, then |
| ;; doing the loop as normal. Note that we also re-align the stack in the prolog, |
| ;; which isn't actually needed because the only aligned allocations are dynamic, |
| ;; this is done even without stack probing. |
| define void @dynamic_align_64(i64 %size, ptr %out) #0 { |
| ; |
| ; LA64-LABEL: dynamic_align_64: |
| ; LA64: # %bb.0: |
| ; LA64-NEXT: addi.d $sp, $sp, -128 |
| ; LA64-NEXT: .cfi_def_cfa_offset 128 |
| ; LA64-NEXT: st.d $zero, $sp, 0 |
| ; LA64-NEXT: st.d $ra, $sp, 120 # 8-byte Folded Spill |
| ; LA64-NEXT: st.d $fp, $sp, 112 # 8-byte Folded Spill |
| ; LA64-NEXT: st.d $s8, $sp, 104 # 8-byte Folded Spill |
| ; LA64-NEXT: .cfi_offset 1, -8 |
| ; LA64-NEXT: .cfi_offset 22, -16 |
| ; LA64-NEXT: .cfi_offset 31, -24 |
| ; LA64-NEXT: addi.d $fp, $sp, 128 |
| ; LA64-NEXT: .cfi_def_cfa 22, 0 |
| ; LA64-NEXT: bstrins.d $sp, $zero, 5, 0 |
| ; LA64-NEXT: move $s8, $sp |
| ; LA64-NEXT: addi.d $a0, $a0, 15 |
| ; LA64-NEXT: bstrins.d $a0, $zero, 3, 0 |
| ; LA64-NEXT: sub.d $a0, $sp, $a0 |
| ; LA64-NEXT: bstrins.d $a0, $zero, 5, 0 |
| ; LA64-NEXT: lu12i.w $a2, 1 |
| ; LA64-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 |
| ; LA64-NEXT: sub.d $sp, $sp, $a2 |
| ; LA64-NEXT: st.d $zero, $sp, 0 |
| ; LA64-NEXT: bltu $a0, $sp, .LBB2_1 |
| ; LA64-NEXT: # %bb.2: |
| ; LA64-NEXT: move $sp, $a0 |
| ; LA64-NEXT: st.d $a0, $a1, 0 |
| ; LA64-NEXT: addi.d $sp, $fp, -128 |
| ; LA64-NEXT: ld.d $s8, $sp, 104 # 8-byte Folded Reload |
| ; LA64-NEXT: ld.d $fp, $sp, 112 # 8-byte Folded Reload |
| ; LA64-NEXT: ld.d $ra, $sp, 120 # 8-byte Folded Reload |
| ; LA64-NEXT: addi.d $sp, $sp, 128 |
| ; LA64-NEXT: ret |
| ; |
| ; LA32-LABEL: dynamic_align_64: |
| ; LA32: # %bb.0: |
| ; LA32-NEXT: addi.w $sp, $sp, -64 |
| ; LA32-NEXT: .cfi_def_cfa_offset 64 |
| ; LA32-NEXT: st.w $zero, $sp, 0 |
| ; LA32-NEXT: st.w $ra, $sp, 60 # 4-byte Folded Spill |
| ; LA32-NEXT: st.w $fp, $sp, 56 # 4-byte Folded Spill |
| ; LA32-NEXT: st.w $s8, $sp, 52 # 4-byte Folded Spill |
| ; LA32-NEXT: .cfi_offset 1, -4 |
| ; LA32-NEXT: .cfi_offset 22, -8 |
| ; LA32-NEXT: .cfi_offset 31, -12 |
| ; LA32-NEXT: addi.w $fp, $sp, 64 |
| ; LA32-NEXT: .cfi_def_cfa 22, 0 |
| ; LA32-NEXT: bstrins.w $sp, $zero, 5, 0 |
| ; LA32-NEXT: move $s8, $sp |
| ; LA32-NEXT: addi.w $a0, $a0, 15 |
| ; LA32-NEXT: addi.w $a1, $zero, -16 |
| ; LA32-NEXT: and $a0, $a0, $a1 |
| ; LA32-NEXT: sub.w $a0, $sp, $a0 |
| ; LA32-NEXT: addi.w $a1, $zero, -64 |
| ; LA32-NEXT: and $a0, $a0, $a1 |
| ; LA32-NEXT: lu12i.w $a1, 1 |
| ; LA32-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 |
| ; LA32-NEXT: sub.w $sp, $sp, $a1 |
| ; LA32-NEXT: st.w $zero, $sp, 0 |
| ; LA32-NEXT: bltu $a0, $sp, .LBB2_1 |
| ; LA32-NEXT: # %bb.2: |
| ; LA32-NEXT: move $sp, $a0 |
| ; LA32-NEXT: st.w $a0, $a2, 0 |
| ; LA32-NEXT: addi.w $sp, $fp, -64 |
| ; LA32-NEXT: ld.w $s8, $sp, 52 # 4-byte Folded Reload |
| ; LA32-NEXT: ld.w $fp, $sp, 56 # 4-byte Folded Reload |
| ; LA32-NEXT: ld.w $ra, $sp, 60 # 4-byte Folded Reload |
| ; LA32-NEXT: addi.w $sp, $sp, 64 |
| ; LA32-NEXT: ret |
| %v = alloca i8, i64 %size, align 64 |
| store ptr %v, ptr %out, align 8 |
| ret void |
| } |
| |
| ;; Dynamic allocation, with an alignment greater than the stack guard size. The |
| ;; only difference to the dynamic allocation is the constant used for aligning |
| ;; the target SP, the loop will probe the whole allocation without needing to |
| ;; know about the alignment padding. |
| define void @dynamic_align_8192(i64 %size, ptr %out) #0 { |
| ; |
| ; LA64-LABEL: dynamic_align_8192: |
| ; LA64: # %bb.0: |
| ; LA64-NEXT: addi.d $sp, $sp, -2032 |
| ; LA64-NEXT: .cfi_def_cfa_offset 2032 |
| ; LA64-NEXT: st.d $zero, $sp, 0 |
| ; LA64-NEXT: st.d $ra, $sp, 2024 # 8-byte Folded Spill |
| ; LA64-NEXT: st.d $fp, $sp, 2016 # 8-byte Folded Spill |
| ; LA64-NEXT: st.d $s8, $sp, 2008 # 8-byte Folded Spill |
| ; LA64-NEXT: .cfi_offset 1, -8 |
| ; LA64-NEXT: .cfi_offset 22, -16 |
| ; LA64-NEXT: .cfi_offset 31, -24 |
| ; LA64-NEXT: addi.d $fp, $sp, 2032 |
| ; LA64-NEXT: .cfi_def_cfa 22, 0 |
| ; LA64-NEXT: lu12i.w $a2, 1 |
| ; LA64-NEXT: sub.d $sp, $sp, $a2 |
| ; LA64-NEXT: st.d $zero, $sp, 0 |
| ; LA64-NEXT: sub.d $sp, $sp, $a2 |
| ; LA64-NEXT: st.d $zero, $sp, 0 |
| ; LA64-NEXT: sub.d $sp, $sp, $a2 |
| ; LA64-NEXT: st.d $zero, $sp, 0 |
| ; LA64-NEXT: addi.d $sp, $sp, -2048 |
| ; LA64-NEXT: addi.d $sp, $sp, -16 |
| ; LA64-NEXT: st.d $zero, $sp, 0 |
| ; LA64-NEXT: bstrins.d $sp, $zero, 12, 0 |
| ; LA64-NEXT: move $s8, $sp |
| ; LA64-NEXT: addi.d $a0, $a0, 15 |
| ; LA64-NEXT: bstrins.d $a0, $zero, 3, 0 |
| ; LA64-NEXT: sub.d $a0, $sp, $a0 |
| ; LA64-NEXT: bstrins.d $a0, $zero, 12, 0 |
| ; LA64-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 |
| ; LA64-NEXT: sub.d $sp, $sp, $a2 |
| ; LA64-NEXT: st.d $zero, $sp, 0 |
| ; LA64-NEXT: bltu $a0, $sp, .LBB3_1 |
| ; LA64-NEXT: # %bb.2: |
| ; LA64-NEXT: move $sp, $a0 |
| ; LA64-NEXT: st.d $a0, $a1, 0 |
| ; LA64-NEXT: lu12i.w $a0, 4 |
| ; LA64-NEXT: sub.d $sp, $fp, $a0 |
| ; LA64-NEXT: lu12i.w $a0, 3 |
| ; LA64-NEXT: ori $a0, $a0, 2064 |
| ; LA64-NEXT: add.d $sp, $sp, $a0 |
| ; LA64-NEXT: ld.d $s8, $sp, 2008 # 8-byte Folded Reload |
| ; LA64-NEXT: ld.d $fp, $sp, 2016 # 8-byte Folded Reload |
| ; LA64-NEXT: ld.d $ra, $sp, 2024 # 8-byte Folded Reload |
| ; LA64-NEXT: addi.d $sp, $sp, 2032 |
| ; LA64-NEXT: ret |
| ; |
| ; LA32-LABEL: dynamic_align_8192: |
| ; LA32: # %bb.0: |
| ; LA32-NEXT: addi.w $sp, $sp, -2032 |
| ; LA32-NEXT: .cfi_def_cfa_offset 2032 |
| ; LA32-NEXT: st.w $zero, $sp, 0 |
| ; LA32-NEXT: st.w $ra, $sp, 2028 # 4-byte Folded Spill |
| ; LA32-NEXT: st.w $fp, $sp, 2024 # 4-byte Folded Spill |
| ; LA32-NEXT: st.w $s8, $sp, 2020 # 4-byte Folded Spill |
| ; LA32-NEXT: .cfi_offset 1, -4 |
| ; LA32-NEXT: .cfi_offset 22, -8 |
| ; LA32-NEXT: .cfi_offset 31, -12 |
| ; LA32-NEXT: addi.w $fp, $sp, 2032 |
| ; LA32-NEXT: .cfi_def_cfa 22, 0 |
| ; LA32-NEXT: lu12i.w $a1, 1 |
| ; LA32-NEXT: sub.w $sp, $sp, $a1 |
| ; LA32-NEXT: st.w $zero, $sp, 0 |
| ; LA32-NEXT: sub.w $sp, $sp, $a1 |
| ; LA32-NEXT: st.w $zero, $sp, 0 |
| ; LA32-NEXT: sub.w $sp, $sp, $a1 |
| ; LA32-NEXT: st.w $zero, $sp, 0 |
| ; LA32-NEXT: addi.w $sp, $sp, -2048 |
| ; LA32-NEXT: addi.w $sp, $sp, -16 |
| ; LA32-NEXT: st.w $zero, $sp, 0 |
| ; LA32-NEXT: bstrins.w $sp, $zero, 12, 0 |
| ; LA32-NEXT: move $s8, $sp |
| ; LA32-NEXT: addi.w $a0, $a0, 15 |
| ; LA32-NEXT: addi.w $a1, $zero, -16 |
| ; LA32-NEXT: and $a0, $a0, $a1 |
| ; LA32-NEXT: sub.w $a0, $sp, $a0 |
| ; LA32-NEXT: lu12i.w $a1, -2 |
| ; LA32-NEXT: and $a0, $a0, $a1 |
| ; LA32-NEXT: lu12i.w $a1, 1 |
| ; LA32-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 |
| ; LA32-NEXT: sub.w $sp, $sp, $a1 |
| ; LA32-NEXT: st.w $zero, $sp, 0 |
| ; LA32-NEXT: bltu $a0, $sp, .LBB3_1 |
| ; LA32-NEXT: # %bb.2: |
| ; LA32-NEXT: move $sp, $a0 |
| ; LA32-NEXT: st.w $a0, $a2, 0 |
| ; LA32-NEXT: lu12i.w $a0, 4 |
| ; LA32-NEXT: sub.w $sp, $fp, $a0 |
| ; LA32-NEXT: lu12i.w $a0, 3 |
| ; LA32-NEXT: ori $a0, $a0, 2064 |
| ; LA32-NEXT: add.w $sp, $sp, $a0 |
| ; LA32-NEXT: ld.w $s8, $sp, 2020 # 4-byte Folded Reload |
| ; LA32-NEXT: ld.w $fp, $sp, 2024 # 4-byte Folded Reload |
| ; LA32-NEXT: ld.w $ra, $sp, 2028 # 4-byte Folded Reload |
| ; LA32-NEXT: addi.w $sp, $sp, 2032 |
| ; LA32-NEXT: ret |
| %v = alloca i8, i64 %size, align 8192 |
| store ptr %v, ptr %out, align 8 |
| ret void |
| } |
| |
| ;; If a function has variable-sized stack objects, then any function calls which |
| ;; need to pass arguments on the stack must allocate the stack space for them |
| ;; dynamically, to ensure they are at the bottom of the frame. |
| define void @no_reserved_call_frame(i64 %n) #0 { |
| ; |
| ; LA64-LABEL: no_reserved_call_frame: |
| ; LA64: # %bb.0: # %entry |
| ; LA64-NEXT: addi.d $sp, $sp, -16 |
| ; LA64-NEXT: .cfi_def_cfa_offset 16 |
| ; LA64-NEXT: st.d $zero, $sp, 0 |
| ; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill |
| ; LA64-NEXT: st.d $fp, $sp, 0 # 8-byte Folded Spill |
| ; LA64-NEXT: .cfi_offset 1, -8 |
| ; LA64-NEXT: .cfi_offset 22, -16 |
| ; LA64-NEXT: addi.d $fp, $sp, 16 |
| ; LA64-NEXT: .cfi_def_cfa 22, 0 |
| ; LA64-NEXT: slli.d $a0, $a0, 2 |
| ; LA64-NEXT: addi.d $a0, $a0, 15 |
| ; LA64-NEXT: bstrins.d $a0, $zero, 3, 0 |
| ; LA64-NEXT: sub.d $a0, $sp, $a0 |
| ; LA64-NEXT: lu12i.w $a1, 1 |
| ; LA64-NEXT: .LBB4_1: # %entry |
| ; LA64-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; LA64-NEXT: sub.d $sp, $sp, $a1 |
| ; LA64-NEXT: st.d $zero, $sp, 0 |
| ; LA64-NEXT: bltu $a0, $sp, .LBB4_1 |
| ; LA64-NEXT: # %bb.2: # %entry |
| ; LA64-NEXT: move $sp, $a0 |
| ; LA64-NEXT: lu12i.w $a1, 1 |
| ; LA64-NEXT: sub.d $sp, $sp, $a1 |
| ; LA64-NEXT: st.d $zero, $sp, 0 |
| ; LA64-NEXT: pcaddu18i $ra, %call36(callee_stack_args) |
| ; LA64-NEXT: jirl $ra, $ra, 0 |
| ; LA64-NEXT: lu12i.w $a0, 1 |
| ; LA64-NEXT: add.d $sp, $sp, $a0 |
| ; LA64-NEXT: addi.d $sp, $fp, -16 |
| ; LA64-NEXT: ld.d $fp, $sp, 0 # 8-byte Folded Reload |
| ; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload |
| ; LA64-NEXT: addi.d $sp, $sp, 16 |
| ; LA64-NEXT: ret |
| ; |
| ; LA32-LABEL: no_reserved_call_frame: |
| ; LA32: # %bb.0: # %entry |
| ; LA32-NEXT: addi.w $sp, $sp, -16 |
| ; LA32-NEXT: .cfi_def_cfa_offset 16 |
| ; LA32-NEXT: st.w $zero, $sp, 0 |
| ; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill |
| ; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill |
| ; LA32-NEXT: .cfi_offset 1, -4 |
| ; LA32-NEXT: .cfi_offset 22, -8 |
| ; LA32-NEXT: addi.w $fp, $sp, 16 |
| ; LA32-NEXT: .cfi_def_cfa 22, 0 |
| ; LA32-NEXT: slli.w $a0, $a0, 2 |
| ; LA32-NEXT: addi.w $a0, $a0, 15 |
| ; LA32-NEXT: addi.w $a1, $zero, -16 |
| ; LA32-NEXT: and $a0, $a0, $a1 |
| ; LA32-NEXT: sub.w $a0, $sp, $a0 |
| ; LA32-NEXT: lu12i.w $a1, 1 |
| ; LA32-NEXT: .LBB4_1: # %entry |
| ; LA32-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; LA32-NEXT: sub.w $sp, $sp, $a1 |
| ; LA32-NEXT: st.w $zero, $sp, 0 |
| ; LA32-NEXT: bltu $a0, $sp, .LBB4_1 |
| ; LA32-NEXT: # %bb.2: # %entry |
| ; LA32-NEXT: move $sp, $a0 |
| ; LA32-NEXT: lu12i.w $a1, 1 |
| ; LA32-NEXT: sub.w $sp, $sp, $a1 |
| ; LA32-NEXT: st.w $zero, $sp, 0 |
| ; LA32-NEXT: addi.w $sp, $sp, -32 |
| ; LA32-NEXT: st.w $zero, $sp, 0 |
| ; LA32-NEXT: bl callee_stack_args |
| ; LA32-NEXT: lu12i.w $a0, 1 |
| ; LA32-NEXT: ori $a0, $a0, 32 |
| ; LA32-NEXT: add.w $sp, $sp, $a0 |
| ; LA32-NEXT: addi.w $sp, $fp, -16 |
| ; LA32-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload |
| ; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload |
| ; LA32-NEXT: addi.w $sp, $sp, 16 |
| ; LA32-NEXT: ret |
| entry: |
| %v = alloca i32, i64 %n |
| call void @callee_stack_args(ptr %v, [518 x i64] poison) |
| ret void |
| } |
| |
| ;; Same as above but without a variable-sized allocation, so the reserved call |
| ;; frame can be folded into the fixed-size allocation in the prologue. |
| define void @reserved_call_frame(i64 %n) #0 { |
| ; |
| ; LA64-LABEL: reserved_call_frame: |
| ; LA64: # %bb.0: # %entry |
| ; LA64-NEXT: addi.d $sp, $sp, -2032 |
| ; LA64-NEXT: .cfi_def_cfa_offset 2032 |
| ; LA64-NEXT: st.d $ra, $sp, 2024 # 8-byte Folded Spill |
| ; LA64-NEXT: .cfi_offset 1, -8 |
| ; LA64-NEXT: lu12i.w $a0, 1 |
| ; LA64-NEXT: sub.d $sp, $sp, $a0 |
| ; LA64-NEXT: st.d $zero, $sp, 0 |
| ; LA64-NEXT: .cfi_def_cfa_offset 6128 |
| ; LA64-NEXT: addi.d $sp, $sp, -48 |
| ; LA64-NEXT: .cfi_def_cfa_offset 6176 |
| ; LA64-NEXT: lu12i.w $a0, 1 |
| ; LA64-NEXT: add.d $a0, $sp, $a0 |
| ; LA64-NEXT: pcaddu18i $ra, %call36(callee_stack_args) |
| ; LA64-NEXT: jirl $ra, $ra, 0 |
| ; LA64-NEXT: lu12i.w $a0, 1 |
| ; LA64-NEXT: ori $a0, $a0, 48 |
| ; LA64-NEXT: add.d $sp, $sp, $a0 |
| ; LA64-NEXT: ld.d $ra, $sp, 2024 # 8-byte Folded Reload |
| ; LA64-NEXT: addi.d $sp, $sp, 2032 |
| ; LA64-NEXT: ret |
| ; |
| ; LA32-LABEL: reserved_call_frame: |
| ; LA32: # %bb.0: # %entry |
| ; LA32-NEXT: addi.w $sp, $sp, -2032 |
| ; LA32-NEXT: .cfi_def_cfa_offset 2032 |
| ; LA32-NEXT: st.w $ra, $sp, 2028 # 4-byte Folded Spill |
| ; LA32-NEXT: .cfi_offset 1, -4 |
| ; LA32-NEXT: lu12i.w $a0, 1 |
| ; LA32-NEXT: sub.w $sp, $sp, $a0 |
| ; LA32-NEXT: st.w $zero, $sp, 0 |
| ; LA32-NEXT: .cfi_def_cfa_offset 6128 |
| ; LA32-NEXT: addi.w $sp, $sp, -80 |
| ; LA32-NEXT: .cfi_def_cfa_offset 6208 |
| ; LA32-NEXT: lu12i.w $a0, 1 |
| ; LA32-NEXT: ori $a0, $a0, 36 |
| ; LA32-NEXT: add.w $a0, $sp, $a0 |
| ; LA32-NEXT: bl callee_stack_args |
| ; LA32-NEXT: lu12i.w $a0, 1 |
| ; LA32-NEXT: ori $a0, $a0, 80 |
| ; LA32-NEXT: add.w $sp, $sp, $a0 |
| ; LA32-NEXT: ld.w $ra, $sp, 2028 # 4-byte Folded Reload |
| ; LA32-NEXT: addi.w $sp, $sp, 2032 |
| ; LA32-NEXT: ret |
| entry: |
| %v = alloca i32, i64 518 |
| call void @callee_stack_args(ptr %v, [518 x i64] poison) |
| ret void |
| } |
| |
| declare void @callee_stack_args(ptr, [518 x i64]) |
| |
| |
| attributes #0 = { uwtable(async) "probe-stack"="inline-asm" "frame-pointer"="none" } |