|  | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | 
|  | ; RUN: llc -mtriple=aarch64 < %s -verify-machineinstrs                                   | FileCheck %s | 
|  | ; RUN: llc -mtriple=aarch64 < %s -verify-machineinstrs -global-isel -global-isel-abort=2 | FileCheck %s | 
|  |  | 
|  | ; Dynamically-sized allocation, needs a loop which can handle any size at | 
|  | ; runtime. The final iteration of the loop will temporarily put SP below the | 
|  | ; target address, but this doesn't break any of the ABI constraints on the | 
|  | ; stack, and also doesn't probe below the target SP value. | 
|  | define void @dynamic(i64 %size, ptr %out) #0 { | 
|  | ; CHECK-LABEL: dynamic: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    .cfi_def_cfa_offset 16 | 
|  | ; CHECK-NEXT:    mov x29, sp | 
|  | ; CHECK-NEXT:    .cfi_def_cfa w29, 16 | 
|  | ; CHECK-NEXT:    .cfi_offset w30, -8 | 
|  | ; CHECK-NEXT:    .cfi_offset w29, -16 | 
|  | ; CHECK-NEXT:    add x9, x0, #15 | 
|  | ; CHECK-NEXT:    mov x8, sp | 
|  | ; CHECK-NEXT:    and x9, x9, #0xfffffffffffffff0 | 
|  | ; CHECK-NEXT:    sub x8, x8, x9 | 
|  | ; CHECK-NEXT:  .LBB0_1: // =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-NEXT:    sub sp, sp, #1, lsl #12 // =4096 | 
|  | ; CHECK-NEXT:    cmp sp, x8 | 
|  | ; CHECK-NEXT:    b.le .LBB0_3 | 
|  | ; CHECK-NEXT:  // %bb.2: // in Loop: Header=BB0_1 Depth=1 | 
|  | ; CHECK-NEXT:    str xzr, [sp] | 
|  | ; CHECK-NEXT:    b .LBB0_1 | 
|  | ; CHECK-NEXT:  .LBB0_3: | 
|  | ; CHECK-NEXT:    mov sp, x8 | 
|  | ; CHECK-NEXT:    ldr xzr, [sp] | 
|  | ; CHECK-NEXT:    str x8, [x1] | 
|  | ; CHECK-NEXT:    mov sp, x29 | 
|  | ; CHECK-NEXT:    .cfi_def_cfa wsp, 16 | 
|  | ; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    .cfi_def_cfa_offset 0 | 
|  | ; CHECK-NEXT:    .cfi_restore w30 | 
|  | ; CHECK-NEXT:    .cfi_restore w29 | 
|  | ; CHECK-NEXT:    ret | 
|  | %v = alloca i8, i64 %size, align 1 | 
|  | store ptr %v, ptr %out, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ; This function has a fixed-size stack slot and a dynamic one. The fixed size | 
|  | ; slot isn't large enough that we would normally probe it, but we need to do so | 
|  | ; here otherwise the gap between the CSR save and the first probe of the | 
|  | ; dynamic allocation could be too far apart when the size of the dynamic | 
|  | ; allocation is close to the guard size. | 
|  | define void @dynamic_fixed(i64 %size, ptr %out1, ptr %out2) #0 { | 
|  | ; CHECK-LABEL: dynamic_fixed: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    .cfi_def_cfa_offset 16 | 
|  | ; CHECK-NEXT:    mov x29, sp | 
|  | ; CHECK-NEXT:    .cfi_def_cfa w29, 16 | 
|  | ; CHECK-NEXT:    .cfi_offset w30, -8 | 
|  | ; CHECK-NEXT:    .cfi_offset w29, -16 | 
|  | ; CHECK-NEXT:    str xzr, [sp, #-64]! | 
|  | ; CHECK-NEXT:    add x9, x0, #15 | 
|  | ; CHECK-NEXT:    mov x8, sp | 
|  | ; CHECK-DAG:     sub x10, x29, #64 | 
|  | ; CHECK-DAG:     and x9, x9, #0xfffffffffffffff0 | 
|  | ; CHECK-NOT:     INVALID_TO_BREAK_UP_CHECK_DAG | 
|  | ; CHECK-DAG:     str x10, [x1] | 
|  | ; CHECK-DAG:     sub x8, x8, x9 | 
|  | ; CHECK-NEXT:  .LBB1_1: // =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-NEXT:    sub sp, sp, #1, lsl #12 // =4096 | 
|  | ; CHECK-NEXT:    cmp sp, x8 | 
|  | ; CHECK-NEXT:    b.le .LBB1_3 | 
|  | ; CHECK-NEXT:  // %bb.2: // in Loop: Header=BB1_1 Depth=1 | 
|  | ; CHECK-NEXT:    str xzr, [sp] | 
|  | ; CHECK-NEXT:    b .LBB1_1 | 
|  | ; CHECK-NEXT:  .LBB1_3: | 
|  | ; CHECK-NEXT:    mov sp, x8 | 
|  | ; CHECK-NEXT:    ldr xzr, [sp] | 
|  | ; CHECK-NEXT:    str x8, [x2] | 
|  | ; CHECK-NEXT:    mov sp, x29 | 
|  | ; CHECK-NEXT:    .cfi_def_cfa wsp, 16 | 
|  | ; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    .cfi_def_cfa_offset 0 | 
|  | ; CHECK-NEXT:    .cfi_restore w30 | 
|  | ; CHECK-NEXT:    .cfi_restore w29 | 
|  | ; CHECK-NEXT:    ret | 
|  | %v1 = alloca i8, i64 64, align 1 | 
|  | store ptr %v1, ptr %out1, align 8 | 
|  | %v2 = alloca i8, i64 %size, align 1 | 
|  | store ptr %v2, ptr %out2, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ; Dynamic allocation, with an alignment requirement greater than the alignment | 
|  | ; of SP. Done by ANDing the target SP with a constant to align it down, then | 
|  | ; doing the loop as normal. Note that we also re-align the stack in the prolog, | 
|  | ; which isn't actually needed because the only aligned allocations are dynamic, | 
|  | ; this is done even without stack probing. | 
|  | define void @dynamic_align_64(i64 %size, ptr %out) #0 { | 
|  | ; CHECK-LABEL: dynamic_align_64: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    stp x29, x30, [sp, #-32]! // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    .cfi_def_cfa_offset 32 | 
|  | ; CHECK-NEXT:    str x19, [sp, #16] // 8-byte Folded Spill | 
|  | ; CHECK-NEXT:    mov x29, sp | 
|  | ; CHECK-NEXT:    .cfi_def_cfa w29, 32 | 
|  | ; CHECK-NEXT:    .cfi_offset w19, -16 | 
|  | ; CHECK-NEXT:    .cfi_offset w30, -24 | 
|  | ; CHECK-NEXT:    .cfi_offset w29, -32 | 
|  | ; CHECK-NEXT:    sub x9, sp, #32 | 
|  | ; CHECK-NEXT:    and sp, x9, #0xffffffffffffffc0 | 
|  | ; CHECK-NEXT:    add x9, x0, #15 | 
|  | ; CHECK-NEXT:    mov x8, sp | 
|  | ; CHECK-DAG:     str xzr, [sp] | 
|  | ; CHECK-DAG:     and x9, x9, #0xfffffffffffffff0 | 
|  | ; CHECK-NOT:     INVALID_TO_BREAK_UP_CHECK_DAG | 
|  | ; CHECK-DAG:     mov x19, sp | 
|  | ; CHECK-DAG:     sub x8, x8, x9 | 
|  | ; CHECK-NEXT:    and x8, x8, #0xffffffffffffffc0 | 
|  | ; CHECK-NEXT:  .LBB2_1: // =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-NEXT:    sub sp, sp, #1, lsl #12 // =4096 | 
|  | ; CHECK-NEXT:    cmp sp, x8 | 
|  | ; CHECK-NEXT:    b.le .LBB2_3 | 
|  | ; CHECK-NEXT:  // %bb.2: // in Loop: Header=BB2_1 Depth=1 | 
|  | ; CHECK-NEXT:    str xzr, [sp] | 
|  | ; CHECK-NEXT:    b .LBB2_1 | 
|  | ; CHECK-NEXT:  .LBB2_3: | 
|  | ; CHECK-NEXT:    mov sp, x8 | 
|  | ; CHECK-NEXT:    ldr xzr, [sp] | 
|  | ; CHECK-NEXT:    str x8, [x1] | 
|  | ; CHECK-NEXT:    mov sp, x29 | 
|  | ; CHECK-NEXT:    .cfi_def_cfa wsp, 32 | 
|  | ; CHECK-NEXT:    ldr x19, [sp, #16] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp x29, x30, [sp], #32 // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    .cfi_def_cfa_offset 0 | 
|  | ; CHECK-NEXT:    .cfi_restore w19 | 
|  | ; CHECK-NEXT:    .cfi_restore w30 | 
|  | ; CHECK-NEXT:    .cfi_restore w29 | 
|  | ; CHECK-NEXT:    ret | 
|  | %v = alloca i8, i64 %size, align 64 | 
|  | store ptr %v, ptr %out, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ; Dynamic allocation, with an alignment greater than the stack guard size. The | 
|  | ; only difference to the dynamic allocation is the constant used for aligning | 
|  | ; the target SP, the loop will probe the whole allocation without needing to | 
|  | ; know about the alignment padding. | 
|  | define void @dynamic_align_8192(i64 %size, ptr %out) #0 { | 
|  | ; CHECK-LABEL: dynamic_align_8192: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    stp x29, x30, [sp, #-32]! // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    .cfi_def_cfa_offset 32 | 
|  | ; CHECK-NEXT:    str x19, [sp, #16] // 8-byte Folded Spill | 
|  | ; CHECK-NEXT:    mov x29, sp | 
|  | ; CHECK-NEXT:    .cfi_def_cfa w29, 32 | 
|  | ; CHECK-NEXT:    .cfi_offset w19, -16 | 
|  | ; CHECK-NEXT:    .cfi_offset w30, -24 | 
|  | ; CHECK-NEXT:    .cfi_offset w29, -32 | 
|  | ; CHECK-NEXT:    sub x9, sp, #1, lsl #12 // =4096 | 
|  | ; CHECK-NEXT:    sub x9, x9, #4064 | 
|  | ; CHECK-NEXT:    and x9, x9, #0xffffffffffffe000 | 
|  | ; CHECK-NEXT:  .LBB3_1: // =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-NEXT:    sub sp, sp, #1, lsl #12 // =4096 | 
|  | ; CHECK-NEXT:    cmp sp, x9 | 
|  | ; CHECK-NEXT:    b.le .LBB3_3 | 
|  | ; CHECK-NEXT:  // %bb.2: // in Loop: Header=BB3_1 Depth=1 | 
|  | ; CHECK-NEXT:    str xzr, [sp] | 
|  | ; CHECK-NEXT:    b .LBB3_1 | 
|  | ; CHECK-NEXT:  .LBB3_3: | 
|  | ; CHECK-NEXT:    mov sp, x9 | 
|  | ; CHECK-NEXT:    add x9, x0, #15 | 
|  | ; CHECK-NEXT:    mov x8, sp | 
|  | ; CHECK-DAG:     ldr xzr, [sp] | 
|  | ; CHECK-DAG:     and x9, x9, #0xfffffffffffffff0 | 
|  | ; CHECK-NOT:     INVALID_TO_BREAK_UP_CHECK_DAG | 
|  | ; CHECK-DAG:     mov x19, sp | 
|  | ; CHECK-DAG:     sub x8, x8, x9 | 
|  | ; CHECK-NEXT:    and x8, x8, #0xffffffffffffe000 | 
|  | ; CHECK-NEXT:  .LBB3_4: // =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-NEXT:    sub sp, sp, #1, lsl #12 // =4096 | 
|  | ; CHECK-NEXT:    cmp sp, x8 | 
|  | ; CHECK-NEXT:    b.le .LBB3_6 | 
|  | ; CHECK-NEXT:  // %bb.5: // in Loop: Header=BB3_4 Depth=1 | 
|  | ; CHECK-NEXT:    str xzr, [sp] | 
|  | ; CHECK-NEXT:    b .LBB3_4 | 
|  | ; CHECK-NEXT:  .LBB3_6: | 
|  | ; CHECK-NEXT:    mov sp, x8 | 
|  | ; CHECK-NEXT:    ldr xzr, [sp] | 
|  | ; CHECK-NEXT:    str x8, [x1] | 
|  | ; CHECK-NEXT:    mov sp, x29 | 
|  | ; CHECK-NEXT:    .cfi_def_cfa wsp, 32 | 
|  | ; CHECK-NEXT:    ldr x19, [sp, #16] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp x29, x30, [sp], #32 // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    .cfi_def_cfa_offset 0 | 
|  | ; CHECK-NEXT:    .cfi_restore w19 | 
|  | ; CHECK-NEXT:    .cfi_restore w30 | 
|  | ; CHECK-NEXT:    .cfi_restore w29 | 
|  | ; CHECK-NEXT:    ret | 
|  | %v = alloca i8, i64 %size, align 8192 | 
|  | store ptr %v, ptr %out, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ; For 64k guard pages, the only difference is the constant subtracted from SP | 
|  | ; in the loop. | 
|  | define void @dynamic_64k_guard(i64 %size, ptr %out) #0 "stack-probe-size"="65536" { | 
|  | ; CHECK-LABEL: dynamic_64k_guard: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    .cfi_def_cfa_offset 16 | 
|  | ; CHECK-NEXT:    mov x29, sp | 
|  | ; CHECK-NEXT:    .cfi_def_cfa w29, 16 | 
|  | ; CHECK-NEXT:    .cfi_offset w30, -8 | 
|  | ; CHECK-NEXT:    .cfi_offset w29, -16 | 
|  | ; CHECK-NEXT:    add x9, x0, #15 | 
|  | ; CHECK-NEXT:    mov x8, sp | 
|  | ; CHECK-NEXT:    and x9, x9, #0xfffffffffffffff0 | 
|  | ; CHECK-NEXT:    sub x8, x8, x9 | 
|  | ; CHECK-NEXT:  .LBB4_1: // =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-NEXT:    sub sp, sp, #16, lsl #12 // =65536 | 
|  | ; CHECK-NEXT:    cmp sp, x8 | 
|  | ; CHECK-NEXT:    b.le .LBB4_3 | 
|  | ; CHECK-NEXT:  // %bb.2: // in Loop: Header=BB4_1 Depth=1 | 
|  | ; CHECK-NEXT:    str xzr, [sp] | 
|  | ; CHECK-NEXT:    b .LBB4_1 | 
|  | ; CHECK-NEXT:  .LBB4_3: | 
|  | ; CHECK-NEXT:    mov sp, x8 | 
|  | ; CHECK-NEXT:    ldr xzr, [sp] | 
|  | ; CHECK-NEXT:    str x8, [x1] | 
|  | ; CHECK-NEXT:    mov sp, x29 | 
|  | ; CHECK-NEXT:    .cfi_def_cfa wsp, 16 | 
|  | ; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    .cfi_def_cfa_offset 0 | 
|  | ; CHECK-NEXT:    .cfi_restore w30 | 
|  | ; CHECK-NEXT:    .cfi_restore w29 | 
|  | ; CHECK-NEXT:    ret | 
|  | %v = alloca i8, i64 %size, align 1 | 
|  | store ptr %v, ptr %out, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ; If a function has variable-sized stack objects, then any function calls which | 
|  | ; need to pass arguments on the stack must allocate the stack space for them | 
|  | ; dynamically, to ensure they are at the bottom of the frame. We need to probe | 
|  | ; that space when it is larger than the unprobed space allowed by the ABI (1024 | 
|  | ; bytes), so this needs a very large number of arguments. | 
|  | define void @no_reserved_call_frame(i64 %n) #0 { | 
|  | ; CHECK-LABEL: no_reserved_call_frame: | 
|  | ; CHECK:       // %bb.0: // %entry | 
|  | ; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    .cfi_def_cfa_offset 16 | 
|  | ; CHECK-NEXT:    mov x29, sp | 
|  | ; CHECK-NEXT:    .cfi_def_cfa w29, 16 | 
|  | ; CHECK-NEXT:    .cfi_offset w30, -8 | 
|  | ; CHECK-NEXT:    .cfi_offset w29, -16 | 
|  | ; CHECK-NEXT:    lsl x9, x0, #2 | 
|  | ; CHECK-NEXT:    mov x8, sp | 
|  | ; CHECK-NEXT:    add x9, x9, #15 | 
|  | ; CHECK-NEXT:    and x9, x9, #0xfffffffffffffff0 | 
|  | ; CHECK-NEXT:    sub x0, x8, x9 | 
|  | ; CHECK-NEXT:  .LBB5_1: // %entry | 
|  | ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-NEXT:    sub sp, sp, #1, lsl #12 // =4096 | 
|  | ; CHECK-NEXT:    cmp sp, x0 | 
|  | ; CHECK-NEXT:    b.le .LBB5_3 | 
|  | ; CHECK-NEXT:  // %bb.2: // %entry | 
|  | ; CHECK-NEXT:    // in Loop: Header=BB5_1 Depth=1 | 
|  | ; CHECK-NEXT:    str xzr, [sp] | 
|  | ; CHECK-NEXT:    b .LBB5_1 | 
|  | ; CHECK-NEXT:  .LBB5_3: // %entry | 
|  | ; CHECK-NEXT:    mov sp, x0 | 
|  | ; CHECK-NEXT:    ldr xzr, [sp] | 
|  | ; CHECK-NEXT:    sub sp, sp, #1104 | 
|  | ; CHECK-NEXT:    str xzr, [sp] | 
|  | ; CHECK-NEXT:    bl callee_stack_args | 
|  | ; CHECK-NEXT:    add sp, sp, #1104 | 
|  | ; CHECK-NEXT:    mov sp, x29 | 
|  | ; CHECK-NEXT:    .cfi_def_cfa wsp, 16 | 
|  | ; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    .cfi_def_cfa_offset 0 | 
|  | ; CHECK-NEXT:    .cfi_restore w30 | 
|  | ; CHECK-NEXT:    .cfi_restore w29 | 
|  | ; CHECK-NEXT:    ret | 
|  | entry: | 
|  | %v = alloca i32, i64 %n | 
|  | call void @callee_stack_args(ptr %v, [138 x i64] undef) | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ; Same as above but without a variable-sized allocation, so the reserved call | 
|  | ; frame can be folded into the fixed-size allocation in the prologue. | 
|  | define void @reserved_call_frame(i64 %n) #0 { | 
|  | ; CHECK-LABEL: reserved_call_frame: | 
|  | ; CHECK:       // %bb.0: // %entry | 
|  | ; CHECK-NEXT:    stp x29, x30, [sp, #-32]! // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    .cfi_def_cfa_offset 32 | 
|  | ; CHECK-NEXT:    str x28, [sp, #16] // 8-byte Folded Spill | 
|  | ; CHECK-NEXT:    mov x29, sp | 
|  | ; CHECK-NEXT:    .cfi_def_cfa w29, 32 | 
|  | ; CHECK-NEXT:    .cfi_offset w28, -16 | 
|  | ; CHECK-NEXT:    .cfi_offset w30, -24 | 
|  | ; CHECK-NEXT:    .cfi_offset w29, -32 | 
|  | ; CHECK-NEXT:    sub sp, sp, #1504 | 
|  | ; CHECK-NEXT:    add x0, sp, #1104 | 
|  | ; CHECK-NEXT:    str xzr, [sp] | 
|  | ; CHECK-NEXT:    bl callee_stack_args | 
|  | ; CHECK-NEXT:    add sp, sp, #1504 | 
|  | ; CHECK-NEXT:    .cfi_def_cfa wsp, 32 | 
|  | ; CHECK-NEXT:    ldr x28, [sp, #16] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp x29, x30, [sp], #32 // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    .cfi_def_cfa_offset 0 | 
|  | ; CHECK-NEXT:    .cfi_restore w28 | 
|  | ; CHECK-NEXT:    .cfi_restore w30 | 
|  | ; CHECK-NEXT:    .cfi_restore w29 | 
|  | ; CHECK-NEXT:    ret | 
|  | entry: | 
|  | %v = alloca i32, i64 100 | 
|  | call void @callee_stack_args(ptr %v, [138 x i64] undef) | 
|  | ret void | 
|  | } | 
|  |  | 
|  | declare void @callee_stack_args(ptr, [138 x i64]) | 
|  |  | 
|  | ; Dynamic allocation of SVE vectors | 
|  | define void @dynamic_sve(i64 %size, ptr %out) #0 "target-features"="+sve" { | 
|  | ; CHECK-LABEL: dynamic_sve: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    stp x29, x30, [sp, #-32]! // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    .cfi_def_cfa_offset 32 | 
|  | ; CHECK-NEXT:    str x19, [sp, #16] // 8-byte Folded Spill | 
|  | ; CHECK-NEXT:    mov x29, sp | 
|  | ; CHECK-NEXT:    .cfi_def_cfa w29, 32 | 
|  | ; CHECK-NEXT:    .cfi_offset w19, -16 | 
|  | ; CHECK-NEXT:    .cfi_offset w30, -24 | 
|  | ; CHECK-NEXT:    .cfi_offset w29, -32 | 
|  | ; CHECK-NEXT:    rdvl x9, #1 | 
|  | ; CHECK-NEXT:    mov x10, #15 // =0xf | 
|  | ; CHECK-DAG:     mov x8, sp | 
|  | ; CHECK-DAG:     madd x9, x0, x9, x10 | 
|  | ; CHECK-NEXT:    and x9, x9, #0xfffffffffffffff0 | 
|  | ; CHECK-NEXT:    sub x8, x8, x9 | 
|  | ; CHECK-NEXT:  .LBB7_1: // =>This Inner Loop Header: Depth=1 | 
|  | ; CHECK-NEXT:    sub sp, sp, #1, lsl #12 // =4096 | 
|  | ; CHECK-NEXT:    cmp sp, x8 | 
|  | ; CHECK-NEXT:    b.le .LBB7_3 | 
|  | ; CHECK-NEXT:  // %bb.2: // in Loop: Header=BB7_1 Depth=1 | 
|  | ; CHECK-NEXT:    str xzr, [sp] | 
|  | ; CHECK-NEXT:    b .LBB7_1 | 
|  | ; CHECK-NEXT:  .LBB7_3: | 
|  | ; CHECK-NEXT:    mov sp, x8 | 
|  | ; CHECK-NEXT:    ldr xzr, [sp] | 
|  | ; CHECK-NEXT:    str x8, [x1] | 
|  | ; CHECK-NEXT:    mov sp, x29 | 
|  | ; CHECK-NEXT:    .cfi_def_cfa wsp, 32 | 
|  | ; CHECK-NEXT:    ldr x19, [sp, #16] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp x29, x30, [sp], #32 // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    .cfi_def_cfa_offset 0 | 
|  | ; CHECK-NEXT:    .cfi_restore w19 | 
|  | ; CHECK-NEXT:    .cfi_restore w30 | 
|  | ; CHECK-NEXT:    .cfi_restore w29 | 
|  | ; CHECK-NEXT:    ret | 
|  | %v = alloca <vscale x 4 x float>, i64 %size, align 16 | 
|  | store ptr %v, ptr %out, align 8 | 
|  | ret void | 
|  | } | 
|  |  | 
|  | attributes #0 = { uwtable(async) "probe-stack"="inline-asm" "frame-pointer"="none" } | 
|  |  |