blob: 0504959bcfa2b0f661e10d27eb16eaed78a481bb [file] [log] [blame] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mattr=+sme2 < %s | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK
; RUN: llc -mattr=+sme2 < %s -aarch64-new-sme-abi | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK-NEWLOWERING
target triple = "aarch64"
declare i64 @private_za_decl(i64)
declare void @private_za()
declare i64 @agnostic_decl(i64) "aarch64_za_state_agnostic"
; No calls. Test that no buffer is allocated.
define i64 @agnostic_caller_no_callees(ptr %ptr) nounwind "aarch64_za_state_agnostic" {
; CHECK-COMMON-LABEL: agnostic_caller_no_callees:
; CHECK-COMMON: // %bb.0:
; CHECK-COMMON-NEXT: ldr x0, [x0]
; CHECK-COMMON-NEXT: ret
%v = load i64, ptr %ptr
ret i64 %v
}
; agnostic-ZA -> private-ZA
;
; Test that a buffer is allocated and that the appropriate save/restore calls are
; inserted for calls to non-agnostic functions and that the arg/result registers are
; preserved by the register allocator.
define i64 @agnostic_caller_private_za_callee(i64 %v) nounwind "aarch64_za_state_agnostic" {
; CHECK-LABEL: agnostic_caller_private_za_callee:
; CHECK: // %bb.0:
; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: mov x8, x0
; CHECK-NEXT: bl __arm_sme_state_size
; CHECK-NEXT: sub sp, sp, x0
; CHECK-NEXT: mov x19, sp
; CHECK-NEXT: mov x0, x19
; CHECK-NEXT: bl __arm_sme_save
; CHECK-NEXT: mov x0, x8
; CHECK-NEXT: bl private_za_decl
; CHECK-NEXT: mov x1, x0
; CHECK-NEXT: mov x0, x19
; CHECK-NEXT: bl __arm_sme_restore
; CHECK-NEXT: mov x0, x19
; CHECK-NEXT: bl __arm_sme_save
; CHECK-NEXT: mov x0, x1
; CHECK-NEXT: bl private_za_decl
; CHECK-NEXT: mov x1, x0
; CHECK-NEXT: mov x0, x19
; CHECK-NEXT: bl __arm_sme_restore
; CHECK-NEXT: mov x0, x1
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-NEXT: ret
;
; CHECK-NEWLOWERING-LABEL: agnostic_caller_private_za_callee:
; CHECK-NEWLOWERING: // %bb.0:
; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
; CHECK-NEWLOWERING-NEXT: mov x29, sp
; CHECK-NEWLOWERING-NEXT: mov x8, x0
; CHECK-NEWLOWERING-NEXT: bl __arm_sme_state_size
; CHECK-NEWLOWERING-NEXT: sub sp, sp, x0
; CHECK-NEWLOWERING-NEXT: mov x19, sp
; CHECK-NEWLOWERING-NEXT: mov x0, x19
; CHECK-NEWLOWERING-NEXT: bl __arm_sme_save
; CHECK-NEWLOWERING-NEXT: mov x0, x8
; CHECK-NEWLOWERING-NEXT: bl private_za_decl
; CHECK-NEWLOWERING-NEXT: bl private_za_decl
; CHECK-NEWLOWERING-NEXT: mov x8, x0
; CHECK-NEWLOWERING-NEXT: mov x0, x19
; CHECK-NEWLOWERING-NEXT: bl __arm_sme_restore
; CHECK-NEWLOWERING-NEXT: mov x0, x8
; CHECK-NEWLOWERING-NEXT: mov sp, x29
; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: ret
%res = call i64 @private_za_decl(i64 %v)
%res2 = call i64 @private_za_decl(i64 %res)
ret i64 %res2
}
; agnostic-ZA -> agnostic-ZA
;
; Should not result in save/restore code.
define i64 @agnostic_caller_agnostic_callee(i64 %v) nounwind "aarch64_za_state_agnostic" {
; CHECK-COMMON-LABEL: agnostic_caller_agnostic_callee:
; CHECK-COMMON: // %bb.0:
; CHECK-COMMON-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-COMMON-NEXT: bl agnostic_decl
; CHECK-COMMON-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-COMMON-NEXT: ret
%res = call i64 @agnostic_decl(i64 %v)
ret i64 %res
}
; shared-ZA -> agnostic-ZA
;
; Should not result in lazy-save or save of ZT0
define i64 @shared_caller_agnostic_callee(i64 %v) nounwind "aarch64_inout_za" "aarch64_inout_zt0" {
; CHECK-COMMON-LABEL: shared_caller_agnostic_callee:
; CHECK-COMMON: // %bb.0:
; CHECK-COMMON-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-COMMON-NEXT: bl agnostic_decl
; CHECK-COMMON-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-COMMON-NEXT: ret
%res = call i64 @agnostic_decl(i64 %v)
ret i64 %res
}
; agnostic-ZA + streaming -> private-ZA + non-streaming
define i64 @streaming_agnostic_caller_nonstreaming_private_za_callee(i64 %v) nounwind "aarch64_za_state_agnostic" "aarch64_pstate_sm_enabled" {
; CHECK-LABEL: streaming_agnostic_caller_nonstreaming_private_za_callee:
; CHECK: // %bb.0:
; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: mov x8, x0
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: add x29, sp, #64
; CHECK-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: bl __arm_sme_state_size
; CHECK-NEXT: sub sp, sp, x0
; CHECK-NEXT: mov x20, sp
; CHECK-NEXT: mov x0, x20
; CHECK-NEXT: bl __arm_sme_save
; CHECK-NEXT: smstop sm
; CHECK-NEXT: mov x0, x8
; CHECK-NEXT: bl private_za_decl
; CHECK-NEXT: mov x1, x0
; CHECK-NEXT: smstart sm
; CHECK-NEXT: mov x0, x20
; CHECK-NEXT: bl __arm_sme_restore
; CHECK-NEXT: mov x0, x20
; CHECK-NEXT: bl __arm_sme_save
; CHECK-NEXT: smstop sm
; CHECK-NEXT: mov x0, x1
; CHECK-NEXT: bl private_za_decl
; CHECK-NEXT: mov x1, x0
; CHECK-NEXT: smstart sm
; CHECK-NEXT: mov x0, x20
; CHECK-NEXT: bl __arm_sme_restore
; CHECK-NEXT: mov x0, x1
; CHECK-NEXT: sub sp, x29, #64
; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
; CHECK-NEXT: ret
;
; CHECK-NEWLOWERING-LABEL: streaming_agnostic_caller_nonstreaming_private_za_callee:
; CHECK-NEWLOWERING: // %bb.0:
; CHECK-NEWLOWERING-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
; CHECK-NEWLOWERING-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NEWLOWERING-NEXT: mov x8, x0
; CHECK-NEWLOWERING-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEWLOWERING-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
; CHECK-NEWLOWERING-NEXT: add x29, sp, #64
; CHECK-NEWLOWERING-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-NEWLOWERING-NEXT: bl __arm_sme_state_size
; CHECK-NEWLOWERING-NEXT: sub sp, sp, x0
; CHECK-NEWLOWERING-NEXT: mov x20, sp
; CHECK-NEWLOWERING-NEXT: mov x0, x20
; CHECK-NEWLOWERING-NEXT: bl __arm_sme_save
; CHECK-NEWLOWERING-NEXT: smstop sm
; CHECK-NEWLOWERING-NEXT: mov x0, x8
; CHECK-NEWLOWERING-NEXT: bl private_za_decl
; CHECK-NEWLOWERING-NEXT: bl private_za_decl
; CHECK-NEWLOWERING-NEXT: smstart sm
; CHECK-NEWLOWERING-NEXT: mov x8, x0
; CHECK-NEWLOWERING-NEXT: mov x0, x20
; CHECK-NEWLOWERING-NEXT: bl __arm_sme_restore
; CHECK-NEWLOWERING-NEXT: mov x0, x8
; CHECK-NEWLOWERING-NEXT: sub sp, x29, #64
; CHECK-NEWLOWERING-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: ret
%res = call i64 @private_za_decl(i64 %v)
%res2 = call i64 @private_za_decl(i64 %res)
ret i64 %res2
}
; agnostic-ZA + streaming-compatible -> private-ZA + non-streaming
define i64 @streaming_compatible_agnostic_caller_nonstreaming_private_za_callee(i64 %v) nounwind "aarch64_za_state_agnostic" "aarch64_pstate_sm_compatible" {
; CHECK-LABEL: streaming_compatible_agnostic_caller_nonstreaming_private_za_callee:
; CHECK: // %bb.0:
; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: mov x8, x0
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: add x29, sp, #64
; CHECK-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: mrs x20, SVCR
; CHECK-NEXT: bl __arm_sme_state_size
; CHECK-NEXT: sub sp, sp, x0
; CHECK-NEXT: mov x19, sp
; CHECK-NEXT: mov x0, x19
; CHECK-NEXT: bl __arm_sme_save
; CHECK-NEXT: tbz w20, #0, .LBB5_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB5_2:
; CHECK-NEXT: mov x0, x8
; CHECK-NEXT: bl private_za_decl
; CHECK-NEXT: mov x1, x0
; CHECK-NEXT: tbz w20, #0, .LBB5_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB5_4:
; CHECK-NEXT: mov x0, x19
; CHECK-NEXT: bl __arm_sme_restore
; CHECK-NEXT: mov x0, x19
; CHECK-NEXT: bl __arm_sme_save
; CHECK-NEXT: tbz w20, #0, .LBB5_6
; CHECK-NEXT: // %bb.5:
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB5_6:
; CHECK-NEXT: mov x0, x1
; CHECK-NEXT: bl private_za_decl
; CHECK-NEXT: mov x1, x0
; CHECK-NEXT: tbz w20, #0, .LBB5_8
; CHECK-NEXT: // %bb.7:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB5_8:
; CHECK-NEXT: mov x0, x19
; CHECK-NEXT: bl __arm_sme_restore
; CHECK-NEXT: mov x0, x1
; CHECK-NEXT: sub sp, x29, #64
; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
; CHECK-NEXT: ret
;
; CHECK-NEWLOWERING-LABEL: streaming_compatible_agnostic_caller_nonstreaming_private_za_callee:
; CHECK-NEWLOWERING: // %bb.0:
; CHECK-NEWLOWERING-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
; CHECK-NEWLOWERING-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NEWLOWERING-NEXT: mov x8, x0
; CHECK-NEWLOWERING-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEWLOWERING-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
; CHECK-NEWLOWERING-NEXT: add x29, sp, #64
; CHECK-NEWLOWERING-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-NEWLOWERING-NEXT: bl __arm_sme_state_size
; CHECK-NEWLOWERING-NEXT: sub sp, sp, x0
; CHECK-NEWLOWERING-NEXT: mov x19, sp
; CHECK-NEWLOWERING-NEXT: mrs x20, SVCR
; CHECK-NEWLOWERING-NEXT: mov x0, x19
; CHECK-NEWLOWERING-NEXT: bl __arm_sme_save
; CHECK-NEWLOWERING-NEXT: tbz w20, #0, .LBB5_2
; CHECK-NEWLOWERING-NEXT: // %bb.1:
; CHECK-NEWLOWERING-NEXT: smstop sm
; CHECK-NEWLOWERING-NEXT: .LBB5_2:
; CHECK-NEWLOWERING-NEXT: mov x0, x8
; CHECK-NEWLOWERING-NEXT: bl private_za_decl
; CHECK-NEWLOWERING-NEXT: bl private_za_decl
; CHECK-NEWLOWERING-NEXT: tbz w20, #0, .LBB5_4
; CHECK-NEWLOWERING-NEXT: // %bb.3:
; CHECK-NEWLOWERING-NEXT: smstart sm
; CHECK-NEWLOWERING-NEXT: .LBB5_4:
; CHECK-NEWLOWERING-NEXT: mov x8, x0
; CHECK-NEWLOWERING-NEXT: mov x0, x19
; CHECK-NEWLOWERING-NEXT: bl __arm_sme_restore
; CHECK-NEWLOWERING-NEXT: mov x0, x8
; CHECK-NEWLOWERING-NEXT: sub sp, x29, #64
; CHECK-NEWLOWERING-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: ret
%res = call i64 @private_za_decl(i64 %v)
%res2 = call i64 @private_za_decl(i64 %res)
ret i64 %res2
}
declare i64 @many_args_private_za_callee(
i64, i64, i64, i64, i64, i64, i64, i64, i64, i64)
; In this example some arguments are passed on the stack, which decrements the
; stack pointer before the call -- in this test the call to __arm_sme_save
; should occur _before_ the stack decrement.
define i64 @test_many_callee_arguments(
; CHECK-LABEL: test_many_callee_arguments:
; CHECK: // %bb.0:
; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: mov x8, x0
; CHECK-NEXT: bl __arm_sme_state_size
; CHECK-NEXT: sub sp, sp, x0
; CHECK-NEXT: ldp x9, x10, [x29, #32]
; CHECK-NEXT: mov x19, sp
; CHECK-NEXT: mov x0, x19
; CHECK-NEXT: bl __arm_sme_save
; CHECK-NEXT: stp x9, x10, [sp, #-16]!
; CHECK-NEXT: mov x0, x8
; CHECK-NEXT: bl many_args_private_za_callee
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: mov x1, x0
; CHECK-NEXT: mov x0, x19
; CHECK-NEXT: bl __arm_sme_restore
; CHECK-NEXT: mov x0, x1
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-NEXT: ret
;
; CHECK-NEWLOWERING-LABEL: test_many_callee_arguments:
; CHECK-NEWLOWERING: // %bb.0:
; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
; CHECK-NEWLOWERING-NEXT: mov x29, sp
; CHECK-NEWLOWERING-NEXT: mov x8, x0
; CHECK-NEWLOWERING-NEXT: bl __arm_sme_state_size
; CHECK-NEWLOWERING-NEXT: sub sp, sp, x0
; CHECK-NEWLOWERING-NEXT: mov x19, sp
; CHECK-NEWLOWERING-NEXT: ldp x9, x10, [x29, #32]
; CHECK-NEWLOWERING-NEXT: mov x0, x19
; CHECK-NEWLOWERING-NEXT: bl __arm_sme_save
; CHECK-NEWLOWERING-NEXT: stp x9, x10, [sp, #-16]!
; CHECK-NEWLOWERING-NEXT: mov x0, x8
; CHECK-NEWLOWERING-NEXT: bl many_args_private_za_callee
; CHECK-NEWLOWERING-NEXT: add sp, sp, #16
; CHECK-NEWLOWERING-NEXT: mov x8, x0
; CHECK-NEWLOWERING-NEXT: mov x0, x19
; CHECK-NEWLOWERING-NEXT: bl __arm_sme_restore
; CHECK-NEWLOWERING-NEXT: mov x0, x8
; CHECK-NEWLOWERING-NEXT: mov sp, x29
; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: ret
i64 %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6, i64 %7, i64 %8, i64 %9
) nounwind "aarch64_za_state_agnostic" {
%ret = call i64 @many_args_private_za_callee(
i64 %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6, i64 %7, i64 %8, i64 %9)
ret i64 %ret
}
define void @agnostic_za_buffer_alloc_with_stack_probes() nounwind "aarch64_za_state_agnostic" "probe-stack"="inline-asm" "stack-probe-size"="65536"{
; CHECK-LABEL: agnostic_za_buffer_alloc_with_stack_probes:
; CHECK: // %bb.0:
; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: bl __arm_sme_state_size
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: sub x19, x8, x0
; CHECK-NEXT: .LBB7_1: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536
; CHECK-NEXT: cmp sp, x19
; CHECK-NEXT: b.le .LBB7_3
; CHECK-NEXT: // %bb.2: // in Loop: Header=BB7_1 Depth=1
; CHECK-NEXT: str xzr, [sp]
; CHECK-NEXT: b .LBB7_1
; CHECK-NEXT: .LBB7_3:
; CHECK-NEXT: mov sp, x19
; CHECK-NEXT: ldr xzr, [sp]
; CHECK-NEXT: mov x0, x19
; CHECK-NEXT: bl __arm_sme_save
; CHECK-NEXT: bl private_za
; CHECK-NEXT: mov x0, x19
; CHECK-NEXT: bl __arm_sme_restore
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-NEXT: ret
;
; CHECK-NEWLOWERING-LABEL: agnostic_za_buffer_alloc_with_stack_probes:
; CHECK-NEWLOWERING: // %bb.0:
; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
; CHECK-NEWLOWERING-NEXT: mov x29, sp
; CHECK-NEWLOWERING-NEXT: bl __arm_sme_state_size
; CHECK-NEWLOWERING-NEXT: mov x8, sp
; CHECK-NEWLOWERING-NEXT: sub x19, x8, x0
; CHECK-NEWLOWERING-NEXT: mov x0, x19
; CHECK-NEWLOWERING-NEXT: bl __arm_sme_save
; CHECK-NEWLOWERING-NEXT: .LBB7_1: // =>This Inner Loop Header: Depth=1
; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16, lsl #12 // =65536
; CHECK-NEWLOWERING-NEXT: cmp sp, x19
; CHECK-NEWLOWERING-NEXT: b.le .LBB7_3
; CHECK-NEWLOWERING-NEXT: // %bb.2: // in Loop: Header=BB7_1 Depth=1
; CHECK-NEWLOWERING-NEXT: str xzr, [sp]
; CHECK-NEWLOWERING-NEXT: b .LBB7_1
; CHECK-NEWLOWERING-NEXT: .LBB7_3:
; CHECK-NEWLOWERING-NEXT: mov sp, x19
; CHECK-NEWLOWERING-NEXT: ldr xzr, [sp]
; CHECK-NEWLOWERING-NEXT: bl private_za
; CHECK-NEWLOWERING-NEXT: mov x0, x19
; CHECK-NEWLOWERING-NEXT: bl __arm_sme_restore
; CHECK-NEWLOWERING-NEXT: mov sp, x29
; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: ret
call void @private_za()
ret void
}