| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc -mattr=+sme2 < %s | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK |
| ; RUN: llc -mattr=+sme2 < %s -aarch64-new-sme-abi | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK-NEWLOWERING |
| |
| target triple = "aarch64" |
| |
| declare i64 @private_za_decl(i64) |
| declare void @private_za() |
| declare i64 @agnostic_decl(i64) "aarch64_za_state_agnostic" |
| |
| ; No calls. Test that no buffer is allocated. |
| define i64 @agnostic_caller_no_callees(ptr %ptr) nounwind "aarch64_za_state_agnostic" { |
| ; CHECK-COMMON-LABEL: agnostic_caller_no_callees: |
| ; CHECK-COMMON: // %bb.0: |
| ; CHECK-COMMON-NEXT: ldr x0, [x0] |
| ; CHECK-COMMON-NEXT: ret |
| %v = load i64, ptr %ptr |
| ret i64 %v |
| } |
| |
| ; agnostic-ZA -> private-ZA |
| ; |
| ; Test that a buffer is allocated and that the appropriate save/restore calls are |
| ; inserted for calls to non-agnostic functions and that the arg/result registers are |
| ; preserved by the register allocator. |
| define i64 @agnostic_caller_private_za_callee(i64 %v) nounwind "aarch64_za_state_agnostic" { |
| ; CHECK-LABEL: agnostic_caller_private_za_callee: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill |
| ; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill |
| ; CHECK-NEXT: mov x29, sp |
| ; CHECK-NEXT: mov x8, x0 |
| ; CHECK-NEXT: bl __arm_sme_state_size |
| ; CHECK-NEXT: sub sp, sp, x0 |
| ; CHECK-NEXT: mov x19, sp |
| ; CHECK-NEXT: mov x0, x19 |
| ; CHECK-NEXT: bl __arm_sme_save |
| ; CHECK-NEXT: mov x0, x8 |
| ; CHECK-NEXT: bl private_za_decl |
| ; CHECK-NEXT: mov x1, x0 |
| ; CHECK-NEXT: mov x0, x19 |
| ; CHECK-NEXT: bl __arm_sme_restore |
| ; CHECK-NEXT: mov x0, x19 |
| ; CHECK-NEXT: bl __arm_sme_save |
| ; CHECK-NEXT: mov x0, x1 |
| ; CHECK-NEXT: bl private_za_decl |
| ; CHECK-NEXT: mov x1, x0 |
| ; CHECK-NEXT: mov x0, x19 |
| ; CHECK-NEXT: bl __arm_sme_restore |
| ; CHECK-NEXT: mov x0, x1 |
| ; CHECK-NEXT: mov sp, x29 |
| ; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload |
| ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-NEWLOWERING-LABEL: agnostic_caller_private_za_callee: |
| ; CHECK-NEWLOWERING: // %bb.0: |
| ; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill |
| ; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Folded Spill |
| ; CHECK-NEWLOWERING-NEXT: mov x29, sp |
| ; CHECK-NEWLOWERING-NEXT: mov x8, x0 |
| ; CHECK-NEWLOWERING-NEXT: bl __arm_sme_state_size |
| ; CHECK-NEWLOWERING-NEXT: sub sp, sp, x0 |
| ; CHECK-NEWLOWERING-NEXT: mov x19, sp |
| ; CHECK-NEWLOWERING-NEXT: mov x0, x19 |
| ; CHECK-NEWLOWERING-NEXT: bl __arm_sme_save |
| ; CHECK-NEWLOWERING-NEXT: mov x0, x8 |
| ; CHECK-NEWLOWERING-NEXT: bl private_za_decl |
| ; CHECK-NEWLOWERING-NEXT: bl private_za_decl |
| ; CHECK-NEWLOWERING-NEXT: mov x8, x0 |
| ; CHECK-NEWLOWERING-NEXT: mov x0, x19 |
| ; CHECK-NEWLOWERING-NEXT: bl __arm_sme_restore |
| ; CHECK-NEWLOWERING-NEXT: mov x0, x8 |
| ; CHECK-NEWLOWERING-NEXT: mov sp, x29 |
| ; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload |
| ; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload |
| ; CHECK-NEWLOWERING-NEXT: ret |
| %res = call i64 @private_za_decl(i64 %v) |
| %res2 = call i64 @private_za_decl(i64 %res) |
| ret i64 %res2 |
| } |
| |
| ; agnostic-ZA -> agnostic-ZA |
| ; |
| ; Should not result in save/restore code. |
| define i64 @agnostic_caller_agnostic_callee(i64 %v) nounwind "aarch64_za_state_agnostic" { |
| ; CHECK-COMMON-LABEL: agnostic_caller_agnostic_callee: |
| ; CHECK-COMMON: // %bb.0: |
| ; CHECK-COMMON-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK-COMMON-NEXT: bl agnostic_decl |
| ; CHECK-COMMON-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload |
| ; CHECK-COMMON-NEXT: ret |
| %res = call i64 @agnostic_decl(i64 %v) |
| ret i64 %res |
| } |
| |
| ; shared-ZA -> agnostic-ZA |
| ; |
| ; Should not result in lazy-save or save of ZT0 |
| define i64 @shared_caller_agnostic_callee(i64 %v) nounwind "aarch64_inout_za" "aarch64_inout_zt0" { |
| ; CHECK-COMMON-LABEL: shared_caller_agnostic_callee: |
| ; CHECK-COMMON: // %bb.0: |
| ; CHECK-COMMON-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK-COMMON-NEXT: bl agnostic_decl |
| ; CHECK-COMMON-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload |
| ; CHECK-COMMON-NEXT: ret |
| %res = call i64 @agnostic_decl(i64 %v) |
| ret i64 %res |
| } |
| |
| ; agnostic-ZA + streaming -> private-ZA + non-streaming |
| define i64 @streaming_agnostic_caller_nonstreaming_private_za_callee(i64 %v) nounwind "aarch64_za_state_agnostic" "aarch64_pstate_sm_enabled" { |
| ; CHECK-LABEL: streaming_agnostic_caller_nonstreaming_private_za_callee: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill |
| ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill |
| ; CHECK-NEXT: mov x8, x0 |
| ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill |
| ; CHECK-NEXT: add x29, sp, #64 |
| ; CHECK-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill |
| ; CHECK-NEXT: bl __arm_sme_state_size |
| ; CHECK-NEXT: sub sp, sp, x0 |
| ; CHECK-NEXT: mov x20, sp |
| ; CHECK-NEXT: mov x0, x20 |
| ; CHECK-NEXT: bl __arm_sme_save |
| ; CHECK-NEXT: smstop sm |
| ; CHECK-NEXT: mov x0, x8 |
| ; CHECK-NEXT: bl private_za_decl |
| ; CHECK-NEXT: mov x1, x0 |
| ; CHECK-NEXT: smstart sm |
| ; CHECK-NEXT: mov x0, x20 |
| ; CHECK-NEXT: bl __arm_sme_restore |
| ; CHECK-NEXT: mov x0, x20 |
| ; CHECK-NEXT: bl __arm_sme_save |
| ; CHECK-NEXT: smstop sm |
| ; CHECK-NEXT: mov x0, x1 |
| ; CHECK-NEXT: bl private_za_decl |
| ; CHECK-NEXT: mov x1, x0 |
| ; CHECK-NEXT: smstart sm |
| ; CHECK-NEXT: mov x0, x20 |
| ; CHECK-NEXT: bl __arm_sme_restore |
| ; CHECK-NEXT: mov x0, x1 |
| ; CHECK-NEXT: sub sp, x29, #64 |
| ; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-NEWLOWERING-LABEL: streaming_agnostic_caller_nonstreaming_private_za_callee: |
| ; CHECK-NEWLOWERING: // %bb.0: |
| ; CHECK-NEWLOWERING-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill |
| ; CHECK-NEWLOWERING-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill |
| ; CHECK-NEWLOWERING-NEXT: mov x8, x0 |
| ; CHECK-NEWLOWERING-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill |
| ; CHECK-NEWLOWERING-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill |
| ; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill |
| ; CHECK-NEWLOWERING-NEXT: add x29, sp, #64 |
| ; CHECK-NEWLOWERING-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill |
| ; CHECK-NEWLOWERING-NEXT: bl __arm_sme_state_size |
| ; CHECK-NEWLOWERING-NEXT: sub sp, sp, x0 |
| ; CHECK-NEWLOWERING-NEXT: mov x20, sp |
| ; CHECK-NEWLOWERING-NEXT: mov x0, x20 |
| ; CHECK-NEWLOWERING-NEXT: bl __arm_sme_save |
| ; CHECK-NEWLOWERING-NEXT: smstop sm |
| ; CHECK-NEWLOWERING-NEXT: mov x0, x8 |
| ; CHECK-NEWLOWERING-NEXT: bl private_za_decl |
| ; CHECK-NEWLOWERING-NEXT: bl private_za_decl |
| ; CHECK-NEWLOWERING-NEXT: smstart sm |
| ; CHECK-NEWLOWERING-NEXT: mov x8, x0 |
| ; CHECK-NEWLOWERING-NEXT: mov x0, x20 |
| ; CHECK-NEWLOWERING-NEXT: bl __arm_sme_restore |
| ; CHECK-NEWLOWERING-NEXT: mov x0, x8 |
| ; CHECK-NEWLOWERING-NEXT: sub sp, x29, #64 |
| ; CHECK-NEWLOWERING-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload |
| ; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload |
| ; CHECK-NEWLOWERING-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload |
| ; CHECK-NEWLOWERING-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload |
| ; CHECK-NEWLOWERING-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload |
| ; CHECK-NEWLOWERING-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload |
| ; CHECK-NEWLOWERING-NEXT: ret |
| %res = call i64 @private_za_decl(i64 %v) |
| %res2 = call i64 @private_za_decl(i64 %res) |
| ret i64 %res2 |
| } |
| |
| ; agnostic-ZA + streaming-compatible -> private-ZA + non-streaming |
| define i64 @streaming_compatible_agnostic_caller_nonstreaming_private_za_callee(i64 %v) nounwind "aarch64_za_state_agnostic" "aarch64_pstate_sm_compatible" { |
| ; CHECK-LABEL: streaming_compatible_agnostic_caller_nonstreaming_private_za_callee: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill |
| ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill |
| ; CHECK-NEXT: mov x8, x0 |
| ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill |
| ; CHECK-NEXT: add x29, sp, #64 |
| ; CHECK-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill |
| ; CHECK-NEXT: mrs x20, SVCR |
| ; CHECK-NEXT: bl __arm_sme_state_size |
| ; CHECK-NEXT: sub sp, sp, x0 |
| ; CHECK-NEXT: mov x19, sp |
| ; CHECK-NEXT: mov x0, x19 |
| ; CHECK-NEXT: bl __arm_sme_save |
| ; CHECK-NEXT: tbz w20, #0, .LBB5_2 |
| ; CHECK-NEXT: // %bb.1: |
| ; CHECK-NEXT: smstop sm |
| ; CHECK-NEXT: .LBB5_2: |
| ; CHECK-NEXT: mov x0, x8 |
| ; CHECK-NEXT: bl private_za_decl |
| ; CHECK-NEXT: mov x1, x0 |
| ; CHECK-NEXT: tbz w20, #0, .LBB5_4 |
| ; CHECK-NEXT: // %bb.3: |
| ; CHECK-NEXT: smstart sm |
| ; CHECK-NEXT: .LBB5_4: |
| ; CHECK-NEXT: mov x0, x19 |
| ; CHECK-NEXT: bl __arm_sme_restore |
| ; CHECK-NEXT: mov x0, x19 |
| ; CHECK-NEXT: bl __arm_sme_save |
| ; CHECK-NEXT: tbz w20, #0, .LBB5_6 |
| ; CHECK-NEXT: // %bb.5: |
| ; CHECK-NEXT: smstop sm |
| ; CHECK-NEXT: .LBB5_6: |
| ; CHECK-NEXT: mov x0, x1 |
| ; CHECK-NEXT: bl private_za_decl |
| ; CHECK-NEXT: mov x1, x0 |
| ; CHECK-NEXT: tbz w20, #0, .LBB5_8 |
| ; CHECK-NEXT: // %bb.7: |
| ; CHECK-NEXT: smstart sm |
| ; CHECK-NEXT: .LBB5_8: |
| ; CHECK-NEXT: mov x0, x19 |
| ; CHECK-NEXT: bl __arm_sme_restore |
| ; CHECK-NEXT: mov x0, x1 |
| ; CHECK-NEXT: sub sp, x29, #64 |
| ; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-NEWLOWERING-LABEL: streaming_compatible_agnostic_caller_nonstreaming_private_za_callee: |
| ; CHECK-NEWLOWERING: // %bb.0: |
| ; CHECK-NEWLOWERING-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill |
| ; CHECK-NEWLOWERING-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill |
| ; CHECK-NEWLOWERING-NEXT: mov x8, x0 |
| ; CHECK-NEWLOWERING-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill |
| ; CHECK-NEWLOWERING-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill |
| ; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill |
| ; CHECK-NEWLOWERING-NEXT: add x29, sp, #64 |
| ; CHECK-NEWLOWERING-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill |
| ; CHECK-NEWLOWERING-NEXT: bl __arm_sme_state_size |
| ; CHECK-NEWLOWERING-NEXT: sub sp, sp, x0 |
| ; CHECK-NEWLOWERING-NEXT: mov x19, sp |
| ; CHECK-NEWLOWERING-NEXT: mrs x20, SVCR |
| ; CHECK-NEWLOWERING-NEXT: mov x0, x19 |
| ; CHECK-NEWLOWERING-NEXT: bl __arm_sme_save |
| ; CHECK-NEWLOWERING-NEXT: tbz w20, #0, .LBB5_2 |
| ; CHECK-NEWLOWERING-NEXT: // %bb.1: |
| ; CHECK-NEWLOWERING-NEXT: smstop sm |
| ; CHECK-NEWLOWERING-NEXT: .LBB5_2: |
| ; CHECK-NEWLOWERING-NEXT: mov x0, x8 |
| ; CHECK-NEWLOWERING-NEXT: bl private_za_decl |
| ; CHECK-NEWLOWERING-NEXT: bl private_za_decl |
| ; CHECK-NEWLOWERING-NEXT: tbz w20, #0, .LBB5_4 |
| ; CHECK-NEWLOWERING-NEXT: // %bb.3: |
| ; CHECK-NEWLOWERING-NEXT: smstart sm |
| ; CHECK-NEWLOWERING-NEXT: .LBB5_4: |
| ; CHECK-NEWLOWERING-NEXT: mov x8, x0 |
| ; CHECK-NEWLOWERING-NEXT: mov x0, x19 |
| ; CHECK-NEWLOWERING-NEXT: bl __arm_sme_restore |
| ; CHECK-NEWLOWERING-NEXT: mov x0, x8 |
| ; CHECK-NEWLOWERING-NEXT: sub sp, x29, #64 |
| ; CHECK-NEWLOWERING-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload |
| ; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload |
| ; CHECK-NEWLOWERING-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload |
| ; CHECK-NEWLOWERING-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload |
| ; CHECK-NEWLOWERING-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload |
| ; CHECK-NEWLOWERING-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload |
| ; CHECK-NEWLOWERING-NEXT: ret |
| %res = call i64 @private_za_decl(i64 %v) |
| %res2 = call i64 @private_za_decl(i64 %res) |
| ret i64 %res2 |
| } |
| |
| declare i64 @many_args_private_za_callee( |
| i64, i64, i64, i64, i64, i64, i64, i64, i64, i64) |
| |
| ; In this example some arguments are passed on the stack, which decrements the |
| ; stack pointer before the call -- in this test the call to __arm_sme_save |
| ; should occur _before_ the stack decrement. |
| define i64 @test_many_callee_arguments( |
| ; CHECK-LABEL: test_many_callee_arguments: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill |
| ; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill |
| ; CHECK-NEXT: mov x29, sp |
| ; CHECK-NEXT: mov x8, x0 |
| ; CHECK-NEXT: bl __arm_sme_state_size |
| ; CHECK-NEXT: sub sp, sp, x0 |
| ; CHECK-NEXT: ldp x9, x10, [x29, #32] |
| ; CHECK-NEXT: mov x19, sp |
| ; CHECK-NEXT: mov x0, x19 |
| ; CHECK-NEXT: bl __arm_sme_save |
| ; CHECK-NEXT: stp x9, x10, [sp, #-16]! |
| ; CHECK-NEXT: mov x0, x8 |
| ; CHECK-NEXT: bl many_args_private_za_callee |
| ; CHECK-NEXT: add sp, sp, #16 |
| ; CHECK-NEXT: mov x1, x0 |
| ; CHECK-NEXT: mov x0, x19 |
| ; CHECK-NEXT: bl __arm_sme_restore |
| ; CHECK-NEXT: mov x0, x1 |
| ; CHECK-NEXT: mov sp, x29 |
| ; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload |
| ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-NEWLOWERING-LABEL: test_many_callee_arguments: |
| ; CHECK-NEWLOWERING: // %bb.0: |
| ; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill |
| ; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Folded Spill |
| ; CHECK-NEWLOWERING-NEXT: mov x29, sp |
| ; CHECK-NEWLOWERING-NEXT: mov x8, x0 |
| ; CHECK-NEWLOWERING-NEXT: bl __arm_sme_state_size |
| ; CHECK-NEWLOWERING-NEXT: sub sp, sp, x0 |
| ; CHECK-NEWLOWERING-NEXT: mov x19, sp |
| ; CHECK-NEWLOWERING-NEXT: ldp x9, x10, [x29, #32] |
| ; CHECK-NEWLOWERING-NEXT: mov x0, x19 |
| ; CHECK-NEWLOWERING-NEXT: bl __arm_sme_save |
| ; CHECK-NEWLOWERING-NEXT: stp x9, x10, [sp, #-16]! |
| ; CHECK-NEWLOWERING-NEXT: mov x0, x8 |
| ; CHECK-NEWLOWERING-NEXT: bl many_args_private_za_callee |
| ; CHECK-NEWLOWERING-NEXT: add sp, sp, #16 |
| ; CHECK-NEWLOWERING-NEXT: mov x8, x0 |
| ; CHECK-NEWLOWERING-NEXT: mov x0, x19 |
| ; CHECK-NEWLOWERING-NEXT: bl __arm_sme_restore |
| ; CHECK-NEWLOWERING-NEXT: mov x0, x8 |
| ; CHECK-NEWLOWERING-NEXT: mov sp, x29 |
| ; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload |
| ; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload |
| ; CHECK-NEWLOWERING-NEXT: ret |
| i64 %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6, i64 %7, i64 %8, i64 %9 |
| ) nounwind "aarch64_za_state_agnostic" { |
| %ret = call i64 @many_args_private_za_callee( |
| i64 %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6, i64 %7, i64 %8, i64 %9) |
| ret i64 %ret |
| } |
| |
| define void @agnostic_za_buffer_alloc_with_stack_probes() nounwind "aarch64_za_state_agnostic" "probe-stack"="inline-asm" "stack-probe-size"="65536"{ |
| ; CHECK-LABEL: agnostic_za_buffer_alloc_with_stack_probes: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill |
| ; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill |
| ; CHECK-NEXT: mov x29, sp |
| ; CHECK-NEXT: bl __arm_sme_state_size |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: sub x19, x8, x0 |
| ; CHECK-NEXT: .LBB7_1: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: sub sp, sp, #16, lsl #12 // =65536 |
| ; CHECK-NEXT: cmp sp, x19 |
| ; CHECK-NEXT: b.le .LBB7_3 |
| ; CHECK-NEXT: // %bb.2: // in Loop: Header=BB7_1 Depth=1 |
| ; CHECK-NEXT: str xzr, [sp] |
| ; CHECK-NEXT: b .LBB7_1 |
| ; CHECK-NEXT: .LBB7_3: |
| ; CHECK-NEXT: mov sp, x19 |
| ; CHECK-NEXT: ldr xzr, [sp] |
| ; CHECK-NEXT: mov x0, x19 |
| ; CHECK-NEXT: bl __arm_sme_save |
| ; CHECK-NEXT: bl private_za |
| ; CHECK-NEXT: mov x0, x19 |
| ; CHECK-NEXT: bl __arm_sme_restore |
| ; CHECK-NEXT: mov sp, x29 |
| ; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload |
| ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-NEWLOWERING-LABEL: agnostic_za_buffer_alloc_with_stack_probes: |
| ; CHECK-NEWLOWERING: // %bb.0: |
| ; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill |
| ; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Folded Spill |
| ; CHECK-NEWLOWERING-NEXT: mov x29, sp |
| ; CHECK-NEWLOWERING-NEXT: bl __arm_sme_state_size |
| ; CHECK-NEWLOWERING-NEXT: mov x8, sp |
| ; CHECK-NEWLOWERING-NEXT: sub x19, x8, x0 |
| ; CHECK-NEWLOWERING-NEXT: mov x0, x19 |
| ; CHECK-NEWLOWERING-NEXT: bl __arm_sme_save |
| ; CHECK-NEWLOWERING-NEXT: .LBB7_1: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16, lsl #12 // =65536 |
| ; CHECK-NEWLOWERING-NEXT: cmp sp, x19 |
| ; CHECK-NEWLOWERING-NEXT: b.le .LBB7_3 |
| ; CHECK-NEWLOWERING-NEXT: // %bb.2: // in Loop: Header=BB7_1 Depth=1 |
| ; CHECK-NEWLOWERING-NEXT: str xzr, [sp] |
| ; CHECK-NEWLOWERING-NEXT: b .LBB7_1 |
| ; CHECK-NEWLOWERING-NEXT: .LBB7_3: |
| ; CHECK-NEWLOWERING-NEXT: mov sp, x19 |
| ; CHECK-NEWLOWERING-NEXT: ldr xzr, [sp] |
| ; CHECK-NEWLOWERING-NEXT: bl private_za |
| ; CHECK-NEWLOWERING-NEXT: mov x0, x19 |
| ; CHECK-NEWLOWERING-NEXT: bl __arm_sme_restore |
| ; CHECK-NEWLOWERING-NEXT: mov sp, x29 |
| ; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload |
| ; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload |
| ; CHECK-NEWLOWERING-NEXT: ret |
| call void @private_za() |
| ret void |
| } |