| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple=aarch64 -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme < %s | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK |
| ; RUN: llc -mtriple=aarch64 -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme -aarch64-new-sme-abi < %s | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK-NEWLOWERING |
| |
| declare void @private_za_callee() |
| declare void @shared_za_callee() "aarch64_inout_za" |
| declare void @preserves_za_callee() "aarch64_preserves_za" |
| |
| declare float @llvm.cos.f32(float) |
| |
| ; Test lazy-save mechanism for a single callee. |
| define void @test_lazy_save_1_callee() nounwind "aarch64_inout_za" { |
| ; CHECK-COMMON-LABEL: test_lazy_save_1_callee: |
| ; CHECK-COMMON: // %bb.0: |
| ; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill |
| ; CHECK-COMMON-NEXT: str x19, [sp, #16] // 8-byte Folded Spill |
| ; CHECK-COMMON-NEXT: mov x29, sp |
| ; CHECK-COMMON-NEXT: sub sp, sp, #16 |
| ; CHECK-COMMON-NEXT: rdsvl x8, #1 |
| ; CHECK-COMMON-NEXT: mov x9, sp |
| ; CHECK-COMMON-NEXT: msub x9, x8, x8, x9 |
| ; CHECK-COMMON-NEXT: mov sp, x9 |
| ; CHECK-COMMON-NEXT: sub x10, x29, #16 |
| ; CHECK-COMMON-NEXT: stp x9, x8, [x29, #-16] |
| ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x10 |
| ; CHECK-COMMON-NEXT: bl private_za_callee |
| ; CHECK-COMMON-NEXT: smstart za |
| ; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK-COMMON-NEXT: sub x0, x29, #16 |
| ; CHECK-COMMON-NEXT: cbnz x8, .LBB0_2 |
| ; CHECK-COMMON-NEXT: // %bb.1: |
| ; CHECK-COMMON-NEXT: bl __arm_tpidr2_restore |
| ; CHECK-COMMON-NEXT: .LBB0_2: |
| ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr |
| ; CHECK-COMMON-NEXT: mov sp, x29 |
| ; CHECK-COMMON-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload |
| ; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload |
| ; CHECK-COMMON-NEXT: ret |
| call void @private_za_callee() |
| ret void |
| } |
| |
| ; Test lazy-save mechanism for multiple callees. |
| define void @test_lazy_save_2_callees() nounwind "aarch64_inout_za" { |
| ; CHECK-LABEL: test_lazy_save_2_callees: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill |
| ; CHECK-NEXT: mov x29, sp |
| ; CHECK-NEXT: sub sp, sp, #16 |
| ; CHECK-NEXT: rdsvl x8, #1 |
| ; CHECK-NEXT: mov x9, sp |
| ; CHECK-NEXT: msub x9, x8, x8, x9 |
| ; CHECK-NEXT: mov sp, x9 |
| ; CHECK-NEXT: sub x20, x29, #16 |
| ; CHECK-NEXT: stp x9, x8, [x29, #-16] |
| ; CHECK-NEXT: msr TPIDR2_EL0, x20 |
| ; CHECK-NEXT: bl private_za_callee |
| ; CHECK-NEXT: smstart za |
| ; CHECK-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK-NEXT: sub x0, x29, #16 |
| ; CHECK-NEXT: cbnz x8, .LBB1_2 |
| ; CHECK-NEXT: // %bb.1: |
| ; CHECK-NEXT: bl __arm_tpidr2_restore |
| ; CHECK-NEXT: .LBB1_2: |
| ; CHECK-NEXT: msr TPIDR2_EL0, xzr |
| ; CHECK-NEXT: msr TPIDR2_EL0, x20 |
| ; CHECK-NEXT: bl private_za_callee |
| ; CHECK-NEXT: smstart za |
| ; CHECK-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK-NEXT: sub x0, x29, #16 |
| ; CHECK-NEXT: cbnz x8, .LBB1_4 |
| ; CHECK-NEXT: // %bb.3: |
| ; CHECK-NEXT: bl __arm_tpidr2_restore |
| ; CHECK-NEXT: .LBB1_4: |
| ; CHECK-NEXT: msr TPIDR2_EL0, xzr |
| ; CHECK-NEXT: mov sp, x29 |
| ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-NEWLOWERING-LABEL: test_lazy_save_2_callees: |
| ; CHECK-NEWLOWERING: // %bb.0: |
| ; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill |
| ; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Folded Spill |
| ; CHECK-NEWLOWERING-NEXT: mov x29, sp |
| ; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16 |
| ; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1 |
| ; CHECK-NEWLOWERING-NEXT: mov x9, sp |
| ; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9 |
| ; CHECK-NEWLOWERING-NEXT: mov sp, x9 |
| ; CHECK-NEWLOWERING-NEXT: sub x10, x29, #16 |
| ; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-16] |
| ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x10 |
| ; CHECK-NEWLOWERING-NEXT: bl private_za_callee |
| ; CHECK-NEWLOWERING-NEXT: bl private_za_callee |
| ; CHECK-NEWLOWERING-NEXT: smstart za |
| ; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16 |
| ; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB1_2 |
| ; CHECK-NEWLOWERING-NEXT: // %bb.1: |
| ; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore |
| ; CHECK-NEWLOWERING-NEXT: .LBB1_2: |
| ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr |
| ; CHECK-NEWLOWERING-NEXT: mov sp, x29 |
| ; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload |
| ; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload |
| ; CHECK-NEWLOWERING-NEXT: ret |
| call void @private_za_callee() |
| call void @private_za_callee() |
| ret void |
| } |
| |
| ; Test a call of an intrinsic that gets expanded to a library call. |
| define float @test_lazy_save_expanded_intrinsic(float %a) nounwind "aarch64_inout_za" { |
| ; CHECK-COMMON-LABEL: test_lazy_save_expanded_intrinsic: |
| ; CHECK-COMMON: // %bb.0: |
| ; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill |
| ; CHECK-COMMON-NEXT: str x19, [sp, #16] // 8-byte Folded Spill |
| ; CHECK-COMMON-NEXT: mov x29, sp |
| ; CHECK-COMMON-NEXT: sub sp, sp, #16 |
| ; CHECK-COMMON-NEXT: rdsvl x8, #1 |
| ; CHECK-COMMON-NEXT: mov x9, sp |
| ; CHECK-COMMON-NEXT: msub x9, x8, x8, x9 |
| ; CHECK-COMMON-NEXT: mov sp, x9 |
| ; CHECK-COMMON-NEXT: sub x10, x29, #16 |
| ; CHECK-COMMON-NEXT: stp x9, x8, [x29, #-16] |
| ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x10 |
| ; CHECK-COMMON-NEXT: bl cosf |
| ; CHECK-COMMON-NEXT: smstart za |
| ; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK-COMMON-NEXT: sub x0, x29, #16 |
| ; CHECK-COMMON-NEXT: cbnz x8, .LBB2_2 |
| ; CHECK-COMMON-NEXT: // %bb.1: |
| ; CHECK-COMMON-NEXT: bl __arm_tpidr2_restore |
| ; CHECK-COMMON-NEXT: .LBB2_2: |
| ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr |
| ; CHECK-COMMON-NEXT: mov sp, x29 |
| ; CHECK-COMMON-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload |
| ; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload |
| ; CHECK-COMMON-NEXT: ret |
| %res = call float @llvm.cos.f32(float %a) |
| ret float %res |
| } |
| |
| ; Test a combination of streaming-compatible -> normal call with lazy-save. |
| define void @test_lazy_save_and_conditional_smstart() nounwind "aarch64_inout_za" "aarch64_pstate_sm_compatible" { |
| ; CHECK-LABEL: test_lazy_save_and_conditional_smstart: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill |
| ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill |
| ; CHECK-NEXT: add x29, sp, #64 |
| ; CHECK-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill |
| ; CHECK-NEXT: sub sp, sp, #16 |
| ; CHECK-NEXT: rdsvl x8, #1 |
| ; CHECK-NEXT: mov x9, sp |
| ; CHECK-NEXT: mrs x20, SVCR |
| ; CHECK-NEXT: msub x9, x8, x8, x9 |
| ; CHECK-NEXT: mov sp, x9 |
| ; CHECK-NEXT: sub x10, x29, #80 |
| ; CHECK-NEXT: stp x9, x8, [x29, #-80] |
| ; CHECK-NEXT: msr TPIDR2_EL0, x10 |
| ; CHECK-NEXT: tbz w20, #0, .LBB3_2 |
| ; CHECK-NEXT: // %bb.1: |
| ; CHECK-NEXT: smstop sm |
| ; CHECK-NEXT: .LBB3_2: |
| ; CHECK-NEXT: bl private_za_callee |
| ; CHECK-NEXT: tbz w20, #0, .LBB3_4 |
| ; CHECK-NEXT: // %bb.3: |
| ; CHECK-NEXT: smstart sm |
| ; CHECK-NEXT: .LBB3_4: |
| ; CHECK-NEXT: smstart za |
| ; CHECK-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK-NEXT: sub x0, x29, #80 |
| ; CHECK-NEXT: cbnz x8, .LBB3_6 |
| ; CHECK-NEXT: // %bb.5: |
| ; CHECK-NEXT: bl __arm_tpidr2_restore |
| ; CHECK-NEXT: .LBB3_6: |
| ; CHECK-NEXT: msr TPIDR2_EL0, xzr |
| ; CHECK-NEXT: sub sp, x29, #64 |
| ; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-NEWLOWERING-LABEL: test_lazy_save_and_conditional_smstart: |
| ; CHECK-NEWLOWERING: // %bb.0: |
| ; CHECK-NEWLOWERING-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill |
| ; CHECK-NEWLOWERING-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill |
| ; CHECK-NEWLOWERING-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill |
| ; CHECK-NEWLOWERING-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill |
| ; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill |
| ; CHECK-NEWLOWERING-NEXT: add x29, sp, #64 |
| ; CHECK-NEWLOWERING-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill |
| ; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16 |
| ; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1 |
| ; CHECK-NEWLOWERING-NEXT: mov x9, sp |
| ; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9 |
| ; CHECK-NEWLOWERING-NEXT: mov sp, x9 |
| ; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-80] |
| ; CHECK-NEWLOWERING-NEXT: mrs x20, SVCR |
| ; CHECK-NEWLOWERING-NEXT: sub x8, x29, #80 |
| ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x8 |
| ; CHECK-NEWLOWERING-NEXT: tbz w20, #0, .LBB3_2 |
| ; CHECK-NEWLOWERING-NEXT: // %bb.1: |
| ; CHECK-NEWLOWERING-NEXT: smstop sm |
| ; CHECK-NEWLOWERING-NEXT: .LBB3_2: |
| ; CHECK-NEWLOWERING-NEXT: bl private_za_callee |
| ; CHECK-NEWLOWERING-NEXT: tbz w20, #0, .LBB3_4 |
| ; CHECK-NEWLOWERING-NEXT: // %bb.3: |
| ; CHECK-NEWLOWERING-NEXT: smstart sm |
| ; CHECK-NEWLOWERING-NEXT: .LBB3_4: |
| ; CHECK-NEWLOWERING-NEXT: smstart za |
| ; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK-NEWLOWERING-NEXT: sub x0, x29, #80 |
| ; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB3_6 |
| ; CHECK-NEWLOWERING-NEXT: // %bb.5: |
| ; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore |
| ; CHECK-NEWLOWERING-NEXT: .LBB3_6: |
| ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr |
| ; CHECK-NEWLOWERING-NEXT: sub sp, x29, #64 |
| ; CHECK-NEWLOWERING-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload |
| ; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload |
| ; CHECK-NEWLOWERING-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload |
| ; CHECK-NEWLOWERING-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload |
| ; CHECK-NEWLOWERING-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload |
| ; CHECK-NEWLOWERING-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload |
| ; CHECK-NEWLOWERING-NEXT: ret |
| call void @private_za_callee() |
| ret void |
| } |
| |
| ; Note: For the final @private_za_callee() we setup a lazy save then don't |
| ; restore from it (since ZA is off on return). We could improve this case |
| ; by turning ZA off before the final private ZA call. |
| define void @test_lazy_save_mixed_shared_and_private_callees() "aarch64_new_za" |
| ; CHECK-LABEL: test_lazy_save_mixed_shared_and_private_callees: |
| ; CHECK: // %bb.0: // %prelude |
| ; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill |
| ; CHECK-NEXT: mov x29, sp |
| ; CHECK-NEXT: sub sp, sp, #16 |
| ; CHECK-NEXT: .cfi_def_cfa w29, 32 |
| ; CHECK-NEXT: .cfi_offset w19, -8 |
| ; CHECK-NEXT: .cfi_offset w20, -16 |
| ; CHECK-NEXT: .cfi_offset w30, -24 |
| ; CHECK-NEXT: .cfi_offset w29, -32 |
| ; CHECK-NEXT: rdsvl x8, #1 |
| ; CHECK-NEXT: mov x9, sp |
| ; CHECK-NEXT: msub x9, x8, x8, x9 |
| ; CHECK-NEXT: mov sp, x9 |
| ; CHECK-NEXT: stp x9, x8, [x29, #-16] |
| ; CHECK-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK-NEXT: cbz x8, .LBB4_2 |
| ; CHECK-NEXT: // %bb.1: // %save.za |
| ; CHECK-NEXT: bl __arm_tpidr2_save |
| ; CHECK-NEXT: msr TPIDR2_EL0, xzr |
| ; CHECK-NEXT: .LBB4_2: |
| ; CHECK-NEXT: smstart za |
| ; CHECK-NEXT: sub x20, x29, #16 |
| ; CHECK-NEXT: zero {za} |
| ; CHECK-NEXT: msr TPIDR2_EL0, x20 |
| ; CHECK-NEXT: bl private_za_callee |
| ; CHECK-NEXT: smstart za |
| ; CHECK-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK-NEXT: sub x0, x29, #16 |
| ; CHECK-NEXT: cbnz x8, .LBB4_4 |
| ; CHECK-NEXT: // %bb.3: |
| ; CHECK-NEXT: bl __arm_tpidr2_restore |
| ; CHECK-NEXT: .LBB4_4: |
| ; CHECK-NEXT: msr TPIDR2_EL0, xzr |
| ; CHECK-NEXT: bl shared_za_callee |
| ; CHECK-NEXT: bl preserves_za_callee |
| ; CHECK-NEXT: msr TPIDR2_EL0, x20 |
| ; CHECK-NEXT: bl private_za_callee |
| ; CHECK-NEXT: smstart za |
| ; CHECK-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK-NEXT: sub x0, x29, #16 |
| ; CHECK-NEXT: cbnz x8, .LBB4_6 |
| ; CHECK-NEXT: // %bb.5: |
| ; CHECK-NEXT: bl __arm_tpidr2_restore |
| ; CHECK-NEXT: .LBB4_6: |
| ; CHECK-NEXT: msr TPIDR2_EL0, xzr |
| ; CHECK-NEXT: smstop za |
| ; CHECK-NEXT: mov sp, x29 |
| ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-NEWLOWERING-LABEL: test_lazy_save_mixed_shared_and_private_callees: |
| ; CHECK-NEWLOWERING: // %bb.0: |
| ; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill |
| ; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Folded Spill |
| ; CHECK-NEWLOWERING-NEXT: mov x29, sp |
| ; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16 |
| ; CHECK-NEWLOWERING-NEXT: .cfi_def_cfa w29, 32 |
| ; CHECK-NEWLOWERING-NEXT: .cfi_offset w19, -16 |
| ; CHECK-NEWLOWERING-NEXT: .cfi_offset w30, -24 |
| ; CHECK-NEWLOWERING-NEXT: .cfi_offset w29, -32 |
| ; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1 |
| ; CHECK-NEWLOWERING-NEXT: mov x9, sp |
| ; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9 |
| ; CHECK-NEWLOWERING-NEXT: mov sp, x9 |
| ; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-16] |
| ; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK-NEWLOWERING-NEXT: cbz x8, .LBB4_2 |
| ; CHECK-NEWLOWERING-NEXT: // %bb.1: |
| ; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_save |
| ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr |
| ; CHECK-NEWLOWERING-NEXT: zero {za} |
| ; CHECK-NEWLOWERING-NEXT: .LBB4_2: |
| ; CHECK-NEWLOWERING-NEXT: smstart za |
| ; CHECK-NEWLOWERING-NEXT: sub x8, x29, #16 |
| ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x8 |
| ; CHECK-NEWLOWERING-NEXT: bl private_za_callee |
| ; CHECK-NEWLOWERING-NEXT: smstart za |
| ; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16 |
| ; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB4_4 |
| ; CHECK-NEWLOWERING-NEXT: // %bb.3: |
| ; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore |
| ; CHECK-NEWLOWERING-NEXT: .LBB4_4: |
| ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr |
| ; CHECK-NEWLOWERING-NEXT: bl shared_za_callee |
| ; CHECK-NEWLOWERING-NEXT: bl preserves_za_callee |
| ; CHECK-NEWLOWERING-NEXT: sub x8, x29, #16 |
| ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x8 |
| ; CHECK-NEWLOWERING-NEXT: bl private_za_callee |
| ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr |
| ; CHECK-NEWLOWERING-NEXT: smstop za |
| ; CHECK-NEWLOWERING-NEXT: mov sp, x29 |
| ; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload |
| ; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload |
| ; CHECK-NEWLOWERING-NEXT: ret |
| { |
| call void @private_za_callee() |
| call void @shared_za_callee() |
| call void @preserves_za_callee() |
| call void @private_za_callee() |
| ret void |
| } |
| |
| define void @test_many_back2back_private_za_calls() "aarch64_inout_za" { |
| ; CHECK-LABEL: test_many_back2back_private_za_calls: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill |
| ; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill |
| ; CHECK-NEXT: mov x29, sp |
| ; CHECK-NEXT: sub sp, sp, #16 |
| ; CHECK-NEXT: .cfi_def_cfa w29, 32 |
| ; CHECK-NEXT: .cfi_offset w19, -8 |
| ; CHECK-NEXT: .cfi_offset w20, -16 |
| ; CHECK-NEXT: .cfi_offset w30, -24 |
| ; CHECK-NEXT: .cfi_offset w29, -32 |
| ; CHECK-NEXT: rdsvl x8, #1 |
| ; CHECK-NEXT: mov x9, sp |
| ; CHECK-NEXT: msub x9, x8, x8, x9 |
| ; CHECK-NEXT: mov sp, x9 |
| ; CHECK-NEXT: stp x9, x8, [x29, #-16] |
| ; CHECK-NEXT: bl shared_za_callee |
| ; CHECK-NEXT: sub x20, x29, #16 |
| ; CHECK-NEXT: msr TPIDR2_EL0, x20 |
| ; CHECK-NEXT: bl private_za_callee |
| ; CHECK-NEXT: smstart za |
| ; CHECK-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK-NEXT: sub x0, x29, #16 |
| ; CHECK-NEXT: cbnz x8, .LBB5_2 |
| ; CHECK-NEXT: // %bb.1: |
| ; CHECK-NEXT: bl __arm_tpidr2_restore |
| ; CHECK-NEXT: .LBB5_2: |
| ; CHECK-NEXT: msr TPIDR2_EL0, xzr |
| ; CHECK-NEXT: msr TPIDR2_EL0, x20 |
| ; CHECK-NEXT: bl private_za_callee |
| ; CHECK-NEXT: smstart za |
| ; CHECK-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK-NEXT: sub x0, x29, #16 |
| ; CHECK-NEXT: cbnz x8, .LBB5_4 |
| ; CHECK-NEXT: // %bb.3: |
| ; CHECK-NEXT: bl __arm_tpidr2_restore |
| ; CHECK-NEXT: .LBB5_4: |
| ; CHECK-NEXT: msr TPIDR2_EL0, xzr |
| ; CHECK-NEXT: msr TPIDR2_EL0, x20 |
| ; CHECK-NEXT: bl private_za_callee |
| ; CHECK-NEXT: smstart za |
| ; CHECK-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK-NEXT: sub x0, x29, #16 |
| ; CHECK-NEXT: cbnz x8, .LBB5_6 |
| ; CHECK-NEXT: // %bb.5: |
| ; CHECK-NEXT: bl __arm_tpidr2_restore |
| ; CHECK-NEXT: .LBB5_6: |
| ; CHECK-NEXT: msr TPIDR2_EL0, xzr |
| ; CHECK-NEXT: msr TPIDR2_EL0, x20 |
| ; CHECK-NEXT: bl private_za_callee |
| ; CHECK-NEXT: smstart za |
| ; CHECK-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK-NEXT: sub x0, x29, #16 |
| ; CHECK-NEXT: cbnz x8, .LBB5_8 |
| ; CHECK-NEXT: // %bb.7: |
| ; CHECK-NEXT: bl __arm_tpidr2_restore |
| ; CHECK-NEXT: .LBB5_8: |
| ; CHECK-NEXT: msr TPIDR2_EL0, xzr |
| ; CHECK-NEXT: msr TPIDR2_EL0, x20 |
| ; CHECK-NEXT: bl private_za_callee |
| ; CHECK-NEXT: smstart za |
| ; CHECK-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK-NEXT: sub x0, x29, #16 |
| ; CHECK-NEXT: cbnz x8, .LBB5_10 |
| ; CHECK-NEXT: // %bb.9: |
| ; CHECK-NEXT: bl __arm_tpidr2_restore |
| ; CHECK-NEXT: .LBB5_10: |
| ; CHECK-NEXT: msr TPIDR2_EL0, xzr |
| ; CHECK-NEXT: msr TPIDR2_EL0, x20 |
| ; CHECK-NEXT: bl private_za_callee |
| ; CHECK-NEXT: smstart za |
| ; CHECK-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK-NEXT: sub x0, x29, #16 |
| ; CHECK-NEXT: cbnz x8, .LBB5_12 |
| ; CHECK-NEXT: // %bb.11: |
| ; CHECK-NEXT: bl __arm_tpidr2_restore |
| ; CHECK-NEXT: .LBB5_12: |
| ; CHECK-NEXT: msr TPIDR2_EL0, xzr |
| ; CHECK-NEXT: bl shared_za_callee |
| ; CHECK-NEXT: mov sp, x29 |
| ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload |
| ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-NEWLOWERING-LABEL: test_many_back2back_private_za_calls: |
| ; CHECK-NEWLOWERING: // %bb.0: |
| ; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill |
| ; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Folded Spill |
| ; CHECK-NEWLOWERING-NEXT: mov x29, sp |
| ; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16 |
| ; CHECK-NEWLOWERING-NEXT: .cfi_def_cfa w29, 32 |
| ; CHECK-NEWLOWERING-NEXT: .cfi_offset w19, -16 |
| ; CHECK-NEWLOWERING-NEXT: .cfi_offset w30, -24 |
| ; CHECK-NEWLOWERING-NEXT: .cfi_offset w29, -32 |
| ; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1 |
| ; CHECK-NEWLOWERING-NEXT: mov x9, sp |
| ; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9 |
| ; CHECK-NEWLOWERING-NEXT: mov sp, x9 |
| ; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-16] |
| ; CHECK-NEWLOWERING-NEXT: bl shared_za_callee |
| ; CHECK-NEWLOWERING-NEXT: sub x8, x29, #16 |
| ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x8 |
| ; CHECK-NEWLOWERING-NEXT: bl private_za_callee |
| ; CHECK-NEWLOWERING-NEXT: bl private_za_callee |
| ; CHECK-NEWLOWERING-NEXT: bl private_za_callee |
| ; CHECK-NEWLOWERING-NEXT: bl private_za_callee |
| ; CHECK-NEWLOWERING-NEXT: bl private_za_callee |
| ; CHECK-NEWLOWERING-NEXT: bl private_za_callee |
| ; CHECK-NEWLOWERING-NEXT: smstart za |
| ; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16 |
| ; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB5_2 |
| ; CHECK-NEWLOWERING-NEXT: // %bb.1: |
| ; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore |
| ; CHECK-NEWLOWERING-NEXT: .LBB5_2: |
| ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr |
| ; CHECK-NEWLOWERING-NEXT: bl shared_za_callee |
| ; CHECK-NEWLOWERING-NEXT: mov sp, x29 |
| ; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload |
| ; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload |
| ; CHECK-NEWLOWERING-NEXT: ret |
| call void @shared_za_callee() |
| call void @private_za_callee() |
| call void @private_za_callee() |
| call void @private_za_callee() |
| call void @private_za_callee() |
| call void @private_za_callee() |
| call void @private_za_callee() |
| call void @shared_za_callee() |
| ret void |
| } |
| |
| define void @test_shared_private_shared() nounwind "aarch64_inout_za" { |
| ; CHECK-COMMON-LABEL: test_shared_private_shared: |
| ; CHECK-COMMON: // %bb.0: |
| ; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill |
| ; CHECK-COMMON-NEXT: str x19, [sp, #16] // 8-byte Folded Spill |
| ; CHECK-COMMON-NEXT: mov x29, sp |
| ; CHECK-COMMON-NEXT: sub sp, sp, #16 |
| ; CHECK-COMMON-NEXT: rdsvl x8, #1 |
| ; CHECK-COMMON-NEXT: mov x9, sp |
| ; CHECK-COMMON-NEXT: msub x9, x8, x8, x9 |
| ; CHECK-COMMON-NEXT: mov sp, x9 |
| ; CHECK-COMMON-NEXT: stp x9, x8, [x29, #-16] |
| ; CHECK-COMMON-NEXT: bl shared_za_callee |
| ; CHECK-COMMON-NEXT: sub x8, x29, #16 |
| ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x8 |
| ; CHECK-COMMON-NEXT: bl private_za_callee |
| ; CHECK-COMMON-NEXT: smstart za |
| ; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK-COMMON-NEXT: sub x0, x29, #16 |
| ; CHECK-COMMON-NEXT: cbnz x8, .LBB6_2 |
| ; CHECK-COMMON-NEXT: // %bb.1: |
| ; CHECK-COMMON-NEXT: bl __arm_tpidr2_restore |
| ; CHECK-COMMON-NEXT: .LBB6_2: |
| ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr |
| ; CHECK-COMMON-NEXT: bl shared_za_callee |
| ; CHECK-COMMON-NEXT: mov sp, x29 |
| ; CHECK-COMMON-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload |
| ; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload |
| ; CHECK-COMMON-NEXT: ret |
| call void @shared_za_callee() |
| call void @private_za_callee() |
| call void @shared_za_callee() |
| ret void |
| } |
| |
| define void @test_only_shared_za() nounwind "aarch64_inout_za" { |
| ; CHECK-COMMON-LABEL: test_only_shared_za: |
| ; CHECK-COMMON: // %bb.0: |
| ; CHECK-COMMON-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK-COMMON-NEXT: bl shared_za_callee |
| ; CHECK-COMMON-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload |
| ; CHECK-COMMON-NEXT: ret |
| call void @shared_za_callee() |
| ret void |
| } |
| |
| declare i64 @shared_za_callee_i64(i64) "aarch64_inout_za" |
| declare i64 @private_za_callee_i64(i64) |
| |
| define i64 @test_shared_private_shared_i64(i64 %x) nounwind "aarch64_inout_za" { |
| ; CHECK-COMMON-LABEL: test_shared_private_shared_i64: |
| ; CHECK-COMMON: // %bb.0: |
| ; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill |
| ; CHECK-COMMON-NEXT: str x19, [sp, #16] // 8-byte Folded Spill |
| ; CHECK-COMMON-NEXT: mov x29, sp |
| ; CHECK-COMMON-NEXT: sub sp, sp, #16 |
| ; CHECK-COMMON-NEXT: rdsvl x8, #1 |
| ; CHECK-COMMON-NEXT: mov x9, sp |
| ; CHECK-COMMON-NEXT: msub x9, x8, x8, x9 |
| ; CHECK-COMMON-NEXT: mov sp, x9 |
| ; CHECK-COMMON-NEXT: stp x9, x8, [x29, #-16] |
| ; CHECK-COMMON-NEXT: bl shared_za_callee_i64 |
| ; CHECK-COMMON-NEXT: sub x8, x29, #16 |
| ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x8 |
| ; CHECK-COMMON-NEXT: bl private_za_callee_i64 |
| ; CHECK-COMMON-NEXT: mov x1, x0 |
| ; CHECK-COMMON-NEXT: smstart za |
| ; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK-COMMON-NEXT: sub x0, x29, #16 |
| ; CHECK-COMMON-NEXT: cbnz x8, .LBB8_2 |
| ; CHECK-COMMON-NEXT: // %bb.1: |
| ; CHECK-COMMON-NEXT: bl __arm_tpidr2_restore |
| ; CHECK-COMMON-NEXT: .LBB8_2: |
| ; CHECK-COMMON-NEXT: mov x0, x1 |
| ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr |
| ; CHECK-COMMON-NEXT: bl shared_za_callee_i64 |
| ; CHECK-COMMON-NEXT: mov sp, x29 |
| ; CHECK-COMMON-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload |
| ; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload |
| ; CHECK-COMMON-NEXT: ret |
| %a = call i64 @shared_za_callee_i64(i64 %x) |
| %b = call i64 @private_za_callee_i64(i64 %a) |
| %c = call i64 @shared_za_callee_i64(i64 %b) |
| ret i64 %c |
| } |
| |
| declare i64 @many_args_private_za_callee( |
| i64, i64, i64, i64, i64, i64, i64, i64, i64, i64) |
| |
| ; In this example some arguments are passed on the stack, which decrements the |
| ; stack pointer before the call -- in this test the lazy save should be setup |
| ; before the stack decrement. |
| define i64 @test_many_callee_arguments( |
| ; CHECK-LABEL: test_many_callee_arguments: |
| ; CHECK: // %bb.0: |
| ; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill |
| ; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill |
| ; CHECK-NEXT: mov x29, sp |
| ; CHECK-NEXT: sub sp, sp, #16 |
| ; CHECK-NEXT: mov x8, sp |
| ; CHECK-NEXT: rdsvl x9, #1 |
| ; CHECK-NEXT: msub x8, x9, x9, x8 |
| ; CHECK-NEXT: mov sp, x8 |
| ; CHECK-NEXT: ldp x10, x11, [x29, #32] |
| ; CHECK-NEXT: sub x12, x29, #16 |
| ; CHECK-NEXT: stp x8, x9, [x29, #-16] |
| ; CHECK-NEXT: msr TPIDR2_EL0, x12 |
| ; CHECK-NEXT: stp x10, x11, [sp, #-16]! |
| ; CHECK-NEXT: bl many_args_private_za_callee |
| ; CHECK-NEXT: add sp, sp, #16 |
| ; CHECK-NEXT: mov x1, x0 |
| ; CHECK-NEXT: smstart za |
| ; CHECK-NEXT: mrs x8, TPIDR2_EL0 |
| ; CHECK-NEXT: sub x0, x29, #16 |
| ; CHECK-NEXT: cbnz x8, .LBB9_2 |
| ; CHECK-NEXT: // %bb.1: |
| ; CHECK-NEXT: bl __arm_tpidr2_restore |
| ; CHECK-NEXT: .LBB9_2: |
| ; CHECK-NEXT: mov x0, x1 |
| ; CHECK-NEXT: msr TPIDR2_EL0, xzr |
| ; CHECK-NEXT: mov sp, x29 |
| ; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload |
| ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload |
| ; CHECK-NEXT: ret |
| ; |
| ; CHECK-NEWLOWERING-LABEL: test_many_callee_arguments: |
| ; CHECK-NEWLOWERING: // %bb.0: |
| ; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill |
| ; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Folded Spill |
| ; CHECK-NEWLOWERING-NEXT: mov x29, sp |
| ; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16 |
| ; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1 |
| ; CHECK-NEWLOWERING-NEXT: mov x9, sp |
| ; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9 |
| ; CHECK-NEWLOWERING-NEXT: mov sp, x9 |
| ; CHECK-NEWLOWERING-NEXT: ldp x10, x11, [x29, #32] |
| ; CHECK-NEWLOWERING-NEXT: sub x12, x29, #16 |
| ; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-16] |
| ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x12 |
| ; CHECK-NEWLOWERING-NEXT: stp x10, x11, [sp, #-16]! |
| ; CHECK-NEWLOWERING-NEXT: bl many_args_private_za_callee |
| ; CHECK-NEWLOWERING-NEXT: add sp, sp, #16 |
| ; CHECK-NEWLOWERING-NEXT: mov x8, x0 |
| ; CHECK-NEWLOWERING-NEXT: smstart za |
| ; CHECK-NEWLOWERING-NEXT: mrs x9, TPIDR2_EL0 |
| ; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16 |
| ; CHECK-NEWLOWERING-NEXT: cbnz x9, .LBB9_2 |
| ; CHECK-NEWLOWERING-NEXT: // %bb.1: |
| ; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore |
| ; CHECK-NEWLOWERING-NEXT: .LBB9_2: |
| ; CHECK-NEWLOWERING-NEXT: mov x0, x8 |
| ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr |
| ; CHECK-NEWLOWERING-NEXT: mov sp, x29 |
| ; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload |
| ; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload |
| ; CHECK-NEWLOWERING-NEXT: ret |
| i64 %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6, i64 %7, i64 %8, i64 %9 |
| ) nounwind "aarch64_inout_za" { |
| %ret = call i64 @many_args_private_za_callee( |
| i64 %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6, i64 %7, i64 %8, i64 %9) |
| ret i64 %ret |
| } |