blob: a7d51968c5157f28afc356dae64f22bf6151e8d9 [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64 -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme < %s | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK
; RUN: llc -mtriple=aarch64 -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme -aarch64-new-sme-abi < %s | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK-NEWLOWERING
declare void @private_za_callee()
declare void @shared_za_callee() "aarch64_inout_za"
declare void @preserves_za_callee() "aarch64_preserves_za"
declare float @llvm.cos.f32(float)
; Test lazy-save mechanism for a single callee.
define void @test_lazy_save_1_callee() nounwind "aarch64_inout_za" {
; CHECK-COMMON-LABEL: test_lazy_save_1_callee:
; CHECK-COMMON: // %bb.0:
; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
; CHECK-COMMON-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: mov x29, sp
; CHECK-COMMON-NEXT: sub sp, sp, #16
; CHECK-COMMON-NEXT: rdsvl x8, #1
; CHECK-COMMON-NEXT: mov x9, sp
; CHECK-COMMON-NEXT: msub x9, x8, x8, x9
; CHECK-COMMON-NEXT: mov sp, x9
; CHECK-COMMON-NEXT: sub x10, x29, #16
; CHECK-COMMON-NEXT: stp x9, x8, [x29, #-16]
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x10
; CHECK-COMMON-NEXT: bl private_za_callee
; CHECK-COMMON-NEXT: smstart za
; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0
; CHECK-COMMON-NEXT: sub x0, x29, #16
; CHECK-COMMON-NEXT: cbnz x8, .LBB0_2
; CHECK-COMMON-NEXT: // %bb.1:
; CHECK-COMMON-NEXT: bl __arm_tpidr2_restore
; CHECK-COMMON-NEXT: .LBB0_2:
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr
; CHECK-COMMON-NEXT: mov sp, x29
; CHECK-COMMON-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-COMMON-NEXT: ret
call void @private_za_callee()
ret void
}
; Test lazy-save mechanism for multiple callees.
define void @test_lazy_save_2_callees() nounwind "aarch64_inout_za" {
; CHECK-LABEL: test_lazy_save_2_callees:
; CHECK: // %bb.0:
; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: rdsvl x8, #1
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: msub x9, x8, x8, x9
; CHECK-NEXT: mov sp, x9
; CHECK-NEXT: sub x20, x29, #16
; CHECK-NEXT: stp x9, x8, [x29, #-16]
; CHECK-NEXT: msr TPIDR2_EL0, x20
; CHECK-NEXT: bl private_za_callee
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEXT: sub x0, x29, #16
; CHECK-NEXT: cbnz x8, .LBB1_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: bl __arm_tpidr2_restore
; CHECK-NEXT: .LBB1_2:
; CHECK-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEXT: msr TPIDR2_EL0, x20
; CHECK-NEXT: bl private_za_callee
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEXT: sub x0, x29, #16
; CHECK-NEXT: cbnz x8, .LBB1_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: bl __arm_tpidr2_restore
; CHECK-NEXT: .LBB1_4:
; CHECK-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-NEXT: ret
;
; CHECK-NEWLOWERING-LABEL: test_lazy_save_2_callees:
; CHECK-NEWLOWERING: // %bb.0:
; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
; CHECK-NEWLOWERING-NEXT: mov x29, sp
; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16
; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1
; CHECK-NEWLOWERING-NEXT: mov x9, sp
; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9
; CHECK-NEWLOWERING-NEXT: mov sp, x9
; CHECK-NEWLOWERING-NEXT: sub x10, x29, #16
; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-16]
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x10
; CHECK-NEWLOWERING-NEXT: bl private_za_callee
; CHECK-NEWLOWERING-NEXT: bl private_za_callee
; CHECK-NEWLOWERING-NEXT: smstart za
; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16
; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB1_2
; CHECK-NEWLOWERING-NEXT: // %bb.1:
; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore
; CHECK-NEWLOWERING-NEXT: .LBB1_2:
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEWLOWERING-NEXT: mov sp, x29
; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: ret
call void @private_za_callee()
call void @private_za_callee()
ret void
}
; Test a call of an intrinsic that gets expanded to a library call.
define float @test_lazy_save_expanded_intrinsic(float %a) nounwind "aarch64_inout_za" {
; CHECK-COMMON-LABEL: test_lazy_save_expanded_intrinsic:
; CHECK-COMMON: // %bb.0:
; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
; CHECK-COMMON-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: mov x29, sp
; CHECK-COMMON-NEXT: sub sp, sp, #16
; CHECK-COMMON-NEXT: rdsvl x8, #1
; CHECK-COMMON-NEXT: mov x9, sp
; CHECK-COMMON-NEXT: msub x9, x8, x8, x9
; CHECK-COMMON-NEXT: mov sp, x9
; CHECK-COMMON-NEXT: sub x10, x29, #16
; CHECK-COMMON-NEXT: stp x9, x8, [x29, #-16]
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x10
; CHECK-COMMON-NEXT: bl cosf
; CHECK-COMMON-NEXT: smstart za
; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0
; CHECK-COMMON-NEXT: sub x0, x29, #16
; CHECK-COMMON-NEXT: cbnz x8, .LBB2_2
; CHECK-COMMON-NEXT: // %bb.1:
; CHECK-COMMON-NEXT: bl __arm_tpidr2_restore
; CHECK-COMMON-NEXT: .LBB2_2:
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr
; CHECK-COMMON-NEXT: mov sp, x29
; CHECK-COMMON-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-COMMON-NEXT: ret
%res = call float @llvm.cos.f32(float %a)
ret float %res
}
; Test a combination of streaming-compatible -> normal call with lazy-save.
define void @test_lazy_save_and_conditional_smstart() nounwind "aarch64_inout_za" "aarch64_pstate_sm_compatible" {
; CHECK-LABEL: test_lazy_save_and_conditional_smstart:
; CHECK: // %bb.0:
; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: add x29, sp, #64
; CHECK-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: rdsvl x8, #1
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: mrs x20, SVCR
; CHECK-NEXT: msub x9, x8, x8, x9
; CHECK-NEXT: mov sp, x9
; CHECK-NEXT: sub x10, x29, #80
; CHECK-NEXT: stp x9, x8, [x29, #-80]
; CHECK-NEXT: msr TPIDR2_EL0, x10
; CHECK-NEXT: tbz w20, #0, .LBB3_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB3_2:
; CHECK-NEXT: bl private_za_callee
; CHECK-NEXT: tbz w20, #0, .LBB3_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB3_4:
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEXT: sub x0, x29, #80
; CHECK-NEXT: cbnz x8, .LBB3_6
; CHECK-NEXT: // %bb.5:
; CHECK-NEXT: bl __arm_tpidr2_restore
; CHECK-NEXT: .LBB3_6:
; CHECK-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEXT: sub sp, x29, #64
; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
; CHECK-NEXT: ret
;
; CHECK-NEWLOWERING-LABEL: test_lazy_save_and_conditional_smstart:
; CHECK-NEWLOWERING: // %bb.0:
; CHECK-NEWLOWERING-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
; CHECK-NEWLOWERING-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NEWLOWERING-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEWLOWERING-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
; CHECK-NEWLOWERING-NEXT: add x29, sp, #64
; CHECK-NEWLOWERING-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16
; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1
; CHECK-NEWLOWERING-NEXT: mov x9, sp
; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9
; CHECK-NEWLOWERING-NEXT: mov sp, x9
; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-80]
; CHECK-NEWLOWERING-NEXT: mrs x20, SVCR
; CHECK-NEWLOWERING-NEXT: sub x8, x29, #80
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x8
; CHECK-NEWLOWERING-NEXT: tbz w20, #0, .LBB3_2
; CHECK-NEWLOWERING-NEXT: // %bb.1:
; CHECK-NEWLOWERING-NEXT: smstop sm
; CHECK-NEWLOWERING-NEXT: .LBB3_2:
; CHECK-NEWLOWERING-NEXT: bl private_za_callee
; CHECK-NEWLOWERING-NEXT: tbz w20, #0, .LBB3_4
; CHECK-NEWLOWERING-NEXT: // %bb.3:
; CHECK-NEWLOWERING-NEXT: smstart sm
; CHECK-NEWLOWERING-NEXT: .LBB3_4:
; CHECK-NEWLOWERING-NEXT: smstart za
; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEWLOWERING-NEXT: sub x0, x29, #80
; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB3_6
; CHECK-NEWLOWERING-NEXT: // %bb.5:
; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore
; CHECK-NEWLOWERING-NEXT: .LBB3_6:
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEWLOWERING-NEXT: sub sp, x29, #64
; CHECK-NEWLOWERING-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: ret
call void @private_za_callee()
ret void
}
; Note: For the final @private_za_callee() we setup a lazy save then don't
; restore from it (since ZA is off on return). We could improve this case
; by turning ZA off before the final private ZA call.
define void @test_lazy_save_mixed_shared_and_private_callees() "aarch64_new_za"
; CHECK-LABEL: test_lazy_save_mixed_shared_and_private_callees:
; CHECK: // %bb.0: // %prelude
; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: .cfi_def_cfa w29, 32
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w20, -16
; CHECK-NEXT: .cfi_offset w30, -24
; CHECK-NEXT: .cfi_offset w29, -32
; CHECK-NEXT: rdsvl x8, #1
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: msub x9, x8, x8, x9
; CHECK-NEXT: mov sp, x9
; CHECK-NEXT: stp x9, x8, [x29, #-16]
; CHECK-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEXT: cbz x8, .LBB4_2
; CHECK-NEXT: // %bb.1: // %save.za
; CHECK-NEXT: bl __arm_tpidr2_save
; CHECK-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEXT: .LBB4_2:
; CHECK-NEXT: smstart za
; CHECK-NEXT: sub x20, x29, #16
; CHECK-NEXT: zero {za}
; CHECK-NEXT: msr TPIDR2_EL0, x20
; CHECK-NEXT: bl private_za_callee
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEXT: sub x0, x29, #16
; CHECK-NEXT: cbnz x8, .LBB4_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: bl __arm_tpidr2_restore
; CHECK-NEXT: .LBB4_4:
; CHECK-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEXT: bl shared_za_callee
; CHECK-NEXT: bl preserves_za_callee
; CHECK-NEXT: msr TPIDR2_EL0, x20
; CHECK-NEXT: bl private_za_callee
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEXT: sub x0, x29, #16
; CHECK-NEXT: cbnz x8, .LBB4_6
; CHECK-NEXT: // %bb.5:
; CHECK-NEXT: bl __arm_tpidr2_restore
; CHECK-NEXT: .LBB4_6:
; CHECK-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEXT: smstop za
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-NEXT: ret
;
; CHECK-NEWLOWERING-LABEL: test_lazy_save_mixed_shared_and_private_callees:
; CHECK-NEWLOWERING: // %bb.0:
; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
; CHECK-NEWLOWERING-NEXT: mov x29, sp
; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16
; CHECK-NEWLOWERING-NEXT: .cfi_def_cfa w29, 32
; CHECK-NEWLOWERING-NEXT: .cfi_offset w19, -16
; CHECK-NEWLOWERING-NEXT: .cfi_offset w30, -24
; CHECK-NEWLOWERING-NEXT: .cfi_offset w29, -32
; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1
; CHECK-NEWLOWERING-NEXT: mov x9, sp
; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9
; CHECK-NEWLOWERING-NEXT: mov sp, x9
; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-16]
; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEWLOWERING-NEXT: cbz x8, .LBB4_2
; CHECK-NEWLOWERING-NEXT: // %bb.1:
; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_save
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEWLOWERING-NEXT: zero {za}
; CHECK-NEWLOWERING-NEXT: .LBB4_2:
; CHECK-NEWLOWERING-NEXT: smstart za
; CHECK-NEWLOWERING-NEXT: sub x8, x29, #16
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x8
; CHECK-NEWLOWERING-NEXT: bl private_za_callee
; CHECK-NEWLOWERING-NEXT: smstart za
; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16
; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB4_4
; CHECK-NEWLOWERING-NEXT: // %bb.3:
; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore
; CHECK-NEWLOWERING-NEXT: .LBB4_4:
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEWLOWERING-NEXT: bl shared_za_callee
; CHECK-NEWLOWERING-NEXT: bl preserves_za_callee
; CHECK-NEWLOWERING-NEXT: sub x8, x29, #16
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x8
; CHECK-NEWLOWERING-NEXT: bl private_za_callee
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEWLOWERING-NEXT: smstop za
; CHECK-NEWLOWERING-NEXT: mov sp, x29
; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: ret
{
call void @private_za_callee()
call void @shared_za_callee()
call void @preserves_za_callee()
call void @private_za_callee()
ret void
}
define void @test_many_back2back_private_za_calls() "aarch64_inout_za" {
; CHECK-LABEL: test_many_back2back_private_za_calls:
; CHECK: // %bb.0:
; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: .cfi_def_cfa w29, 32
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w20, -16
; CHECK-NEXT: .cfi_offset w30, -24
; CHECK-NEXT: .cfi_offset w29, -32
; CHECK-NEXT: rdsvl x8, #1
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: msub x9, x8, x8, x9
; CHECK-NEXT: mov sp, x9
; CHECK-NEXT: stp x9, x8, [x29, #-16]
; CHECK-NEXT: bl shared_za_callee
; CHECK-NEXT: sub x20, x29, #16
; CHECK-NEXT: msr TPIDR2_EL0, x20
; CHECK-NEXT: bl private_za_callee
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEXT: sub x0, x29, #16
; CHECK-NEXT: cbnz x8, .LBB5_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: bl __arm_tpidr2_restore
; CHECK-NEXT: .LBB5_2:
; CHECK-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEXT: msr TPIDR2_EL0, x20
; CHECK-NEXT: bl private_za_callee
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEXT: sub x0, x29, #16
; CHECK-NEXT: cbnz x8, .LBB5_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: bl __arm_tpidr2_restore
; CHECK-NEXT: .LBB5_4:
; CHECK-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEXT: msr TPIDR2_EL0, x20
; CHECK-NEXT: bl private_za_callee
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEXT: sub x0, x29, #16
; CHECK-NEXT: cbnz x8, .LBB5_6
; CHECK-NEXT: // %bb.5:
; CHECK-NEXT: bl __arm_tpidr2_restore
; CHECK-NEXT: .LBB5_6:
; CHECK-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEXT: msr TPIDR2_EL0, x20
; CHECK-NEXT: bl private_za_callee
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEXT: sub x0, x29, #16
; CHECK-NEXT: cbnz x8, .LBB5_8
; CHECK-NEXT: // %bb.7:
; CHECK-NEXT: bl __arm_tpidr2_restore
; CHECK-NEXT: .LBB5_8:
; CHECK-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEXT: msr TPIDR2_EL0, x20
; CHECK-NEXT: bl private_za_callee
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEXT: sub x0, x29, #16
; CHECK-NEXT: cbnz x8, .LBB5_10
; CHECK-NEXT: // %bb.9:
; CHECK-NEXT: bl __arm_tpidr2_restore
; CHECK-NEXT: .LBB5_10:
; CHECK-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEXT: msr TPIDR2_EL0, x20
; CHECK-NEXT: bl private_za_callee
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEXT: sub x0, x29, #16
; CHECK-NEXT: cbnz x8, .LBB5_12
; CHECK-NEXT: // %bb.11:
; CHECK-NEXT: bl __arm_tpidr2_restore
; CHECK-NEXT: .LBB5_12:
; CHECK-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEXT: bl shared_za_callee
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-NEXT: ret
;
; CHECK-NEWLOWERING-LABEL: test_many_back2back_private_za_calls:
; CHECK-NEWLOWERING: // %bb.0:
; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
; CHECK-NEWLOWERING-NEXT: mov x29, sp
; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16
; CHECK-NEWLOWERING-NEXT: .cfi_def_cfa w29, 32
; CHECK-NEWLOWERING-NEXT: .cfi_offset w19, -16
; CHECK-NEWLOWERING-NEXT: .cfi_offset w30, -24
; CHECK-NEWLOWERING-NEXT: .cfi_offset w29, -32
; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1
; CHECK-NEWLOWERING-NEXT: mov x9, sp
; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9
; CHECK-NEWLOWERING-NEXT: mov sp, x9
; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-16]
; CHECK-NEWLOWERING-NEXT: bl shared_za_callee
; CHECK-NEWLOWERING-NEXT: sub x8, x29, #16
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x8
; CHECK-NEWLOWERING-NEXT: bl private_za_callee
; CHECK-NEWLOWERING-NEXT: bl private_za_callee
; CHECK-NEWLOWERING-NEXT: bl private_za_callee
; CHECK-NEWLOWERING-NEXT: bl private_za_callee
; CHECK-NEWLOWERING-NEXT: bl private_za_callee
; CHECK-NEWLOWERING-NEXT: bl private_za_callee
; CHECK-NEWLOWERING-NEXT: smstart za
; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16
; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB5_2
; CHECK-NEWLOWERING-NEXT: // %bb.1:
; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore
; CHECK-NEWLOWERING-NEXT: .LBB5_2:
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEWLOWERING-NEXT: bl shared_za_callee
; CHECK-NEWLOWERING-NEXT: mov sp, x29
; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: ret
call void @shared_za_callee()
call void @private_za_callee()
call void @private_za_callee()
call void @private_za_callee()
call void @private_za_callee()
call void @private_za_callee()
call void @private_za_callee()
call void @shared_za_callee()
ret void
}
define void @test_shared_private_shared() nounwind "aarch64_inout_za" {
; CHECK-COMMON-LABEL: test_shared_private_shared:
; CHECK-COMMON: // %bb.0:
; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
; CHECK-COMMON-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: mov x29, sp
; CHECK-COMMON-NEXT: sub sp, sp, #16
; CHECK-COMMON-NEXT: rdsvl x8, #1
; CHECK-COMMON-NEXT: mov x9, sp
; CHECK-COMMON-NEXT: msub x9, x8, x8, x9
; CHECK-COMMON-NEXT: mov sp, x9
; CHECK-COMMON-NEXT: stp x9, x8, [x29, #-16]
; CHECK-COMMON-NEXT: bl shared_za_callee
; CHECK-COMMON-NEXT: sub x8, x29, #16
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x8
; CHECK-COMMON-NEXT: bl private_za_callee
; CHECK-COMMON-NEXT: smstart za
; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0
; CHECK-COMMON-NEXT: sub x0, x29, #16
; CHECK-COMMON-NEXT: cbnz x8, .LBB6_2
; CHECK-COMMON-NEXT: // %bb.1:
; CHECK-COMMON-NEXT: bl __arm_tpidr2_restore
; CHECK-COMMON-NEXT: .LBB6_2:
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr
; CHECK-COMMON-NEXT: bl shared_za_callee
; CHECK-COMMON-NEXT: mov sp, x29
; CHECK-COMMON-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-COMMON-NEXT: ret
call void @shared_za_callee()
call void @private_za_callee()
call void @shared_za_callee()
ret void
}
define void @test_only_shared_za() nounwind "aarch64_inout_za" {
; CHECK-COMMON-LABEL: test_only_shared_za:
; CHECK-COMMON: // %bb.0:
; CHECK-COMMON-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-COMMON-NEXT: bl shared_za_callee
; CHECK-COMMON-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-COMMON-NEXT: ret
call void @shared_za_callee()
ret void
}
declare i64 @shared_za_callee_i64(i64) "aarch64_inout_za"
declare i64 @private_za_callee_i64(i64)
define i64 @test_shared_private_shared_i64(i64 %x) nounwind "aarch64_inout_za" {
; CHECK-COMMON-LABEL: test_shared_private_shared_i64:
; CHECK-COMMON: // %bb.0:
; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
; CHECK-COMMON-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: mov x29, sp
; CHECK-COMMON-NEXT: sub sp, sp, #16
; CHECK-COMMON-NEXT: rdsvl x8, #1
; CHECK-COMMON-NEXT: mov x9, sp
; CHECK-COMMON-NEXT: msub x9, x8, x8, x9
; CHECK-COMMON-NEXT: mov sp, x9
; CHECK-COMMON-NEXT: stp x9, x8, [x29, #-16]
; CHECK-COMMON-NEXT: bl shared_za_callee_i64
; CHECK-COMMON-NEXT: sub x8, x29, #16
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x8
; CHECK-COMMON-NEXT: bl private_za_callee_i64
; CHECK-COMMON-NEXT: mov x1, x0
; CHECK-COMMON-NEXT: smstart za
; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0
; CHECK-COMMON-NEXT: sub x0, x29, #16
; CHECK-COMMON-NEXT: cbnz x8, .LBB8_2
; CHECK-COMMON-NEXT: // %bb.1:
; CHECK-COMMON-NEXT: bl __arm_tpidr2_restore
; CHECK-COMMON-NEXT: .LBB8_2:
; CHECK-COMMON-NEXT: mov x0, x1
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr
; CHECK-COMMON-NEXT: bl shared_za_callee_i64
; CHECK-COMMON-NEXT: mov sp, x29
; CHECK-COMMON-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-COMMON-NEXT: ret
%a = call i64 @shared_za_callee_i64(i64 %x)
%b = call i64 @private_za_callee_i64(i64 %a)
%c = call i64 @shared_za_callee_i64(i64 %b)
ret i64 %c
}
declare i64 @many_args_private_za_callee(
i64, i64, i64, i64, i64, i64, i64, i64, i64, i64)
; In this example some arguments are passed on the stack, which decrements the
; stack pointer before the call -- in this test the lazy save should be setup
; before the stack decrement.
define i64 @test_many_callee_arguments(
; CHECK-LABEL: test_many_callee_arguments:
; CHECK: // %bb.0:
; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: rdsvl x9, #1
; CHECK-NEXT: msub x8, x9, x9, x8
; CHECK-NEXT: mov sp, x8
; CHECK-NEXT: ldp x10, x11, [x29, #32]
; CHECK-NEXT: sub x12, x29, #16
; CHECK-NEXT: stp x8, x9, [x29, #-16]
; CHECK-NEXT: msr TPIDR2_EL0, x12
; CHECK-NEXT: stp x10, x11, [sp, #-16]!
; CHECK-NEXT: bl many_args_private_za_callee
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: mov x1, x0
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEXT: sub x0, x29, #16
; CHECK-NEXT: cbnz x8, .LBB9_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: bl __arm_tpidr2_restore
; CHECK-NEXT: .LBB9_2:
; CHECK-NEXT: mov x0, x1
; CHECK-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-NEXT: ret
;
; CHECK-NEWLOWERING-LABEL: test_many_callee_arguments:
; CHECK-NEWLOWERING: // %bb.0:
; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8-byte Folded Spill
; CHECK-NEWLOWERING-NEXT: mov x29, sp
; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16
; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1
; CHECK-NEWLOWERING-NEXT: mov x9, sp
; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9
; CHECK-NEWLOWERING-NEXT: mov sp, x9
; CHECK-NEWLOWERING-NEXT: ldp x10, x11, [x29, #32]
; CHECK-NEWLOWERING-NEXT: sub x12, x29, #16
; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-16]
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x12
; CHECK-NEWLOWERING-NEXT: stp x10, x11, [sp, #-16]!
; CHECK-NEWLOWERING-NEXT: bl many_args_private_za_callee
; CHECK-NEWLOWERING-NEXT: add sp, sp, #16
; CHECK-NEWLOWERING-NEXT: mov x8, x0
; CHECK-NEWLOWERING-NEXT: smstart za
; CHECK-NEWLOWERING-NEXT: mrs x9, TPIDR2_EL0
; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16
; CHECK-NEWLOWERING-NEXT: cbnz x9, .LBB9_2
; CHECK-NEWLOWERING-NEXT: // %bb.1:
; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore
; CHECK-NEWLOWERING-NEXT: .LBB9_2:
; CHECK-NEWLOWERING-NEXT: mov x0, x8
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEWLOWERING-NEXT: mov sp, x29
; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: ret
i64 %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6, i64 %7, i64 %8, i64 %9
) nounwind "aarch64_inout_za" {
%ret = call i64 @many_args_private_za_callee(
i64 %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6, i64 %7, i64 %8, i64 %9)
ret i64 %ret
}