| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| |
| // This patch implements the support routines for the SME ABI, |
| // described here: |
| // https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst#sme-support-routines |
| |
| #include "../assembly.h" |
| |
| .set FEAT_SVE_BIT, 30 |
| .set FEAT_SME_BIT, 42 |
| .set FEAT_SME2_BIT, 57 |
| .set FEAT_SME2_MASK, 1 << 57 |
| .set SVCR_PSTATE_SM_BIT, 0 |
| |
| #if !defined(__APPLE__) |
| #define CPU_FEATS_SYMBOL SYMBOL_NAME(__aarch64_cpu_features) |
| #define CPU_FEATS_SYMBOL_OFFSET :lo12:SYMBOL_NAME(__aarch64_cpu_features) |
| #else |
| // MachO requires @page/@pageoff directives because the global is defined |
| // in a different file. Otherwise this file may fail to build. |
| #define CPU_FEATS_SYMBOL SYMBOL_NAME(__aarch64_cpu_features)@page |
| #define CPU_FEATS_SYMBOL_OFFSET SYMBOL_NAME(__aarch64_cpu_features)@pageoff |
| #endif |
| |
| .arch armv9-a+sme2 |
| |
| // Utility function which calls a system's abort() routine. Because the function |
| // is streaming-compatible it should disable streaming-SVE mode before calling |
| // abort(). Note that there is no need to preserve any state before the call, |
| // because the function does not return. |
| DEFINE_COMPILERRT_PRIVATE_FUNCTION(do_abort) |
| .cfi_startproc |
| .variant_pcs FUNC_SYMBOL(SYMBOL_NAME(do_abort)) |
| BTI_C |
| stp x29, x30, [sp, #-32]! |
| cntd x0 |
| // Store VG to a stack location that we describe with .cfi_offset |
| str x0, [sp, #16] |
| .cfi_def_cfa_offset 32 |
| .cfi_offset w30, -24 |
| .cfi_offset w29, -32 |
| .cfi_offset 46, -16 |
| bl FUNC_SYMBOL(SYMBOL_NAME(__arm_sme_state)) |
| tbz x0, #0, 2f |
| 1: |
| smstop sm |
| 2: |
| // We can't make this into a tail-call because the unwinder would |
| // need to restore the value of VG. |
| bl FUNC_SYMBOL(SYMBOL_NAME(abort)) |
| .cfi_endproc |
| END_COMPILERRT_FUNCTION(do_abort) |
| |
| // __arm_sme_state fills the result registers based on a local |
| // that is set as part of the compiler-rt startup code. |
| // __aarch64_has_sme_and_tpidr2_el0 |
| DEFINE_COMPILERRT_FUNCTION(__arm_sme_state) |
| .variant_pcs __arm_sme_state |
| BTI_C |
| mov x0, xzr |
| mov x1, xzr |
| |
| adrp x16, CPU_FEATS_SYMBOL |
| ldr x16, [x16, CPU_FEATS_SYMBOL_OFFSET] |
| tbz x16, #FEAT_SME_BIT, 1f |
| 0: |
| orr x0, x0, #0xC000000000000000 |
| mrs x16, SVCR |
| bfxil x0, x16, #0, #2 |
| mrs x1, TPIDR2_EL0 |
| 1: |
| ret |
| END_COMPILERRT_FUNCTION(__arm_sme_state) |
| |
| DEFINE_COMPILERRT_FUNCTION(__arm_tpidr2_restore) |
| .variant_pcs __arm_tpidr2_restore |
| BTI_C |
| // If TPIDR2_EL0 is nonnull, the subroutine aborts in some platform-specific |
| // manner. |
| mrs x14, TPIDR2_EL0 |
| cbnz x14, 2f |
| |
| // If any of the reserved bytes in the first 16 bytes of BLK are nonzero, |
| // the subroutine [..] aborts in some platform-defined manner. |
| ldrh w14, [x0, #10] |
| cbnz w14, 2f |
| ldr w14, [x0, #12] |
| cbnz w14, 2f |
| |
| // If BLK.za_save_buffer is NULL, the subroutine does nothing. |
| ldr x16, [x0] |
| cbz x16, 1f |
| |
| // If BLK.num_za_save_slices is zero, the subroutine does nothing. |
| ldrh w14, [x0, #8] |
| cbz x14, 1f |
| |
| mov x15, xzr |
| 0: |
| ldr za[w15,0], [x16] |
| addsvl x16, x16, #1 |
| add x15, x15, #1 |
| cmp x14, x15 |
| b.ne 0b |
| 1: |
| ret |
| 2: |
| b FUNC_SYMBOL(SYMBOL_NAME(do_abort)) |
| END_COMPILERRT_FUNCTION(__arm_tpidr2_restore) |
| |
| DEFINE_COMPILERRT_FUNCTION(__arm_tpidr2_save) |
| .variant_pcs __arm_tpidr2_save |
| BTI_C |
| // If the current thread does not have access to TPIDR2_EL0, the subroutine |
| // does nothing. |
| adrp x14, CPU_FEATS_SYMBOL |
| ldr x14, [x14, CPU_FEATS_SYMBOL_OFFSET] |
| tbz x14, #FEAT_SME_BIT, 1f |
| |
| // If TPIDR2_EL0 is null, the subroutine does nothing. |
| mrs x16, TPIDR2_EL0 |
| cbz x16, 1f |
| |
| // If any of the reserved bytes in the first 16 bytes of the TPIDR2 block are |
| // nonzero, the subroutine [..] aborts in some platform-defined manner. |
| ldrh w14, [x16, #10] |
| cbnz w14, 2f |
| ldr w14, [x16, #12] |
| cbnz w14, 2f |
| |
| // If num_za_save_slices is zero, the subroutine does nothing. |
| ldrh w14, [x16, #8] |
| cbz x14, 1f |
| |
| // If za_save_buffer is NULL, the subroutine does nothing. |
| ldr x16, [x16] |
| cbz x16, 1f |
| |
| mov x15, xzr |
| 0: |
| str za[w15,0], [x16] |
| addsvl x16, x16, #1 |
| add x15, x15, #1 |
| cmp x14, x15 |
| b.ne 0b |
| 1: |
| ret |
| 2: |
| b FUNC_SYMBOL(SYMBOL_NAME(do_abort)) |
| END_COMPILERRT_FUNCTION(__arm_tpidr2_save) |
| |
| DEFINE_COMPILERRT_FUNCTION(__arm_za_disable) |
| .cfi_startproc |
| .variant_pcs __arm_za_disable |
| BTI_C |
| // If the current thread does not have access to SME, the subroutine does |
| // nothing. |
| adrp x14, CPU_FEATS_SYMBOL |
| ldr x14, [x14, CPU_FEATS_SYMBOL_OFFSET] |
| tbz x14, #FEAT_SME_BIT, 0f |
| |
| // Otherwise, the subroutine behaves as if it did the following: |
| // * Call __arm_tpidr2_save. |
| stp x29, x30, [sp, #-16]! |
| .cfi_def_cfa_offset 16 |
| mov x29, sp |
| .cfi_def_cfa w29, 16 |
| .cfi_offset w30, -8 |
| .cfi_offset w29, -16 |
| bl FUNC_SYMBOL(SYMBOL_NAME(__arm_tpidr2_save)) |
| |
| // * Set TPIDR2_EL0 to null. |
| msr TPIDR2_EL0, xzr |
| |
| // * Set PSTATE.ZA to 0. |
| smstop za |
| |
| .cfi_def_cfa wsp, 16 |
| ldp x29, x30, [sp], #16 |
| .cfi_def_cfa_offset 0 |
| .cfi_restore w30 |
| .cfi_restore w29 |
| 0: |
| ret |
| .cfi_endproc |
| END_COMPILERRT_FUNCTION(__arm_za_disable) |
| |
| DEFINE_COMPILERRT_FUNCTION(__arm_get_current_vg) |
| .variant_pcs __arm_get_current_vg |
| BTI_C |
| |
| adrp x17, CPU_FEATS_SYMBOL |
| ldr x17, [x17, CPU_FEATS_SYMBOL_OFFSET] |
| tbnz w17, #FEAT_SVE_BIT, 1f |
| tbz x17, #FEAT_SME_BIT, 2f |
| 0: |
| mrs x17, SVCR |
| tbz x17, #SVCR_PSTATE_SM_BIT, 2f |
| 1: |
| cntd x0 |
| ret |
| 2: |
| mov x0, xzr |
| ret |
| END_COMPILERRT_FUNCTION(__arm_get_current_vg) |
| |
| // The diagram below describes the layout used in the following routines: |
| // * __arm_sme_state_size |
| // * __arm_sme_save |
| // * __arm_sme_restore |
| // |
| // +---------------------------------+ |
| // | ... | |
| // | ZA buffer | |
| // | ... | |
| // +---------------------------------+ <- @96 |
| // | ZT0 contents | |
| // +---------------------------------+ <- @32 |
| // | byte 15-10: zero (reserved) | |
| // | byte 9-8: num_za_save_slices | TPIDR2 block |
| // | byte 7-0: za_save_buffer | |
| // +---------------------------------+ <- @16 |
| // | bit 127-1: zero (reserved) | Internal state for __arm_sme_save/restore |
| // | bit 0: VALID | |
| // +---------------------------------+ <- @0 |
| |
| DEFINE_COMPILERRT_FUNCTION(__arm_sme_state_size) |
| .variant_pcs __arm_sme_state_size |
| BTI_C |
| |
| // Test if SME is available and ZA state is 'active'. |
| adrp x17, CPU_FEATS_SYMBOL |
| ldr x17, [x17, CPU_FEATS_SYMBOL_OFFSET] |
| tbz x17, #FEAT_SME_BIT, 0f |
| mrs x16, SVCR |
| tbz x16, #1, 0f |
| mrs x16, TPIDR2_EL0 |
| cbnz x16, 0f |
| |
| // Size = HAS_FEAT_SME2 ? 96 : 32 |
| tst x17, #FEAT_SME2_MASK |
| mov w17, #32 |
| mov w16, #96 |
| csel x16, x17, x16, eq |
| |
| // Size = Size + (SVLB * SVLB) |
| rdsvl x17, #1 |
| madd x0, x17, x17, x16 |
| ret |
| |
| 0: |
| // Default case, 16 bytes is minimum (to encode VALID bit, multiple of 16 bytes) |
| mov w0, #16 |
| ret |
| END_COMPILERRT_FUNCTION(__arm_sme_state_size) |
| |
| DEFINE_COMPILERRT_FUNCTION(__arm_sme_save) |
| .variant_pcs __arm_sme_save |
| BTI_C |
| |
| // If PTR is not 16-byte aligned, abort. |
| tst x0, #0xF |
| b.ne 3f |
| |
| // Clear internal state bits |
| stp xzr, xzr, [x0] |
| |
| // If SME is not available, PSTATE.ZA = 0 or TPIDR2_EL0 != 0, return. |
| adrp x17, CPU_FEATS_SYMBOL |
| ldr x17, [x17, CPU_FEATS_SYMBOL_OFFSET] |
| tbz x17, #FEAT_SME_BIT, 2f |
| mrs x16, SVCR |
| tbz x16, #1, 2f |
| mrs x16, TPIDR2_EL0 |
| cbnz x16, 2f |
| |
| # ZA or ZT0 need saving, we can now set internal VALID bit to 1 |
| mov w16, #1 |
| str x16, [x0] |
| |
| add x18, x0, #32 |
| tbz x17, #FEAT_SME2_BIT, 1f |
| |
| // Store ZT0 |
| str zt0, [x18] |
| add x18, x18, #64 |
| |
| 1: |
| // Set up lazy-save (x18 = pointer to buffer) |
| rdsvl x17, #1 |
| str x18, [x0, #16]! |
| strh w17, [x0, #8] |
| strh wzr, [x0, #10] |
| str wzr, [x0, #12] |
| msr TPIDR2_EL0, x0 |
| |
| 2: |
| // Do nothing |
| ret |
| |
| 3: |
| b FUNC_SYMBOL(SYMBOL_NAME(do_abort)) |
| END_COMPILERRT_FUNCTION(__arm_sme_save) |
| |
| DEFINE_COMPILERRT_FUNCTION(__arm_sme_restore) |
| .cfi_startproc |
| .variant_pcs __arm_sme_restore |
| BTI_C |
| |
| stp x29, x30, [sp, #-16]! |
| .cfi_def_cfa_offset 16 |
| mov x29, sp |
| .cfi_def_cfa w29, 16 |
| .cfi_offset w30, -8 |
| .cfi_offset w29, -16 |
| |
| // If PTR is not 16-byte aligned, abort. |
| tst x0, #0xF |
| b.ne 3f |
| |
| // If the VALID bit is 0, return early. |
| ldr x16, [x0] |
| cbz x16, 2f |
| |
| // If SME is not available, abort. |
| adrp x17, CPU_FEATS_SYMBOL |
| ldr x17, [x17, CPU_FEATS_SYMBOL_OFFSET] |
| tbz x17, #FEAT_SME_BIT, 3f |
| |
| // If TPIDR2_EL0 != nullptr, no lazy-save was committed, try to reload zt0. |
| mrs x16, TPIDR2_EL0 |
| cbnz x16, 1f |
| |
| // If TPIDR2_EL0 == nullptr and PSTATE.ZA = 1 (<=> ZA state is 'active'), |
| // abort. |
| mrs x16, SVCR |
| tbnz x16, #1, 3f |
| |
| // Restore za. |
| smstart za |
| add x0, x0, #16 |
| bl __arm_tpidr2_restore |
| sub x0, x0, #16 |
| |
| 1: |
| smstart za |
| msr TPIDR2_EL0, xzr |
| |
| // Check if zt0 needs restoring. |
| tbz x17, #FEAT_SME2_BIT, 2f |
| |
| // Restore zt0. |
| add x16, x0, #32 |
| ldr zt0, [x16] |
| |
| 2: |
| // Do nothing |
| .cfi_def_cfa wsp, 16 |
| ldp x29, x30, [sp], #16 |
| .cfi_def_cfa_offset 0 |
| .cfi_restore w30 |
| .cfi_restore w29 |
| ret |
| |
| 3: |
| b FUNC_SYMBOL(SYMBOL_NAME(do_abort)) |
| .cfi_endproc |
| END_COMPILERRT_FUNCTION(__arm_sme_restore) |
| |
| NO_EXEC_STACK_DIRECTIVE |
| |
| // GNU property note for BTI and PAC |
| GNU_PROPERTY_BTI_PAC |