| //===-- Memcpy implementation for aarch64 -----------------------*- C++ -*-===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_INLINE_MEMCPY_H |
| #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_INLINE_MEMCPY_H |
| |
| #include "src/__support/macros/attributes.h" // LIBC_INLINE |
| #include "src/__support/macros/properties/cpu_features.h" |
| #include "src/string/memory_utils/op_builtin.h" |
| #include "src/string/memory_utils/utils.h" |
| |
| #include <stddef.h> // size_t |
| |
| #if defined(LIBC_TARGET_CPU_HAS_SVE) |
| #include <arm_sve.h> |
| #endif |
| namespace LIBC_NAMESPACE_DECL { |
| [[maybe_unused]] LIBC_INLINE void |
| inline_memcpy_aarch64(Ptr __restrict dst, CPtr __restrict src, size_t count) { |
| // Always avoid emit any memory operation if count == 0. |
| if (count == 0) |
| return; |
| // Use predicated load/store on SVE available targets to avoid branching in |
| // small cases. |
| #ifdef LIBC_TARGET_CPU_HAS_SVE |
| auto src_ptr = reinterpret_cast<const uint8_t *>(src); |
| auto dst_ptr = reinterpret_cast<uint8_t *>(dst); |
| if (count <= 16) { |
| const svbool_t mask = svwhilelt_b8_u64(0, count); |
| svst1_u8(mask, dst_ptr, svld1_u8(mask, src_ptr)); |
| return; |
| } |
| if (count <= 32) { |
| const size_t vlen = svcntb(); |
| svbool_t m0 = svwhilelt_b8_u64(0, count); |
| svbool_t m1 = svwhilelt_b8_u64(vlen, count); |
| svst1_u8(m0, dst_ptr, svld1_u8(m0, src_ptr)); |
| svst1_u8(m1, dst_ptr + vlen, svld1_u8(m1, src_ptr + vlen)); |
| return; |
| } |
| #else |
| if (count == 1) |
| return builtin::Memcpy<1>::block(dst, src); |
| if (count == 2) |
| return builtin::Memcpy<2>::block(dst, src); |
| if (count == 3) |
| return builtin::Memcpy<3>::block(dst, src); |
| if (count == 4) |
| return builtin::Memcpy<4>::block(dst, src); |
| if (count < 8) |
| return builtin::Memcpy<4>::head_tail(dst, src, count); |
| if (count < 16) |
| return builtin::Memcpy<8>::head_tail(dst, src, count); |
| if (count < 32) |
| return builtin::Memcpy<16>::head_tail(dst, src, count); |
| #endif |
| if (count < 64) |
| return builtin::Memcpy<32>::head_tail(dst, src, count); |
| if (count < 128) |
| return builtin::Memcpy<64>::head_tail(dst, src, count); |
| builtin::Memcpy<16>::block(dst, src); |
| align_to_next_boundary<16, Arg::Src>(dst, src, count); |
| return builtin::Memcpy<64>::loop_and_tail(dst, src, count); |
| } |
| |
| } // namespace LIBC_NAMESPACE_DECL |
| |
| #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_INLINE_MEMCPY_H |