blob: 739f8c1aaddbc0e4f29a0c767dfc1da9b428fd95 [file] [log] [blame]
//===-- Strlen implementation for x86_64 ----------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_STRLEN_H
#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_STRLEN_H
#include "src/__support/CPP/bit.h" // countr_zero
#include <immintrin.h>
#include <stddef.h> // size_t
namespace LIBC_NAMESPACE_DECL {
namespace string_length_internal {
// Return a bit-mask with the nth bit set if the nth-byte in block_ptr is zero.
template <typename Vector, typename Mask>
[[gnu::no_sanitize_address]] LIBC_INLINE static Mask
compare_and_mask(const Vector *block_ptr);
template <typename Vector, typename Mask,
decltype(compare_and_mask<Vector, Mask>)>
[[gnu::no_sanitize_address]] LIBC_INLINE static size_t
string_length_vector(const char *src) {
uintptr_t misalign_bytes = reinterpret_cast<uintptr_t>(src) % sizeof(Vector);
const Vector *block_ptr =
reinterpret_cast<const Vector *>(src - misalign_bytes);
auto cmp = compare_and_mask<Vector, Mask>(block_ptr) >> misalign_bytes;
if (cmp)
return cpp::countr_zero(cmp);
while (true) {
block_ptr++;
cmp = compare_and_mask<Vector, Mask>(block_ptr);
if (cmp)
return static_cast<size_t>(reinterpret_cast<uintptr_t>(block_ptr) -
reinterpret_cast<uintptr_t>(src) +
cpp::countr_zero(cmp));
}
}
template <>
LIBC_INLINE uint32_t
compare_and_mask<__m128i, uint32_t>(const __m128i *block_ptr) {
__m128i v = _mm_load_si128(block_ptr);
__m128i z = _mm_setzero_si128();
__m128i c = _mm_cmpeq_epi8(z, v);
return _mm_movemask_epi8(c);
}
namespace sse2 {
[[maybe_unused]] LIBC_INLINE size_t string_length(const char *src) {
return string_length_vector<__m128i, uint32_t,
compare_and_mask<__m128i, uint32_t>>(src);
}
} // namespace sse2
#if defined(__AVX2__)
template <>
LIBC_INLINE uint32_t
compare_and_mask<__m256i, uint32_t>(const __m256i *block_ptr) {
__m256i v = _mm256_load_si256(block_ptr);
__m256i z = _mm256_setzero_si256();
__m256i c = _mm256_cmpeq_epi8(z, v);
return _mm256_movemask_epi8(c);
}
namespace avx2 {
[[maybe_unused]] LIBC_INLINE size_t string_length(const char *src) {
return string_length_vector<__m256i, uint32_t,
compare_and_mask<__m256i, uint32_t>>(src);
}
} // namespace avx2
#endif
#if defined(__AVX512F__)
template <>
LIBC_INLINE __mmask64
compare_and_mask<__m512i, __mmask64>(const __m512i *block_ptr) {
__m512i v = _mm512_load_si512(block_ptr);
__m512i z = _mm512_setzero_si512();
return _mm512_cmp_epu8_mask(z, v, _MM_CMPINT_EQ);
}
namespace avx512 {
[[maybe_unused]] LIBC_INLINE size_t string_length(const char *src) {
return string_length_vector<__m512i, __mmask64,
compare_and_mask<__m512i, __mmask64>>(src);
}
} // namespace avx512
#endif
} // namespace string_length_internal
#if defined(__AVX512F__)
namespace string_length_impl = string_length_internal::avx512;
#elif defined(__AVX2__)
namespace string_length_impl = string_length_internal::avx2;
#else
namespace string_length_impl = string_length_internal::sse2;
#endif
} // namespace LIBC_NAMESPACE_DECL
#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_STRLEN_H