| //===-- String Length -------------------------------------------*- C++ -*-===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // Basic implementation and dispatch mechanism for performance-sensitive string- |
| // related code. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #ifndef LLVM_LIBC_SRC_STRING_STRING_LENGTH_H |
| #define LLVM_LIBC_SRC_STRING_STRING_LENGTH_H |
| |
| #include "hdr/limits_macros.h" |
| #include "hdr/stdint_proxy.h" // uintptr_t |
| #include "hdr/types/size_t.h" |
| #include "src/__support/CPP/type_traits.h" // cpp::is_same_v |
| |
| #if LIBC_HAS_VECTOR_TYPE |
| #include "src/string/memory_utils/generic/inline_strlen.h" |
| #endif |
| #if defined(LIBC_TARGET_ARCH_IS_X86) |
| #include "src/string/memory_utils/x86_64/inline_strlen.h" |
| #elif defined(LIBC_TARGET_ARCH_IS_AARCH64) && \ |
| (defined(LIBC_TARGET_CPU_HAS_SVE) || defined(__ARM_NEON)) |
| #include "src/string/memory_utils/aarch64/inline_strlen.h" |
| #endif |
| |
| // Set sensible defaults |
| #ifndef LIBC_COPT_STRING_LENGTH_IMPL |
| #define LIBC_COPT_STRING_LENGTH_IMPL element |
| #endif |
| #ifndef LIBC_COPT_FIND_FIRST_CHARACTER_IMPL |
| #define LIBC_COPT_FIND_FIRST_CHARACTER_IMPL element |
| #endif |
| |
| namespace LIBC_NAMESPACE_DECL { |
| namespace internal { |
| |
| #if !LIBC_HAS_VECTOR_TYPE |
| // Forward any clang vector impls to architecture specific ones |
| namespace arch_vector {} |
| namespace clang_vector = arch_vector; |
| #endif |
| |
| namespace element { |
| // Element-by-element (usually a byte, but wider for wchar) implementations of |
| // functions that search for data. Slow, but easy to understand and analyze. |
| |
| // Returns the length of a string, denoted by the first occurrence |
| // of a null terminator. |
| LIBC_INLINE size_t string_length(const char *src) { |
| size_t length; |
| for (length = 0; *src; ++src, ++length) |
| ; |
| return length; |
| } |
| |
| template <typename T> LIBC_INLINE size_t string_length_element(const T *src) { |
| size_t length; |
| for (length = 0; *src; ++src, ++length) |
| ; |
| return length; |
| } |
| |
| LIBC_INLINE void *find_first_character(const unsigned char *src, |
| unsigned char ch, size_t n) { |
| for (; n && *src != ch; --n, ++src) |
| ; |
| return n ? const_cast<unsigned char *>(src) : nullptr; |
| } |
| } // namespace element |
| |
| namespace word { |
| // Non-vector, implementations of functions that search for data by reading from |
| // memory word-by-word. |
| |
| template <typename Word> LIBC_INLINE constexpr Word repeat_byte(Word byte) { |
| static_assert(CHAR_BIT == 8, "repeat_byte assumes a byte is 8 bits."); |
| constexpr size_t BITS_IN_BYTE = CHAR_BIT; |
| constexpr size_t BYTE_MASK = 0xff; |
| Word result = 0; |
| byte = byte & BYTE_MASK; |
| for (size_t i = 0; i < sizeof(Word); ++i) |
| result = (result << BITS_IN_BYTE) | byte; |
| return result; |
| } |
| |
| // The goal of this function is to take in a block of arbitrary size and return |
| // if it has any bytes equal to zero without branching. This is done by |
| // transforming the block such that zero bytes become non-zero and non-zero |
| // bytes become zero. |
| // The first transformation relies on the properties of carrying in arithmetic |
| // subtraction. Specifically, if 0x01 is subtracted from a byte that is 0x00, |
| // then the result for that byte must be equal to 0xff (or 0xfe if the next byte |
| // needs a carry as well). |
| // The next transformation is a simple mask. All zero bytes will have the high |
| // bit set after the subtraction, so each byte is masked with 0x80. This narrows |
| // the set of bytes that result in a non-zero value to only zero bytes and bytes |
| // with the high bit and any other bit set. |
| // The final transformation masks the result of the previous transformations |
| // with the inverse of the original byte. This means that any byte that had the |
| // high bit set will no longer have it set, narrowing the list of bytes which |
| // result in non-zero values to just the zero byte. |
| template <typename Word> LIBC_INLINE constexpr bool has_zeroes(Word block) { |
| constexpr unsigned int LOW_BITS = repeat_byte<Word>(0x01); |
| constexpr Word HIGH_BITS = repeat_byte<Word>(0x80); |
| Word subtracted = block - LOW_BITS; |
| Word inverted = ~block; |
| return (subtracted & inverted & HIGH_BITS) != 0; |
| } |
| |
| // Unsigned int is the default size for most processors, and on x86-64 it |
| // performs better than larger sizes when the src pointer can't be assumed to |
| // be aligned to a word boundary, so it's the size we use for reading the |
| // string a block at a time. |
| |
| LIBC_INLINE size_t string_length(const char *src) { |
| using Word = unsigned int; |
| const char *char_ptr = src; |
| // Step 1: read 1 byte at a time to align to block size |
| for (; reinterpret_cast<uintptr_t>(char_ptr) % sizeof(Word) != 0; |
| ++char_ptr) { |
| if (*char_ptr == '\0') |
| return static_cast<size_t>(char_ptr - src); |
| } |
| // Step 2: read blocks |
| for (const Word *block_ptr = reinterpret_cast<const Word *>(char_ptr); |
| !has_zeroes<Word>(*block_ptr); ++block_ptr) { |
| char_ptr = reinterpret_cast<const char *>(block_ptr); |
| } |
| // Step 3: find the zero in the block |
| for (; *char_ptr != '\0'; ++char_ptr) { |
| ; |
| } |
| return static_cast<size_t>(char_ptr - src); |
| } |
| |
| LIBC_NO_SANITIZE_OOB_ACCESS LIBC_INLINE void * |
| find_first_character(const unsigned char *src, unsigned char ch, |
| size_t max_strlen = cpp::numeric_limits<size_t>::max()) { |
| using Word = unsigned int; |
| const unsigned char *char_ptr = src; |
| size_t cur = 0; |
| |
| // If the maximum size of the string is small, the overhead of aligning to a |
| // word boundary and generating a bitmask of the appropriate size may be |
| // greater than the gains from reading larger chunks. Based on some testing, |
| // the crossover point between when it's faster to just read bytewise and read |
| // blocks is somewhere between 16 and 32, so 4 times the size of the block |
| // should be in that range. |
| if (max_strlen < (sizeof(Word) * 4)) { |
| return element::find_first_character(src, ch, max_strlen); |
| } |
| size_t n = max_strlen; |
| // Step 1: read 1 byte at a time to align to block size |
| for (; cur < n && reinterpret_cast<uintptr_t>(char_ptr) % sizeof(Word) != 0; |
| ++cur, ++char_ptr) { |
| if (*char_ptr == ch) |
| return const_cast<unsigned char *>(char_ptr); |
| } |
| |
| const Word ch_mask = repeat_byte<Word>(ch); |
| |
| // Step 2: read blocks |
| const Word *block_ptr = reinterpret_cast<const Word *>(char_ptr); |
| for (; cur < n && !has_zeroes<Word>((*block_ptr) ^ ch_mask); |
| cur += sizeof(Word), ++block_ptr) |
| ; |
| char_ptr = reinterpret_cast<const unsigned char *>(block_ptr); |
| |
| // Step 3: find the match in the block |
| for (; cur < n && *char_ptr != ch; ++cur, ++char_ptr) { |
| ; |
| } |
| |
| if (cur >= n || *char_ptr != ch) |
| return static_cast<void *>(nullptr); |
| |
| return const_cast<unsigned char *>(char_ptr); |
| } |
| |
| } // namespace word |
| |
| // Dispatch mechanism for implementations of performance-sensitive |
| // functions. Always measure, but generally from lower- to higher-performance |
| // order: |
| // |
| // 1. element - read char-by-char or wchar-by-wchar |
| // 3. word - read word-by-word |
| // 3. clang_vector - read using clang's internal vector types |
| // 4. arch_vector - hand-coded per architecture. Possibly in asm, or with |
| // intrinsics. |
| // |
| // The called implemenation is chosen at build-time by setting |
| // LIBC_CONF_{FUNC}_IMPL in config.json |
| static constexpr auto &string_length_impl = |
| LIBC_COPT_STRING_LENGTH_IMPL::string_length; |
| static constexpr auto &find_first_character_impl = |
| LIBC_COPT_FIND_FIRST_CHARACTER_IMPL::find_first_character; |
| |
| template <typename T> LIBC_INLINE size_t string_length(const T *src) { |
| if constexpr (cpp::is_same_v<T, char>) |
| return string_length_impl(src); |
| return element::string_length_element<T>(src); |
| } |
| |
| } // namespace internal |
| } // namespace LIBC_NAMESPACE_DECL |
| |
| #endif // LLVM_LIBC_SRC_STRING_STRING_LENGTH_H |