[libc] add an SVE implementation of strlen (#167259)
This PR creates an SVE-based implementation for strlen by translating
from the AOR code in tree. Microbenchmark shows improvements against
NEON when N>=64. Although both implementations fall behind glibc by a
large margin,
this may be a good start point to explore SVE implementations.
Together with the PR:
1. Added two more tests of strlen with special nul symbols.
2. Added strlen's fuzzer and fix a typo in previous heap fuzzer.
```
=== strlen(16 bytes) ===
libc: 1.56115 ns/call, 9.54499 GiB/s
neon: 1.59393 ns/call, 9.34867 GiB/s
sve: 1.66097 ns/call, 8.97134 GiB/s
=== strlen(64 bytes) ===
libc: 2.06967 ns/call, 28.7991 GiB/s
neon: 2.59914 ns/call, 22.9325 GiB/s
sve: 2.58628 ns/call, 23.0465 GiB/s
=== strlen(256 bytes) ===
libc: 3.74165 ns/call, 63.7202 GiB/s
neon: 8.98243 ns/call, 26.5428 GiB/s
sve: 7.36426 ns/call, 32.3751 GiB/s
=== strlen(1024 bytes) ===
libc: 10.5327 ns/call, 90.5438 GiB/s
neon: 34.363 ns/call, 27.7529 GiB/s
sve: 26.9329 ns/call, 35.4092 GiB/s
=== strlen(4096 bytes) ===
libc: 37.7304 ns/call, 101.104 GiB/s
neon: 145.911 ns/call, 26.144 GiB/s
sve: 103.208 ns/call, 36.9612 GiB/s
=== strlen(1048576 bytes) ===
libc: 9623.4 ns/call, 101.478 GiB/s
neon: 36138.2 ns/call, 27.023 GiB/s
sve: 26605.6 ns/call, 36.7051 GiB/s
```
diff --git a/libc/fuzzing/__support/freelist_heap_fuzz.cpp b/libc/fuzzing/__support/freelist_heap_fuzz.cpp
index 7b7985a..0b400cb 100644
--- a/libc/fuzzing/__support/freelist_heap_fuzz.cpp
+++ b/libc/fuzzing/__support/freelist_heap_fuzz.cpp
@@ -24,7 +24,7 @@
_end:
.fill 1024
__llvm_libc_heap_limit:
-)";
+)");
using LIBC_NAMESPACE::FreeListHeap;
using LIBC_NAMESPACE::inline_memset;
diff --git a/libc/fuzzing/string/CMakeLists.txt b/libc/fuzzing/string/CMakeLists.txt
index efda80b..0918e925 100644
--- a/libc/fuzzing/string/CMakeLists.txt
+++ b/libc/fuzzing/string/CMakeLists.txt
@@ -40,3 +40,11 @@
DEPENDS
libc.src.strings.bcmp
)
+
+add_libc_fuzzer(
+ strlen_fuzz
+ SRCS
+ strlen_fuzz.cpp
+ DEPENDS
+ libc.src.string.strlen
+)
diff --git a/libc/fuzzing/string/strlen_fuzz.cpp b/libc/fuzzing/string/strlen_fuzz.cpp
new file mode 100644
index 0000000..dd72c19
--- /dev/null
+++ b/libc/fuzzing/string/strlen_fuzz.cpp
@@ -0,0 +1,32 @@
+//===-- strlen_fuzz.cpp ---------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// Fuzzing test for llvm-libc strlen implementation.
+///
+//===----------------------------------------------------------------------===//
+
+#include "src/string/strlen.h"
+#include <cstdint>
+#include <cstring>
+
+// always null terminate the data
+extern "C" size_t LLVMFuzzerMutate(uint8_t *data, size_t size, size_t max_size);
+extern "C" size_t LLVMFuzzerCustomMutator(uint8_t *data, size_t size,
+ size_t max_size, unsigned int seed) {
+ size = LLVMFuzzerMutate(data, size, max_size);
+ data[size - 1] = '\0';
+ return size;
+}
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ size_t ref = ::strlen(reinterpret_cast<const char *>(data));
+ size_t impl = LIBC_NAMESPACE::strlen(reinterpret_cast<const char *>(data));
+ if (ref != impl)
+ __builtin_trap();
+ return 0;
+}
diff --git a/libc/src/string/memory_utils/aarch64/inline_strlen.h b/libc/src/string/memory_utils/aarch64/inline_strlen.h
index 87f5ccd..eafaca9 100644
--- a/libc/src/string/memory_utils/aarch64/inline_strlen.h
+++ b/libc/src/string/memory_utils/aarch64/inline_strlen.h
@@ -8,14 +8,13 @@
#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_INLINE_STRLEN_H
#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_INLINE_STRLEN_H
+#include "src/__support/macros/properties/cpu_features.h"
+
#if defined(__ARM_NEON)
#include "src/__support/CPP/bit.h" // countr_zero
-
#include <arm_neon.h>
#include <stddef.h> // size_t
-
namespace LIBC_NAMESPACE_DECL {
-
namespace neon {
[[maybe_unused]] LIBC_NO_SANITIZE_OOB_ACCESS LIBC_INLINE static size_t
string_length(const char *src) {
@@ -45,9 +44,63 @@
}
}
} // namespace neon
-
-namespace string_length_impl = neon;
-
} // namespace LIBC_NAMESPACE_DECL
#endif // __ARM_NEON
+
+#ifdef LIBC_TARGET_CPU_HAS_SVE
+#include "src/__support/macros/optimization.h"
+#include <arm_sve.h>
+namespace LIBC_NAMESPACE_DECL {
+namespace sve {
+[[maybe_unused]] LIBC_INLINE static size_t string_length(const char *src) {
+ const uint8_t *ptr = reinterpret_cast<const uint8_t *>(src);
+ // Initialize the first-fault register to all true
+ svsetffr();
+ const svbool_t all_true = svptrue_b8(); // all true predicate
+ svbool_t cmp_zero;
+ size_t len = 0;
+
+ for (;;) {
+ // Read a vector's worth of bytes, stopping on first fault.
+ svuint8_t data = svldff1_u8(all_true, &ptr[len]);
+ svbool_t fault_mask = svrdffr_z(all_true);
+ bool has_no_fault = svptest_last(all_true, fault_mask);
+ if (LIBC_LIKELY(has_no_fault)) {
+ // First fault did not fail: the whole vector is valid.
+ // Avoid depending on the contents of FFR beyond the branch.
+ len += svcntb(); // speculative increment
+ cmp_zero = svcmpeq_n_u8(all_true, data, 0);
+ bool has_no_zero = !svptest_any(all_true, cmp_zero);
+ if (LIBC_LIKELY(has_no_zero))
+ continue;
+ len -= svcntb(); // undo speculative increment
+ break;
+ } else {
+ // First fault failed: only some of the vector is valid.
+ // Perform the comparison only on the valid bytes.
+ cmp_zero = svcmpeq_n_u8(fault_mask, data, 0);
+ bool has_zero = svptest_any(fault_mask, cmp_zero);
+ if (LIBC_LIKELY(has_zero))
+ break;
+ svsetffr();
+ len += svcntp_b8(all_true, fault_mask);
+ continue;
+ }
+ }
+ // Select the bytes before the first and count them.
+ svbool_t before_zero = svbrkb_z(all_true, cmp_zero);
+ len += svcntp_b8(all_true, before_zero);
+ return len;
+}
+} // namespace sve
+} // namespace LIBC_NAMESPACE_DECL
+#endif // LIBC_TARGET_CPU_HAS_SVE
+
+namespace LIBC_NAMESPACE_DECL {
+#ifdef LIBC_TARGET_CPU_HAS_SVE
+namespace string_length_impl = sve;
+#elif defined(__ARM_NEON)
+namespace string_length_impl = neon;
+#endif
+} // namespace LIBC_NAMESPACE_DECL
#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_INLINE_STRLEN_H
diff --git a/libc/test/src/string/strlen_test.cpp b/libc/test/src/string/strlen_test.cpp
index 4eb9d47..784dd7b 100644
--- a/libc/test/src/string/strlen_test.cpp
+++ b/libc/test/src/string/strlen_test.cpp
@@ -22,3 +22,15 @@
size_t result = LIBC_NAMESPACE::strlen(any);
ASSERT_EQ((size_t)12, result);
}
+
+TEST(LlvmLibcStrLenTest, DataAfterNulString) {
+ constexpr char A[10] = {'a', 'b', 'c', 'd', 'e', 'f', 0, 'h', 'i', 'j'};
+ size_t result = LIBC_NAMESPACE::strlen(A);
+ ASSERT_EQ((size_t)6, result);
+}
+
+TEST(LlvmLibcStrLenTest, MultipleNulsInOneWord) {
+ constexpr char A[10] = {'a', 'b', 0, 'd', 'e', 'f', 0, 'h', 'i', 'j'};
+ size_t result = LIBC_NAMESPACE::strlen(A);
+ ASSERT_EQ((size_t)2, result);
+}