[libc] Add strspn implementation and std::bitset

Reviewed By: sivachandra, abrachet

Differential Revision: https://reviews.llvm.org/D85103

GitOrigin-RevId: f3b41502554f2948ad00531dde7c3f53973de960
diff --git a/config/linux/aarch64/entrypoints.txt b/config/linux/aarch64/entrypoints.txt
index 909166c..468f2cc 100644
--- a/config/linux/aarch64/entrypoints.txt
+++ b/config/linux/aarch64/entrypoints.txt
@@ -28,6 +28,7 @@
     libc.src.string.strlen
     libc.src.string.strnlen
     libc.src.string.strrchr
+    libc.src.string.strspn
     libc.src.string.strstr
 )
 
diff --git a/config/linux/x86_64/entrypoints.txt b/config/linux/x86_64/entrypoints.txt
index 37a9d71..031812c 100644
--- a/config/linux/x86_64/entrypoints.txt
+++ b/config/linux/x86_64/entrypoints.txt
@@ -46,6 +46,7 @@
     libc.src.string.strlen
     libc.src.string.strnlen
     libc.src.string.strrchr
+    libc.src.string.strspn
     libc.src.string.strstr
 
     # sys/mman.h entrypoints
diff --git a/src/string/CMakeLists.txt b/src/string/CMakeLists.txt
index d0eab63..82d3457 100644
--- a/src/string/CMakeLists.txt
+++ b/src/string/CMakeLists.txt
@@ -94,6 +94,14 @@
     strrchr.h
 )
 
+add_entrypoint_object(
+  strspn
+  SRCS
+    strspn.cpp
+  HDRS
+    strspn.h
+)
+
 # Helper to define a function with multiple implementations
 # - Computes flags to satisfy required/rejected features and arch,
 # - Declares an entry point,
diff --git a/src/string/strspn.cpp b/src/string/strspn.cpp
new file mode 100644
index 0000000..f01bc01
--- /dev/null
+++ b/src/string/strspn.cpp
@@ -0,0 +1,29 @@
+//===-- Implementation of strspn ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/string/strspn.h"
+
+#include "src/__support/common.h"
+#include "utils/CPP/Bitset.h"
+#include <stddef.h>
+#include <stdint.h>
+
+namespace __llvm_libc {
+
+size_t LLVM_LIBC_ENTRYPOINT(strspn)(const char *src, const char *segment) {
+  const char *initial = src;
+  cpp::Bitset<256> bitset;
+
+  for (; *segment; ++segment)
+    bitset.set(*segment);
+  for (; *src && bitset.test(*src); ++src)
+    ;
+  return src - initial;
+}
+
+} // namespace __llvm_libc
diff --git a/src/string/strspn.h b/src/string/strspn.h
new file mode 100644
index 0000000..92321d1
--- /dev/null
+++ b/src/string/strspn.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for strspn ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STRING_STRSPN_H
+#define LLVM_LIBC_SRC_STRING_STRSPN_H
+
+#include <stddef.h>
+
+namespace __llvm_libc {
+
+size_t strspn(const char *src, const char *segment);
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_STRING_STRSPN_H
diff --git a/test/src/string/CMakeLists.txt b/test/src/string/CMakeLists.txt
index 0fff250..e1db8d6 100644
--- a/test/src/string/CMakeLists.txt
+++ b/test/src/string/CMakeLists.txt
@@ -102,6 +102,17 @@
     libc.src.string.strrchr
 )
 
+add_libc_unittest(
+  strspn_test
+  SUITE
+    libc_string_unittests
+  SRCS
+    strspn_test.cpp
+  DEPENDS
+    libc.src.string.strspn
+)
+
+
 # Tests all implementations that can run on the host.
 function(add_libc_multi_impl_test name)
   get_property(fq_implementations GLOBAL PROPERTY ${name}_implementations)
diff --git a/test/src/string/strspn_test.cpp b/test/src/string/strspn_test.cpp
new file mode 100644
index 0000000..edcfb17
--- /dev/null
+++ b/test/src/string/strspn_test.cpp
@@ -0,0 +1,85 @@
+//===-- Unittests for strspn ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/string/strspn.h"
+
+#include "utils/UnitTest/Test.h"
+
+TEST(StrSpnTest, EmptyStringShouldReturnZeroLengthSpan) {
+  // The search should not include the null terminator.
+  EXPECT_EQ(__llvm_libc::strspn("", ""), size_t{0});
+  EXPECT_EQ(__llvm_libc::strspn("_", ""), size_t{0});
+  EXPECT_EQ(__llvm_libc::strspn("", "_"), size_t{0});
+}
+
+TEST(StrSpnTest, ShouldNotSpanAnythingAfterNullTerminator) {
+  const char src[4] = {'a', 'b', '\0', 'c'};
+  EXPECT_EQ(__llvm_libc::strspn(src, "ab"), size_t{2});
+  EXPECT_EQ(__llvm_libc::strspn(src, "c"), size_t{0});
+
+  // Same goes for the segment to be searched for.
+  const char segment[4] = {'1', '2', '\0', '3'};
+  EXPECT_EQ(__llvm_libc::strspn("123", segment), size_t{2});
+}
+
+TEST(StrSpnTest, SpanEachIndividualCharacter) {
+  const char *src = "12345";
+  EXPECT_EQ(__llvm_libc::strspn(src, "1"), size_t{1});
+  // Since '1' is not within the segment, the span
+  // size should remain zero.
+  EXPECT_EQ(__llvm_libc::strspn(src, "2"), size_t{0});
+  EXPECT_EQ(__llvm_libc::strspn(src, "3"), size_t{0});
+  EXPECT_EQ(__llvm_libc::strspn(src, "4"), size_t{0});
+  EXPECT_EQ(__llvm_libc::strspn(src, "5"), size_t{0});
+}
+
+TEST(StrSpnTest, UnmatchedCharacterShouldNotBeCountedInSpan) {
+  EXPECT_EQ(__llvm_libc::strspn("a", "b"), size_t{0});
+  EXPECT_EQ(__llvm_libc::strspn("abcdef", "1"), size_t{0});
+  EXPECT_EQ(__llvm_libc::strspn("123", "4"), size_t{0});
+}
+
+TEST(StrSpnTest, SequentialCharactersShouldSpan) {
+  const char *src = "abcde";
+  EXPECT_EQ(__llvm_libc::strspn(src, "a"), size_t{1});
+  EXPECT_EQ(__llvm_libc::strspn(src, "ab"), size_t{2});
+  EXPECT_EQ(__llvm_libc::strspn(src, "abc"), size_t{3});
+  EXPECT_EQ(__llvm_libc::strspn(src, "abcd"), size_t{4});
+  EXPECT_EQ(__llvm_libc::strspn(src, "abcde"), size_t{5});
+  // Same thing for when the roles are reversed.
+  EXPECT_EQ(__llvm_libc::strspn("abcde", src), size_t{5});
+  EXPECT_EQ(__llvm_libc::strspn("abcd", src), size_t{4});
+  EXPECT_EQ(__llvm_libc::strspn("abc", src), size_t{3});
+  EXPECT_EQ(__llvm_libc::strspn("ab", src), size_t{2});
+  EXPECT_EQ(__llvm_libc::strspn("a", src), size_t{1});
+}
+
+TEST(StrSpnTest, NonSequentialCharactersShouldNotSpan) {
+  const char *src = "123456789";
+  EXPECT_EQ(__llvm_libc::strspn(src, "_1_abc_2_def_3_"), size_t{3});
+  // Only spans 4 since '5' is not within the span.
+  EXPECT_EQ(__llvm_libc::strspn(src, "67__34abc12"), size_t{4});
+}
+
+TEST(StrSpnTest, ReverseCharacters) {
+  // Since these are still sequential, this should span.
+  EXPECT_EQ(__llvm_libc::strspn("12345", "54321"), size_t{5});
+  // Does not span any since '1' is not within the span.
+  EXPECT_EQ(__llvm_libc::strspn("12345", "432"), size_t{0});
+  // Only spans 1 since '2' is not within the span.
+  EXPECT_EQ(__llvm_libc::strspn("12345", "51"), size_t{1});
+}
+
+TEST(StrSpnTest, DuplicatedCharactersToBeSearchedForShouldStillMatch) {
+  // Only a single character, so only spans 1.
+  EXPECT_EQ(__llvm_libc::strspn("a", "aa"), size_t{1});
+  // This should count once for each 'a' in the source string.
+  EXPECT_EQ(__llvm_libc::strspn("aa", "aa"), size_t{2});
+  EXPECT_EQ(__llvm_libc::strspn("aaa", "aa"), size_t{3});
+  EXPECT_EQ(__llvm_libc::strspn("aaaa", "aa"), size_t{4});
+}
diff --git a/test/utils/CMakeLists.txt b/test/utils/CMakeLists.txt
index 90ff4bb..ee9bff4 100644
--- a/test/utils/CMakeLists.txt
+++ b/test/utils/CMakeLists.txt
@@ -1 +1,2 @@
 add_subdirectory(FPUtil)
+add_subdirectory(CPP)
diff --git a/test/utils/CPP/CMakeLists.txt b/test/utils/CPP/CMakeLists.txt
new file mode 100644
index 0000000..f770334
--- /dev/null
+++ b/test/utils/CPP/CMakeLists.txt
@@ -0,0 +1,11 @@
+add_libc_testsuite(libc_cpp_utils_unittests)
+
+add_libc_unittest(
+  bitset_test
+  SUITE
+    libc_cpp_utils_unittests
+  SRCS
+    bitset_test.cpp
+  DEPENDS
+    libc.utils.CPP.standalone_cpp
+)
diff --git a/test/utils/CPP/bitset_test.cpp b/test/utils/CPP/bitset_test.cpp
new file mode 100644
index 0000000..4613f94
--- /dev/null
+++ b/test/utils/CPP/bitset_test.cpp
@@ -0,0 +1,102 @@
+//===-- Unittests for Bitset ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "utils/CPP/Bitset.h"
+#include "utils/UnitTest/Test.h"
+
+TEST(BitsetTest, SetBitForSizeEqualToOne) {
+  __llvm_libc::cpp::Bitset<1> bitset;
+  EXPECT_FALSE(bitset.test(0));
+  bitset.set(0);
+  EXPECT_TRUE(bitset.test(0));
+}
+
+TEST(BitsetTest, SetsBitsForSizeEqualToTwo) {
+  __llvm_libc::cpp::Bitset<2> bitset;
+  bitset.set(0);
+  EXPECT_TRUE(bitset.test(0));
+  bitset.set(1);
+  EXPECT_TRUE(bitset.test(1));
+}
+
+TEST(BitsetTest, SetsAllBitsForSizeLessThanEight) {
+  __llvm_libc::cpp::Bitset<7> bitset;
+  for (size_t i = 0; i < 7; ++i)
+    bitset.set(i);
+  // Verify all bits are now set.
+  for (size_t j = 0; j < 7; ++j)
+    EXPECT_TRUE(bitset.test(j));
+}
+
+TEST(BitsetTest, SetsAllBitsForSizeLessThanSixteen) {
+  __llvm_libc::cpp::Bitset<15> bitset;
+  for (size_t i = 0; i < 15; ++i)
+    bitset.set(i);
+  // Verify all bits are now set.
+  for (size_t j = 0; j < 15; ++j)
+    EXPECT_TRUE(bitset.test(j));
+}
+
+TEST(BitsetTest, SetsAllBitsForSizeLessThanThirtyTwo) {
+  __llvm_libc::cpp::Bitset<31> bitset;
+  for (size_t i = 0; i < 31; ++i)
+    bitset.set(i);
+  // Verify all bits are now set.
+  for (size_t j = 0; j < 31; ++j)
+    EXPECT_TRUE(bitset.test(j));
+}
+
+TEST(BitsetTest, DefaultHasNoSetBits) {
+  __llvm_libc::cpp::Bitset<64> bitset;
+  for (size_t i = 0; i < 64; ++i) {
+    EXPECT_FALSE(bitset.test(i));
+  }
+  // Same for odd number.
+  __llvm_libc::cpp::Bitset<65> odd_bitset;
+  for (size_t i = 0; i < 65; ++i) {
+    EXPECT_FALSE(odd_bitset.test(i));
+  }
+}
+
+TEST(BitsetTest, SettingBitXDoesNotSetBitY) {
+  for (size_t i = 0; i < 256; ++i) {
+    // Initialize within the loop to start with a fresh Bitset.
+    __llvm_libc::cpp::Bitset<256> bitset;
+    bitset.set(i);
+
+    for (size_t neighbor = 0; neighbor < 256; ++neighbor) {
+      if (neighbor == i)
+        EXPECT_TRUE(bitset.test(neighbor));
+      else
+        EXPECT_FALSE(bitset.test(neighbor));
+    }
+  }
+  // Same for odd number.
+  for (size_t i = 0; i < 255; ++i) {
+
+    __llvm_libc::cpp::Bitset<255> bitset;
+    bitset.set(i);
+
+    for (size_t neighbor = 0; neighbor < 255; ++neighbor) {
+      if (neighbor == i)
+        EXPECT_TRUE(bitset.test(neighbor));
+      else
+        EXPECT_FALSE(bitset.test(neighbor));
+    }
+  }
+}
+
+TEST(BitsetTest, SettingBitXDoesNotResetBitY) {
+  __llvm_libc::cpp::Bitset<128> bitset;
+  for (size_t i = 0; i < 128; ++i)
+    bitset.set(i);
+
+  // Verify all bits are now set.
+  for (size_t j = 0; j < 128; ++j)
+    EXPECT_TRUE(bitset.test(j));
+}
diff --git a/utils/CPP/Bitset.h b/utils/CPP/Bitset.h
new file mode 100644
index 0000000..304a6fe
--- /dev/null
+++ b/utils/CPP/Bitset.h
@@ -0,0 +1,39 @@
+//===-- A self contained equivalent of std::bitset --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_UTILS_CPP_BITSET_H
+#define LLVM_LIBC_UTILS_CPP_BITSET_H
+
+#include <stddef.h> // For size_t.
+#include <stdint.h> // For uintptr_t.
+
+namespace __llvm_libc {
+namespace cpp {
+
+template <size_t NumberOfBits> struct Bitset {
+  static_assert(NumberOfBits != 0,
+                "Cannot create a __llvm_libc::cpp::Bitset of size 0.");
+
+  constexpr void set(size_t Index) {
+    Data[Index / BitsPerUnit] |= (uintptr_t{1} << (Index % BitsPerUnit));
+  }
+
+  constexpr bool test(size_t Index) const {
+    return Data[Index / BitsPerUnit] & (uintptr_t{1} << (Index % BitsPerUnit));
+  }
+
+private:
+  static constexpr size_t BitsPerByte = 8;
+  static constexpr size_t BitsPerUnit = BitsPerByte * sizeof(uintptr_t);
+  uintptr_t Data[(NumberOfBits + BitsPerUnit - 1) / BitsPerUnit] = {0};
+};
+
+} // namespace cpp
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_UTILS_CPP_BITSET_H
diff --git a/utils/CPP/CMakeLists.txt b/utils/CPP/CMakeLists.txt
index 4c7f5e9..60975fe 100644
--- a/utils/CPP/CMakeLists.txt
+++ b/utils/CPP/CMakeLists.txt
@@ -3,6 +3,7 @@
   HDRS
     Array.h
     ArrayRef.h
+    Bitset.h
     Functional.h
     StringRef.h
     TypeTraits.h