[libc] fixed signed char issues in strsep()/strtok()/strtok_r(). (#156705)

Also add the missing tests for all the related functions (even the ones
that were already right), and add the missing bazel build rules.

GitOrigin-RevId: ded5f433dedf4a3419548929ac3dfa826e11d2e1
diff --git a/src/string/string_utils.h b/src/string/string_utils.h
index 26e9add..1080348 100644
--- a/src/string/string_utils.h
+++ b/src/string/string_utils.h
@@ -212,28 +212,28 @@
   static_assert(CHAR_BIT == 8, "bitset of 256 assumes char is 8 bits");
   cpp::bitset<256> delims;
   for (; *delimiter_string != '\0'; ++delimiter_string)
-    delims.set(static_cast<size_t>(*delimiter_string));
+    delims.set(*reinterpret_cast<const unsigned char *>(delimiter_string));
 
-  char *tok_start = src;
+  unsigned char *tok_start = reinterpret_cast<unsigned char *>(src);
   if constexpr (SkipDelim)
-    while (*tok_start != '\0' && delims.test(static_cast<size_t>(*tok_start)))
+    while (*tok_start != '\0' && delims.test(*tok_start))
       ++tok_start;
   if (*tok_start == '\0' && SkipDelim) {
     *context = nullptr;
     return nullptr;
   }
 
-  char *tok_end = tok_start;
-  while (*tok_end != '\0' && !delims.test(static_cast<size_t>(*tok_end)))
+  unsigned char *tok_end = tok_start;
+  while (*tok_end != '\0' && !delims.test(*tok_end))
     ++tok_end;
 
   if (*tok_end == '\0') {
     *context = nullptr;
   } else {
     *tok_end = '\0';
-    *context = tok_end + 1;
+    *context = reinterpret_cast<char *>(tok_end + 1);
   }
-  return tok_start;
+  return reinterpret_cast<char *>(tok_start);
 }
 
 LIBC_INLINE size_t strlcpy(char *__restrict dst, const char *__restrict src,
diff --git a/test/src/string/strcspn_test.cpp b/test/src/string/strcspn_test.cpp
index d83b3cf..ec98f72 100644
--- a/test/src/string/strcspn_test.cpp
+++ b/test/src/string/strcspn_test.cpp
@@ -48,3 +48,7 @@
   EXPECT_EQ(LIBC_NAMESPACE::strcspn("aaaa", "aa"), size_t{0});
   EXPECT_EQ(LIBC_NAMESPACE::strcspn("aaaa", "baa"), size_t{0});
 }
+
+TEST(LlvmLibcStrCSpnTest, TopBitSet) {
+  EXPECT_EQ(LIBC_NAMESPACE::strcspn("hello\x80world", "\x80"), size_t{5});
+}
diff --git a/test/src/string/strpbrk_test.cpp b/test/src/string/strpbrk_test.cpp
index fbe14da..cc80246 100644
--- a/test/src/string/strpbrk_test.cpp
+++ b/test/src/string/strpbrk_test.cpp
@@ -60,3 +60,7 @@
 TEST(LlvmLibcStrPBrkTest, FindsFirstInBreakset) {
   EXPECT_STREQ(LIBC_NAMESPACE::strpbrk("12345", "34"), "345");
 }
+
+TEST(LlvmLibcStrPBrkTest, TopBitSet) {
+  EXPECT_STREQ(LIBC_NAMESPACE::strpbrk("hello\x80world", "\x80 "), "\x80world");
+}
diff --git a/test/src/string/strsep_test.cpp b/test/src/string/strsep_test.cpp
index e2a5d52..553edd9 100644
--- a/test/src/string/strsep_test.cpp
+++ b/test/src/string/strsep_test.cpp
@@ -61,6 +61,14 @@
   ASSERT_EQ(LIBC_NAMESPACE::strsep(&string, ":"), nullptr);
 }
 
+TEST(LlvmLibcStrsepTest, TopBitSet) {
+  char top_bit_set_str[] = "hello\x80world";
+  char *p = top_bit_set_str;
+  ASSERT_STREQ(LIBC_NAMESPACE::strsep(&p, "\x80"), "hello");
+  ASSERT_STREQ(LIBC_NAMESPACE::strsep(&p, "\x80"), "world");
+  ASSERT_EQ(LIBC_NAMESPACE::strsep(&p, "\x80"), nullptr);
+}
+
 #if defined(LIBC_ADD_NULL_CHECKS)
 
 TEST(LlvmLibcStrsepTest, CrashOnNullPtr) {
diff --git a/test/src/string/strspn_test.cpp b/test/src/string/strspn_test.cpp
index 82f9b2a..813612f 100644
--- a/test/src/string/strspn_test.cpp
+++ b/test/src/string/strspn_test.cpp
@@ -85,6 +85,10 @@
   EXPECT_EQ(LIBC_NAMESPACE::strspn("aaaa", "aa"), size_t{4});
 }
 
+TEST(LlvmLibcStrSpnTest, TopBitSet) {
+  EXPECT_EQ(LIBC_NAMESPACE::strspn("hello\x80world", "helo\x80rld"), size_t{6});
+}
+
 #if defined(LIBC_ADD_NULL_CHECKS)
 
 TEST(LlvmLibcStrSpnTest, CrashOnNullPtr) {
diff --git a/test/src/string/strtok_r_test.cpp b/test/src/string/strtok_r_test.cpp
index a19390d..8c4d3c3 100644
--- a/test/src/string/strtok_r_test.cpp
+++ b/test/src/string/strtok_r_test.cpp
@@ -131,3 +131,11 @@
   ASSERT_EQ(LIBC_NAMESPACE::strtok_r(nullptr, ":", &reserve), nullptr);
   ASSERT_EQ(LIBC_NAMESPACE::strtok_r(nullptr, ":", &reserve), nullptr);
 }
+
+TEST(LlvmLibcStrTokReentrantTest, TopBitSet) {
+  char top_bit_set_str[] = "hello\x80world";
+  char *p;
+  ASSERT_STREQ(LIBC_NAMESPACE::strtok_r(top_bit_set_str, "\x80", &p), "hello");
+  ASSERT_STREQ(LIBC_NAMESPACE::strtok_r(nullptr, "\x80", &p), "world");
+  ASSERT_EQ(LIBC_NAMESPACE::strtok_r(nullptr, "\x80", &p), nullptr);
+}
diff --git a/test/src/string/strtok_test.cpp b/test/src/string/strtok_test.cpp
index 76efedd..3c097fd 100644
--- a/test/src/string/strtok_test.cpp
+++ b/test/src/string/strtok_test.cpp
@@ -83,3 +83,10 @@
   ASSERT_EQ(LIBC_NAMESPACE::strtok(nullptr, ":"), nullptr);
   ASSERT_EQ(LIBC_NAMESPACE::strtok(nullptr, ":"), nullptr);
 }
+
+TEST(LlvmLibcStrTokTest, TopBitSet) {
+  char top_bit_set_str[] = "hello\x80world";
+  ASSERT_STREQ(LIBC_NAMESPACE::strtok(top_bit_set_str, "\x80"), "hello");
+  ASSERT_STREQ(LIBC_NAMESPACE::strtok(nullptr, "\x80"), "world");
+  ASSERT_EQ(LIBC_NAMESPACE::strtok(nullptr, "\x80"), nullptr);
+}