[libc] fixed signed char issues in strsep()/strtok()/strtok_r(). (#156705) Also add the missing tests for all the related functions (even the ones that were already right), and add the missing bazel build rules. GitOrigin-RevId: ded5f433dedf4a3419548929ac3dfa826e11d2e1
diff --git a/src/string/string_utils.h b/src/string/string_utils.h index 26e9add..1080348 100644 --- a/src/string/string_utils.h +++ b/src/string/string_utils.h
@@ -212,28 +212,28 @@ static_assert(CHAR_BIT == 8, "bitset of 256 assumes char is 8 bits"); cpp::bitset<256> delims; for (; *delimiter_string != '\0'; ++delimiter_string) - delims.set(static_cast<size_t>(*delimiter_string)); + delims.set(*reinterpret_cast<const unsigned char *>(delimiter_string)); - char *tok_start = src; + unsigned char *tok_start = reinterpret_cast<unsigned char *>(src); if constexpr (SkipDelim) - while (*tok_start != '\0' && delims.test(static_cast<size_t>(*tok_start))) + while (*tok_start != '\0' && delims.test(*tok_start)) ++tok_start; if (*tok_start == '\0' && SkipDelim) { *context = nullptr; return nullptr; } - char *tok_end = tok_start; - while (*tok_end != '\0' && !delims.test(static_cast<size_t>(*tok_end))) + unsigned char *tok_end = tok_start; + while (*tok_end != '\0' && !delims.test(*tok_end)) ++tok_end; if (*tok_end == '\0') { *context = nullptr; } else { *tok_end = '\0'; - *context = tok_end + 1; + *context = reinterpret_cast<char *>(tok_end + 1); } - return tok_start; + return reinterpret_cast<char *>(tok_start); } LIBC_INLINE size_t strlcpy(char *__restrict dst, const char *__restrict src,
diff --git a/test/src/string/strcspn_test.cpp b/test/src/string/strcspn_test.cpp index d83b3cf..ec98f72 100644 --- a/test/src/string/strcspn_test.cpp +++ b/test/src/string/strcspn_test.cpp
@@ -48,3 +48,7 @@ EXPECT_EQ(LIBC_NAMESPACE::strcspn("aaaa", "aa"), size_t{0}); EXPECT_EQ(LIBC_NAMESPACE::strcspn("aaaa", "baa"), size_t{0}); } + +TEST(LlvmLibcStrCSpnTest, TopBitSet) { + EXPECT_EQ(LIBC_NAMESPACE::strcspn("hello\x80world", "\x80"), size_t{5}); +}
diff --git a/test/src/string/strpbrk_test.cpp b/test/src/string/strpbrk_test.cpp index fbe14da..cc80246 100644 --- a/test/src/string/strpbrk_test.cpp +++ b/test/src/string/strpbrk_test.cpp
@@ -60,3 +60,7 @@ TEST(LlvmLibcStrPBrkTest, FindsFirstInBreakset) { EXPECT_STREQ(LIBC_NAMESPACE::strpbrk("12345", "34"), "345"); } + +TEST(LlvmLibcStrPBrkTest, TopBitSet) { + EXPECT_STREQ(LIBC_NAMESPACE::strpbrk("hello\x80world", "\x80 "), "\x80world"); +}
diff --git a/test/src/string/strsep_test.cpp b/test/src/string/strsep_test.cpp index e2a5d52..553edd9 100644 --- a/test/src/string/strsep_test.cpp +++ b/test/src/string/strsep_test.cpp
@@ -61,6 +61,14 @@ ASSERT_EQ(LIBC_NAMESPACE::strsep(&string, ":"), nullptr); } +TEST(LlvmLibcStrsepTest, TopBitSet) { + char top_bit_set_str[] = "hello\x80world"; + char *p = top_bit_set_str; + ASSERT_STREQ(LIBC_NAMESPACE::strsep(&p, "\x80"), "hello"); + ASSERT_STREQ(LIBC_NAMESPACE::strsep(&p, "\x80"), "world"); + ASSERT_EQ(LIBC_NAMESPACE::strsep(&p, "\x80"), nullptr); +} + #if defined(LIBC_ADD_NULL_CHECKS) TEST(LlvmLibcStrsepTest, CrashOnNullPtr) {
diff --git a/test/src/string/strspn_test.cpp b/test/src/string/strspn_test.cpp index 82f9b2a..813612f 100644 --- a/test/src/string/strspn_test.cpp +++ b/test/src/string/strspn_test.cpp
@@ -85,6 +85,10 @@ EXPECT_EQ(LIBC_NAMESPACE::strspn("aaaa", "aa"), size_t{4}); } +TEST(LlvmLibcStrSpnTest, TopBitSet) { + EXPECT_EQ(LIBC_NAMESPACE::strspn("hello\x80world", "helo\x80rld"), size_t{6}); +} + #if defined(LIBC_ADD_NULL_CHECKS) TEST(LlvmLibcStrSpnTest, CrashOnNullPtr) {
diff --git a/test/src/string/strtok_r_test.cpp b/test/src/string/strtok_r_test.cpp index a19390d..8c4d3c3 100644 --- a/test/src/string/strtok_r_test.cpp +++ b/test/src/string/strtok_r_test.cpp
@@ -131,3 +131,11 @@ ASSERT_EQ(LIBC_NAMESPACE::strtok_r(nullptr, ":", &reserve), nullptr); ASSERT_EQ(LIBC_NAMESPACE::strtok_r(nullptr, ":", &reserve), nullptr); } + +TEST(LlvmLibcStrTokReentrantTest, TopBitSet) { + char top_bit_set_str[] = "hello\x80world"; + char *p; + ASSERT_STREQ(LIBC_NAMESPACE::strtok_r(top_bit_set_str, "\x80", &p), "hello"); + ASSERT_STREQ(LIBC_NAMESPACE::strtok_r(nullptr, "\x80", &p), "world"); + ASSERT_EQ(LIBC_NAMESPACE::strtok_r(nullptr, "\x80", &p), nullptr); +}
diff --git a/test/src/string/strtok_test.cpp b/test/src/string/strtok_test.cpp index 76efedd..3c097fd 100644 --- a/test/src/string/strtok_test.cpp +++ b/test/src/string/strtok_test.cpp
@@ -83,3 +83,10 @@ ASSERT_EQ(LIBC_NAMESPACE::strtok(nullptr, ":"), nullptr); ASSERT_EQ(LIBC_NAMESPACE::strtok(nullptr, ":"), nullptr); } + +TEST(LlvmLibcStrTokTest, TopBitSet) { + char top_bit_set_str[] = "hello\x80world"; + ASSERT_STREQ(LIBC_NAMESPACE::strtok(top_bit_set_str, "\x80"), "hello"); + ASSERT_STREQ(LIBC_NAMESPACE::strtok(nullptr, "\x80"), "world"); + ASSERT_EQ(LIBC_NAMESPACE::strtok(nullptr, "\x80"), nullptr); +}