[libc] fix strtointeger hex prefix parsing

Fix edge case where "0x" would be considered a complete hexadecimal
number for purposes of str_end. Now the hexadecimal prefix needs a valid
digit after it, else just the 0 will be counted as the number.

Reviewed By: sivachandra

Differential Revision: https://reviews.llvm.org/D109084

GitOrigin-RevId: 37ce7349f7e95ef19cd6169e70414286b33114cb
diff --git a/src/__support/str_conv_utils.h b/src/__support/str_conv_utils.h
index adde158..fb87ca0 100644
--- a/src/__support/str_conv_utils.h
+++ b/src/__support/str_conv_utils.h
@@ -34,18 +34,25 @@
   return 0;
 }
 
+// checks if the next 3 characters of the string pointer are the start of a
+// hexadecimal number. Does not advance the string pointer.
+static inline bool is_hex_start(const char *__restrict src) {
+  return *src == '0' && (*(src + 1) | 32) == 'x' && isalnum(*(src + 2)) &&
+         b36_char_to_int(*(src + 2)) < 16;
+}
+
 // Takes the address of the string pointer and parses the base from the start of
 // it. This will advance the string pointer.
 static inline int infer_base(const char *__restrict *__restrict src) {
-  if (**src == '0') {
+  if (is_hex_start(*src)) {
+    (*src) += 2;
+    return 16;
+  } else if (**src == '0') {
     ++(*src);
-    if ((**src | 32) == 'x') {
-      ++(*src);
-      return 16;
-    }
     return 8;
+  } else {
+    return 10;
   }
-  return 10;
 }
 
 // Takes a pointer to a string, a pointer to a string pointer, and the base to
@@ -71,7 +78,7 @@
 
   if (base == 0) {
     base = infer_base(&src);
-  } else if (base == 16 && *src == '0' && (*(src + 1) | 32) == 'x') {
+  } else if (base == 16 && is_hex_start(src)) {
     src = src + 2;
   }
 
diff --git a/test/src/stdlib/strtol_test.cpp b/test/src/stdlib/strtol_test.cpp
index 2999763..1d91b8a 100644
--- a/test/src/stdlib/strtol_test.cpp
+++ b/test/src/stdlib/strtol_test.cpp
@@ -261,6 +261,38 @@
   ASSERT_EQ(__llvm_libc::strtol(yes_prefix, &str_end, 16), 0x456defl);
   ASSERT_EQ(errno, 0);
   EXPECT_EQ(str_end - yes_prefix, ptrdiff_t(8));
+
+  const char *letter_after_prefix = "0xabc123";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtol(letter_after_prefix, &str_end, 16), 0xabc123l);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - letter_after_prefix, ptrdiff_t(8));
+}
+
+TEST(LlvmLibcStrToLTest, MessyBaseSixteenDecode) {
+  char *str_end = nullptr;
+
+  const char *just_prefix = "0x";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtol(just_prefix, &str_end, 16), 0l);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - just_prefix, ptrdiff_t(1));
+
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtol(just_prefix, &str_end, 0), 0l);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - just_prefix, ptrdiff_t(1));
+
+  const char *prefix_with_x_after = "0xx";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtol(prefix_with_x_after, &str_end, 16), 0l);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - prefix_with_x_after, ptrdiff_t(1));
+
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtol(prefix_with_x_after, &str_end, 0), 0l);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - prefix_with_x_after, ptrdiff_t(1));
 }
 
 TEST(LlvmLibcStrToLTest, AutomaticBaseSelection) {
diff --git a/test/src/stdlib/strtoll_test.cpp b/test/src/stdlib/strtoll_test.cpp
index 8e3ce85..03e6e2d 100644
--- a/test/src/stdlib/strtoll_test.cpp
+++ b/test/src/stdlib/strtoll_test.cpp
@@ -287,6 +287,39 @@
   ASSERT_EQ(__llvm_libc::strtoll(yes_prefix, &str_end, 16), 0x456defll);
   ASSERT_EQ(errno, 0);
   EXPECT_EQ(str_end - yes_prefix, ptrdiff_t(8));
+
+  const char *letter_after_prefix = "0xabc123";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoll(letter_after_prefix, &str_end, 16),
+            0xabc123ll);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - letter_after_prefix, ptrdiff_t(8));
+}
+
+TEST(LlvmLibcStrToLLTest, MessyBaseSixteenDecode) {
+  char *str_end = nullptr;
+
+  const char *just_prefix = "0x";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoll(just_prefix, &str_end, 16), 0ll);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - just_prefix, ptrdiff_t(1));
+
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoll(just_prefix, &str_end, 0), 0ll);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - just_prefix, ptrdiff_t(1));
+
+  const char *prefix_with_x_after = "0xx";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoll(prefix_with_x_after, &str_end, 16), 0ll);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - prefix_with_x_after, ptrdiff_t(1));
+
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoll(prefix_with_x_after, &str_end, 0), 0ll);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - prefix_with_x_after, ptrdiff_t(1));
 }
 
 TEST(LlvmLibcStrToLLTest, AutomaticBaseSelection) {
diff --git a/test/src/stdlib/strtoul_test.cpp b/test/src/stdlib/strtoul_test.cpp
index 7b08a39..07976b1 100644
--- a/test/src/stdlib/strtoul_test.cpp
+++ b/test/src/stdlib/strtoul_test.cpp
@@ -255,6 +255,39 @@
   ASSERT_EQ(__llvm_libc::strtoul(yes_prefix, &str_end, 16), 0x456deful);
   ASSERT_EQ(errno, 0);
   EXPECT_EQ(str_end - yes_prefix, ptrdiff_t(8));
+
+  const char *letter_after_prefix = "0xabc123";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoul(letter_after_prefix, &str_end, 16),
+            0xabc123ul);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - letter_after_prefix, ptrdiff_t(8));
+}
+
+TEST(LlvmLibcStrToULTest, MessyBaseSixteenDecode) {
+  char *str_end = nullptr;
+
+  const char *just_prefix = "0x";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoul(just_prefix, &str_end, 16), 0ul);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - just_prefix, ptrdiff_t(1));
+
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoul(just_prefix, &str_end, 0), 0ul);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - just_prefix, ptrdiff_t(1));
+
+  const char *prefix_with_x_after = "0xx";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoul(prefix_with_x_after, &str_end, 16), 0ul);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - prefix_with_x_after, ptrdiff_t(1));
+
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoul(prefix_with_x_after, &str_end, 0), 0ul);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - prefix_with_x_after, ptrdiff_t(1));
 }
 
 TEST(LlvmLibcStrToULTest, AutomaticBaseSelection) {
diff --git a/test/src/stdlib/strtoull_test.cpp b/test/src/stdlib/strtoull_test.cpp
index 8a2ca11..9536da2 100644
--- a/test/src/stdlib/strtoull_test.cpp
+++ b/test/src/stdlib/strtoull_test.cpp
@@ -263,6 +263,39 @@
   ASSERT_EQ(__llvm_libc::strtoull(yes_prefix, &str_end, 16), 0x456defull);
   ASSERT_EQ(errno, 0);
   EXPECT_EQ(str_end - yes_prefix, ptrdiff_t(8));
+
+  const char *letter_after_prefix = "0xabc123";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoull(letter_after_prefix, &str_end, 16),
+            0xabc123ull);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - letter_after_prefix, ptrdiff_t(8));
+}
+
+TEST(LlvmLibcStrToULLTest, MessyBaseSixteenDecode) {
+  char *str_end = nullptr;
+
+  const char *just_prefix = "0x";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoull(just_prefix, &str_end, 16), 0ull);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - just_prefix, ptrdiff_t(1));
+
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoull(just_prefix, &str_end, 0), 0ull);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - just_prefix, ptrdiff_t(1));
+
+  const char *prefix_with_x_after = "0xx";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoull(prefix_with_x_after, &str_end, 16), 0ull);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - prefix_with_x_after, ptrdiff_t(1));
+
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoull(prefix_with_x_after, &str_end, 0), 0ull);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - prefix_with_x_after, ptrdiff_t(1));
 }
 
 TEST(LlvmLibcStrToULLTest, AutomaticBaseSelection) {