[libc++] Fix ambiguous call in {ranges, std}::count (#122529)

This PR fixes an ambiguous call encountered while using the
`std::ranges::count` and `std::count` algorithms with `vector<bool>`
with small `size_type`s.

The ambiguity arises from integral promotions during the internal
bitwise arithmetic of the `count` algorithms for small integral types.
This results in multiple viable candidates:
`__libcpp_popcount(unsigned)`,` __libcpp_popcount(unsigned long)`, and
`__libcpp_popcount(unsigned long long)`, leading to an ambiguous call
error. To resolve this ambiguity, we introduce a dispatcher function,
`__popcount`, which directs calls to the appropriate overloads of
`__libcpp_popcount`. This closes #122528.
diff --git a/libcxx/include/__algorithm/count.h b/libcxx/include/__algorithm/count.h
index cd91257..0cbe9b6 100644
--- a/libcxx/include/__algorithm/count.h
+++ b/libcxx/include/__algorithm/count.h
@@ -55,18 +55,18 @@
   if (__first.__ctz_ != 0) {
     __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_);
     __storage_type __dn    = std::min(__clz_f, __n);
-    __storage_type __m     = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
-    __r                    = std::__libcpp_popcount(std::__invert_if<!_ToCount>(*__first.__seg_) & __m);
+    __storage_type __m     = std::__middle_mask<__storage_type>(__clz_f - __dn, __first.__ctz_);
+    __r                    = std::__popcount(__storage_type(std::__invert_if<!_ToCount>(*__first.__seg_) & __m));
     __n -= __dn;
     ++__first.__seg_;
   }
   // do middle whole words
   for (; __n >= __bits_per_word; ++__first.__seg_, __n -= __bits_per_word)
-    __r += std::__libcpp_popcount(std::__invert_if<!_ToCount>(*__first.__seg_));
+    __r += std::__popcount(std::__invert_if<!_ToCount>(*__first.__seg_));
   // do last partial word
   if (__n > 0) {
-    __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n);
-    __r += std::__libcpp_popcount(std::__invert_if<!_ToCount>(*__first.__seg_) & __m);
+    __storage_type __m = std::__trailing_mask<__storage_type>(__bits_per_word - __n);
+    __r += std::__popcount(__storage_type(std::__invert_if<!_ToCount>(*__first.__seg_) & __m));
   }
   return __r;
 }
diff --git a/libcxx/include/__bit/popcount.h b/libcxx/include/__bit/popcount.h
index 5cf0a01..b1d93ca 100644
--- a/libcxx/include/__bit/popcount.h
+++ b/libcxx/include/__bit/popcount.h
@@ -15,6 +15,7 @@
 #include <__bit/rotate.h>
 #include <__concepts/arithmetic.h>
 #include <__config>
+#include <__type_traits/is_unsigned.h>
 #include <limits>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -38,31 +39,48 @@
   return __builtin_popcountll(__x);
 }
 
+template <class _Tp>
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __popcount_impl(_Tp __t) _NOEXCEPT {
+  if _LIBCPP_CONSTEXPR (sizeof(_Tp) <= sizeof(unsigned int)) {
+    return std::__libcpp_popcount(static_cast<unsigned int>(__t));
+  } else if _LIBCPP_CONSTEXPR (sizeof(_Tp) <= sizeof(unsigned long)) {
+    return std::__libcpp_popcount(static_cast<unsigned long>(__t));
+  } else if _LIBCPP_CONSTEXPR (sizeof(_Tp) <= sizeof(unsigned long long)) {
+    return std::__libcpp_popcount(static_cast<unsigned long long>(__t));
+  } else {
+#if _LIBCPP_STD_VER == 11
+    return __t != 0 ? std::__libcpp_popcount(static_cast<unsigned long long>(__t)) +
+                          std::__popcount_impl<_Tp>(__t >> numeric_limits<unsigned long long>::digits)
+                    : 0;
+#else
+    int __ret = 0;
+    while (__t != 0) {
+      __ret += std::__libcpp_popcount(static_cast<unsigned long long>(__t));
+      __t >>= std::numeric_limits<unsigned long long>::digits;
+    }
+    return __ret;
+#endif
+  }
+}
+
+template <class _Tp>
+[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __popcount(_Tp __t) _NOEXCEPT {
+  static_assert(is_unsigned<_Tp>::value, "__popcount only works with unsigned types");
+#if __has_builtin(__builtin_popcountg) // TODO (LLVM 21): This can be dropped once we only support Clang >= 19.
+  return __builtin_popcountg(__t);
+#else
+  return std::__popcount_impl(__t);
+#endif
+}
+
 #if _LIBCPP_STD_VER >= 20
 
 template <__libcpp_unsigned_integer _Tp>
 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr int popcount(_Tp __t) noexcept {
-#  if __has_builtin(__builtin_popcountg)
-  return __builtin_popcountg(__t);
-#  else  // __has_builtin(__builtin_popcountg)
-  if (sizeof(_Tp) <= sizeof(unsigned int))
-    return std::__libcpp_popcount(static_cast<unsigned int>(__t));
-  else if (sizeof(_Tp) <= sizeof(unsigned long))
-    return std::__libcpp_popcount(static_cast<unsigned long>(__t));
-  else if (sizeof(_Tp) <= sizeof(unsigned long long))
-    return std::__libcpp_popcount(static_cast<unsigned long long>(__t));
-  else {
-    int __ret = 0;
-    while (__t != 0) {
-      __ret += std::__libcpp_popcount(static_cast<unsigned long long>(__t));
-      __t >>= numeric_limits<unsigned long long>::digits;
-    }
-    return __ret;
-  }
-#  endif // __has_builtin(__builtin_popcountg)
+  return std::__popcount(__t);
 }
 
-#endif // _LIBCPP_STD_VER >= 20
+#endif
 
 _LIBCPP_END_NAMESPACE_STD
 
diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.count/count.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.count/count.pass.cpp
index 7250c49..7bc61a9 100644
--- a/libcxx/test/std/algorithms/alg.nonmodifying/alg.count/count.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.count/count.pass.cpp
@@ -15,12 +15,14 @@
 
 // ADDITIONAL_COMPILE_FLAGS(has-fconstexpr-steps): -fconstexpr-steps=20000000
 // ADDITIONAL_COMPILE_FLAGS(has-fconstexpr-ops-limit): -fconstexpr-ops-limit=80000000
+// XFAIL: FROZEN-CXX03-HEADERS-FIXME
 
 #include <algorithm>
 #include <cassert>
 #include <cstddef>
 #include <vector>
 
+#include "sized_allocator.h"
 #include "test_macros.h"
 #include "test_iterators.h"
 #include "type_algorithms.h"
@@ -39,16 +41,47 @@
 TEST_CONSTEXPR_CXX20 bool test() {
   types::for_each(types::cpp17_input_iterator_list<const int*>(), Test());
 
-  if (TEST_STD_AT_LEAST_20_OR_RUNTIME_EVALUATED) {
-    std::vector<bool> vec(256 + 64);
-    for (ptrdiff_t i = 0; i != 256; ++i) {
-      for (size_t offset = 0; offset != 64; ++offset) {
-        std::fill(vec.begin(), vec.end(), false);
-        std::fill(vec.begin() + offset, vec.begin() + i + offset, true);
-        assert(std::count(vec.begin() + offset, vec.begin() + offset + 256, true) == i);
-        assert(std::count(vec.begin() + offset, vec.begin() + offset + 256, false) == 256 - i);
+  // Tests for std::count with std::vector<bool>::iterator optimizations.
+  {
+    { // check that vector<bool>::iterator optimization works as expected
+      std::vector<bool> vec(256 + 64);
+      for (ptrdiff_t i = 0; i != 256; ++i) {
+        for (size_t offset = 0; offset != 64; ++offset) {
+          std::fill(vec.begin(), vec.end(), false);
+          std::fill(vec.begin() + offset, vec.begin() + i + offset, true);
+          assert(std::count(vec.begin() + offset, vec.begin() + offset + 256, true) == i);
+          assert(std::count(vec.begin() + offset, vec.begin() + offset + 256, false) == 256 - i);
+        }
       }
     }
+
+    // Fix std::count for std::vector<bool> with small storage types, e.g., std::uint16_t, unsigned short.
+    // See https://github.com/llvm/llvm-project/issues/122528
+    {
+      using Alloc = sized_allocator<bool, std::uint8_t, std::int8_t>;
+      std::vector<bool, Alloc> in(100, true, Alloc(1));
+      assert(std::count(in.begin(), in.end(), true) == 100);
+    }
+    {
+      using Alloc = sized_allocator<bool, std::uint16_t, std::int16_t>;
+      std::vector<bool, Alloc> in(199, true, Alloc(1));
+      assert(std::count(in.begin(), in.end(), true) == 199);
+    }
+    {
+      using Alloc = sized_allocator<bool, unsigned short, short>;
+      std::vector<bool, Alloc> in(200, true, Alloc(1));
+      assert(std::count(in.begin(), in.end(), true) == 200);
+    }
+    {
+      using Alloc = sized_allocator<bool, std::uint32_t, std::int32_t>;
+      std::vector<bool, Alloc> in(205, true, Alloc(1));
+      assert(std::count(in.begin(), in.end(), true) == 205);
+    }
+    {
+      using Alloc = sized_allocator<bool, std::uint64_t, std::int64_t>;
+      std::vector<bool, Alloc> in(257, true, Alloc(1));
+      assert(std::count(in.begin(), in.end(), true) == 257);
+    }
   }
 
   return true;
diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.count/ranges.count.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.count/ranges.count.pass.cpp
index 6030bed..ab9d6a0 100644
--- a/libcxx/test/std/algorithms/alg.nonmodifying/alg.count/ranges.count.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.count/ranges.count.pass.cpp
@@ -29,6 +29,7 @@
 #include <ranges>
 #include <vector>
 
+#include "sized_allocator.h"
 #include "almost_satisfies_types.h"
 #include "test_iterators.h"
 
@@ -67,13 +68,13 @@
   {
     // simple test
     {
-      int a[] = {1, 2, 3, 4};
+      int a[]                               = {1, 2, 3, 4};
       std::same_as<std::ptrdiff_t> auto ret = std::ranges::count(It(a), Sent(It(a + 4)), 3);
       assert(ret == 1);
     }
     {
-      int a[] = {1, 2, 3, 4};
-      auto range = std::ranges::subrange(It(a), Sent(It(a + 4)));
+      int a[]                               = {1, 2, 3, 4};
+      auto range                            = std::ranges::subrange(It(a), Sent(It(a + 4)));
       std::same_as<std::ptrdiff_t> auto ret = std::ranges::count(range, 3);
       assert(ret == 1);
     }
@@ -83,13 +84,13 @@
     // check that an empty range works
     {
       std::array<int, 0> a = {};
-      auto ret = std::ranges::count(It(a.data()), Sent(It(a.data() + a.size())), 1);
+      auto ret             = std::ranges::count(It(a.data()), Sent(It(a.data() + a.size())), 1);
       assert(ret == 0);
     }
     {
       std::array<int, 0> a = {};
-      auto range = std::ranges::subrange(It(a.data()), Sent(It(a.data() + a.size())));
-      auto ret = std::ranges::count(range, 1);
+      auto range           = std::ranges::subrange(It(a.data()), Sent(It(a.data() + a.size())));
+      auto ret             = std::ranges::count(range, 1);
       assert(ret == 0);
     }
   }
@@ -98,13 +99,13 @@
     // check that a range with a single element works
     {
       std::array a = {2};
-      auto ret = std::ranges::count(It(a.data()), Sent(It(a.data() + a.size())), 2);
+      auto ret     = std::ranges::count(It(a.data()), Sent(It(a.data() + a.size())), 2);
       assert(ret == 1);
     }
     {
       std::array a = {2};
-      auto range = std::ranges::subrange(It(a.data()), Sent(It(a.data() + a.size())));
-      auto ret = std::ranges::count(range, 2);
+      auto range   = std::ranges::subrange(It(a.data()), Sent(It(a.data() + a.size())));
+      auto ret     = std::ranges::count(range, 2);
       assert(ret == 1);
     }
   }
@@ -113,13 +114,13 @@
     // check that 0 is returned with no match
     {
       std::array a = {1, 1, 1};
-      auto ret = std::ranges::count(It(a.data()), Sent(It(a.data() + a.size())), 0);
+      auto ret     = std::ranges::count(It(a.data()), Sent(It(a.data() + a.size())), 0);
       assert(ret == 0);
     }
     {
       std::array a = {1, 1, 1};
-      auto range = std::ranges::subrange(It(a.data()), Sent(It(a.data() + a.size())));
-      auto ret = std::ranges::count(range, 0);
+      auto range   = std::ranges::subrange(It(a.data()), Sent(It(a.data() + a.size())));
+      auto ret     = std::ranges::count(range, 0);
       assert(ret == 0);
     }
   }
@@ -128,13 +129,13 @@
     // check that more than one element is counted
     {
       std::array a = {3, 3, 4, 3, 3};
-      auto ret = std::ranges::count(It(a.data()), Sent(It(a.data() + a.size())), 3);
+      auto ret     = std::ranges::count(It(a.data()), Sent(It(a.data() + a.size())), 3);
       assert(ret == 4);
     }
     {
       std::array a = {3, 3, 4, 3, 3};
-      auto range = std::ranges::subrange(It(a.data()), Sent(It(a.data() + a.size())));
-      auto ret = std::ranges::count(range, 3);
+      auto range   = std::ranges::subrange(It(a.data()), Sent(It(a.data() + a.size())));
+      auto ret     = std::ranges::count(range, 3);
       assert(ret == 4);
     }
   }
@@ -143,13 +144,13 @@
     // check that all elements are counted
     {
       std::array a = {5, 5, 5, 5};
-      auto ret = std::ranges::count(It(a.data()), Sent(It(a.data() + a.size())), 5);
+      auto ret     = std::ranges::count(It(a.data()), Sent(It(a.data() + a.size())), 5);
       assert(ret == 4);
     }
     {
       std::array a = {5, 5, 5, 5};
-      auto range = std::ranges::subrange(It(a.data()), Sent(It(a.data() + a.size())));
-      auto ret = std::ranges::count(range, 5);
+      auto range   = std::ranges::subrange(It(a.data()), Sent(It(a.data() + a.size())));
+      auto ret     = std::ranges::count(range, 5);
       assert(ret == 4);
     }
   }
@@ -167,12 +168,12 @@
   {
     // check that projections are used properly and that they are called with the iterator directly
     {
-      int a[] = {1, 2, 3, 4};
+      int a[]  = {1, 2, 3, 4};
       auto ret = std::ranges::count(a, a + 4, a + 3, [](int& i) { return &i; });
       assert(ret == 1);
     }
     {
-      int a[] = {1, 2, 3, 4};
+      int a[]  = {1, 2, 3, 4};
       auto ret = std::ranges::count(a, a + 3, [](int& i) { return &i; });
       assert(ret == 1);
     }
@@ -180,8 +181,10 @@
 
   {
     // check that std::invoke is used
-    struct S { int i; };
-    S a[] = { S{1}, S{3}, S{2} };
+    struct S {
+      int i;
+    };
+    S a[]                                 = {S{1}, S{3}, S{2}};
     std::same_as<std::ptrdiff_t> auto ret = std::ranges::count(a, 4, &S::i);
     assert(ret == 0);
   }
@@ -189,16 +192,22 @@
   {
     // count invocations of the projection
     {
-      int a[] = {1, 2, 3, 4};
+      int a[]              = {1, 2, 3, 4};
       int projection_count = 0;
-      auto ret = std::ranges::count(a, a + 4, 2, [&](int i) { ++projection_count; return i; });
+      auto ret             = std::ranges::count(a, a + 4, 2, [&](int i) {
+        ++projection_count;
+        return i;
+      });
       assert(ret == 1);
       assert(projection_count == 4);
     }
     {
-      int a[] = {1, 2, 3, 4};
+      int a[]              = {1, 2, 3, 4};
       int projection_count = 0;
-      auto ret = std::ranges::count(a, 2, [&](int i) { ++projection_count; return i; });
+      auto ret             = std::ranges::count(a, 2, [&](int i) {
+        ++projection_count;
+        return i;
+      });
       assert(ret == 1);
       assert(projection_count == 4);
     }
@@ -208,7 +217,7 @@
     // check that an immobile type works
     struct NonMovable {
       NonMovable(const NonMovable&) = delete;
-      NonMovable(NonMovable&&) = delete;
+      NonMovable(NonMovable&&)      = delete;
       constexpr NonMovable(int i_) : i(i_) {}
       int i;
 
@@ -216,12 +225,12 @@
     };
     {
       NonMovable a[] = {9, 8, 4, 3};
-      auto ret = std::ranges::count(a, a + 4, NonMovable(8));
+      auto ret       = std::ranges::count(a, a + 4, NonMovable(8));
       assert(ret == 1);
     }
     {
       NonMovable a[] = {9, 8, 4, 3};
-      auto ret = std::ranges::count(a, NonMovable(8));
+      auto ret       = std::ranges::count(a, NonMovable(8));
       assert(ret == 1);
     }
   }
@@ -230,7 +239,7 @@
     // check that difference_type is used
     struct DiffTypeIterator {
       using difference_type = signed char;
-      using value_type = int;
+      using value_type      = int;
 
       int* it = nullptr;
 
@@ -238,7 +247,10 @@
       constexpr DiffTypeIterator(int* i) : it(i) {}
 
       constexpr int& operator*() const { return *it; }
-      constexpr DiffTypeIterator& operator++() { ++it; return *this; }
+      constexpr DiffTypeIterator& operator++() {
+        ++it;
+        return *this;
+      }
       constexpr void operator++(int) { ++it; }
 
       bool operator==(const DiffTypeIterator&) const = default;
@@ -251,23 +263,54 @@
       assert(ret == 1);
     }
     {
-      int a[] = {5, 5, 4, 3, 2, 1};
+      int a[]                                      = {5, 5, 4, 3, 2, 1};
       auto range = std::ranges::subrange(DiffTypeIterator(a), DiffTypeIterator(a + 6));
       std::same_as<signed char> decltype(auto) ret = std::ranges::count(range, 4);
       assert(ret == 1);
     }
   }
 
-  { // check that __bit_iterator optimizations work as expected
-    std::vector<bool> vec(256 + 64);
-    for (ptrdiff_t i = 0; i != 256; ++i) {
-      for (size_t offset = 0; offset != 64; ++offset) {
-        std::fill(vec.begin(), vec.end(), false);
-        std::fill(vec.begin() + offset, vec.begin() + i + offset, true);
-        assert(std::ranges::count(vec.begin() + offset, vec.begin() + offset + 256, true) == i);
-        assert(std::ranges::count(vec.begin() + offset, vec.begin() + offset + 256, false) == 256 - i);
+  // Tests for std::count with std::vector<bool>::iterator optimizations.
+  {
+    { // check that vector<bool>::iterator optimization works as expected
+      std::vector<bool> vec(256 + 64);
+      for (ptrdiff_t i = 0; i != 256; ++i) {
+        for (size_t offset = 0; offset != 64; ++offset) {
+          std::fill(vec.begin(), vec.end(), false);
+          std::fill(vec.begin() + offset, vec.begin() + i + offset, true);
+          assert(std::ranges::count(vec.begin() + offset, vec.begin() + offset + 256, true) == i);
+          assert(std::ranges::count(vec.begin() + offset, vec.begin() + offset + 256, false) == 256 - i);
+        }
       }
     }
+
+    // Fix std::ranges::count for std::vector<bool> with small storage types, e.g., std::uint16_t, unsigned short.
+    // See https://github.com/llvm/llvm-project/issues/122528
+    {
+      using Alloc = sized_allocator<bool, std::uint8_t, std::int8_t>;
+      std::vector<bool, Alloc> in(100, true, Alloc(1));
+      assert(std::ranges::count(in, true) == 100);
+    }
+    {
+      using Alloc = sized_allocator<bool, std::uint16_t, std::int16_t>;
+      std::vector<bool, Alloc> in(199, true, Alloc(1));
+      assert(std::ranges::count(in, true) == 199);
+    }
+    {
+      using Alloc = sized_allocator<bool, unsigned short, short>;
+      std::vector<bool, Alloc> in(200, true, Alloc(1));
+      assert(std::ranges::count(in, true) == 200);
+    }
+    {
+      using Alloc = sized_allocator<bool, std::uint32_t, std::int32_t>;
+      std::vector<bool, Alloc> in(205, true, Alloc(1));
+      assert(std::ranges::count(in, true) == 205);
+    }
+    {
+      using Alloc = sized_allocator<bool, std::uint64_t, std::int64_t>;
+      std::vector<bool, Alloc> in(257, true, Alloc(1));
+      assert(std::ranges::count(in, true) == 257);
+    }
   }
 
   return true;