[libcxx] Optimize `std::generate_n` for segmented iterators (#164266) Part of #102817. This is a natural follow-up to #163006. We are forwarding `std::generate_n` to `std::__for_each_n` (`std::for_each_n` needs c++17), resulting in improved performance for segmented iterators. before: ``` std::generate_n(deque<int>)/32 17.5 ns 17.3 ns 40727273 std::generate_n(deque<int>)/50 25.7 ns 25.5 ns 26352941 std::generate_n(deque<int>)/1024 490 ns 487 ns 1445161 std::generate_n(deque<int>)/8192 3908 ns 3924 ns 179200 ``` after: ``` std::generate_n(deque<int>)/32 11.1 ns 11.0 ns 64000000 std::generate_n(deque<int>)/50 16.1 ns 16.0 ns 44800000 std::generate_n(deque<int>)/1024 291 ns 292 ns 2357895 std::generate_n(deque<int>)/8192 2269 ns 2250 ns 298667 ``` GitOrigin-RevId: c06ae43e26aa5cd472d0b25d5904c44d20e84067
diff --git a/docs/ReleaseNotes/22.rst b/docs/ReleaseNotes/22.rst index e95cbc0..25d33a9 100644 --- a/docs/ReleaseNotes/22.rst +++ b/docs/ReleaseNotes/22.rst
@@ -76,8 +76,8 @@ - The ``std::{fill, fill_n}`` and ``std::ranges::{fill, fill_n}`` algorithms have been optimized for segmented iterators, resulting in a performance improvement of at least 10x for ``std::deque<int>`` iterators and ``std::join_view<std::vector<std::vector<int>>>`` iterators. -- The ``std::generate`` algorithm has been optimized for segmented iterators, resulting in a performance improvement for - ``std::deque<short>`` and ``std::join_view<vector<vector<short>>>`` iterators. +- The ``std::generate`` and ``std::generate_n`` algorithms have been optimized for segmented iterators, resulting in a + performance improvement for ``std::deque<short>`` and ``std::join_view<vector<vector<short>>>`` iterators. Deprecations and Removals -------------------------
diff --git a/include/__algorithm/generate_n.h b/include/__algorithm/generate_n.h index f36403f..e9da133 100644 --- a/include/__algorithm/generate_n.h +++ b/include/__algorithm/generate_n.h
@@ -9,8 +9,10 @@ #ifndef _LIBCPP___ALGORITHM_GENERATE_N_H #define _LIBCPP___ALGORITHM_GENERATE_N_H +#include <__algorithm/for_each_n.h> #include <__config> -#include <__utility/convert_to_integral.h> +#include <__functional/identity.h> +#include <__utility/forward.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -21,11 +23,10 @@ template <class _OutputIterator, class _Size, class _Generator> inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator generate_n(_OutputIterator __first, _Size __orig_n, _Generator __gen) { - typedef decltype(std::__convert_to_integral(__orig_n)) _IntegralSize; - _IntegralSize __n = __orig_n; - for (; __n > 0; ++__first, (void)--__n) - *__first = __gen(); - return __first; + using __iter_ref = decltype(*__first); + __identity __proj; + auto __f = [&](__iter_ref __element) { std::forward<__iter_ref>(__element) = __gen(); }; + return std::__for_each_n(__first, __orig_n, __f, __proj); } _LIBCPP_END_NAMESPACE_STD
diff --git a/test/std/algorithms/alg.modifying.operations/alg.generate/generate_n.pass.cpp b/test/std/algorithms/alg.modifying.operations/alg.generate/generate_n.pass.cpp index 13fd1cb..525737c 100644 --- a/test/std/algorithms/alg.modifying.operations/alg.generate/generate_n.pass.cpp +++ b/test/std/algorithms/alg.modifying.operations/alg.generate/generate_n.pass.cpp
@@ -16,6 +16,7 @@ #include <algorithm> #include <cassert> +#include <deque> #include "test_iterators.h" #include "test_macros.h" @@ -71,12 +72,22 @@ test2<Iter, long double>(); } +void deque_test() { + int sizes[] = {0, 1, 2, 1023, 1024, 1025, 2047, 2048, 2049}; + for (const int size : sizes) { + std::deque<int> d(size); + std::generate_n(d.begin(), size, gen_test()); + assert(std::all_of(d.begin(), d.end(), [](int x) { return x == 2; })); + } +} + int main(int, char**) { test<forward_iterator<int*> >(); test<bidirectional_iterator<int*> >(); test<random_access_iterator<int*> >(); test<int*>(); + deque_test(); #if TEST_STD_VER > 17 static_assert(test_constexpr());