Optimize vector::assign for InputIterator-only pair inputs (#113852)
This PR optimizes the input iterator overload of `assign(_InputIterator,
_InputIterator)` in `std::vector<_Tp, _Allocator>` by directly assigning
to already initialized memory, rather than first destroying existing
elements and then constructing new ones. By eliminating unnecessary
destruction and construction, the proposed algorithm enhances the
performance by up to 2x for trivial element types (e.g.,
`std::vector<int>`), up to 2.6x for non-trivial element types like
`std::vector<std::string>`, and up to 3.4x for more complex non-trivial
types (e.g., `std::vector<std::vector<int>>`).
### Google Benchmarks
Benchmark tests (`libcxx/test/benchmarks/vector_operations.bench.cpp`)
were conducted for the `assign()` implementations before and after this
patch. The tests focused on trivial element types like
`std::vector<int>`, and non-trivial element types such as
`std::vector<std::string>` and `std::vector<std::vector<int>>`.
#### Before
```
-------------------------------------------------------------------------------------------------
Benchmark Time CPU Iterations
-------------------------------------------------------------------------------------------------
BM_AssignInputIterIter/vector_int/1024/1024 1157 ns 1169 ns 608188
BM_AssignInputIterIter<32>/vector_string/1024/1024 14559 ns 14710 ns 47277
BM_AssignInputIterIter<32>/vector_vector_int/1024/1024 26846 ns 27129 ns 25925
```
#### After
```
-------------------------------------------------------------------------------------------------
Benchmark Time CPU Iterations
-------------------------------------------------------------------------------------------------
BM_AssignInputIterIter/vector_int/1024/1024 561 ns 566 ns 1242251
BM_AssignInputIterIter<32>/vector_string/1024/1024 5604 ns 5664 ns 128365
BM_AssignInputIterIter<32>/vector_vector_int/1024/1024 7927 ns 8012 ns 88579
```
GitOrigin-RevId: 056153f36eca184f81969f5cd5c8cd967c935f96
diff --git a/docs/ReleaseNotes/20.rst b/docs/ReleaseNotes/20.rst
index 93d45f5..a72fd2d 100644
--- a/docs/ReleaseNotes/20.rst
+++ b/docs/ReleaseNotes/20.rst
@@ -69,6 +69,10 @@
- The ``_LIBCPP_ABI_BOUNDED_ITERATORS_IN_STD_ARRAY`` ABI configuration was added, which allows storing valid bounds
in ``std::array::iterator`` and detecting OOB accesses when the appropriate hardening mode is enabled.
+- The input iterator overload of `assign(_InputIterator, _InputIterator)` in `std::vector<_Tp, _Allocator>` has been
+ optimized, resulting in a performance improvement of up to 2x for trivial element types (e.g., `std::vector<int>`),
+ and up to 3.4x for non-trivial element types (e.g., `std::vector<std::vector<int>>`).
+
Deprecations and Removals
-------------------------
diff --git a/include/__vector/vector.h b/include/__vector/vector.h
index 4b9ba24..6ba7ba7 100644
--- a/include/__vector/vector.h
+++ b/include/__vector/vector.h
@@ -1003,9 +1003,15 @@
template <class _Iterator, class _Sentinel>
_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void
vector<_Tp, _Allocator>::__assign_with_sentinel(_Iterator __first, _Sentinel __last) {
- clear();
- for (; __first != __last; ++__first)
- emplace_back(*__first);
+ pointer __cur = __begin_;
+ for (; __first != __last && __cur != __end_; ++__first, (void)++__cur)
+ *__cur = *__first;
+ if (__cur != __end_) {
+ __destruct_at_end(__cur);
+ } else {
+ for (; __first != __last; ++__first)
+ emplace_back(*__first);
+ }
}
template <class _Tp, class _Allocator>
diff --git a/test/benchmarks/ContainerBenchmarks.h b/test/benchmarks/ContainerBenchmarks.h
index 38e1177..458134c 100644
--- a/test/benchmarks/ContainerBenchmarks.h
+++ b/test/benchmarks/ContainerBenchmarks.h
@@ -14,8 +14,9 @@
#include <iterator>
#include <utility>
-#include "Utilities.h"
#include "benchmark/benchmark.h"
+#include "Utilities.h"
+#include "test_iterators.h"
namespace ContainerBenchmarks {
@@ -50,6 +51,19 @@
}
}
+template <std::size_t... sz, typename Container, typename GenInputs>
+void BM_AssignInputIterIter(benchmark::State& st, Container c, GenInputs gen) {
+ auto v = gen(1, sz...);
+ c.resize(st.range(0), v[0]);
+ auto in = gen(st.range(1), sz...);
+ benchmark::DoNotOptimize(&in);
+ benchmark::DoNotOptimize(&c);
+ for (auto _ : st) {
+ c.assign(cpp17_input_iterator(in.begin()), cpp17_input_iterator(in.end()));
+ benchmark::ClobberMemory();
+ }
+}
+
template <class Container>
void BM_ConstructSizeValue(benchmark::State& st, Container, typename Container::value_type const& val) {
const auto size = st.range(0);
diff --git a/test/benchmarks/GenerateInput.h b/test/benchmarks/GenerateInput.h
index db37661..67ce798 100644
--- a/test/benchmarks/GenerateInput.h
+++ b/test/benchmarks/GenerateInput.h
@@ -108,16 +108,29 @@
return inputs;
}
+inline std::vector<std::string> getRandomStringInputsWithLength(std::size_t N, std::size_t len) { // N-by-len
+ std::vector<std::string> inputs;
+ inputs.reserve(N);
+ for (std::size_t i = 0; i < N; ++i)
+ inputs.push_back(getRandomString(len));
+ return inputs;
+}
+
inline std::vector<std::string> getDuplicateStringInputs(std::size_t N) {
std::vector<std::string> inputs(N, getRandomString(1024));
return inputs;
}
inline std::vector<std::string> getRandomStringInputs(std::size_t N) {
- std::vector<std::string> inputs;
+ return getRandomStringInputsWithLength(N, 1024);
+}
+
+template <class IntT>
+std::vector<std::vector<IntT>> getRandomIntegerInputsWithLength(std::size_t N, std::size_t len) { // N-by-len
+ std::vector<std::vector<IntT>> inputs;
inputs.reserve(N);
for (std::size_t i = 0; i < N; ++i)
- inputs.push_back(getRandomString(1024));
+ inputs.push_back(getRandomIntegerInputs<IntT>(len));
return inputs;
}
diff --git a/test/benchmarks/vector_operations.bench.cpp b/test/benchmarks/vector_operations.bench.cpp
index 1855861..3a72eae 100644
--- a/test/benchmarks/vector_operations.bench.cpp
+++ b/test/benchmarks/vector_operations.bench.cpp
@@ -18,7 +18,6 @@
#include <vector>
#include "benchmark/benchmark.h"
-
#include "ContainerBenchmarks.h"
#include "GenerateInput.h"
@@ -79,4 +78,17 @@
BENCHMARK(bm_grow<std::unique_ptr<int>>);
BENCHMARK(bm_grow<std::deque<int>>);
+BENCHMARK_CAPTURE(BM_AssignInputIterIter, vector_int, std::vector<int>{}, getRandomIntegerInputs<int>)
+ ->Args({TestNumInputs, TestNumInputs});
+
+BENCHMARK_CAPTURE(
+ BM_AssignInputIterIter<32>, vector_string, std::vector<std::string>{}, getRandomStringInputsWithLength)
+ ->Args({TestNumInputs, TestNumInputs});
+
+BENCHMARK_CAPTURE(BM_AssignInputIterIter<100>,
+ vector_vector_int,
+ std::vector<std::vector<int>>{},
+ getRandomIntegerInputsWithLength<int>)
+ ->Args({TestNumInputs, TestNumInputs});
+
BENCHMARK_MAIN();