third-party/boost-math/include/boost/math/optimization/cma_es.hpp - llvm-project.git - Git at Google

 /*
  * Copyright Nick Thompson, 2024
  * Use, modification and distribution are subject to the
  * Boost Software License, Version 1.0. (See accompanying file
  * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  */
 #ifndef BOOST_MATH_OPTIMIZATION_CMA_ES_HPP
 #define BOOST_MATH_OPTIMIZATION_CMA_ES_HPP
 #include <atomic>
 #include <cmath>
 #include <iostream>
 #include <limits>
 #include <random>
 #include <sstream>
 #include <stdexcept>
 #include <utility>
 #include <vector>
 #include <boost/math/optimization/detail/common.hpp>
 #include <boost/math/tools/assert.hpp>
 #if __has_include(<Eigen/Dense>)
 #include <Eigen/Dense>
 #else
 #error "CMA-ES requires Eigen."
 #endif

 // Follows the notation in:
 // https://arxiv.org/pdf/1604.00772.pdf
 // This is a (hopefully) faithful reproduction of the pseudocode in the arxiv review
 // by Nikolaus Hansen.
 // Comments referring to equations all refer to this arxiv review.
 // A slide deck by the same author is given here:
 // http://www.cmap.polytechnique.fr/~nikolaus.hansen/CmaTutorialGecco2023-no-audio.pdf
 // which is also a very useful reference.

 #ifndef BOOST_MATH_DEBUG_CMA_ES
 #define BOOST_MATH_DEBUG_CMA_ES 0
 #endif

 namespace boost::math::optimization {

 template <typename ArgumentContainer> struct cma_es_parameters {
   using Real = typename ArgumentContainer::value_type;
   using DimensionlessReal = decltype(Real()/Real());
   ArgumentContainer lower_bounds;
   ArgumentContainer upper_bounds;
   size_t max_generations = 1000;
   ArgumentContainer const *initial_guess = nullptr;
   // In the reference, population size = \lambda.
   // If the population size is zero, it is set to equation (48) of the reference
   // and rounded up to the nearest multiple of threads:
   size_t population_size = 0;
   // In the reference, learning_rate = c_m:
   DimensionlessReal learning_rate = 1;
 };

 template <typename ArgumentContainer>
 void validate_cma_es_parameters(cma_es_parameters<ArgumentContainer> &params) {
   using Real = typename ArgumentContainer::value_type;
   using DimensionlessReal = decltype(Real()/Real());
   using std::isfinite;
   using std::isnan;
   using std::log;
   using std::ceil;
   using std::floor;

   std::ostringstream oss;
   detail::validate_bounds(params.lower_bounds, params.upper_bounds);
   if (params.initial_guess) {
     detail::validate_initial_guess(*params.initial_guess, params.lower_bounds, params.upper_bounds);
   }
   const size_t n = params.upper_bounds.size();
   // Equation 48 of the arxiv review:
   if (params.population_size == 0) {
     //auto tmp = 4.0 + floor(3*log(n));
     // But round to the nearest multiple of the thread count:
     //auto k = static_cast<size_t>(std::ceil(tmp/params.threads));
     //params.population_size = k*params.threads;
     params.population_size = static_cast<size_t>(4 + floor(3*log(n)));
   }
   if (params.learning_rate <= DimensionlessReal(0) || !isfinite(params.learning_rate)) {
     oss << __FILE__ << ":" << __LINE__ << ":" << __func__;
     oss << ": The learning rate must be > 0, but got " << params.learning_rate << ".";
     throw std::invalid_argument(oss.str());
   }
 }

 template <typename ArgumentContainer, class Func, class URBG>
 ArgumentContainer cma_es(
     const Func cost_function,
     cma_es_parameters<ArgumentContainer> &params,
     URBG &gen,
     std::invoke_result_t<Func, ArgumentContainer> target_value = std::numeric_limits<std::invoke_result_t<Func, ArgumentContainer>>::quiet_NaN(),
     std::atomic<bool> *cancellation = nullptr,
     std::atomic<std::invoke_result_t<Func, ArgumentContainer>> *current_minimum_cost = nullptr,
     std::vector<std::pair<ArgumentContainer, std::invoke_result_t<Func, ArgumentContainer>>> *queries = nullptr)
  {
   using Real = typename ArgumentContainer::value_type;
   using DimensionlessReal = decltype(Real()/Real());
   using ResultType = std::invoke_result_t<Func, ArgumentContainer>;
   using std::abs;
   using std::log;
   using std::exp;
   using std::pow;
   using std::min;
   using std::max;
   using std::sqrt;
   using std::isnan;
   using std::isfinite;
   using std::uniform_real_distribution;
   using std::normal_distribution;
   validate_cma_es_parameters(params);
   // n = dimension of problem:
   const size_t n = params.lower_bounds.size();
   std::atomic<bool> target_attained = false;
   std::atomic<ResultType> lowest_cost = std::numeric_limits<ResultType>::infinity();
   ArgumentContainer best_vector;
   // p_{c} := evolution path, equation (24) of the arxiv review:
   Eigen::Vector<DimensionlessReal, Eigen::Dynamic> p_c(n);
   // p_{\sigma} := conjugate evolution path, equation (31) of the arxiv review:
   Eigen::Vector<DimensionlessReal, Eigen::Dynamic> p_sigma(n);
   if constexpr (detail::has_resize_v<ArgumentContainer>) {
     best_vector.resize(n, std::numeric_limits<Real>::quiet_NaN());
   }
   for (size_t i = 0; i < n; ++i) {
     p_c[i] = DimensionlessReal(0);
     p_sigma[i] = DimensionlessReal(0);
   }
   // Table 1, \mu = floor(\lambda/2):
   size_t mu = params.population_size/2;
   std::vector<DimensionlessReal> w_prime(params.population_size, std::numeric_limits<DimensionlessReal>::quiet_NaN());
   for (size_t i = 0; i < params.population_size; ++i) {
     // Equation (49), but 0-indexed:
     w_prime[i] = log(static_cast<DimensionlessReal>(params.population_size + 1)/(2*(i+1)));
   }
   // Table 1, notes at top:
   DimensionlessReal positive_weight_sum = 0;
   DimensionlessReal sq_weight_sum = 0;
   for (size_t i = 0; i < mu; ++i) {
     BOOST_MATH_ASSERT(w_prime[i] > 0);
     positive_weight_sum += w_prime[i];
     sq_weight_sum += w_prime[i]*w_prime[i];
   }
   DimensionlessReal mu_eff = positive_weight_sum*positive_weight_sum/sq_weight_sum;
   BOOST_MATH_ASSERT(1 <= mu_eff);
   BOOST_MATH_ASSERT(mu_eff <= mu);
   DimensionlessReal negative_weight_sum = 0;
   sq_weight_sum = 0;
   for (size_t i = mu; i < params.population_size; ++i) {
     BOOST_MATH_ASSERT(w_prime[i] <= 0);
     negative_weight_sum += w_prime[i];
     sq_weight_sum += w_prime[i]*w_prime[i];
   }
   DimensionlessReal mu_eff_m = negative_weight_sum*negative_weight_sum/sq_weight_sum;
   // Equation (54):
   DimensionlessReal c_m = params.learning_rate;
   // Equation (55):
   DimensionlessReal c_sigma = (mu_eff + 2)/(n + mu_eff + 5);
   BOOST_MATH_ASSERT(c_sigma < 1);
   DimensionlessReal d_sigma = 1 + 2*(max)(DimensionlessReal(0), sqrt(DimensionlessReal((mu_eff - 1)/(n + 1))) - DimensionlessReal(1)) + c_sigma;
   // Equation (56):
   DimensionlessReal c_c = (4 + mu_eff/n)/(n + 4 + 2*mu_eff/n);
   BOOST_MATH_ASSERT(c_c <= 1);
   // Equation (57):
   DimensionlessReal c_1 = DimensionlessReal(2)/(pow(n + 1.3, 2) + mu_eff);
   // Equation (58)
   DimensionlessReal c_mu = (min)(1 - c_1, 2*(DimensionlessReal(0.25)  + mu_eff  + 1/mu_eff - 2)/((n+2)*(n+2) + mu_eff));
   BOOST_MATH_ASSERT(c_1 + c_mu <= DimensionlessReal(1));
   // Equation (50):
   DimensionlessReal alpha_mu_m = 1 + c_1/c_mu;
   // Equation (51):
   DimensionlessReal alpha_mu_eff_m = 1 + 2*mu_eff_m/(mu_eff + 2);
   // Equation (52):
   DimensionlessReal alpha_m_pos_def = (1- c_1 - c_mu)/(n*c_mu);
   // Equation (53):
   std::vector<DimensionlessReal> weights(params.population_size, std::numeric_limits<DimensionlessReal>::quiet_NaN());
   for (size_t i = 0; i < mu; ++i) {
     weights[i] = w_prime[i]/positive_weight_sum;
   }
   DimensionlessReal min_alpha = (min)(alpha_mu_m, (min)(alpha_mu_eff_m, alpha_m_pos_def));
   for (size_t i = mu; i < params.population_size; ++i) {
     weights[i] = min_alpha*w_prime[i]/abs(negative_weight_sum);
   }
   // mu:= number of parents, lambda := number of offspring.
   Eigen::Matrix<DimensionlessReal, Eigen::Dynamic, Eigen::Dynamic> C = Eigen::Matrix<DimensionlessReal, Eigen::Dynamic, Eigen::Dynamic>::Identity(n, n);
   ArgumentContainer mean_vector;
   // See the footnote in Figure 6 of the arxiv review:
   // We should consider the more robust initialization described there. . .
   Real sigma = DimensionlessReal(0.3)*(params.upper_bounds[0] - params.lower_bounds[0]);;
   if (params.initial_guess) {
     mean_vector = *params.initial_guess;
   }
   else {
     mean_vector = detail::random_initial_population(params.lower_bounds, params.upper_bounds, 1, gen)[0];
   }
   auto initial_cost = cost_function(mean_vector);
   if (!isnan(initial_cost)) {
     best_vector = mean_vector;
     lowest_cost = initial_cost;
     if (current_minimum_cost) {
       *current_minimum_cost = initial_cost;
     }
   }
 #if BOOST_MATH_DEBUG_CMA_ES
   {
     std::cout << __FILE__ << ":" << __LINE__ << ":" << __func__ << "\n";
     std::cout << "\tRunning a (" << params.population_size/2 << "/" << params.population_size/2 << "_W, " << params.population_size << ")-aCMA Evolutionary Strategy on " << params.threads << " threads.\n";
     std::cout << "\tInitial mean vector: {";
     for (size_t i = 0; i < n - 1; ++i) {
       std::cout << mean_vector[i] << ", ";
     }
     std::cout << mean_vector[n - 1] << "}.\n";
     std::cout << "\tCost: " << lowest_cost << ".\n";
     std::cout << "\tInitial step length: " << sigma << ".\n";
     std::cout << "\tVariance effective selection mass: " << mu_eff << ".\n";
     std::cout << "\tLearning rate for rank-one update of covariance matrix: " << c_1 << ".\n";
     std::cout << "\tLearning rate for rank-mu update of covariance matrix: " << c_mu << ".\n";
     std::cout << "\tDecay rate for cumulation path for step-size control: " << c_sigma << ".\n";
     std::cout << "\tLearning rate for the mean: " << c_m << ".\n";
     std::cout << "\tDamping parameter for step-size update: " << d_sigma << ".\n";
   }
 #endif
   size_t generation = 0;

   std::vector<Eigen::Vector<DimensionlessReal, Eigen::Dynamic>> ys(params.population_size);
   std::vector<ArgumentContainer> xs(params.population_size);
   std::vector<ResultType> costs(params.population_size, std::numeric_limits<ResultType>::quiet_NaN());
   Eigen::Vector<DimensionlessReal, Eigen::Dynamic> weighted_avg_y(n);
   Eigen::Vector<DimensionlessReal, Eigen::Dynamic> z(n);
   if constexpr (detail::has_resize_v<ArgumentContainer>) {
     for (auto & x : xs) {
       x.resize(n, std::numeric_limits<Real>::quiet_NaN());
     }
   }
   for (auto & y : ys) {
     y.resize(n);
   }
   normal_distribution<DimensionlessReal> dis(DimensionlessReal(0), DimensionlessReal(1));
   do {
     if (cancellation && *cancellation) {
       break;
     }
     // TODO: The reference contends the following in
     // Section B.2 "Strategy internal numerical effort":
     // "In practice, the re-calculation of B and D needs to be done not until about
     // max(1, floor(1/(10n(c_1+c_mu)))) generations."
     // Note that sigma can be dimensionless, in which case C carries the units.
     // This is a weird decision-we're not gonna do that!
     Eigen::SelfAdjointEigenSolver<Eigen::Matrix<DimensionlessReal, Eigen::Dynamic, Eigen::Dynamic>> eigensolver(C);
     if (eigensolver.info() != Eigen::Success) {
       std::ostringstream oss;
       oss << __FILE__ << ":" << __LINE__ << ":" << __func__;
       oss << ": Could not decompose the covariance matrix as BDB^{T}.";
       throw std::logic_error(oss.str());
     }
     Eigen::Matrix<DimensionlessReal, Eigen::Dynamic, Eigen::Dynamic> B = eigensolver.eigenvectors();
     // Eigen returns D^2, in the notation of the survey:
     auto D = eigensolver.eigenvalues();
     // So make it better:
     for (auto & d : D) {
       if (d <= 0 || isnan(d)) {
         std::ostringstream oss;
         oss << __FILE__ << ":" << __LINE__ << ":" << __func__;
         oss << ": The covariance matrix is not positive definite. This breaks the evolution path computation downstream.\n";
         oss << "C=\n" << C << "\n";
         oss << "Eigenvalues: " << D;
         throw std::domain_error(oss.str());
       }
       d = sqrt(d);
     }

     for (size_t k = 0; k < params.population_size; ++k) {
       auto & y = ys[k];
       auto & x = xs[k];
       BOOST_MATH_ASSERT(static_cast<size_t>(x.size()) == n);
       BOOST_MATH_ASSERT(static_cast<size_t>(y.size()) == n);
       size_t resample_counter = 0;
       do {
         // equation (39) of Figure 6:
         // Also see equation (4):
         for (size_t i = 0; i < n; ++i) {
           z[i] = dis(gen);
         }
         Eigen::Vector<DimensionlessReal, Eigen::Dynamic> Dz(n);
         for (size_t i = 0; i < n; ++i) {
           Dz[i] = D[i]*z[i];
         }
         y = B*Dz;
         for (size_t i = 0; i < n; ++i) {
           BOOST_MATH_ASSERT(!isnan(mean_vector[i]));
           BOOST_MATH_ASSERT(!isnan(y[i]));
           x[i] = mean_vector[i] + sigma*y[i]; // equation (40) of Figure 6.
         }
         costs[k] = cost_function(x);
         if (resample_counter++ == 50) {
           std::ostringstream oss;
           oss << __FILE__ << ":" << __LINE__ << ":" << __func__;
           oss << ": 50 resamples was not sufficient to find an argument to the cost function which did not return NaN.";
           oss << " Giving up.";
           throw std::domain_error(oss.str());
         }
       } while (isnan(costs[k]));

       if (queries) {
         queries->emplace_back(std::make_pair(x, costs[k]));
       }
       if (costs[k] < lowest_cost) {
         lowest_cost = costs[k];
         best_vector = x;
         if (current_minimum_cost && costs[k] < *current_minimum_cost) {
           *current_minimum_cost = costs[k];
         }
         if (lowest_cost < target_value) {
           target_attained = true;
           break;
         }
       }
     }
     if (target_attained) {
       break;
     }
     if (cancellation && *cancellation) {
       break;
     }
     auto indices = detail::best_indices(costs);
     // Equation (41), Figure 6:
     for (size_t j = 0; j < n; ++j) {
       weighted_avg_y[j] = 0;
       for (size_t i = 0; i < mu; ++i) {
         BOOST_MATH_ASSERT(!isnan(weights[i]));
         BOOST_MATH_ASSERT(!isnan(ys[indices[i]][j]));
         weighted_avg_y[j] += weights[i]*ys[indices[i]][j];
       }
     }
     // Equation (42), Figure 6:
     for (size_t j = 0; j < n; ++j) {
       mean_vector[j] = mean_vector[j] + c_m*sigma*weighted_avg_y[j];
     }
     // Equation (43), Figure 6: Start with C^{-1/2}<y>_{w}
     Eigen::Vector<DimensionlessReal, Eigen::Dynamic> inv_D_B_transpose_y = B.transpose()*weighted_avg_y;
     for (long j = 0; j < inv_D_B_transpose_y.size(); ++j) {
       inv_D_B_transpose_y[j] /= D[j];
     }
     Eigen::Vector<DimensionlessReal, Eigen::Dynamic> C_inv_sqrt_y_avg = B*inv_D_B_transpose_y;
     // Equation (43), Figure 6:
     DimensionlessReal p_sigma_norm = 0;
     for (size_t j = 0; j < n; ++j) {
       p_sigma[j] = (1-c_sigma)*p_sigma[j] + sqrt(c_sigma*(2-c_sigma)*mu_eff)*C_inv_sqrt_y_avg[j];
       p_sigma_norm += p_sigma[j]*p_sigma[j];
     }
     p_sigma_norm = sqrt(p_sigma_norm);
     // A: Algorithm Summary: E[||N(0,1)||]:
     const DimensionlessReal expectation_norm_0I = sqrt(static_cast<DimensionlessReal>(n))*(DimensionlessReal(1) - DimensionlessReal(1)/(4*n) + DimensionlessReal(1)/(21*n*n));
     // Equation (44), Figure 6:
     sigma = sigma*exp(c_sigma*(p_sigma_norm/expectation_norm_0I -1)/d_sigma);
     // A: Algorithm Summary:
     DimensionlessReal h_sigma = 0;
     DimensionlessReal rhs = (DimensionlessReal(1.4) + DimensionlessReal(2)/(n+1))*expectation_norm_0I*sqrt(1 - pow(1-c_sigma, 2*(generation+1)));
     if (p_sigma_norm < rhs) {
       h_sigma = 1;
     }
     // Equation (45), Figure 6:
     p_c = (1-c_c)*p_c + h_sigma*sqrt(c_c*(2-c_c)*mu_eff)*weighted_avg_y;
     DimensionlessReal delta_h_sigma = (1-h_sigma)*c_c*(2-c_c);
     DimensionlessReal weight_sum = 0;
     for (auto & w : weights) {
       weight_sum += w;
     }
     // Equation (47), Figure 6:
     DimensionlessReal K = (1 + c_1*delta_h_sigma - c_1 - c_mu*weight_sum);
     // Can these operations be sped up using `.selfadjointView<Eigen::Upper>`?
     // Maybe: A.selfadjointView<Eigen::Lower>().rankUpdate(p_c, c_1);?
     C = K*C + c_1*p_c*p_c.transpose();
     // Incorporate positive weights of Equation (46):
     for (size_t i = 0; i < params.population_size/2; ++i) {
       C += c_mu*weights[i]*ys[indices[i]]*ys[indices[i]].transpose();
     }
     for (size_t i = params.population_size/2; i < params.population_size; ++i) {
       Eigen::Vector<DimensionlessReal, Eigen::Dynamic> D_inv_BTy = B.transpose()*ys[indices[i]];
       for (size_t j = 0; j < n; ++j) {
         D_inv_BTy[j] /= D[j];
       }
       DimensionlessReal squared_norm = D_inv_BTy.squaredNorm();
       DimensionlessReal K2 = c_mu*weights[i]/squared_norm;
       C += K2*ys[indices[i]]*ys[indices[i]].transpose();
     }
   } while (generation++ < params.max_generations);

   return best_vector;
 }

 } // namespace boost::math::optimization
 #endif
	/*
	* Copyright Nick Thompson, 2024
	* Use, modification and distribution are subject to the
	* Boost Software License, Version 1.0. (See accompanying file
	* LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
	*/
	#ifndef BOOST_MATH_OPTIMIZATION_CMA_ES_HPP
	#define BOOST_MATH_OPTIMIZATION_CMA_ES_HPP
	#include <atomic>
	#include <cmath>
	#include <iostream>
	#include <limits>
	#include <random>
	#include <sstream>
	#include <stdexcept>
	#include <utility>
	#include <vector>
	#include <boost/math/optimization/detail/common.hpp>
	#include <boost/math/tools/assert.hpp>
	#if __has_include(<Eigen/Dense>)
	#include <Eigen/Dense>
	#else
	#error "CMA-ES requires Eigen."
	#endif

	// Follows the notation in:
	// https://arxiv.org/pdf/1604.00772.pdf
	// This is a (hopefully) faithful reproduction of the pseudocode in the arxiv review
	// by Nikolaus Hansen.
	// Comments referring to equations all refer to this arxiv review.
	// A slide deck by the same author is given here:
	// http://www.cmap.polytechnique.fr/~nikolaus.hansen/CmaTutorialGecco2023-no-audio.pdf
	// which is also a very useful reference.

	#ifndef BOOST_MATH_DEBUG_CMA_ES
	#define BOOST_MATH_DEBUG_CMA_ES 0
	#endif

	namespace boost::math::optimization {

	template <typename ArgumentContainer> struct cma_es_parameters {
	using Real = typename ArgumentContainer::value_type;
	using DimensionlessReal = decltype(Real()/Real());
	ArgumentContainer lower_bounds;
	ArgumentContainer upper_bounds;
	size_t max_generations = 1000;
	ArgumentContainer const *initial_guess = nullptr;
	// In the reference, population size = \lambda.
	// If the population size is zero, it is set to equation (48) of the reference
	// and rounded up to the nearest multiple of threads:
	size_t population_size = 0;
	// In the reference, learning_rate = c_m:
	DimensionlessReal learning_rate = 1;
	};

	template <typename ArgumentContainer>
	void validate_cma_es_parameters(cma_es_parameters<ArgumentContainer> &params) {
	using Real = typename ArgumentContainer::value_type;
	using DimensionlessReal = decltype(Real()/Real());
	using std::isfinite;
	using std::isnan;
	using std::log;
	using std::ceil;
	using std::floor;

	std::ostringstream oss;
	detail::validate_bounds(params.lower_bounds, params.upper_bounds);
	if (params.initial_guess) {
	detail::validate_initial_guess(*params.initial_guess, params.lower_bounds, params.upper_bounds);
	}
	const size_t n = params.upper_bounds.size();
	// Equation 48 of the arxiv review:
	if (params.population_size == 0) {
	//auto tmp = 4.0 + floor(3*log(n));
	// But round to the nearest multiple of the thread count:
	//auto k = static_cast<size_t>(std::ceil(tmp/params.threads));
	//params.population_size = k*params.threads;
	params.population_size = static_cast<size_t>(4 + floor(3*log(n)));
	}
	if (params.learning_rate <= DimensionlessReal(0) \|\| !isfinite(params.learning_rate)) {
	oss << __FILE__ << ":" << __LINE__ << ":" << __func__;
	oss << ": The learning rate must be > 0, but got " << params.learning_rate << ".";
	throw std::invalid_argument(oss.str());
	}
	}

	template <typename ArgumentContainer, class Func, class URBG>
	ArgumentContainer cma_es(
	const Func cost_function,
	cma_es_parameters<ArgumentContainer> &params,
	URBG &gen,
	std::invoke_result_t<Func, ArgumentContainer> target_value = std::numeric_limits<std::invoke_result_t<Func, ArgumentContainer>>::quiet_NaN(),
	std::atomic<bool> *cancellation = nullptr,
	std::atomic<std::invoke_result_t<Func, ArgumentContainer>> *current_minimum_cost = nullptr,
	std::vector<std::pair<ArgumentContainer, std::invoke_result_t<Func, ArgumentContainer>>> *queries = nullptr)
	{
	using Real = typename ArgumentContainer::value_type;
	using DimensionlessReal = decltype(Real()/Real());
	using ResultType = std::invoke_result_t<Func, ArgumentContainer>;
	using std::abs;
	using std::log;
	using std::exp;
	using std::pow;
	using std::min;
	using std::max;
	using std::sqrt;
	using std::isnan;
	using std::isfinite;
	using std::uniform_real_distribution;
	using std::normal_distribution;
	validate_cma_es_parameters(params);
	// n = dimension of problem:
	const size_t n = params.lower_bounds.size();
	std::atomic<bool> target_attained = false;
	std::atomic<ResultType> lowest_cost = std::numeric_limits<ResultType>::infinity();
	ArgumentContainer best_vector;
	// p_{c} := evolution path, equation (24) of the arxiv review:
	Eigen::Vector<DimensionlessReal, Eigen::Dynamic> p_c(n);
	// p_{\sigma} := conjugate evolution path, equation (31) of the arxiv review:
	Eigen::Vector<DimensionlessReal, Eigen::Dynamic> p_sigma(n);
	if constexpr (detail::has_resize_v<ArgumentContainer>) {
	best_vector.resize(n, std::numeric_limits<Real>::quiet_NaN());
	}
	for (size_t i = 0; i < n; ++i) {
	p_c[i] = DimensionlessReal(0);
	p_sigma[i] = DimensionlessReal(0);
	}
	// Table 1, \mu = floor(\lambda/2):
	size_t mu = params.population_size/2;
	std::vector<DimensionlessReal> w_prime(params.population_size, std::numeric_limits<DimensionlessReal>::quiet_NaN());
	for (size_t i = 0; i < params.population_size; ++i) {
	// Equation (49), but 0-indexed:
	w_prime[i] = log(static_cast<DimensionlessReal>(params.population_size + 1)/(2*(i+1)));
	}
	// Table 1, notes at top:
	DimensionlessReal positive_weight_sum = 0;
	DimensionlessReal sq_weight_sum = 0;
	for (size_t i = 0; i < mu; ++i) {
	BOOST_MATH_ASSERT(w_prime[i] > 0);
	positive_weight_sum += w_prime[i];
	sq_weight_sum += w_prime[i]*w_prime[i];
	}
	DimensionlessReal mu_eff = positive_weight_sum*positive_weight_sum/sq_weight_sum;
	BOOST_MATH_ASSERT(1 <= mu_eff);
	BOOST_MATH_ASSERT(mu_eff <= mu);
	DimensionlessReal negative_weight_sum = 0;
	sq_weight_sum = 0;
	for (size_t i = mu; i < params.population_size; ++i) {
	BOOST_MATH_ASSERT(w_prime[i] <= 0);
	negative_weight_sum += w_prime[i];
	sq_weight_sum += w_prime[i]*w_prime[i];
	}
	DimensionlessReal mu_eff_m = negative_weight_sum*negative_weight_sum/sq_weight_sum;
	// Equation (54):
	DimensionlessReal c_m = params.learning_rate;
	// Equation (55):
	DimensionlessReal c_sigma = (mu_eff + 2)/(n + mu_eff + 5);
	BOOST_MATH_ASSERT(c_sigma < 1);
	DimensionlessReal d_sigma = 1 + 2*(max)(DimensionlessReal(0), sqrt(DimensionlessReal((mu_eff - 1)/(n + 1))) - DimensionlessReal(1)) + c_sigma;
	// Equation (56):
	DimensionlessReal c_c = (4 + mu_eff/n)/(n + 4 + 2*mu_eff/n);
	BOOST_MATH_ASSERT(c_c <= 1);
	// Equation (57):
	DimensionlessReal c_1 = DimensionlessReal(2)/(pow(n + 1.3, 2) + mu_eff);
	// Equation (58)
	DimensionlessReal c_mu = (min)(1 - c_1, 2(DimensionlessReal(0.25) + mu_eff + 1/mu_eff - 2)/((n+2)(n+2) + mu_eff));
	BOOST_MATH_ASSERT(c_1 + c_mu <= DimensionlessReal(1));
	// Equation (50):
	DimensionlessReal alpha_mu_m = 1 + c_1/c_mu;
	// Equation (51):
	DimensionlessReal alpha_mu_eff_m = 1 + 2*mu_eff_m/(mu_eff + 2);
	// Equation (52):
	DimensionlessReal alpha_m_pos_def = (1- c_1 - c_mu)/(n*c_mu);
	// Equation (53):
	std::vector<DimensionlessReal> weights(params.population_size, std::numeric_limits<DimensionlessReal>::quiet_NaN());
	for (size_t i = 0; i < mu; ++i) {
	weights[i] = w_prime[i]/positive_weight_sum;
	}
	DimensionlessReal min_alpha = (min)(alpha_mu_m, (min)(alpha_mu_eff_m, alpha_m_pos_def));
	for (size_t i = mu; i < params.population_size; ++i) {
	weights[i] = min_alpha*w_prime[i]/abs(negative_weight_sum);
	}
	// mu:= number of parents, lambda := number of offspring.
	Eigen::Matrix<DimensionlessReal, Eigen::Dynamic, Eigen::Dynamic> C = Eigen::Matrix<DimensionlessReal, Eigen::Dynamic, Eigen::Dynamic>::Identity(n, n);
	ArgumentContainer mean_vector;
	// See the footnote in Figure 6 of the arxiv review:
	// We should consider the more robust initialization described there. . .
	Real sigma = DimensionlessReal(0.3)*(params.upper_bounds[0] - params.lower_bounds[0]);;
	if (params.initial_guess) {
	mean_vector = *params.initial_guess;
	}
	else {
	mean_vector = detail::random_initial_population(params.lower_bounds, params.upper_bounds, 1, gen)[0];
	}
	auto initial_cost = cost_function(mean_vector);
	if (!isnan(initial_cost)) {
	best_vector = mean_vector;
	lowest_cost = initial_cost;
	if (current_minimum_cost) {
	*current_minimum_cost = initial_cost;
	}
	}
	#if BOOST_MATH_DEBUG_CMA_ES
	{
	std::cout << __FILE__ << ":" << __LINE__ << ":" << __func__ << "\n";
	std::cout << "\tRunning a (" << params.population_size/2 << "/" << params.population_size/2 << "_W, " << params.population_size << ")-aCMA Evolutionary Strategy on " << params.threads << " threads.\n";
	std::cout << "\tInitial mean vector: {";
	for (size_t i = 0; i < n - 1; ++i) {
	std::cout << mean_vector[i] << ", ";
	}
	std::cout << mean_vector[n - 1] << "}.\n";
	std::cout << "\tCost: " << lowest_cost << ".\n";
	std::cout << "\tInitial step length: " << sigma << ".\n";
	std::cout << "\tVariance effective selection mass: " << mu_eff << ".\n";
	std::cout << "\tLearning rate for rank-one update of covariance matrix: " << c_1 << ".\n";
	std::cout << "\tLearning rate for rank-mu update of covariance matrix: " << c_mu << ".\n";
	std::cout << "\tDecay rate for cumulation path for step-size control: " << c_sigma << ".\n";
	std::cout << "\tLearning rate for the mean: " << c_m << ".\n";
	std::cout << "\tDamping parameter for step-size update: " << d_sigma << ".\n";
	}
	#endif
	size_t generation = 0;

	std::vector<Eigen::Vector<DimensionlessReal, Eigen::Dynamic>> ys(params.population_size);
	std::vector<ArgumentContainer> xs(params.population_size);
	std::vector<ResultType> costs(params.population_size, std::numeric_limits<ResultType>::quiet_NaN());
	Eigen::Vector<DimensionlessReal, Eigen::Dynamic> weighted_avg_y(n);
	Eigen::Vector<DimensionlessReal, Eigen::Dynamic> z(n);
	if constexpr (detail::has_resize_v<ArgumentContainer>) {
	for (auto & x : xs) {
	x.resize(n, std::numeric_limits<Real>::quiet_NaN());
	}
	}
	for (auto & y : ys) {
	y.resize(n);
	}
	normal_distribution<DimensionlessReal> dis(DimensionlessReal(0), DimensionlessReal(1));
	do {
	if (cancellation && *cancellation) {
	break;
	}
	// TODO: The reference contends the following in
	// Section B.2 "Strategy internal numerical effort":
	// "In practice, the re-calculation of B and D needs to be done not until about
	// max(1, floor(1/(10n(c_1+c_mu)))) generations."
	// Note that sigma can be dimensionless, in which case C carries the units.
	// This is a weird decision-we're not gonna do that!
	Eigen::SelfAdjointEigenSolver<Eigen::Matrix<DimensionlessReal, Eigen::Dynamic, Eigen::Dynamic>> eigensolver(C);
	if (eigensolver.info() != Eigen::Success) {
	std::ostringstream oss;
	oss << __FILE__ << ":" << __LINE__ << ":" << __func__;
	oss << ": Could not decompose the covariance matrix as BDB^{T}.";
	throw std::logic_error(oss.str());
	}
	Eigen::Matrix<DimensionlessReal, Eigen::Dynamic, Eigen::Dynamic> B = eigensolver.eigenvectors();
	// Eigen returns D^2, in the notation of the survey:
	auto D = eigensolver.eigenvalues();
	// So make it better:
	for (auto & d : D) {
	if (d <= 0 \|\| isnan(d)) {
	std::ostringstream oss;
	oss << __FILE__ << ":" << __LINE__ << ":" << __func__;
	oss << ": The covariance matrix is not positive definite. This breaks the evolution path computation downstream.\n";
	oss << "C=\n" << C << "\n";
	oss << "Eigenvalues: " << D;
	throw std::domain_error(oss.str());
	}
	d = sqrt(d);
	}

	for (size_t k = 0; k < params.population_size; ++k) {
	auto & y = ys[k];
	auto & x = xs[k];
	BOOST_MATH_ASSERT(static_cast<size_t>(x.size()) == n);
	BOOST_MATH_ASSERT(static_cast<size_t>(y.size()) == n);
	size_t resample_counter = 0;
	do {
	// equation (39) of Figure 6:
	// Also see equation (4):
	for (size_t i = 0; i < n; ++i) {
	z[i] = dis(gen);
	}
	Eigen::Vector<DimensionlessReal, Eigen::Dynamic> Dz(n);
	for (size_t i = 0; i < n; ++i) {
	Dz[i] = D[i]*z[i];
	}
	y = B*Dz;
	for (size_t i = 0; i < n; ++i) {
	BOOST_MATH_ASSERT(!isnan(mean_vector[i]));
	BOOST_MATH_ASSERT(!isnan(y[i]));
	x[i] = mean_vector[i] + sigma*y[i]; // equation (40) of Figure 6.
	}
	costs[k] = cost_function(x);
	if (resample_counter++ == 50) {
	std::ostringstream oss;
	oss << __FILE__ << ":" << __LINE__ << ":" << __func__;
	oss << ": 50 resamples was not sufficient to find an argument to the cost function which did not return NaN.";
	oss << " Giving up.";
	throw std::domain_error(oss.str());
	}
	} while (isnan(costs[k]));

	if (queries) {
	queries->emplace_back(std::make_pair(x, costs[k]));
	}
	if (costs[k] < lowest_cost) {
	lowest_cost = costs[k];
	best_vector = x;
	if (current_minimum_cost && costs[k] < *current_minimum_cost) {
	*current_minimum_cost = costs[k];
	}
	if (lowest_cost < target_value) {
	target_attained = true;
	break;
	}
	}
	}
	if (target_attained) {
	break;
	}
	if (cancellation && *cancellation) {
	break;
	}
	auto indices = detail::best_indices(costs);
	// Equation (41), Figure 6:
	for (size_t j = 0; j < n; ++j) {
	weighted_avg_y[j] = 0;
	for (size_t i = 0; i < mu; ++i) {
	BOOST_MATH_ASSERT(!isnan(weights[i]));
	BOOST_MATH_ASSERT(!isnan(ys[indices[i]][j]));
	weighted_avg_y[j] += weights[i]*ys[indices[i]][j];
	}
	}
	// Equation (42), Figure 6:
	for (size_t j = 0; j < n; ++j) {
	mean_vector[j] = mean_vector[j] + c_msigmaweighted_avg_y[j];
	}
	// Equation (43), Figure 6: Start with C^{-1/2}<y>_{w}
	Eigen::Vector<DimensionlessReal, Eigen::Dynamic> inv_D_B_transpose_y = B.transpose()*weighted_avg_y;
	for (long j = 0; j < inv_D_B_transpose_y.size(); ++j) {
	inv_D_B_transpose_y[j] /= D[j];
	}
	Eigen::Vector<DimensionlessReal, Eigen::Dynamic> C_inv_sqrt_y_avg = B*inv_D_B_transpose_y;
	// Equation (43), Figure 6:
	DimensionlessReal p_sigma_norm = 0;
	for (size_t j = 0; j < n; ++j) {
	p_sigma[j] = (1-c_sigma)p_sigma[j] + sqrt(c_sigma(2-c_sigma)mu_eff)C_inv_sqrt_y_avg[j];
	p_sigma_norm += p_sigma[j]*p_sigma[j];
	}
	p_sigma_norm = sqrt(p_sigma_norm);
	// A: Algorithm Summary: E[\|\|N(0,1)\|\|]:
	const DimensionlessReal expectation_norm_0I = sqrt(static_cast<DimensionlessReal>(n))(DimensionlessReal(1) - DimensionlessReal(1)/(4n) + DimensionlessReal(1)/(21nn));
	// Equation (44), Figure 6:
	sigma = sigmaexp(c_sigma(p_sigma_norm/expectation_norm_0I -1)/d_sigma);
	// A: Algorithm Summary:
	DimensionlessReal h_sigma = 0;
	DimensionlessReal rhs = (DimensionlessReal(1.4) + DimensionlessReal(2)/(n+1))expectation_norm_0Isqrt(1 - pow(1-c_sigma, 2*(generation+1)));
	if (p_sigma_norm < rhs) {
	h_sigma = 1;
	}
	// Equation (45), Figure 6:
	p_c = (1-c_c)p_c + h_sigmasqrt(c_c(2-c_c)mu_eff)*weighted_avg_y;
	DimensionlessReal delta_h_sigma = (1-h_sigma)c_c(2-c_c);
	DimensionlessReal weight_sum = 0;
	for (auto & w : weights) {
	weight_sum += w;
	}
	// Equation (47), Figure 6:
	DimensionlessReal K = (1 + c_1delta_h_sigma - c_1 - c_muweight_sum);
	// Can these operations be sped up using `.selfadjointView<Eigen::Upper>`?
	// Maybe: A.selfadjointView<Eigen::Lower>().rankUpdate(p_c, c_1);?
	C = KC + c_1p_c*p_c.transpose();
	// Incorporate positive weights of Equation (46):
	for (size_t i = 0; i < params.population_size/2; ++i) {
	C += c_muweights[i]ys[indices[i]]*ys[indices[i]].transpose();
	}
	for (size_t i = params.population_size/2; i < params.population_size; ++i) {
	Eigen::Vector<DimensionlessReal, Eigen::Dynamic> D_inv_BTy = B.transpose()*ys[indices[i]];
	for (size_t j = 0; j < n; ++j) {
	D_inv_BTy[j] /= D[j];
	}
	DimensionlessReal squared_norm = D_inv_BTy.squaredNorm();
	DimensionlessReal K2 = c_mu*weights[i]/squared_norm;
	C += K2ys[indices[i]]ys[indices[i]].transpose();
	}
	} while (generation++ < params.max_generations);

	return best_vector;
	}

	} // namespace boost::math::optimization
	#endif