PaGMO  1.1.5
cmaes.cpp
1 /*****************************************************************************
2  * Copyright (C) 2004-2015 The PaGMO development team, *
3  * Advanced Concepts Team (ACT), European Space Agency (ESA) *
4  * *
5  * https://github.com/esa/pagmo *
6  * *
7  * act@esa.int *
8  * *
9  * This program is free software; you can redistribute it and/or modify *
10  * it under the terms of the GNU General Public License as published by *
11  * the Free Software Foundation; either version 2 of the License, or *
12  * (at your option) any later version. *
13  * *
14  * This program is distributed in the hope that it will be useful, *
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
17  * GNU General Public License for more details. *
18  * *
19  * You should have received a copy of the GNU General Public License *
20  * along with this program; if not, write to the *
21  * Free Software Foundation, Inc., *
22  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
23  *****************************************************************************/
24 
25 #include <string>
26 #include <iomanip>
27 #include <vector>
28 #include <algorithm>
29 #include <cmath>
30 #include <boost/random/normal_distribution.hpp>
31 #include <boost/random/variate_generator.hpp>
32 #include <boost/random/uniform_real.hpp>
33 #include <boost/numeric/conversion/cast.hpp>
34 #include <boost/math/special_functions/fpclassify.hpp>
35 
36 
37 #include "cmaes.h"
38 #include "../exceptions.h"
39 #include "../population.h"
40 #include "../problem/base_stochastic.h"
41 #include "../types.h"
42 #include "../Eigen/Dense"
43 
44 namespace pagmo { namespace algorithm {
45 
47 
62 cmaes::cmaes(int gen, double cc, double cs, double c1, double cmu, double sigma0, double ftol, double xtol, bool memory):
63  base(), m_gen(boost::numeric_cast<std::size_t>(gen)), m_cc(cc), m_cs(cs), m_c1(c1),
64  m_cmu(cmu), m_sigma(sigma0), m_ftol(ftol), m_xtol(xtol), m_memory(memory) {
65  if (gen < 0) {
66  pagmo_throw(value_error,"number of generations must be nonnegative");
67  }
68  if ( ((cc < 0) || (cc > 1)) && !(cc==-1) ){
69  pagmo_throw(value_error,"cc needs to be in [0,1] or -1 for auto value");
70  }
71  if ( ((cs < 0) || (cs > 1)) && !(cs==-1) ){
72  pagmo_throw(value_error,"cs needs to be in [0,1] or -1 for auto value");
73  }
74  if ( ((c1 < 0) || (c1 > 1)) && !(c1==-1) ){
75  pagmo_throw(value_error,"c1 needs to be in [0,1] or -1 for auto value");
76  }
77  if ( ((cmu < 0) || (cmu > 1)) && !(cmu==-1) ){
78  pagmo_throw(value_error,"cmu needs to be in [0,1] or -1 for auto value");
79  }
80 
81  //Initialize the algorithm memory
82  m_mean = Eigen::VectorXd::Zero(1);
83  m_variation = Eigen::VectorXd::Zero(1);
84  m_newpop = std::vector<Eigen::VectorXd>();
85  m_B = Eigen::MatrixXd::Identity(1,1);
86  m_D = Eigen::MatrixXd::Identity(1,1);
87  m_C = Eigen::MatrixXd::Identity(1,1);
88  m_invsqrtC = Eigen::MatrixXd::Identity(1,1);
89  m_pc = Eigen::VectorXd::Zero(1);
90  m_ps = Eigen::VectorXd::Zero(1);
91  m_counteval = 0;
92  m_eigeneval = 0;
93 
94 }
97 {
98  return base_ptr(new cmaes(*this));
99 }
100 
101 struct cmp_using_cur
102 {
103  cmp_using_cur(const population &pop):m_pop(pop) {}
104  bool operator()(const population::size_type &i1, const population::size_type &i2) const
105  {
106  return (
107  m_pop.problem().compare_fc(
108  m_pop.get_individual(i1).cur_f, m_pop.get_individual(i1).cur_c,
109  m_pop.get_individual(i2).cur_f, m_pop.get_individual(i2).cur_c)
110  );
111  }
112  const population &m_pop;
113 };
114 
115 
117 
123 void cmaes::evolve(population &pop) const
124 {
125  // Let's store some useful variables.
126  const problem::base &prob = pop.problem();
127  const problem::base::size_type prob_i_dimension = prob.get_i_dimension(), dim = prob.get_dimension(), N = dim - prob_i_dimension, prob_c_dimension = prob.get_c_dimension();
128  const decision_vector &lb = prob.get_lb(), &ub = prob.get_ub();
129  const population::size_type lam = pop.size();
130  const population::size_type mu = boost::numeric_cast<population::size_type>(lam/2);
131 
132  //We perform some checks to determine whether the problem/population are suitable for Cross Entropy
133  if ( N == 0 ) {
134  pagmo_throw(value_error,"There is no continuous part in the problem decision vector for CE to optimise");
135  }
136 
137  if ( prob.get_f_dimension() != 1 ) {
138  pagmo_throw(value_error,"The problem is not single objective and CE is not suitable to solve it");
139  }
140 
141  if ( prob_c_dimension != 0 ) {
142  pagmo_throw(value_error,"The problem is not box constrained and CE is not suitable to solve it");
143  }
144 
145  if ( prob_i_dimension != 0 ) {
146  pagmo_throw(value_error,"The problem has an integer part and CE is not suitable to solve it");
147  }
148 
149  if (lam < 5) {
150  pagmo_throw(value_error,"for CE at least 5 individuals in the population are required");
151  }
152 
153  // Get out if there is nothing to do.
154  if (m_gen == 0) {
155  return;
156  }
157 
158  using namespace Eigen;
159 
160  // Initializing the random number generators
161  boost::normal_distribution<double> normal(0.0,1.0);
162  boost::variate_generator<boost::lagged_fibonacci607 &, boost::normal_distribution<double> > normally_distributed_number(m_drng,normal);
163  boost::uniform_real<double> uniform(0.0,1.0);
164  boost::variate_generator<boost::lagged_fibonacci607 &, boost::uniform_real<double> > randomly_distributed_number(m_drng,uniform);
165 
166  // Setting coefficients for Selection
167  VectorXd weights(mu);
168  for (int i = 0; i < weights.rows(); ++i){
169  weights(i) = std::log(mu+0.5) - std::log(i+1.0);
170  }
171  weights /= weights.sum(); // weights for weighted recombination
172  double mueff = 1.0 / (weights.transpose()*weights); // variance-effectiveness of sum w_i x_i
173 
174  // Setting coefficients for Adaptation automatically or to user defined data
175  double cc(m_cc), cs(m_cs), c1(m_c1), cmu(m_cmu);
176  if (cc == -1) {
177  cc = (4 + mueff/N) / (N+4 + 2*mueff/N); // t-const for cumulation for C
178  }
179  if (cs == -1) {
180  cs = (mueff+2) / (N+mueff+5); // t-const for cumulation for sigma control
181  }
182  if (c1 == -1) {
183  c1 = 2.0 / ((N+1.3)*(N+1.3)+mueff); // learning rate for rank-one update of C
184  }
185  if (cmu == -1) {
186  cmu = 2.0 * (mueff-2+1/mueff) / ((N+2)*(N+2)+mueff); // and for rank-mu update
187  }
188 
189  double damps = 1 + 2*std::max(0.0, std::sqrt((mueff-1)/(N+1))-1) + cs; // damping for sigma
190  double chiN = std::sqrt(N) * (1-1.0/(4*N)+1.0/(21*N*N)); // expectation of ||N(0,I)|| == norm(randn(N,1))
191 
192  // Initializing and allocating (here one could use mutable data member to avoid redefinition of non const data)
193 
194  // Algorithm's Memory. This allows the algorithm to start from its last "state"
195  VectorXd mean(m_mean);
196  VectorXd variation(m_variation);
197  std::vector<VectorXd> newpop(m_newpop);
198  MatrixXd B(m_B);
199  MatrixXd D(m_D);
200  MatrixXd C(m_C);
201  MatrixXd invsqrtC(m_invsqrtC);
202  VectorXd pc(m_pc);
203  VectorXd ps(m_ps);
204  int counteval(m_counteval);
205  int eigeneval(m_eigeneval);
206  double sigma(m_sigma);
207  double var_norm = 0;
208 
209  // Some buffers
210  VectorXd meanold = VectorXd::Zero(N);
211  MatrixXd Dinv = MatrixXd::Identity(N,N);
212  MatrixXd Cold = MatrixXd::Identity(N,N);
213  VectorXd tmp = VectorXd::Zero(N);
214  std::vector<VectorXd> elite(mu,tmp);
215  decision_vector dumb(N,0);
216 
217  // If the algorithm is called for the first time on this problem dimension / pop size or if m_memory is false we erease the memory of past calls
218  if ( (m_newpop.size() != lam) || ((unsigned int)(m_newpop[0].rows() ) != N) || (m_memory==false) ) {
219  mean.resize(N);
220  for (problem::base::size_type i=0;i<N;++i){
221  mean(i) = pop.champion().x[i];
222  }
223  newpop = std::vector<VectorXd>(lam,tmp);
224  variation.resize(N);
225 
226  //We define the satrting B,D,C
227  B.resize(N,N); B = MatrixXd::Identity(N,N); //B defines the coordinate system
228  D.resize(N,N); D = MatrixXd::Identity(N,N); //diagonal D defines the scaling. By default this is the witdh of the box.
229  //If this is too small... then 1e-6 is used
230  for (problem::base::size_type j=0; j<N; ++j){
231  D(j,j) = std::max((ub[j]-lb[j]),1e-6);
232  }
233  C.resize(N,N); C = MatrixXd::Identity(N,N); //covariance matrix C
234  C = D*D;
235  invsqrtC.resize(N,N); invsqrtC = MatrixXd::Identity(N,N); //inverse of sqrt(C)
236  for (problem::base::size_type j=0; j<N; ++j){
237  invsqrtC(j,j) = 1 / D(j,j);
238  }
239  pc.resize(N); pc = VectorXd::Zero(N);
240  ps.resize(N); ps = VectorXd::Zero(N);
241  counteval = 0;
242  eigeneval = 0;
243  }
244 
245  // ----------------------------------------------//
246  // HERE WE START THE REAL ALGORITHM //
247  // ----------------------------------------------//
248 
249  if (m_screen_output) {
250  std::cout << "CMAES 4 PaGMO: " << std::endl;
251  std::cout << "mu: " << mu
252  << " - lambda: " << lam
253  << " - mueff: " << mueff
254  << " - N: " << N << std::endl;
255 
256  std::cout << "cc: " << cc
257  << " - cs: " << cs
258  << " - c1: " << c1
259  << " - cmu: " << cmu
260  << " - sigma: " << sigma
261  << " - damps: " << damps
262  << " - chiN: " << chiN << std::endl;
263  }
264 
265  SelfAdjointEigenSolver<MatrixXd> es(N);
266  for (std::size_t g = 0; g < m_gen; ++g) {
267  // 1 - We generate and evaluate lam new individuals
268 
269  for (population::size_type i = 0; i<lam; ++i ) {
270  // 1a - we create a randomly normal distributed vector
271  for (problem::base::size_type j=0; j<N; ++j){
272  tmp(j) = normally_distributed_number();
273  }
274  // 1b - and store its transformed value in the newpop
275  newpop[i] = mean + (sigma * B * D * tmp);
276  }
277  //This is evaluated here on the last generated tmp and will be used only as
278  //a stopping criteria
279  var_norm = (sigma * B * D * tmp).norm();
280 
281  //1b - Check the exit conditions (every 5 generations) // we need to do it here as
282  //termination is defined on tmp
283  if (g%5 == 0) {
284  if ( (sigma*B*D*tmp).norm() < m_xtol ) {
285  if (m_screen_output) {
286  std::cout << "Exit condition -- xtol < " << m_xtol << std::endl;
287  }
288  return;
289  }
290 
291  double mah = std::fabs(pop.get_individual(pop.get_worst_idx()).best_f[0] - pop.get_individual(pop.get_best_idx()).best_f[0]);
292 
293  if (mah < m_ftol) {
294  if (m_screen_output) {
295  std::cout << "Exit condition -- ftol < " << m_ftol << std::endl;
296  }
297  return;
298  }
299  }
300 
301  // 1c - we fix the bounds
302  for (population::size_type i = 0; i<lam; ++i ) {
303  for (decision_vector::size_type j = 0; j<N; ++j ) {
304  if ( (newpop[i](j) < lb[j]) || (newpop[i](j) > ub[j]) ) {
305  newpop[i](j) = lb[j] + randomly_distributed_number() * (ub[j] - lb[j]);
306  }
307  }
308  }
309 
310  // 2 - We Evaluate the new population (if the problem is stochastic change seed first)
311  try
312  { //TODO: check if it is really necessary to clear the pop, also
313  //would it make sense to use best_x also?
314  dynamic_cast<const pagmo::problem::base_stochastic &>(prob).set_seed(m_urng());
315  pop.clear(); // Removes memory based on different seeds (champion and best_x, best_f, best_c)
316  for (population::size_type i = 0; i<lam; ++i ) {
317  for (decision_vector::size_type j = 0; j<N; ++j ) {
318  dumb[j] = newpop[i](j);
319  }
320  pop.push_back(dumb);
321  }
322  counteval += lam;
323  }
324  catch (const std::bad_cast& e)
325  {
326  // Reinsertion (original method)
327  for (population::size_type i = 0; i<lam; ++i ) {
328  for (decision_vector::size_type j = 0; j<N; ++j ) {
329  dumb[j] = newpop[i](j);
330  }
331  pop.set_x(i,dumb);
332  }
333  counteval += lam;
334  }
335 
336  // 2 - We extract the elite from this generation. We use cur_f, equivalent to the
337  // original method
338  std::vector<population::size_type> best_idx;
339  best_idx.reserve(pop.size());
340  for (population::size_type i=0; i<pop.size(); ++i){
341  best_idx.push_back(i);
342  }
343  cmp_using_cur cmp(pop);
344  std::sort(best_idx.begin(),best_idx.end(),cmp);
345  best_idx.resize(mu);
346  for (population::size_type i = 0; i<mu; ++i ) {
347  for (decision_vector::size_type j = 0; j<N; ++j ) {
348  elite[i](j) = pop.get_individual(best_idx[i]).cur_x[j];
349  }
350  }
351 
352 
353  // 3 - Compute the new elite mean storing the old one
354  meanold=mean;
355  mean = elite[0]*weights(0);
356  for (population::size_type i = 1; i<mu; ++i ) {
357  mean += elite[i]*weights(i);
358  }
359 
360  // 4 - Update evolution paths
361  ps = (1 - cs) * ps + std::sqrt(cs*(2-cs)*mueff) * invsqrtC * (mean-meanold) / sigma;
362  double hsig = 0;
363  hsig = (ps.squaredNorm() / N / (1-std::pow((1-cs),(2.0*counteval/lam))) ) < (2.0 + 4/(N+1));
364  pc = (1-cc) * pc + hsig * std::sqrt(cc*(2-cc)*mueff) * (mean-meanold) / sigma;
365 
366  // 5 - Adapt Covariance Matrix
367  Cold = C;
368  C = (elite[0]-meanold)*(elite[0]-meanold).transpose()*weights(0);
369  for (population::size_type i = 1; i<mu; ++i ) {
370  C += (elite[i]-meanold)*(elite[i]-meanold).transpose()*weights(i);
371  }
372  C /= sigma*sigma;
373  C = (1-c1-cmu) * Cold +
374  cmu * C +
375  c1 * ((pc * pc.transpose()) + (1-hsig) * cc * (2-cc) * Cold);
376 
377  //6 - Adapt sigma
378  sigma *= std::exp( std::min( 0.6, (cs/damps) * (ps.norm()/chiN - 1) ) );
379  if ( (boost::math::isnan)(sigma) || (boost::math::isnan)(sigma) || (boost::math::isinf)(var_norm) || (boost::math::isnan)(var_norm) ) {
380  std::cout << "eigen: " << es.info() << std::endl;
381  std::cout << "B: " << B << std::endl;
382  std::cout << "D: " << D << std::endl;
383  std::cout << "Dinv: " << D << std::endl;
384  std::cout << "invsqrtC: " << invsqrtC << std::endl;
385  pagmo_throw(value_error,"NaN!!!!! in CMAES");
386  }
387 
388  //7 - Perform eigen-decomposition of C
389  if ( (counteval - eigeneval) > (lam/(c1+cmu)/N/10) ) { //achieve O(N^2)
390  eigeneval = counteval;
391  C = (C+C.transpose())/2; //enforce symmetry
392  es.compute(C); //eigen decomposition
393  if (es.info()==Success) {
394  B = es.eigenvectors();
395  D = es.eigenvalues().asDiagonal();
396  for (decision_vector::size_type j = 0; j<N; ++j ) {
397  D(j,j) = std::sqrt( std::max(1e-20,D(j,j)) ); //D contains standard deviations now
398  }
399  for (decision_vector::size_type j = 0; j<N; ++j ) {
400  Dinv(j,j) = 1.0 / D(j,j);
401  }
402  invsqrtC = B*Dinv*B.transpose();
403  } //if eigendecomposition fails just skip it and keep pevious succesful one.
404  }
405 
406 
407 
408  //8 - We print on screen if required
409  if (m_screen_output) {
410  if (!(g%20)) {
411  std::cout << std::endl << std::left << std::setw(20) <<
412  "Gen." << std::setw(20) <<
413  "Champion " << std::setw(20) <<
414  "Highest " << std::setw(20) <<
415  "Lowest" << std::setw(20) <<
416  "Variation" << std::setw(20) <<
417  "Step" << std::endl;
418  }
419 
420  std::cout << std::left << std::setprecision(14) << std::setw(20) <<
421  g << std::setw(20) <<
422  pop.champion().f[0] << std::setw(20) <<
423  pop.get_individual(pop.get_best_idx()).best_f[0] << std::setw(20) <<
424  pop.get_individual(pop.get_worst_idx()).best_f[0] << std::setw(20) <<
425  var_norm << std::setw(20) <<
426  sigma << std::endl;
427  }
428 
429  // Update algorithm memory
430  if (m_memory) {
431  m_mean = mean;
432  m_variation = variation;
433  m_newpop = newpop;
434  m_B = B;
435  m_D = D;
436  m_C = C;
437  m_invsqrtC = invsqrtC;
438  m_pc = pc;
439  m_ps = ps;
440  m_counteval = counteval;
441  m_eigeneval = eigeneval;
442  m_sigma = sigma;
443  }
444 
445  } // end loop on g
446 }
447 
449 void cmaes::set_gen(const int gen) {m_gen = gen;}
451 int cmaes::get_gen() const {return m_gen;}
452 
454 void cmaes::set_cc(const double cc) {m_cc = cc;}
456 double cmaes::get_cc() const {return m_cc;}
457 
459 void cmaes::set_cs(const double cs) {m_cs = cs;}
461 double cmaes::get_cs() const {return m_cs;}
462 
464 void cmaes::set_c1(const double c1) {m_c1 = c1;}
466 double cmaes::get_c1() const {return m_c1;}
467 
469 void cmaes::set_cmu(const double cmu) {m_cmu = cmu;}
471 double cmaes::get_cmu() const {return m_cmu;}
472 
474 void cmaes::set_sigma(const double sigma) {m_sigma = sigma;}
476 double cmaes::get_sigma() const {return m_sigma;}
477 
479 void cmaes::set_ftol(const double ftol) {m_ftol = ftol;}
481 double cmaes::get_ftol() const {return m_ftol;}
482 
484 void cmaes::set_xtol(const double xtol) {m_xtol = xtol;}
486 double cmaes::get_xtol() const {return m_xtol;}
487 
489 std::string cmaes::get_name() const
490 {
491  return "CMAES";
492 }
493 
494 
496 
499 std::string cmaes::human_readable_extra() const
500 {
501  std::ostringstream s;
502  s << "gen:" << m_gen << ' '
503  << "cc:" << m_cc << ' '
504  << "cs:" << m_cs << ' '
505  << "c1:" << m_c1 << ' '
506  << "cmu:" << m_cmu << ' '
507  << "sigma0:" << m_sigma << ' '
508  << "ftol:" << m_ftol << ' '
509  << "xtol:" << m_xtol << ' '
510  << "memory:" << m_memory;
511  return s.str();
512 }
513 
514 
515 
516 }} //namespaces
517 
518 BOOST_CLASS_EXPORT_IMPLEMENT(pagmo::algorithm::cmaes)
boost::shared_ptr< base > base_ptr
Alias for shared pointer to base algorithm.
Root PaGMO namespace.
double get_ftol() const
Getter for m_ftol.
Definition: cmaes.cpp:481
std::vector< double > decision_vector
Decision vector type.
Definition: types.h:40
void set_xtol(const double p)
Setter for m_xtol.
Definition: cmaes.cpp:484
void set_gen(const int gen)
Setter for m_gen.
Definition: cmaes.cpp:449
const individual_type & get_individual(const size_type &) const
Get constant reference to individual at position n.
Definition: population.cpp:277
double get_c1() const
Getter for m_c1.
Definition: cmaes.cpp:466
Base algorithm class.
void evolve(population &) const
Evolve implementation.
Definition: cmaes.cpp:123
void set_cmu(const double p)
Setter for m_cmu.
Definition: cmaes.cpp:469
void set_ftol(const double p)
Setter for m_ftol.
Definition: cmaes.cpp:479
cmaes(int gen=500, double cc=-1, double cs=-1, double c1=-1, double cmu=-1, double sigma0=0.5, double ftol=1e-6, double xtol=1e-6, bool memory=true)
Constructor.
Definition: cmaes.cpp:62
STL namespace.
double get_cc() const
Getter for m_cc.
Definition: cmaes.cpp:456
Base problem class.
Definition: problem/base.h:148
Population class.
Definition: population.h:70
Covariance Matrix Adaptation Evolutionary Strategy (CMAES)
Definition: cmaes.h:48
size_type get_dimension() const
Return global dimension.
base_ptr clone() const
Clone method.
Definition: cmaes.cpp:96
decision_vector x
Decision vector.
Definition: population.h:149
void set_cs(const double p)
Setter for m_cs.
Definition: cmaes.cpp:459
fitness_vector f
Fitness vector.
Definition: population.h:153
void set_c1(const double p)
Setter for m_c1.
Definition: cmaes.cpp:464
double get_cmu() const
Getter for m_cmu.
Definition: cmaes.cpp:471
bool m_screen_output
Indicates to the derived class whether to print stuff on screen.
size_type get_i_dimension() const
Return integer dimension.
void set_sigma(const double p)
Setter for m_sigma.
Definition: cmaes.cpp:474
c_size_type get_c_dimension() const
Return global constraints dimension.
double get_xtol() const
Getter for m_xtol.
Definition: cmaes.cpp:486
const decision_vector & get_ub() const
Upper bounds getter.
Base Stochastic Optimization Problem.
container_type::size_type size_type
Population size type.
Definition: population.h:192
std::string get_name() const
Algorithm name.
Definition: cmaes.cpp:489
decision_vector cur_x
Current decision vector.
Definition: population.h:83
rng_uint32 m_urng
Random number generator for unsigned integer values.
f_size_type get_f_dimension() const
Return fitness dimension.
double get_sigma() const
Getter for m_sigma.
Definition: cmaes.cpp:476
int get_gen() const
Getter for m_gen.
Definition: cmaes.cpp:451
const decision_vector & get_lb() const
Lower bounds getter.
rng_double m_drng
Random number generator for double-precision floating point values.
The Compass Search Solver (CS)
Definition: cs.h:63
double get_cs() const
Getter for m_cs.
Definition: cmaes.cpp:461
void set_cc(const double p)
Setter for m_cc.
Definition: cmaes.cpp:454
decision_vector::size_type size_type
Problem's size type: the same as pagmo::decision_vector's size type.
Definition: problem/base.h:160
std::string human_readable_extra() const
Extra human readable algorithm info.
Definition: cmaes.cpp:499