00001 /* 00002 ------------------------------------------------------------------- 00003 00004 Copyright (C) 2006, 2007, Andrew W. Steiner 00005 00006 This file is part of O2scl. 00007 00008 O2scl is free software; you can redistribute it and/or modify 00009 it under the terms of the GNU General Public License as published by 00010 the Free Software Foundation; either version 3 of the License, or 00011 (at your option) any later version. 00012 00013 O2scl is distributed in the hope that it will be useful, 00014 but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00016 GNU General Public License for more details. 00017 00018 You should have received a copy of the GNU General Public License 00019 along with O2scl. If not, see <http://www.gnu.org/licenses/>. 00020 00021 ------------------------------------------------------------------- 00022 */ 00023 #ifndef O2SCL_GSL_MMIN_BFGS2_H 00024 #define O2SCL_GSL_MMIN_BFGS2_H 00025 00026 #include <gsl/gsl_blas.h> 00027 #include <gsl/gsl_poly.h> 00028 #include <gsl/gsl_multimin.h> 00029 #include <o2scl/multi_min.h> 00030 00031 #ifndef DOXYGENP 00032 namespace o2scl { 00033 #endif 00034 00035 /** 00036 \brief Virtual base for the gsl_mmin_bfgs2 wrapper 00037 00038 This is useful so that the gsl_mmin_linmin class doesn't need to 00039 depend on any template parameters, even though it will need a 00040 wrapping object as an argument for the 00041 gsl_mmin_linmin::minimize() function. 00042 */ 00043 class gsl_mmin_wrap_base { 00044 public: 00045 virtual ~gsl_mmin_wrap_base() {} 00046 /// Function 00047 virtual double wrap_f(double alpha, void *params)=0; 00048 /// Derivative 00049 virtual double wrap_df(double alpha, void *params)=0; 00050 /// Function and derivative 00051 virtual void wrap_fdf(double alpha, void *params, double *f, double *df)=0; 00052 }; 00053 00054 /** 00055 \brief Wrapper class for the gsl_mmin_bfgs2 minimizer 00056 00057 \future There's a bit of extra vector copying here which 00058 could potentially be avoided. 00059 */ 00060 template<class param_t, class func_t, 00061 class vec_t=ovector_view, class alloc_vec_t=ovector, 00062 class alloc_t=ovector_alloc, class dfunc_t=func_t> 00063 class gsl_mmin_wrapper : public gsl_mmin_wrap_base { 00064 00065 #ifndef DOXYGEN_INTERNAL 00066 00067 protected: 00068 00069 /// Function 00070 func_t *func; 00071 00072 /// Derivative 00073 dfunc_t *dfunc; 00074 00075 /// Parameters 00076 param_t *pa; 00077 00078 /** \name fixed values 00079 */ 00080 //@{ 00081 gsl_vector *x; 00082 gsl_vector *g; 00083 gsl_vector *p; 00084 //@} 00085 00086 /** \name cached values, for x(alpha) = x + alpha * p 00087 */ 00088 //@{ 00089 double f_alpha; 00090 double df_alpha; 00091 //@} 00092 00093 /** \name cache "keys" 00094 */ 00095 //@{ 00096 double f_cache_key; 00097 double df_cache_key; 00098 double x_cache_key; 00099 double g_cache_key; 00100 //@} 00101 00102 /// Move to a new point, using the cached value if possible 00103 void moveto(double alpha) { 00104 00105 if (alpha == x_cache_key) { 00106 /* using previously cached position */ 00107 return; 00108 } 00109 00110 /* set x_alpha = x + alpha * p */ 00111 00112 gsl_vector *tmp=gsl_vector_alloc(dim); 00113 for(size_t i=0;i<dim;i++) { 00114 av_x_alpha[i]=gsl_vector_get(x,i); 00115 gsl_vector_set(tmp,i,av_x_alpha[i]); 00116 } 00117 00118 gsl_blas_daxpy(alpha,p,tmp); 00119 for(size_t i=0;i<dim;i++) { 00120 av_x_alpha[i]=gsl_vector_get(tmp,i); 00121 } 00122 gsl_vector_free(tmp); 00123 00124 x_cache_key = alpha; 00125 } 00126 00127 /// Compute the slope 00128 double slope() { 00129 double df; 00130 gsl_vector *tmp=gsl_vector_alloc(dim); 00131 for(size_t i=0;i<dim;i++) { 00132 gsl_vector_set(tmp,i,av_g_alpha[i]); 00133 } 00134 gsl_blas_ddot(tmp,p,&df); 00135 for(size_t i=0;i<dim;i++) { 00136 av_g_alpha[i]=gsl_vector_get(tmp,i); 00137 } 00138 gsl_vector_free(tmp); 00139 return df; 00140 } 00141 00142 /// Evaluate the function 00143 virtual double wrap_f(double alpha, void *params) { 00144 if (alpha==f_cache_key) { 00145 return f_alpha; 00146 } 00147 moveto(alpha); 00148 (*func)(dim,av_x_alpha,f_alpha,*pa); 00149 00150 f_cache_key = alpha; 00151 return f_alpha; 00152 } 00153 00154 /// Evaluate the derivative 00155 virtual double wrap_df(double alpha, void *params) { 00156 00157 /* using previously cached df(alpha) */ 00158 if (alpha==df_cache_key) return df_alpha; 00159 00160 moveto(alpha); 00161 if (alpha!=g_cache_key) { 00162 simple_df(av_x_alpha,av_g_alpha); 00163 g_cache_key=alpha; 00164 } 00165 df_alpha=slope(); 00166 df_cache_key=alpha; 00167 00168 return df_alpha; 00169 } 00170 00171 /// A simple derivative 00172 int simple_df(vec_t &x2, vec_t &g2) { 00173 00174 double fv1, fv2, deriv_h=1.0e-4; 00175 00176 (*func)(dim,x2,fv1,*pa); 00177 00178 for(size_t i=0;i<dim;i++) { 00179 x2[i]+=deriv_h; 00180 (*func)(dim,x2,fv2,*pa); 00181 x2[i]-=deriv_h; 00182 g2[i]=(fv2-fv1)/deriv_h; 00183 } 00184 00185 return 0; 00186 } 00187 00188 /// Evaluate the function and the derivative 00189 virtual void wrap_fdf(double alpha, void *params, double *f, double *df) { 00190 00191 /* Check for previously cached values */ 00192 if (alpha == f_cache_key && alpha == df_cache_key) { 00193 *f = f_alpha; 00194 *df = df_alpha; 00195 return; 00196 } 00197 if (alpha == f_cache_key || alpha == df_cache_key) { 00198 *f = wrap_f (alpha, params); 00199 *df = wrap_df (alpha, params); 00200 return; 00201 } 00202 00203 moveto(alpha); 00204 (*func)(dim,av_x_alpha,f_alpha,*pa); 00205 simple_df(av_x_alpha,av_g_alpha); 00206 f_cache_key = alpha; 00207 g_cache_key = alpha; 00208 00209 df_alpha = slope(); 00210 df_cache_key = alpha; 00211 00212 *f = f_alpha; 00213 *df = df_alpha; 00214 00215 return; 00216 } 00217 00218 #endif 00219 00220 public: 00221 00222 /// Temporary storage 00223 alloc_vec_t av_x_alpha; 00224 00225 /// Temporary storage 00226 alloc_vec_t av_g_alpha; 00227 00228 /// Number of minimization dimensions 00229 size_t dim; 00230 00231 /// Initialize wrapper 00232 void prepare_wrapper(func_t &ufunc, param_t &upa, gsl_vector *t_x, 00233 double f, gsl_vector *t_g, gsl_vector *t_p) { 00234 00235 func=&ufunc; 00236 //dfunc=&udfunc; 00237 pa=&upa; 00238 00239 x=t_x; 00240 g=t_g; 00241 p=t_p; 00242 00243 x_cache_key=0.0; 00244 f_cache_key=0.0; 00245 g_cache_key=0.0; 00246 df_cache_key=0.0; 00247 00248 for(size_t i=0;i<dim;i++) { 00249 av_x_alpha[i]=gsl_vector_get(x,i); 00250 av_g_alpha[i]=gsl_vector_get(g,i); 00251 } 00252 00253 f_alpha=f; 00254 df_alpha=slope(); 00255 00256 return; 00257 } 00258 00259 /// Update position 00260 void update_position(double alpha, gsl_vector *t_x, double *t_f, 00261 gsl_vector *t_g) { 00262 00263 /* ensure that everything is fully cached */ 00264 { 00265 double t_f_alpha, t_df_alpha; 00266 wrap_fdf(alpha,0,&t_f_alpha,&t_df_alpha); 00267 } 00268 00269 *t_f = f_alpha; 00270 for(size_t i=0;i<dim;i++) { 00271 gsl_vector_set(t_x,i,av_x_alpha[i]); 00272 gsl_vector_set(t_g,i,av_g_alpha[i]); 00273 } 00274 } 00275 00276 /// Convert cache values to the new minimizer direction 00277 void change_direction() { 00278 00279 /* Convert the cache values from the end of the current minimisation 00280 to those needed for the start of the next minimisation, alpha=0 */ 00281 00282 /* The new x_alpha for alpha=0 is the current position and the 00283 new g_alpha for alpha=0 is the current gradient at the 00284 endpoint */ 00285 for(size_t i=0;i<dim;i++) { 00286 av_x_alpha[i]=gsl_vector_get(x,i); 00287 av_g_alpha[i]=gsl_vector_get(g,i); 00288 } 00289 x_cache_key = 0.0; 00290 g_cache_key = 0.0; 00291 00292 /* The function value does not change */ 00293 f_cache_key = 0.0; 00294 00295 /* Calculate the slope along the new direction vector, p */ 00296 df_alpha = slope (); 00297 df_cache_key = 0.0; 00298 00299 return; 00300 } 00301 00302 }; 00303 00304 /** 00305 \brief The line minimizer for gsl_mmin_bfgs2 00306 */ 00307 class gsl_mmin_linmin { 00308 00309 #ifndef DOXYGEN_INTERNAL 00310 00311 protected: 00312 00313 /** 00314 \brief Minimize the interpolating quadratic 00315 00316 Find a minimum in x=[0,1] of the interpolating quadratic through 00317 (0,f0) (1,f1) with derivative fp0 at x=0. The interpolating 00318 polynomial is q(x) = f0 + fp0 * z + (f1-f0-fp0) * z^2 00319 */ 00320 double interp_quad(double f0, double fp0, double f1, double zl, 00321 double zh); 00322 00323 /** 00324 \brief Minimize the interpolating cubic 00325 00326 Find a minimum in x=[0,1] of the interpolating cubic through 00327 (0,f0) (1,f1) with derivatives fp0 at x=0 and fp1 at x=1. 00328 00329 The interpolating polynomial is: 00330 00331 c(x) = f0 + fp0 * z + eta * z^2 + xi * z^3 00332 00333 where eta=3*(f1-f0)-2*fp0-fp1, xi=fp0+fp1-2*(f1-f0). 00334 */ 00335 double cubic(double c0, double c1, double c2, double c3, double z); 00336 00337 /// Test to see curvature is positive 00338 void check_extremum(double c0, double c1, double c2, double c3, double z, 00339 double *zmin, double *fmin); 00340 00341 /// Interpolate using a cubic 00342 double interp_cubic(double f0, double fp0, double f1, 00343 double fp1, double zl, double zh); 00344 00345 /// Perform the interpolation 00346 double interpolate(double a, double fa, double fpa, double b, 00347 double fb, double fpb, double xmin, double xmax, 00348 int order); 00349 #endif 00350 00351 public: 00352 00353 /** 00354 \brief The line minimization 00355 00356 recommended values from Fletcher are 00357 rho = 0.01, sigma = 0.1, tau1 = 9, tau2 = 0.05, tau3 = 0.5 00358 */ 00359 int minimize(gsl_mmin_wrap_base &wrap, double rho, double sigma, 00360 double tau1, double tau2, double tau3, 00361 int order, double alpha1, double *alpha_new); 00362 }; 00363 00364 /** \brief Multidimensional minimization by the BFGS 00365 algorithm (GSL) 00366 00367 This class includes the optimizations from the GSL minimizer \c 00368 vector_bfgs2. 00369 */ 00370 template<class param_t, class func_t, 00371 class vec_t=ovector_view, class alloc_vec_t=ovector, 00372 class alloc_t=ovector_alloc, class dfunc_t=func_t> 00373 class gsl_mmin_bfgs2 : public multi_min<param_t,func_t,func_t,vec_t> { 00374 00375 #ifndef DOXYGEN_INTERNAL 00376 00377 protected: 00378 00379 /// \name The original variables from the GSL state structure 00380 //@{ 00381 int iter; 00382 double step; 00383 double g0norm; 00384 double pnorm; 00385 double delta_f; 00386 /* f'(0) for f(x-alpha*p) */ 00387 double fp0; 00388 gsl_vector *x0; 00389 gsl_vector *g0; 00390 gsl_vector *p; 00391 /* work space */ 00392 gsl_vector *dx0; 00393 gsl_vector *dg0; 00394 /* wrapper function */ 00395 gsl_mmin_wrapper<param_t,func_t,vec_t,alloc_vec_t,alloc_t,dfunc_t> wrap; 00396 /* minimization parameters */ 00397 double rho; 00398 double sigma; 00399 double tau1; 00400 double tau2; 00401 double tau3; 00402 int order; 00403 //@} 00404 00405 /// The line minimizer 00406 gsl_mmin_linmin lm; 00407 00408 /// \name Store the arguments to set() so we can use them for iterate() 00409 //@{ 00410 vec_t *st_x; 00411 gsl_vector *st_dx; 00412 gsl_vector *st_grad; 00413 double st_f; 00414 //@} 00415 00416 /// Memory size 00417 size_t dim; 00418 00419 /// Memory allocation 00420 alloc_t ao; 00421 00422 #endif 00423 00424 public: 00425 00426 gsl_mmin_bfgs2() { 00427 lmin_tol=1.0e-4; 00428 this->tolf=1.0e-3; 00429 step_size=0.01; 00430 } 00431 00432 virtual ~gsl_mmin_bfgs2() {} 00433 00434 /// Perform an iteration 00435 virtual int iterate() { 00436 00437 double alpha = 0.0, alpha1; 00438 00439 double pg, dir; 00440 int status; 00441 00442 double f0 = st_f; 00443 00444 if (pnorm == 0.0 || g0norm == 0.0 || fp0 == 0) 00445 { 00446 gsl_vector_set_zero (st_dx); 00447 return GSL_ENOPROG; 00448 } 00449 00450 if (delta_f < 0) { 00451 double del = GSL_MAX_DBL (-delta_f, 10 * GSL_DBL_EPSILON * fabs(f0)); 00452 alpha1 = GSL_MIN_DBL (1.0, 2.0 * del / (- fp0)); 00453 } else { 00454 alpha1 = fabs( step); 00455 } 00456 00457 /* line minimisation, with cubic interpolation (order = 3) */ 00458 00459 status=lm.minimize(wrap,rho,sigma,tau1,tau2,tau3,order, 00460 alpha1,&alpha); 00461 00462 if (status != GSL_SUCCESS) { 00463 return status; 00464 } 00465 00466 wrap.update_position(alpha,st_x,&st_f,st_grad); 00467 00468 delta_f = st_f - f0; 00469 00470 /* Choose a new direction for the next step */ 00471 00472 { 00473 /* This is the BFGS update: */ 00474 /* p' = g1 - A dx - B dg */ 00475 /* A = - (1+ dg.dg/dx.dg) B + dg.g/dx.dg */ 00476 /* B = dx.g/dx.dg */ 00477 00478 double dxg, dgg, dxdg, dgnorm, A, B; 00479 00480 /* dx0 = x - x0 */ 00481 gsl_vector_memcpy (dx0, st_x); 00482 gsl_blas_daxpy (-1.0, x0, dx0); 00483 00484 gsl_vector_memcpy (st_dx, dx0); /* keep a copy */ 00485 00486 /* dg0 = g - g0 */ 00487 gsl_vector_memcpy (dg0, st_grad); 00488 gsl_blas_daxpy (-1.0, g0, dg0); 00489 00490 gsl_blas_ddot (dx0, st_grad, &dxg); 00491 gsl_blas_ddot (dg0, st_grad, &dgg); 00492 gsl_blas_ddot (dx0, dg0, &dxdg); 00493 00494 dgnorm = gsl_blas_dnrm2 (dg0); 00495 00496 if (dxdg != 0) { 00497 B = dxg / dxdg; 00498 A = -(1.0 + dgnorm * dgnorm / dxdg) * B + dgg / dxdg; 00499 } else { 00500 B = 0; 00501 A = 0; 00502 } 00503 00504 gsl_vector_memcpy (p, st_grad); 00505 gsl_blas_daxpy (-A, dx0, p); 00506 gsl_blas_daxpy (-B, dg0, p); 00507 } 00508 00509 gsl_vector_memcpy (g0, st_grad); 00510 gsl_vector_memcpy (x0, st_x); 00511 g0norm = gsl_blas_dnrm2 (g0); 00512 pnorm = gsl_blas_dnrm2 (p); 00513 /* update direction and fp0 */ 00514 00515 gsl_blas_ddot (p, st_grad, &pg); 00516 dir = (pg >= 0.0) ? -1.0 : +1.0; 00517 gsl_blas_dscal (dir / pnorm, p); 00518 pnorm = gsl_blas_dnrm2 (p); 00519 gsl_blas_ddot (p, g0, & fp0); 00520 00521 wrap.change_direction(); 00522 00523 return GSL_SUCCESS; 00524 00525 } 00526 00527 /// Return string denoting type("gsl_mmin_bfgs2") 00528 virtual const char *type() { return "gsl_mmin_bfgs2";} 00529 00530 /// Allocate the memory 00531 virtual int allocate(size_t n) { 00532 00533 p = gsl_vector_calloc (n); 00534 00535 if ( p == 0) { 00536 GSL_ERROR ("failed to allocate space for p", GSL_ENOMEM); 00537 } 00538 00539 x0 = gsl_vector_calloc (n); 00540 00541 if ( x0 == 0) { 00542 gsl_vector_free ( p); 00543 GSL_ERROR ("failed to allocate space for g0", GSL_ENOMEM); 00544 } 00545 00546 g0 = gsl_vector_calloc (n); 00547 00548 if ( g0 == 0) { 00549 gsl_vector_free ( x0); 00550 gsl_vector_free ( p); 00551 GSL_ERROR ("failed to allocate space for g0", GSL_ENOMEM); 00552 } 00553 00554 dx0 = gsl_vector_calloc (n); 00555 00556 if ( dx0 == 0) { 00557 gsl_vector_free ( g0); 00558 gsl_vector_free ( x0); 00559 gsl_vector_free ( p); 00560 GSL_ERROR ("failed to allocate space for g0", GSL_ENOMEM); 00561 } 00562 00563 dg0 = gsl_vector_calloc (n); 00564 00565 if ( dg0 == 0) { 00566 gsl_vector_free ( dx0); 00567 gsl_vector_free ( g0); 00568 gsl_vector_free ( x0); 00569 gsl_vector_free ( p); 00570 GSL_ERROR ("failed to allocate space for g0", GSL_ENOMEM); 00571 } 00572 00573 st_dx = gsl_vector_alloc(n); 00574 st_grad = gsl_vector_alloc(n); 00575 00576 ao.allocate(wrap.av_x_alpha,n); 00577 ao.allocate(wrap.av_g_alpha,n); 00578 wrap.dim=n; 00579 dim=n; 00580 00581 return GSL_SUCCESS; 00582 } 00583 00584 /// Free the allocated memory 00585 virtual int free() { 00586 ao.free(wrap.av_x_alpha); 00587 ao.free(wrap.av_g_alpha); 00588 gsl_vector_free(dg0); 00589 gsl_vector_free(dx0); 00590 gsl_vector_free(g0); 00591 gsl_vector_free(x0); 00592 gsl_vector_free(p); 00593 gsl_vector_free(st_dx); 00594 gsl_vector_free(st_grad); 00595 wrap.dim=0; 00596 dim=0; 00597 return 0; 00598 } 00599 00600 /// Reset the minimizer to use the current point as a new starting point 00601 int restart() { 00602 iter = 0; 00603 return gsl_success; 00604 } 00605 00606 /// Set the function and initial guess 00607 virtual int set(vec_t &x, double u_step_size, double tol_u, 00608 func_t &ufunc, param_t &upa) { 00609 00610 iter = 0; 00611 step = u_step_size; 00612 delta_f = 0; 00613 00614 st_x=&x; 00615 00616 ufunc(dim,x,st_f,upa); 00617 { 00618 double fv2, deriv_h=1.0e-4; 00619 00620 for(size_t i=0;i<dim;i++) { 00621 x[i]+=deriv_h; 00622 ufunc(dim,x,fv2,upa); 00623 x[i]-=deriv_h; 00624 gsl_vector_set(st_grad,i,(fv2-st_f)/deriv_h); 00625 } 00626 } 00627 00628 /* Use the gradient as the initial direction */ 00629 00630 for(size_t i=0;i<dim;i++) { 00631 gsl_vector_set(x0,i,x[i]); 00632 } 00633 gsl_vector_memcpy(g0,st_grad); 00634 g0norm = gsl_blas_dnrm2 ( g0); 00635 00636 gsl_vector_memcpy(p,st_grad); 00637 gsl_blas_dscal(-1/g0norm,p); 00638 pnorm = gsl_blas_dnrm2 ( p); /* should be 1 */ 00639 fp0 = - g0norm; 00640 00641 /* Prepare the wrapper */ 00642 00643 wrap.prepare_wrapper(ufunc,upa,x0,st_f,g0,p); 00644 00645 /* Prepare 1d minimisation parameters */ 00646 00647 rho = 0.01; 00648 sigma = tol_u; 00649 tau1 = 9; 00650 tau2 = 0.05; 00651 tau3 = 0.5; 00652 order = 3; /* use cubic interpolation where possible */ 00653 00654 return GSL_SUCCESS; 00655 00656 } 00657 00658 /// The size of the first trial step 00659 double step_size; 00660 00661 /// The tolerance for the 1-dimensional minimizer 00662 double lmin_tol; 00663 00664 /** \brief Calculate the minimum \c min of \c func w.r.t the 00665 array \c x of size \c nvar. 00666 */ 00667 virtual int mmin(size_t nn, vec_t &xx, double &fmin, param_t &pa, 00668 func_t &ufunc) { 00669 00670 int xiter=0, status; 00671 00672 allocate(nn); 00673 00674 set(xx,step_size,lmin_tol,ufunc,pa); 00675 00676 do { 00677 xiter++; 00678 00679 status=iterate(); 00680 00681 if (status) { 00682 break; 00683 } 00684 00685 // Equivalent to gsl_multimin_test_gradient with 00686 // additional code to print out present iteration 00687 00688 double norm=gsl_blas_dnrm2(st_grad); 00689 00690 if(this->verbose>0) { 00691 this->print_iter(nn,*st_x,st_f,xiter, 00692 norm,this->tolf,"gsl_mmin_bfgs2"); 00693 } 00694 00695 if (norm<this->tolf) status=gsl_success; 00696 else status=gsl_continue; 00697 00698 } 00699 while (status == GSL_CONTINUE && xiter < this->ntrial); 00700 00701 for(size_t i=0;i<nn;i++) xx[i]=(*st_x)[i]; 00702 00703 fmin=st_f; 00704 00705 free(); 00706 00707 this->last_ntrial=xiter; 00708 00709 return status; 00710 } 00711 00712 }; 00713 00714 #ifndef DOXYGENP 00715 } 00716 #endif 00717 00718 #endif
Documentation generated with Doxygen and provided under the GNU Free Documentation License. See License Information for details.
Project hosting provided by
,
O2scl Sourceforge Project Page