00001 /* 00002 ------------------------------------------------------------------- 00003 00004 Copyright (C) 2006, 2007, 2008, Andrew W. Steiner 00005 00006 This file is part of O2scl. 00007 00008 O2scl is free software; you can redistribute it and/or modify 00009 it under the terms of the GNU General Public License as published by 00010 the Free Software Foundation; either version 3 of the License, or 00011 (at your option) any later version. 00012 00013 O2scl is distributed in the hope that it will be useful, 00014 but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00016 GNU General Public License for more details. 00017 00018 You should have received a copy of the GNU General Public License 00019 along with O2scl. If not, see <http://www.gnu.org/licenses/>. 00020 00021 ------------------------------------------------------------------- 00022 */ 00023 #ifndef O2SCL_GSL_MMIN_BFGS2_H 00024 #define O2SCL_GSL_MMIN_BFGS2_H 00025 00026 #include <gsl/gsl_blas.h> 00027 #include <gsl/gsl_poly.h> 00028 #include <gsl/gsl_multimin.h> 00029 #include <o2scl/multi_min.h> 00030 00031 #ifndef DOXYGENP 00032 namespace o2scl { 00033 #endif 00034 00035 /** 00036 \brief Virtual base for the gsl_mmin_bfgs2 wrapper 00037 00038 This is useful so that the gsl_mmin_linmin class doesn't need to 00039 depend on any template parameters, even though it will need a 00040 wrapping object as an argument for the 00041 gsl_mmin_linmin::minimize() function. 00042 */ 00043 class gsl_mmin_wrap_base { 00044 public: 00045 virtual ~gsl_mmin_wrap_base() {} 00046 /// Function 00047 virtual double wrap_f(double alpha, void *params)=0; 00048 /// Derivative 00049 virtual double wrap_df(double alpha, void *params)=0; 00050 /// Function and derivative 00051 virtual void wrap_fdf(double alpha, void *params, double *f, double *df)=0; 00052 }; 00053 00054 /** 00055 \brief Wrapper class for the gsl_mmin_bfgs2 minimizer 00056 00057 \future There's a bit of extra vector copying here which 00058 could potentially be avoided. 00059 */ 00060 template<class param_t, class func_t, 00061 class vec_t=ovector_view, class alloc_vec_t=ovector, 00062 class alloc_t=ovector_alloc, class dfunc_t=func_t> 00063 class gsl_mmin_wrapper : public gsl_mmin_wrap_base { 00064 00065 #ifndef DOXYGEN_INTERNAL 00066 00067 protected: 00068 00069 /// Function 00070 func_t *func; 00071 00072 /// Derivative 00073 dfunc_t *dfunc; 00074 00075 /// Parameters 00076 param_t *pa; 00077 00078 /** \name fixed values 00079 */ 00080 //@{ 00081 gsl_vector *x; 00082 gsl_vector *g; 00083 gsl_vector *p; 00084 //@} 00085 00086 /** \name cached values, for x(alpha) = x + alpha * p 00087 */ 00088 //@{ 00089 double f_alpha; 00090 double df_alpha; 00091 //@} 00092 00093 /** \name cache "keys" 00094 */ 00095 //@{ 00096 double f_cache_key; 00097 double df_cache_key; 00098 double x_cache_key; 00099 double g_cache_key; 00100 //@} 00101 00102 /// Move to a new point, using the cached value if possible 00103 void moveto(double alpha) { 00104 00105 if (alpha == x_cache_key) { 00106 /* using previously cached position */ 00107 return; 00108 } 00109 00110 /* set x_alpha = x + alpha * p */ 00111 00112 gsl_vector *tmp=gsl_vector_alloc(dim); 00113 for(size_t i=0;i<dim;i++) { 00114 av_x_alpha[i]=gsl_vector_get(x,i); 00115 gsl_vector_set(tmp,i,av_x_alpha[i]); 00116 } 00117 00118 gsl_blas_daxpy(alpha,p,tmp); 00119 for(size_t i=0;i<dim;i++) { 00120 av_x_alpha[i]=gsl_vector_get(tmp,i); 00121 } 00122 gsl_vector_free(tmp); 00123 00124 x_cache_key = alpha; 00125 } 00126 00127 /// Compute the slope 00128 double slope() { 00129 double df; 00130 gsl_vector *tmp=gsl_vector_alloc(dim); 00131 for(size_t i=0;i<dim;i++) { 00132 gsl_vector_set(tmp,i,av_g_alpha[i]); 00133 } 00134 gsl_blas_ddot(tmp,p,&df); 00135 for(size_t i=0;i<dim;i++) { 00136 av_g_alpha[i]=gsl_vector_get(tmp,i); 00137 } 00138 gsl_vector_free(tmp); 00139 return df; 00140 } 00141 00142 /// Evaluate the function 00143 virtual double wrap_f(double alpha, void *params) { 00144 if (alpha==f_cache_key) { 00145 return f_alpha; 00146 } 00147 moveto(alpha); 00148 (*func)(dim,av_x_alpha,f_alpha,*pa); 00149 00150 f_cache_key = alpha; 00151 return f_alpha; 00152 } 00153 00154 /// Evaluate the derivative 00155 virtual double wrap_df(double alpha, void *params) { 00156 00157 /* using previously cached df(alpha) */ 00158 if (alpha==df_cache_key) return df_alpha; 00159 00160 moveto(alpha); 00161 if (alpha!=g_cache_key) { 00162 simple_df(av_x_alpha,av_g_alpha); 00163 g_cache_key=alpha; 00164 } 00165 df_alpha=slope(); 00166 df_cache_key=alpha; 00167 00168 return df_alpha; 00169 } 00170 00171 /// A simple derivative 00172 int simple_df(vec_t &x2, vec_t &g2) { 00173 00174 double fv1, fv2, deriv_h=1.0e-4; 00175 00176 (*func)(dim,x2,fv1,*pa); 00177 00178 for(size_t i=0;i<dim;i++) { 00179 x2[i]+=deriv_h; 00180 (*func)(dim,x2,fv2,*pa); 00181 x2[i]-=deriv_h; 00182 g2[i]=(fv2-fv1)/deriv_h; 00183 } 00184 00185 return 0; 00186 } 00187 00188 /// Evaluate the function and the derivative 00189 virtual void wrap_fdf(double alpha, void *params, double *f, double *df) { 00190 00191 /* Check for previously cached values */ 00192 if (alpha == f_cache_key && alpha == df_cache_key) { 00193 *f = f_alpha; 00194 *df = df_alpha; 00195 return; 00196 } 00197 if (alpha == f_cache_key || alpha == df_cache_key) { 00198 *f = wrap_f (alpha, params); 00199 *df = wrap_df (alpha, params); 00200 return; 00201 } 00202 00203 moveto(alpha); 00204 (*func)(dim,av_x_alpha,f_alpha,*pa); 00205 simple_df(av_x_alpha,av_g_alpha); 00206 f_cache_key = alpha; 00207 g_cache_key = alpha; 00208 00209 df_alpha = slope(); 00210 df_cache_key = alpha; 00211 00212 *f = f_alpha; 00213 *df = df_alpha; 00214 00215 return; 00216 } 00217 00218 #endif 00219 00220 public: 00221 00222 /// Temporary storage 00223 alloc_vec_t av_x_alpha; 00224 00225 /// Temporary storage 00226 alloc_vec_t av_g_alpha; 00227 00228 /// Number of minimization dimensions 00229 size_t dim; 00230 00231 /// Initialize wrapper 00232 void prepare_wrapper(func_t &ufunc, param_t &upa, gsl_vector *t_x, 00233 double f, gsl_vector *t_g, gsl_vector *t_p) { 00234 00235 func=&ufunc; 00236 //dfunc=&udfunc; 00237 pa=&upa; 00238 00239 x=t_x; 00240 g=t_g; 00241 p=t_p; 00242 00243 x_cache_key=0.0; 00244 f_cache_key=0.0; 00245 g_cache_key=0.0; 00246 df_cache_key=0.0; 00247 00248 for(size_t i=0;i<dim;i++) { 00249 av_x_alpha[i]=gsl_vector_get(x,i); 00250 av_g_alpha[i]=gsl_vector_get(g,i); 00251 } 00252 00253 f_alpha=f; 00254 df_alpha=slope(); 00255 00256 return; 00257 } 00258 00259 /// Update position 00260 void update_position(double alpha, gsl_vector *t_x, double *t_f, 00261 gsl_vector *t_g) { 00262 00263 /* ensure that everything is fully cached */ 00264 { 00265 double t_f_alpha, t_df_alpha; 00266 wrap_fdf(alpha,0,&t_f_alpha,&t_df_alpha); 00267 } 00268 00269 *t_f = f_alpha; 00270 for(size_t i=0;i<dim;i++) { 00271 gsl_vector_set(t_x,i,av_x_alpha[i]); 00272 gsl_vector_set(t_g,i,av_g_alpha[i]); 00273 } 00274 } 00275 00276 /// Convert cache values to the new minimizer direction 00277 void change_direction() { 00278 00279 /* Convert the cache values from the end of the current minimisation 00280 to those needed for the start of the next minimisation, alpha=0 */ 00281 00282 /* The new x_alpha for alpha=0 is the current position and the 00283 new g_alpha for alpha=0 is the current gradient at the 00284 endpoint */ 00285 for(size_t i=0;i<dim;i++) { 00286 av_x_alpha[i]=gsl_vector_get(x,i); 00287 av_g_alpha[i]=gsl_vector_get(g,i); 00288 } 00289 x_cache_key = 0.0; 00290 g_cache_key = 0.0; 00291 00292 /* The function value does not change */ 00293 f_cache_key = 0.0; 00294 00295 /* Calculate the slope along the new direction vector, p */ 00296 df_alpha = slope (); 00297 df_cache_key = 0.0; 00298 00299 return; 00300 } 00301 00302 }; 00303 00304 /** 00305 \brief The line minimizer for gsl_mmin_bfgs2 00306 */ 00307 class gsl_mmin_linmin { 00308 00309 #ifndef DOXYGEN_INTERNAL 00310 00311 protected: 00312 00313 /** 00314 \brief Minimize the interpolating quadratic 00315 00316 Find a minimum in x=[0,1] of the interpolating quadratic through 00317 (0,f0) (1,f1) with derivative fp0 at x=0. The interpolating 00318 polynomial is q(x) = f0 + fp0 * z + (f1-f0-fp0) * z^2 00319 */ 00320 double interp_quad(double f0, double fp0, double f1, double zl, 00321 double zh); 00322 00323 /** 00324 \brief Minimize the interpolating cubic 00325 00326 Find a minimum in x=[0,1] of the interpolating cubic through 00327 (0,f0) (1,f1) with derivatives fp0 at x=0 and fp1 at x=1. 00328 00329 The interpolating polynomial is: 00330 00331 c(x) = f0 + fp0 * z + eta * z^2 + xi * z^3 00332 00333 where eta=3*(f1-f0)-2*fp0-fp1, xi=fp0+fp1-2*(f1-f0). 00334 */ 00335 double cubic(double c0, double c1, double c2, double c3, double z); 00336 00337 /// Test to see curvature is positive 00338 void check_extremum(double c0, double c1, double c2, double c3, double z, 00339 double *zmin, double *fmin); 00340 00341 /// Interpolate using a cubic 00342 double interp_cubic(double f0, double fp0, double f1, 00343 double fp1, double zl, double zh); 00344 00345 /// Perform the interpolation 00346 double interpolate(double a, double fa, double fpa, double b, 00347 double fb, double fpb, double xmin, double xmax, 00348 int order); 00349 #endif 00350 00351 public: 00352 00353 /** 00354 \brief The line minimization 00355 00356 recommended values from Fletcher are 00357 rho = 0.01, sigma = 0.1, tau1 = 9, tau2 = 0.05, tau3 = 0.5 00358 */ 00359 int minimize(gsl_mmin_wrap_base &wrap, double rho, double sigma, 00360 double tau1, double tau2, double tau3, 00361 int order, double alpha1, double *alpha_new); 00362 }; 00363 00364 /** \brief Multidimensional minimization by the BFGS 00365 algorithm (GSL) 00366 00367 This class includes the optimizations from the GSL minimizer \c 00368 vector_bfgs2. 00369 */ 00370 template<class param_t, class func_t, 00371 class vec_t=ovector_view, class alloc_vec_t=ovector, 00372 class alloc_t=ovector_alloc, class dfunc_t=func_t> 00373 class gsl_mmin_bfgs2 : public multi_min<param_t,func_t,func_t,vec_t> { 00374 00375 #ifndef DOXYGEN_INTERNAL 00376 00377 protected: 00378 00379 /// \name The original variables from the GSL state structure 00380 //@{ 00381 int iter; 00382 double step; 00383 double g0norm; 00384 double pnorm; 00385 double delta_f; 00386 /* f'(0) for f(x-alpha*p) */ 00387 double fp0; 00388 gsl_vector *x0; 00389 gsl_vector *g0; 00390 gsl_vector *p; 00391 /* work space */ 00392 gsl_vector *dx0; 00393 gsl_vector *dg0; 00394 /* wrapper function */ 00395 gsl_mmin_wrapper<param_t,func_t,vec_t,alloc_vec_t,alloc_t,dfunc_t> wrap; 00396 /* minimization parameters */ 00397 double rho; 00398 double sigma; 00399 double tau1; 00400 double tau2; 00401 double tau3; 00402 int order; 00403 //@} 00404 00405 /// The line minimizer 00406 gsl_mmin_linmin lm; 00407 00408 /// \name Store the arguments to set() so we can use them for iterate() 00409 //@{ 00410 vec_t *st_x; 00411 gsl_vector *st_dx; 00412 gsl_vector *st_grad; 00413 double st_f; 00414 //@} 00415 00416 /// Memory size 00417 size_t dim; 00418 00419 /// Memory allocation 00420 alloc_t ao; 00421 00422 #endif 00423 00424 public: 00425 00426 gsl_mmin_bfgs2() { 00427 lmin_tol=1.0e-4; 00428 this->tolf=1.0e-3; 00429 step_size=0.01; 00430 } 00431 00432 virtual ~gsl_mmin_bfgs2() {} 00433 00434 /// Perform an iteration 00435 virtual int iterate() { 00436 00437 double alpha = 0.0, alpha1; 00438 00439 double pg, dir; 00440 int status; 00441 00442 double f0 = st_f; 00443 00444 if (pnorm == 0.0 || g0norm == 0.0 || fp0 == 0) { 00445 gsl_vector_set_zero (st_dx); 00446 return GSL_ENOPROG; 00447 } 00448 00449 if (delta_f < 0) { 00450 double del = GSL_MAX_DBL (-delta_f, 10 * GSL_DBL_EPSILON * fabs(f0)); 00451 alpha1 = GSL_MIN_DBL (1.0, 2.0 * del / (- fp0)); 00452 } else { 00453 alpha1 = fabs( step); 00454 } 00455 00456 /* line minimisation, with cubic interpolation (order = 3) */ 00457 00458 status=lm.minimize(wrap,rho,sigma,tau1,tau2,tau3,order, 00459 alpha1,&alpha); 00460 00461 if (status != GSL_SUCCESS) { 00462 return status; 00463 } 00464 00465 wrap.update_position(alpha,st_x,&st_f,st_grad); 00466 00467 delta_f = st_f - f0; 00468 00469 /* Choose a new direction for the next step */ 00470 00471 { 00472 /* This is the BFGS update: */ 00473 /* p' = g1 - A dx - B dg */ 00474 /* A = - (1+ dg.dg/dx.dg) B + dg.g/dx.dg */ 00475 /* B = dx.g/dx.dg */ 00476 00477 double dxg, dgg, dxdg, dgnorm, A, B; 00478 00479 /* dx0 = x - x0 */ 00480 gsl_vector_memcpy (dx0, st_x); 00481 gsl_blas_daxpy (-1.0, x0, dx0); 00482 00483 gsl_vector_memcpy (st_dx, dx0); /* keep a copy */ 00484 00485 /* dg0 = g - g0 */ 00486 gsl_vector_memcpy (dg0, st_grad); 00487 gsl_blas_daxpy (-1.0, g0, dg0); 00488 00489 gsl_blas_ddot (dx0, st_grad, &dxg); 00490 gsl_blas_ddot (dg0, st_grad, &dgg); 00491 gsl_blas_ddot (dx0, dg0, &dxdg); 00492 00493 dgnorm = gsl_blas_dnrm2 (dg0); 00494 00495 if (dxdg != 0) { 00496 B = dxg / dxdg; 00497 A = -(1.0 + dgnorm * dgnorm / dxdg) * B + dgg / dxdg; 00498 } else { 00499 B = 0; 00500 A = 0; 00501 } 00502 00503 gsl_vector_memcpy (p, st_grad); 00504 gsl_blas_daxpy (-A, dx0, p); 00505 gsl_blas_daxpy (-B, dg0, p); 00506 } 00507 00508 gsl_vector_memcpy (g0, st_grad); 00509 gsl_vector_memcpy (x0, st_x); 00510 g0norm = gsl_blas_dnrm2 (g0); 00511 pnorm = gsl_blas_dnrm2 (p); 00512 /* update direction and fp0 */ 00513 00514 gsl_blas_ddot (p, st_grad, &pg); 00515 dir = (pg >= 0.0) ? -1.0 : +1.0; 00516 gsl_blas_dscal (dir / pnorm, p); 00517 pnorm = gsl_blas_dnrm2 (p); 00518 gsl_blas_ddot (p, g0, & fp0); 00519 00520 wrap.change_direction(); 00521 00522 return GSL_SUCCESS; 00523 00524 } 00525 00526 /// Return string denoting type("gsl_mmin_bfgs2") 00527 virtual const char *type() { return "gsl_mmin_bfgs2";} 00528 00529 /// Allocate the memory 00530 virtual int allocate(size_t n) { 00531 00532 p=gsl_vector_calloc(n); 00533 if (p == 0) { 00534 set_err_ret("Failed to allocate p in gsl_mmin_bfgs2::allocate().", 00535 gsl_enomem); 00536 } 00537 00538 x0=gsl_vector_calloc(n); 00539 if (x0 == 0) { 00540 gsl_vector_free(p); 00541 set_err_ret("Failed to allocate x0 in gsl_mmin_bfgs2::allocate().", 00542 gsl_enomem); 00543 } 00544 00545 g0=gsl_vector_calloc(n); 00546 if (g0 == 0) { 00547 gsl_vector_free(x0); 00548 gsl_vector_free(p); 00549 set_err_ret("Failed to allocate g0 in gsl_mmin_bfgs2::allocate().", 00550 gsl_enomem); 00551 } 00552 00553 dx0=gsl_vector_calloc(n); 00554 if (dx0 == 0) { 00555 gsl_vector_free(g0); 00556 gsl_vector_free(x0); 00557 gsl_vector_free(p); 00558 set_err_ret("Failed to allocate dx0 in gsl_mmin_bfgs2::allocate().", 00559 gsl_enomem); 00560 } 00561 00562 dg0=gsl_vector_calloc(n); 00563 if (dg0 == 0) { 00564 gsl_vector_free(dx0); 00565 gsl_vector_free(g0); 00566 gsl_vector_free(x0); 00567 gsl_vector_free(p); 00568 set_err_ret("Failed to allocate dg0 in gsl_mmin_bfgs2::allocate().", 00569 gsl_enomem); 00570 } 00571 00572 st_dx=gsl_vector_alloc(n); 00573 st_grad=gsl_vector_alloc(n); 00574 00575 ao.allocate(wrap.av_x_alpha,n); 00576 ao.allocate(wrap.av_g_alpha,n); 00577 wrap.dim=n; 00578 dim=n; 00579 00580 return GSL_SUCCESS; 00581 } 00582 00583 /// Free the allocated memory 00584 virtual int free() { 00585 ao.free(wrap.av_x_alpha); 00586 ao.free(wrap.av_g_alpha); 00587 gsl_vector_free(dg0); 00588 gsl_vector_free(dx0); 00589 gsl_vector_free(g0); 00590 gsl_vector_free(x0); 00591 gsl_vector_free(p); 00592 gsl_vector_free(st_dx); 00593 gsl_vector_free(st_grad); 00594 wrap.dim=0; 00595 dim=0; 00596 return 0; 00597 } 00598 00599 /// Reset the minimizer to use the current point as a new starting point 00600 int restart() { 00601 iter=0; 00602 return gsl_success; 00603 } 00604 00605 /// Set the function and initial guess 00606 virtual int set(vec_t &x, double u_step_size, double tol_u, 00607 func_t &ufunc, param_t &upa) { 00608 00609 iter=0; 00610 step=u_step_size; 00611 delta_f=0; 00612 00613 st_x=&x; 00614 00615 ufunc(dim,x,st_f,upa); 00616 { 00617 double fv2, deriv_h=1.0e-4; 00618 00619 for(size_t i=0;i<dim;i++) { 00620 x[i]+=deriv_h; 00621 ufunc(dim,x,fv2,upa); 00622 x[i]-=deriv_h; 00623 gsl_vector_set(st_grad,i,(fv2-st_f)/deriv_h); 00624 } 00625 } 00626 00627 /* Use the gradient as the initial direction */ 00628 00629 for(size_t i=0;i<dim;i++) { 00630 gsl_vector_set(x0,i,x[i]); 00631 } 00632 gsl_vector_memcpy(g0,st_grad); 00633 g0norm=gsl_blas_dnrm2(g0); 00634 00635 gsl_vector_memcpy(p,st_grad); 00636 gsl_blas_dscal(-1/g0norm,p); 00637 pnorm=gsl_blas_dnrm2(p); /* should be 1 */ 00638 fp0=- g0norm; 00639 00640 /* Prepare the wrapper */ 00641 00642 wrap.prepare_wrapper(ufunc,upa,x0,st_f,g0,p); 00643 00644 /* Prepare 1d minimisation parameters */ 00645 00646 rho=0.01; 00647 sigma=tol_u; 00648 tau1=9; 00649 tau2=0.05; 00650 tau3=0.5; 00651 order=3; /* use cubic interpolation where possible */ 00652 00653 return GSL_SUCCESS; 00654 00655 } 00656 00657 /// The size of the first trial step 00658 double step_size; 00659 00660 /// The tolerance for the 1-dimensional minimizer 00661 double lmin_tol; 00662 00663 /** \brief Calculate the minimum \c min of \c func w.r.t the 00664 array \c x of size \c nvar. 00665 */ 00666 virtual int mmin(size_t nn, vec_t &xx, double &fmin, param_t &pa, 00667 func_t &ufunc) { 00668 00669 int xiter=0, status; 00670 00671 allocate(nn); 00672 00673 set(xx,step_size,lmin_tol,ufunc,pa); 00674 00675 do { 00676 xiter++; 00677 00678 status=iterate(); 00679 00680 if (status) { 00681 break; 00682 } 00683 00684 // Equivalent to gsl_multimin_test_gradient with 00685 // additional code to print out present iteration 00686 00687 double norm=gsl_blas_dnrm2(st_grad); 00688 00689 if(this->verbose>0) { 00690 this->print_iter(nn,*st_x,st_f,xiter, 00691 norm,this->tolf,"gsl_mmin_bfgs2"); 00692 } 00693 00694 if (norm<this->tolf) status=gsl_success; 00695 else status=gsl_continue; 00696 00697 } 00698 while (status == GSL_CONTINUE && xiter < this->ntrial); 00699 00700 for(size_t i=0;i<nn;i++) xx[i]=(*st_x)[i]; 00701 00702 fmin=st_f; 00703 00704 free(); 00705 00706 this->last_ntrial=xiter; 00707 00708 return status; 00709 } 00710 00711 }; 00712 00713 #ifndef DOXYGENP 00714 } 00715 #endif 00716 00717 #endif
Documentation generated with Doxygen and provided under the GNU Free Documentation License. See License Information for details.
Project hosting provided by
,
O2scl Sourceforge Project Page