gsl_mmin_bfgs2.h

00001 /*
00002   -------------------------------------------------------------------
00003 
00004   Copyright (C) 2006, 2007, 2008, Andrew W. Steiner
00005 
00006   This file is part of O2scl.
00007 
00008   O2scl is free software; you can redistribute it and/or modify
00009   it under the terms of the GNU General Public License as published by
00010   the Free Software Foundation; either version 3 of the License, or
00011   (at your option) any later version.
00012 
00013   O2scl is distributed in the hope that it will be useful,
00014   but WITHOUT ANY WARRANTY; without even the implied warranty of
00015   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016   GNU General Public License for more details.
00017 
00018   You should have received a copy of the GNU General Public License
00019   along with O2scl. If not, see <http://www.gnu.org/licenses/>.
00020 
00021   -------------------------------------------------------------------
00022 */
00023 #ifndef O2SCL_GSL_MMIN_BFGS2_H
00024 #define O2SCL_GSL_MMIN_BFGS2_H
00025 
00026 #include <gsl/gsl_blas.h>
00027 #include <gsl/gsl_poly.h>
00028 #include <gsl/gsl_multimin.h>
00029 #include <o2scl/multi_min.h>
00030 
00031 #ifndef DOXYGENP
00032 namespace o2scl {
00033 #endif
00034 
00035   /** 
00036       \brief Virtual base for the gsl_mmin_bfgs2 wrapper
00037 
00038       This is useful so that the gsl_mmin_linmin class doesn't need to
00039       depend on any template parameters, even though it will need a
00040       wrapping object as an argument for the
00041       gsl_mmin_linmin::minimize() function.
00042   */
00043   class gsl_mmin_wrap_base {
00044   public:
00045     virtual ~gsl_mmin_wrap_base() {}
00046     /// Function
00047     virtual double wrap_f(double alpha, void *params)=0;
00048     /// Derivative
00049     virtual double wrap_df(double alpha, void *params)=0;
00050     /// Function and derivative
00051     virtual void wrap_fdf(double alpha, void *params, double *f, double *df)=0;
00052   };
00053 
00054   /**
00055      \brief Wrapper class for the gsl_mmin_bfgs2 minimizer
00056 
00057      \future There's a bit of extra vector copying here which 
00058      could potentially be avoided.
00059   */
00060   template<class param_t, class func_t,
00061     class vec_t=ovector_view, class alloc_vec_t=ovector,
00062     class alloc_t=ovector_alloc, class dfunc_t=func_t>
00063     class gsl_mmin_wrapper : public gsl_mmin_wrap_base {
00064 
00065 #ifndef DOXYGEN_INTERNAL
00066     
00067     protected:
00068 
00069     /// Function
00070     func_t *func;
00071 
00072     /// Derivative
00073     dfunc_t *dfunc;
00074 
00075     /// Parameters
00076     param_t *pa;
00077     
00078     /** \name fixed values 
00079      */
00080     //@{
00081     gsl_vector *x;
00082     gsl_vector *g;
00083     gsl_vector *p;
00084     //@}
00085     
00086     /** \name cached values, for x(alpha) = x + alpha * p 
00087      */
00088     //@{
00089     double f_alpha;
00090     double df_alpha;
00091     //@}
00092     
00093     /** \name cache "keys" 
00094      */
00095     //@{
00096     double f_cache_key;
00097     double df_cache_key;
00098     double x_cache_key;
00099     double g_cache_key;
00100     //@}
00101     
00102     /// Move to a new point, using the cached value if possible
00103     void moveto(double alpha) {
00104 
00105       if (alpha == x_cache_key) {
00106         /* using previously cached position */
00107         return;
00108       }
00109       
00110       /* set x_alpha = x + alpha * p */
00111       
00112       gsl_vector *tmp=gsl_vector_alloc(dim);
00113       for(size_t i=0;i<dim;i++) {
00114         av_x_alpha[i]=gsl_vector_get(x,i);
00115         gsl_vector_set(tmp,i,av_x_alpha[i]);
00116       }
00117 
00118       gsl_blas_daxpy(alpha,p,tmp);
00119       for(size_t i=0;i<dim;i++) {
00120         av_x_alpha[i]=gsl_vector_get(tmp,i);
00121       }
00122       gsl_vector_free(tmp);
00123 
00124       x_cache_key = alpha;
00125     }
00126 
00127     /// Compute the slope
00128     double slope() {
00129       double df;
00130       gsl_vector *tmp=gsl_vector_alloc(dim);
00131       for(size_t i=0;i<dim;i++) {
00132         gsl_vector_set(tmp,i,av_g_alpha[i]);
00133       }
00134       gsl_blas_ddot(tmp,p,&df);
00135       for(size_t i=0;i<dim;i++) {
00136         av_g_alpha[i]=gsl_vector_get(tmp,i);
00137       }
00138       gsl_vector_free(tmp);
00139       return df;
00140     }
00141 
00142     /// Evaluate the function
00143     virtual double wrap_f(double alpha, void *params) {
00144       if (alpha==f_cache_key) {
00145         return f_alpha;
00146       }
00147       moveto(alpha);
00148       (*func)(dim,av_x_alpha,f_alpha,*pa);
00149 
00150       f_cache_key = alpha;
00151       return f_alpha;
00152     }
00153 
00154     /// Evaluate the derivative
00155     virtual double wrap_df(double alpha, void *params) {
00156 
00157       /* using previously cached df(alpha) */
00158       if (alpha==df_cache_key) return df_alpha;
00159 
00160       moveto(alpha);
00161       if (alpha!=g_cache_key) {
00162         simple_df(av_x_alpha,av_g_alpha);
00163         g_cache_key=alpha;
00164       }
00165       df_alpha=slope();
00166       df_cache_key=alpha;
00167 
00168       return df_alpha;
00169     }
00170 
00171     /// A simple derivative
00172     int simple_df(vec_t &x2, vec_t &g2) {
00173       
00174       double fv1, fv2, deriv_h=1.0e-4;
00175       
00176       (*func)(dim,x2,fv1,*pa);
00177       
00178       for(size_t i=0;i<dim;i++) {
00179         x2[i]+=deriv_h;
00180         (*func)(dim,x2,fv2,*pa);
00181         x2[i]-=deriv_h;
00182         g2[i]=(fv2-fv1)/deriv_h;
00183       }
00184       
00185       return 0;
00186     }
00187     
00188     /// Evaluate the function and the derivative
00189     virtual void wrap_fdf(double alpha, void *params, double *f, double *df) {
00190 
00191       /* Check for previously cached values */
00192       if (alpha ==  f_cache_key && alpha ==  df_cache_key) {
00193         *f =  f_alpha;
00194         *df =  df_alpha;
00195         return;
00196       }
00197       if (alpha ==  f_cache_key || alpha ==  df_cache_key) {
00198         *f = wrap_f (alpha, params);
00199         *df = wrap_df (alpha, params);
00200         return;
00201       }
00202       
00203       moveto(alpha);
00204       (*func)(dim,av_x_alpha,f_alpha,*pa);
00205       simple_df(av_x_alpha,av_g_alpha);
00206       f_cache_key = alpha;
00207       g_cache_key = alpha;
00208       
00209       df_alpha = slope();
00210       df_cache_key = alpha;
00211       
00212       *f =  f_alpha;
00213       *df =  df_alpha;
00214       
00215       return;
00216     }
00217     
00218 #endif
00219 
00220     public:
00221 
00222     /// Temporary storage
00223     alloc_vec_t av_x_alpha;
00224 
00225     /// Temporary storage
00226     alloc_vec_t av_g_alpha;
00227 
00228     /// Number of minimization dimensions
00229     size_t dim;
00230     
00231     /// Initialize wrapper
00232     void prepare_wrapper(func_t &ufunc, param_t &upa, gsl_vector *t_x,
00233                          double f, gsl_vector *t_g, gsl_vector *t_p) {
00234       
00235       func=&ufunc;
00236       //dfunc=&udfunc;
00237       pa=&upa;
00238 
00239       x=t_x;
00240       g=t_g;
00241       p=t_p;
00242       
00243       x_cache_key=0.0;
00244       f_cache_key=0.0;
00245       g_cache_key=0.0;
00246       df_cache_key=0.0;
00247 
00248       for(size_t i=0;i<dim;i++) {
00249         av_x_alpha[i]=gsl_vector_get(x,i);
00250         av_g_alpha[i]=gsl_vector_get(g,i);
00251       }
00252       
00253       f_alpha=f;
00254       df_alpha=slope();
00255 
00256       return;
00257     }
00258     
00259     /// Update position
00260     void update_position(double alpha, gsl_vector *t_x, double *t_f, 
00261                          gsl_vector *t_g) {
00262       
00263       /* ensure that everything is fully cached */
00264       { 
00265         double t_f_alpha, t_df_alpha; 
00266         wrap_fdf(alpha,0,&t_f_alpha,&t_df_alpha); 
00267       }
00268       
00269       *t_f = f_alpha;
00270       for(size_t i=0;i<dim;i++) {
00271         gsl_vector_set(t_x,i,av_x_alpha[i]);
00272         gsl_vector_set(t_g,i,av_g_alpha[i]);
00273       }
00274     }
00275     
00276     /// Convert cache values to the new minimizer direction
00277     void change_direction() {
00278       
00279       /* Convert the cache values from the end of the current minimisation
00280          to those needed for the start of the next minimisation, alpha=0 */
00281       
00282       /* The new x_alpha for alpha=0 is the current position and the
00283          new g_alpha for alpha=0 is the current gradient at the
00284          endpoint */
00285       for(size_t i=0;i<dim;i++) {
00286         av_x_alpha[i]=gsl_vector_get(x,i);
00287         av_g_alpha[i]=gsl_vector_get(g,i);
00288       }
00289       x_cache_key = 0.0;
00290       g_cache_key = 0.0;
00291       
00292       /* The function value does not change */
00293       f_cache_key = 0.0;
00294       
00295       /* Calculate the slope along the new direction vector, p */
00296       df_alpha = slope ();
00297       df_cache_key = 0.0;
00298       
00299       return;
00300     }
00301     
00302   };
00303 
00304   /**
00305      \brief The line minimizer for gsl_mmin_bfgs2
00306   */
00307   class gsl_mmin_linmin {
00308 
00309 #ifndef DOXYGEN_INTERNAL
00310 
00311   protected:
00312 
00313     /**
00314        \brief Minimize the interpolating quadratic
00315 
00316        Find a minimum in x=[0,1] of the interpolating quadratic through
00317        (0,f0) (1,f1) with derivative fp0 at x=0.  The interpolating
00318        polynomial is q(x) = f0 + fp0 * z + (f1-f0-fp0) * z^2
00319     */
00320     double interp_quad(double f0, double fp0, double f1, double zl, 
00321                        double zh);
00322     
00323     /**
00324        \brief Minimize the interpolating cubic
00325        
00326        Find a minimum in x=[0,1] of the interpolating cubic through
00327        (0,f0) (1,f1) with derivatives fp0 at x=0 and fp1 at x=1.
00328        
00329        The interpolating polynomial is:
00330        
00331        c(x) = f0 + fp0 * z + eta * z^2 + xi * z^3
00332        
00333        where eta=3*(f1-f0)-2*fp0-fp1, xi=fp0+fp1-2*(f1-f0).
00334     */
00335     double cubic(double c0, double c1, double c2, double c3, double z);
00336     
00337     /// Test to see curvature is positive
00338     void check_extremum(double c0, double c1, double c2, double c3, double z,
00339                         double *zmin, double *fmin);
00340     
00341     /// Interpolate using a cubic
00342     double interp_cubic(double f0, double fp0, double f1, 
00343                         double fp1, double zl, double zh);
00344     
00345     /// Perform the interpolation
00346     double interpolate(double a, double fa, double fpa, double b, 
00347                        double fb, double fpb, double xmin, double xmax,
00348                        int order);
00349 #endif
00350     
00351   public:
00352 
00353     /** 
00354         \brief The line minimization
00355         
00356         recommended values from Fletcher are
00357         rho = 0.01, sigma = 0.1, tau1 = 9, tau2 = 0.05, tau3 = 0.5 
00358     */
00359     int minimize(gsl_mmin_wrap_base &wrap, double rho, double sigma,
00360                  double tau1, double tau2, double tau3,
00361                  int order, double alpha1, double *alpha_new);
00362   };
00363 
00364   /** \brief Multidimensional minimization by the BFGS
00365       algorithm (GSL)
00366 
00367       This class includes the optimizations from the GSL minimizer \c
00368       vector_bfgs2.
00369   */
00370   template<class param_t, class func_t, 
00371     class vec_t=ovector_view, class alloc_vec_t=ovector,
00372     class alloc_t=ovector_alloc, class dfunc_t=func_t> 
00373     class gsl_mmin_bfgs2 : public multi_min<param_t,func_t,func_t,vec_t> {
00374     
00375 #ifndef DOXYGEN_INTERNAL
00376 
00377     protected:
00378 
00379     /// \name The original variables from the GSL state structure
00380     //@{
00381     int iter;
00382     double step;
00383     double g0norm;
00384     double pnorm;
00385     double delta_f;
00386     /* f'(0) for f(x-alpha*p) */
00387     double fp0;                   
00388     gsl_vector *x0;
00389     gsl_vector *g0;
00390     gsl_vector *p;
00391     /* work space */
00392     gsl_vector *dx0;
00393     gsl_vector *dg0;
00394     /* wrapper function */
00395     gsl_mmin_wrapper<param_t,func_t,vec_t,alloc_vec_t,alloc_t,dfunc_t> wrap;
00396     /* minimization parameters */
00397     double rho;
00398     double sigma;
00399     double tau1;
00400     double tau2;
00401     double tau3;
00402     int order;
00403     //@}
00404 
00405     /// The line minimizer
00406     gsl_mmin_linmin lm;
00407       
00408     /// \name Store the arguments to set() so we can use them for iterate()
00409     //@{
00410     vec_t *st_x;
00411     gsl_vector *st_dx;
00412     gsl_vector *st_grad;
00413     double st_f;
00414     //@}
00415 
00416     /// Memory size
00417     size_t dim;
00418 
00419     /// Memory allocation
00420     alloc_t ao;
00421 
00422 #endif
00423     
00424     public:
00425      
00426     gsl_mmin_bfgs2() {
00427       lmin_tol=1.0e-4;
00428       this->tolf=1.0e-3;
00429       step_size=0.01;
00430     }
00431     
00432     virtual ~gsl_mmin_bfgs2() {}
00433 
00434     /// Perform an iteration
00435     virtual int iterate() {
00436 
00437       double alpha = 0.0, alpha1;
00438 
00439       double pg, dir;
00440       int status;
00441       
00442       double f0 = st_f;
00443       
00444       if (pnorm == 0.0 || g0norm == 0.0 || fp0 == 0) {
00445         gsl_vector_set_zero (st_dx);
00446         return GSL_ENOPROG;
00447       }
00448       
00449       if (delta_f < 0) {
00450         double del = GSL_MAX_DBL (-delta_f, 10 * GSL_DBL_EPSILON * fabs(f0));
00451         alpha1 = GSL_MIN_DBL (1.0, 2.0 * del / (- fp0));
00452       } else {
00453         alpha1 = fabs( step);
00454       }
00455         
00456       /* line minimisation, with cubic interpolation (order = 3) */
00457       
00458       status=lm.minimize(wrap,rho,sigma,tau1,tau2,tau3,order,
00459                          alpha1,&alpha);
00460       
00461       if (status != GSL_SUCCESS) {
00462         return status;
00463       }
00464       
00465       wrap.update_position(alpha,st_x,&st_f,st_grad);
00466 
00467       delta_f = st_f - f0;
00468 
00469       /* Choose a new direction for the next step */
00470 
00471       {
00472         /* This is the BFGS update: */
00473         /* p' = g1 - A dx - B dg */
00474         /* A = - (1+ dg.dg/dx.dg) B + dg.g/dx.dg */
00475         /* B = dx.g/dx.dg */
00476 
00477         double dxg, dgg, dxdg, dgnorm, A, B;
00478 
00479         /* dx0 = x - x0 */
00480         gsl_vector_memcpy (dx0, st_x);
00481         gsl_blas_daxpy (-1.0, x0, dx0);
00482 
00483         gsl_vector_memcpy (st_dx, dx0);  /* keep a copy */
00484 
00485         /* dg0 = g - g0 */
00486         gsl_vector_memcpy (dg0, st_grad);
00487         gsl_blas_daxpy (-1.0, g0, dg0);
00488 
00489         gsl_blas_ddot (dx0, st_grad, &dxg);
00490         gsl_blas_ddot (dg0, st_grad, &dgg);
00491         gsl_blas_ddot (dx0, dg0, &dxdg);
00492 
00493         dgnorm = gsl_blas_dnrm2 (dg0);
00494 
00495         if (dxdg != 0) {
00496           B = dxg / dxdg;
00497           A = -(1.0 + dgnorm * dgnorm / dxdg) * B + dgg / dxdg;
00498         } else {
00499           B = 0;
00500           A = 0;
00501         }
00502         
00503         gsl_vector_memcpy (p, st_grad);
00504         gsl_blas_daxpy (-A, dx0, p);
00505         gsl_blas_daxpy (-B, dg0, p);
00506       }
00507 
00508       gsl_vector_memcpy (g0, st_grad);
00509       gsl_vector_memcpy (x0, st_x);
00510       g0norm = gsl_blas_dnrm2 (g0);
00511       pnorm = gsl_blas_dnrm2 (p);
00512       /* update direction and fp0 */
00513 
00514       gsl_blas_ddot (p, st_grad, &pg);
00515       dir = (pg >= 0.0) ? -1.0 : +1.0;
00516       gsl_blas_dscal (dir /  pnorm, p);
00517       pnorm = gsl_blas_dnrm2 (p);
00518       gsl_blas_ddot (p, g0, & fp0);
00519 
00520       wrap.change_direction();
00521 
00522       return GSL_SUCCESS;
00523 
00524     }
00525     
00526     /// Return string denoting type("gsl_mmin_bfgs2")
00527     virtual const char *type() { return "gsl_mmin_bfgs2";}
00528 
00529     /// Allocate the memory
00530     virtual int allocate(size_t n) {
00531 
00532       p=gsl_vector_calloc(n);
00533       if (p == 0) {
00534         set_err_ret("Failed to allocate p in gsl_mmin_bfgs2::allocate().",
00535                     gsl_enomem);
00536       }
00537 
00538       x0=gsl_vector_calloc(n);
00539       if (x0 == 0) {
00540         gsl_vector_free(p);
00541         set_err_ret("Failed to allocate x0 in gsl_mmin_bfgs2::allocate().",
00542                     gsl_enomem);
00543       }
00544 
00545       g0=gsl_vector_calloc(n);
00546       if (g0 == 0) {
00547         gsl_vector_free(x0);
00548         gsl_vector_free(p);
00549         set_err_ret("Failed to allocate g0 in gsl_mmin_bfgs2::allocate().",
00550                     gsl_enomem);
00551       }
00552 
00553       dx0=gsl_vector_calloc(n);
00554       if (dx0 == 0) {
00555         gsl_vector_free(g0);
00556         gsl_vector_free(x0);
00557         gsl_vector_free(p);
00558         set_err_ret("Failed to allocate dx0 in gsl_mmin_bfgs2::allocate().",
00559                     gsl_enomem);
00560       }
00561 
00562       dg0=gsl_vector_calloc(n);
00563       if (dg0 == 0) {
00564         gsl_vector_free(dx0);
00565         gsl_vector_free(g0);
00566         gsl_vector_free(x0);
00567         gsl_vector_free(p);
00568         set_err_ret("Failed to allocate dg0 in gsl_mmin_bfgs2::allocate().",
00569                     gsl_enomem);
00570       }
00571 
00572       st_dx=gsl_vector_alloc(n);
00573       st_grad=gsl_vector_alloc(n);
00574 
00575       ao.allocate(wrap.av_x_alpha,n);
00576       ao.allocate(wrap.av_g_alpha,n);
00577       wrap.dim=n;
00578       dim=n;
00579 
00580       return GSL_SUCCESS;
00581     }
00582     
00583     /// Free the allocated memory
00584     virtual int free() {
00585       ao.free(wrap.av_x_alpha);
00586       ao.free(wrap.av_g_alpha);
00587       gsl_vector_free(dg0);
00588       gsl_vector_free(dx0);
00589       gsl_vector_free(g0);
00590       gsl_vector_free(x0);
00591       gsl_vector_free(p);
00592       gsl_vector_free(st_dx);
00593       gsl_vector_free(st_grad);
00594       wrap.dim=0;
00595       dim=0;
00596       return 0;
00597     }
00598       
00599     /// Reset the minimizer to use the current point as a new starting point
00600     int restart() {
00601       iter=0;
00602       return gsl_success;
00603     }
00604 
00605     /// Set the function and initial guess
00606     virtual int set(vec_t &x, double u_step_size, double tol_u, 
00607                     func_t &ufunc, param_t &upa) {
00608       
00609       iter=0;
00610       step=u_step_size;
00611       delta_f=0;
00612       
00613       st_x=&x;
00614       
00615       ufunc(dim,x,st_f,upa);
00616       {
00617         double fv2, deriv_h=1.0e-4;
00618         
00619         for(size_t i=0;i<dim;i++) {
00620           x[i]+=deriv_h;
00621           ufunc(dim,x,fv2,upa);
00622           x[i]-=deriv_h;
00623           gsl_vector_set(st_grad,i,(fv2-st_f)/deriv_h);
00624         }
00625       }
00626       
00627       /* Use the gradient as the initial direction */
00628 
00629       for(size_t i=0;i<dim;i++) {
00630         gsl_vector_set(x0,i,x[i]);
00631       }
00632       gsl_vector_memcpy(g0,st_grad);
00633       g0norm=gsl_blas_dnrm2(g0);
00634 
00635       gsl_vector_memcpy(p,st_grad);
00636       gsl_blas_dscal(-1/g0norm,p);
00637       pnorm=gsl_blas_dnrm2(p);     /* should be 1 */
00638       fp0=- g0norm;
00639 
00640       /* Prepare the wrapper */
00641       
00642       wrap.prepare_wrapper(ufunc,upa,x0,st_f,g0,p);
00643       
00644       /* Prepare 1d minimisation parameters */
00645 
00646       rho=0.01;
00647       sigma=tol_u;
00648       tau1=9;
00649       tau2=0.05;
00650       tau3=0.5;
00651       order=3;  /* use cubic interpolation where possible */
00652 
00653       return GSL_SUCCESS;
00654  
00655     }
00656 
00657     /// The size of the first trial step
00658     double step_size;
00659     
00660     /// The tolerance for the 1-dimensional minimizer
00661     double lmin_tol;
00662 
00663     /** \brief Calculate the minimum \c min of \c func w.r.t the
00664         array \c x of size \c nvar.
00665     */
00666     virtual int mmin(size_t nn, vec_t &xx, double &fmin, param_t &pa,
00667                      func_t &ufunc) {
00668 
00669       int xiter=0, status;
00670 
00671       allocate(nn);
00672 
00673       set(xx,step_size,lmin_tol,ufunc,pa);
00674 
00675       do {
00676         xiter++;
00677 
00678         status=iterate();
00679         
00680         if (status) {
00681           break;
00682         }
00683 
00684         // Equivalent to gsl_multimin_test_gradient with
00685         // additional code to print out present iteration
00686         
00687         double norm=gsl_blas_dnrm2(st_grad);
00688         
00689         if(this->verbose>0) {
00690           this->print_iter(nn,*st_x,st_f,xiter,
00691                            norm,this->tolf,"gsl_mmin_bfgs2");
00692         }
00693 
00694         if (norm<this->tolf) status=gsl_success;
00695         else status=gsl_continue;
00696 
00697       }
00698       while (status == GSL_CONTINUE && xiter < this->ntrial);
00699       
00700       for(size_t i=0;i<nn;i++) xx[i]=(*st_x)[i];
00701 
00702       fmin=st_f;
00703       
00704       free();
00705 
00706       this->last_ntrial=xiter;
00707         
00708       return status;
00709     }
00710 
00711   };
00712 
00713 #ifndef DOXYGENP
00714 }
00715 #endif
00716 
00717 #endif

Documentation generated with Doxygen and provided under the GNU Free Documentation License. See License Information for details.

Project hosting provided by SourceForge.net Logo, O2scl Sourceforge Project Page