gsl_mmin_bfgs2.h

00001 /*
00002   -------------------------------------------------------------------
00003 
00004   Copyright (C) 2006, 2007, Andrew W. Steiner
00005 
00006   This file is part of O2scl.
00007 
00008   O2scl is free software; you can redistribute it and/or modify
00009   it under the terms of the GNU General Public License as published by
00010   the Free Software Foundation; either version 3 of the License, or
00011   (at your option) any later version.
00012 
00013   O2scl is distributed in the hope that it will be useful,
00014   but WITHOUT ANY WARRANTY; without even the implied warranty of
00015   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016   GNU General Public License for more details.
00017 
00018   You should have received a copy of the GNU General Public License
00019   along with O2scl. If not, see <http://www.gnu.org/licenses/>.
00020 
00021   -------------------------------------------------------------------
00022 */
00023 #ifndef O2SCL_GSL_MMIN_BFGS2_H
00024 #define O2SCL_GSL_MMIN_BFGS2_H
00025 
00026 #include <gsl/gsl_blas.h>
00027 #include <gsl/gsl_poly.h>
00028 #include <gsl/gsl_multimin.h>
00029 #include <o2scl/multi_min.h>
00030 
00031 #ifndef DOXYGENP
00032 namespace o2scl {
00033 #endif
00034 
00035   /** 
00036       \brief Virtual base for the gsl_mmin_bfgs2 wrapper
00037 
00038       This is useful so that the gsl_mmin_linmin class doesn't need to
00039       depend on any template parameters, even though it will need a
00040       wrapping object as an argument for the
00041       gsl_mmin_linmin::minimize() function.
00042   */
00043   class gsl_mmin_wrap_base {
00044   public:
00045     virtual ~gsl_mmin_wrap_base() {}
00046     /// Function
00047     virtual double wrap_f(double alpha, void *params)=0;
00048     /// Derivative
00049     virtual double wrap_df(double alpha, void *params)=0;
00050     /// Function and derivative
00051     virtual void wrap_fdf(double alpha, void *params, double *f, double *df)=0;
00052   };
00053 
00054   /**
00055      \brief Wrapper class for the gsl_mmin_bfgs2 minimizer
00056 
00057      \future There's a bit of extra vector copying here which 
00058      could potentially be avoided.
00059   */
00060   template<class param_t, class func_t,
00061     class vec_t=ovector_view, class alloc_vec_t=ovector,
00062     class alloc_t=ovector_alloc, class dfunc_t=func_t>
00063     class gsl_mmin_wrapper : public gsl_mmin_wrap_base {
00064 
00065 #ifndef DOXYGEN_INTERNAL
00066     
00067     protected:
00068 
00069     /// Function
00070     func_t *func;
00071 
00072     /// Derivative
00073     dfunc_t *dfunc;
00074 
00075     /// Parameters
00076     param_t *pa;
00077     
00078     /** \name fixed values 
00079      */
00080     //@{
00081     gsl_vector *x;
00082     gsl_vector *g;
00083     gsl_vector *p;
00084     //@}
00085     
00086     /** \name cached values, for x(alpha) = x + alpha * p 
00087      */
00088     //@{
00089     double f_alpha;
00090     double df_alpha;
00091     //@}
00092     
00093     /** \name cache "keys" 
00094      */
00095     //@{
00096     double f_cache_key;
00097     double df_cache_key;
00098     double x_cache_key;
00099     double g_cache_key;
00100     //@}
00101     
00102     /// Move to a new point, using the cached value if possible
00103     void moveto(double alpha) {
00104 
00105       if (alpha == x_cache_key) {
00106         /* using previously cached position */
00107         return;
00108       }
00109       
00110       /* set x_alpha = x + alpha * p */
00111       
00112       gsl_vector *tmp=gsl_vector_alloc(dim);
00113       for(size_t i=0;i<dim;i++) {
00114         av_x_alpha[i]=gsl_vector_get(x,i);
00115         gsl_vector_set(tmp,i,av_x_alpha[i]);
00116       }
00117 
00118       gsl_blas_daxpy(alpha,p,tmp);
00119       for(size_t i=0;i<dim;i++) {
00120         av_x_alpha[i]=gsl_vector_get(tmp,i);
00121       }
00122       gsl_vector_free(tmp);
00123 
00124       x_cache_key = alpha;
00125     }
00126 
00127     /// Compute the slope
00128     double slope() {
00129       double df;
00130       gsl_vector *tmp=gsl_vector_alloc(dim);
00131       for(size_t i=0;i<dim;i++) {
00132         gsl_vector_set(tmp,i,av_g_alpha[i]);
00133       }
00134       gsl_blas_ddot(tmp,p,&df);
00135       for(size_t i=0;i<dim;i++) {
00136         av_g_alpha[i]=gsl_vector_get(tmp,i);
00137       }
00138       gsl_vector_free(tmp);
00139       return df;
00140     }
00141 
00142     /// Evaluate the function
00143     virtual double wrap_f(double alpha, void *params) {
00144       if (alpha==f_cache_key) {
00145         return f_alpha;
00146       }
00147       moveto(alpha);
00148       (*func)(dim,av_x_alpha,f_alpha,*pa);
00149 
00150       f_cache_key = alpha;
00151       return f_alpha;
00152     }
00153 
00154     /// Evaluate the derivative
00155     virtual double wrap_df(double alpha, void *params) {
00156 
00157       /* using previously cached df(alpha) */
00158       if (alpha==df_cache_key) return df_alpha;
00159 
00160       moveto(alpha);
00161       if (alpha!=g_cache_key) {
00162         simple_df(av_x_alpha,av_g_alpha);
00163         g_cache_key=alpha;
00164       }
00165       df_alpha=slope();
00166       df_cache_key=alpha;
00167 
00168       return df_alpha;
00169     }
00170 
00171     /// A simple derivative
00172     int simple_df(vec_t &x2, vec_t &g2) {
00173       
00174       double fv1, fv2, deriv_h=1.0e-4;
00175       
00176       (*func)(dim,x2,fv1,*pa);
00177       
00178       for(size_t i=0;i<dim;i++) {
00179         x2[i]+=deriv_h;
00180         (*func)(dim,x2,fv2,*pa);
00181         x2[i]-=deriv_h;
00182         g2[i]=(fv2-fv1)/deriv_h;
00183       }
00184       
00185       return 0;
00186     }
00187     
00188     /// Evaluate the function and the derivative
00189     virtual void wrap_fdf(double alpha, void *params, double *f, double *df) {
00190 
00191       /* Check for previously cached values */
00192       if (alpha ==  f_cache_key && alpha ==  df_cache_key) {
00193         *f =  f_alpha;
00194         *df =  df_alpha;
00195         return;
00196       }
00197       if (alpha ==  f_cache_key || alpha ==  df_cache_key) {
00198         *f = wrap_f (alpha, params);
00199         *df = wrap_df (alpha, params);
00200         return;
00201       }
00202       
00203       moveto(alpha);
00204       (*func)(dim,av_x_alpha,f_alpha,*pa);
00205       simple_df(av_x_alpha,av_g_alpha);
00206       f_cache_key = alpha;
00207       g_cache_key = alpha;
00208       
00209       df_alpha = slope();
00210       df_cache_key = alpha;
00211       
00212       *f =  f_alpha;
00213       *df =  df_alpha;
00214       
00215       return;
00216     }
00217     
00218 #endif
00219 
00220     public:
00221 
00222     /// Temporary storage
00223     alloc_vec_t av_x_alpha;
00224 
00225     /// Temporary storage
00226     alloc_vec_t av_g_alpha;
00227 
00228     /// Number of minimization dimensions
00229     size_t dim;
00230     
00231     /// Initialize wrapper
00232     void prepare_wrapper(func_t &ufunc, param_t &upa, gsl_vector *t_x,
00233                          double f, gsl_vector *t_g, gsl_vector *t_p) {
00234       
00235       func=&ufunc;
00236       //dfunc=&udfunc;
00237       pa=&upa;
00238 
00239       x=t_x;
00240       g=t_g;
00241       p=t_p;
00242       
00243       x_cache_key=0.0;
00244       f_cache_key=0.0;
00245       g_cache_key=0.0;
00246       df_cache_key=0.0;
00247 
00248       for(size_t i=0;i<dim;i++) {
00249         av_x_alpha[i]=gsl_vector_get(x,i);
00250         av_g_alpha[i]=gsl_vector_get(g,i);
00251       }
00252       
00253       f_alpha=f;
00254       df_alpha=slope();
00255 
00256       return;
00257     }
00258     
00259     /// Update position
00260     void update_position(double alpha, gsl_vector *t_x, double *t_f, 
00261                          gsl_vector *t_g) {
00262       
00263       /* ensure that everything is fully cached */
00264       { 
00265         double t_f_alpha, t_df_alpha; 
00266         wrap_fdf(alpha,0,&t_f_alpha,&t_df_alpha); 
00267       }
00268       
00269       *t_f = f_alpha;
00270       for(size_t i=0;i<dim;i++) {
00271         gsl_vector_set(t_x,i,av_x_alpha[i]);
00272         gsl_vector_set(t_g,i,av_g_alpha[i]);
00273       }
00274     }
00275     
00276     /// Convert cache values to the new minimizer direction
00277     void change_direction() {
00278       
00279       /* Convert the cache values from the end of the current minimisation
00280          to those needed for the start of the next minimisation, alpha=0 */
00281       
00282       /* The new x_alpha for alpha=0 is the current position and the
00283          new g_alpha for alpha=0 is the current gradient at the
00284          endpoint */
00285       for(size_t i=0;i<dim;i++) {
00286         av_x_alpha[i]=gsl_vector_get(x,i);
00287         av_g_alpha[i]=gsl_vector_get(g,i);
00288       }
00289       x_cache_key = 0.0;
00290       g_cache_key = 0.0;
00291       
00292       /* The function value does not change */
00293       f_cache_key = 0.0;
00294       
00295       /* Calculate the slope along the new direction vector, p */
00296       df_alpha = slope ();
00297       df_cache_key = 0.0;
00298       
00299       return;
00300     }
00301     
00302   };
00303 
00304   /**
00305      \brief The line minimizer for gsl_mmin_bfgs2
00306   */
00307   class gsl_mmin_linmin {
00308 
00309 #ifndef DOXYGEN_INTERNAL
00310 
00311   protected:
00312 
00313     /**
00314        \brief Minimize the interpolating quadratic
00315 
00316        Find a minimum in x=[0,1] of the interpolating quadratic through
00317        (0,f0) (1,f1) with derivative fp0 at x=0.  The interpolating
00318        polynomial is q(x) = f0 + fp0 * z + (f1-f0-fp0) * z^2
00319     */
00320     double interp_quad(double f0, double fp0, double f1, double zl, 
00321                        double zh);
00322     
00323     /**
00324        \brief Minimize the interpolating cubic
00325        
00326        Find a minimum in x=[0,1] of the interpolating cubic through
00327        (0,f0) (1,f1) with derivatives fp0 at x=0 and fp1 at x=1.
00328        
00329        The interpolating polynomial is:
00330        
00331        c(x) = f0 + fp0 * z + eta * z^2 + xi * z^3
00332        
00333        where eta=3*(f1-f0)-2*fp0-fp1, xi=fp0+fp1-2*(f1-f0).
00334     */
00335     double cubic(double c0, double c1, double c2, double c3, double z);
00336     
00337     /// Test to see curvature is positive
00338     void check_extremum(double c0, double c1, double c2, double c3, double z,
00339                         double *zmin, double *fmin);
00340     
00341     /// Interpolate using a cubic
00342     double interp_cubic(double f0, double fp0, double f1, 
00343                         double fp1, double zl, double zh);
00344     
00345     /// Perform the interpolation
00346     double interpolate(double a, double fa, double fpa, double b, 
00347                        double fb, double fpb, double xmin, double xmax,
00348                        int order);
00349 #endif
00350     
00351   public:
00352 
00353     /** 
00354         \brief The line minimization
00355         
00356         recommended values from Fletcher are
00357         rho = 0.01, sigma = 0.1, tau1 = 9, tau2 = 0.05, tau3 = 0.5 
00358     */
00359     int minimize(gsl_mmin_wrap_base &wrap, double rho, double sigma,
00360                  double tau1, double tau2, double tau3,
00361                  int order, double alpha1, double *alpha_new);
00362   };
00363 
00364   /** \brief Multidimensional minimization by the BFGS
00365       algorithm (GSL)
00366 
00367       This class includes the optimizations from the GSL minimizer \c
00368       vector_bfgs2.
00369   */
00370   template<class param_t, class func_t, 
00371     class vec_t=ovector_view, class alloc_vec_t=ovector,
00372     class alloc_t=ovector_alloc, class dfunc_t=func_t> 
00373     class gsl_mmin_bfgs2 : public multi_min<param_t,func_t,func_t,vec_t> {
00374     
00375 #ifndef DOXYGEN_INTERNAL
00376 
00377     protected:
00378 
00379     /// \name The original variables from the GSL state structure
00380     //@{
00381     int iter;
00382     double step;
00383     double g0norm;
00384     double pnorm;
00385     double delta_f;
00386     /* f'(0) for f(x-alpha*p) */
00387     double fp0;                   
00388     gsl_vector *x0;
00389     gsl_vector *g0;
00390     gsl_vector *p;
00391     /* work space */
00392     gsl_vector *dx0;
00393     gsl_vector *dg0;
00394     /* wrapper function */
00395     gsl_mmin_wrapper<param_t,func_t,vec_t,alloc_vec_t,alloc_t,dfunc_t> wrap;
00396     /* minimization parameters */
00397     double rho;
00398     double sigma;
00399     double tau1;
00400     double tau2;
00401     double tau3;
00402     int order;
00403     //@}
00404 
00405     /// The line minimizer
00406     gsl_mmin_linmin lm;
00407       
00408     /// \name Store the arguments to set() so we can use them for iterate()
00409     //@{
00410     vec_t *st_x;
00411     gsl_vector *st_dx;
00412     gsl_vector *st_grad;
00413     double st_f;
00414     //@}
00415 
00416     /// Memory size
00417     size_t dim;
00418 
00419     /// Memory allocation
00420     alloc_t ao;
00421 
00422 #endif
00423     
00424     public:
00425       
00426     gsl_mmin_bfgs2() {
00427       lmin_tol=1.0e-4;
00428       this->tolf=1.0e-3;
00429       step_size=0.01;
00430     }
00431     
00432     virtual ~gsl_mmin_bfgs2() {}
00433 
00434     /// Perform an iteration
00435     virtual int iterate() {
00436 
00437       double alpha = 0.0, alpha1;
00438 
00439       double pg, dir;
00440       int status;
00441       
00442       double f0 = st_f;
00443       
00444       if (pnorm == 0.0 || g0norm == 0.0 ||  fp0 == 0)
00445         {
00446           gsl_vector_set_zero (st_dx);
00447           return GSL_ENOPROG;
00448         }
00449 
00450       if (delta_f < 0) {
00451         double del = GSL_MAX_DBL (-delta_f, 10 * GSL_DBL_EPSILON * fabs(f0));
00452         alpha1 = GSL_MIN_DBL (1.0, 2.0 * del / (- fp0));
00453       } else {
00454         alpha1 = fabs( step);
00455       }
00456         
00457       /* line minimisation, with cubic interpolation (order = 3) */
00458       
00459       status=lm.minimize(wrap,rho,sigma,tau1,tau2,tau3,order,
00460                          alpha1,&alpha);
00461       
00462       if (status != GSL_SUCCESS) {
00463         return status;
00464       }
00465       
00466       wrap.update_position(alpha,st_x,&st_f,st_grad);
00467 
00468       delta_f = st_f - f0;
00469 
00470       /* Choose a new direction for the next step */
00471 
00472       {
00473         /* This is the BFGS update: */
00474         /* p' = g1 - A dx - B dg */
00475         /* A = - (1+ dg.dg/dx.dg) B + dg.g/dx.dg */
00476         /* B = dx.g/dx.dg */
00477 
00478         double dxg, dgg, dxdg, dgnorm, A, B;
00479 
00480         /* dx0 = x - x0 */
00481         gsl_vector_memcpy (dx0, st_x);
00482         gsl_blas_daxpy (-1.0, x0, dx0);
00483 
00484         gsl_vector_memcpy (st_dx, dx0);  /* keep a copy */
00485 
00486         /* dg0 = g - g0 */
00487         gsl_vector_memcpy (dg0, st_grad);
00488         gsl_blas_daxpy (-1.0, g0, dg0);
00489 
00490         gsl_blas_ddot (dx0, st_grad, &dxg);
00491         gsl_blas_ddot (dg0, st_grad, &dgg);
00492         gsl_blas_ddot (dx0, dg0, &dxdg);
00493 
00494         dgnorm = gsl_blas_dnrm2 (dg0);
00495 
00496         if (dxdg != 0) {
00497           B = dxg / dxdg;
00498           A = -(1.0 + dgnorm * dgnorm / dxdg) * B + dgg / dxdg;
00499         } else {
00500           B = 0;
00501           A = 0;
00502         }
00503         
00504         gsl_vector_memcpy (p, st_grad);
00505         gsl_blas_daxpy (-A, dx0, p);
00506         gsl_blas_daxpy (-B, dg0, p);
00507       }
00508 
00509       gsl_vector_memcpy (g0, st_grad);
00510       gsl_vector_memcpy (x0, st_x);
00511       g0norm = gsl_blas_dnrm2 (g0);
00512       pnorm = gsl_blas_dnrm2 (p);
00513       /* update direction and fp0 */
00514 
00515       gsl_blas_ddot (p, st_grad, &pg);
00516       dir = (pg >= 0.0) ? -1.0 : +1.0;
00517       gsl_blas_dscal (dir /  pnorm, p);
00518       pnorm = gsl_blas_dnrm2 (p);
00519       gsl_blas_ddot (p, g0, & fp0);
00520 
00521       wrap.change_direction();
00522 
00523       return GSL_SUCCESS;
00524 
00525     }
00526     
00527     /// Return string denoting type("gsl_mmin_bfgs2")
00528     virtual const char *type() { return "gsl_mmin_bfgs2";}
00529 
00530     /// Allocate the memory
00531     virtual int allocate(size_t n) {
00532 
00533       p = gsl_vector_calloc (n);
00534 
00535       if ( p == 0) {
00536         GSL_ERROR ("failed to allocate space for p", GSL_ENOMEM);
00537       }
00538 
00539       x0 = gsl_vector_calloc (n);
00540 
00541       if ( x0 == 0) {
00542         gsl_vector_free ( p);
00543         GSL_ERROR ("failed to allocate space for g0", GSL_ENOMEM);
00544       }
00545 
00546       g0 = gsl_vector_calloc (n);
00547 
00548       if ( g0 == 0) {
00549         gsl_vector_free ( x0);
00550         gsl_vector_free ( p);
00551         GSL_ERROR ("failed to allocate space for g0", GSL_ENOMEM);
00552       }
00553 
00554       dx0 = gsl_vector_calloc (n);
00555 
00556       if ( dx0 == 0) {
00557         gsl_vector_free ( g0);
00558         gsl_vector_free ( x0);
00559         gsl_vector_free ( p);
00560         GSL_ERROR ("failed to allocate space for g0", GSL_ENOMEM);
00561       }
00562 
00563       dg0 = gsl_vector_calloc (n);
00564 
00565       if ( dg0 == 0) {
00566         gsl_vector_free ( dx0);
00567         gsl_vector_free ( g0);
00568         gsl_vector_free ( x0);
00569         gsl_vector_free ( p);
00570         GSL_ERROR ("failed to allocate space for g0", GSL_ENOMEM);
00571       }
00572 
00573       st_dx = gsl_vector_alloc(n);
00574       st_grad = gsl_vector_alloc(n);
00575 
00576       ao.allocate(wrap.av_x_alpha,n);
00577       ao.allocate(wrap.av_g_alpha,n);
00578       wrap.dim=n;
00579       dim=n;
00580 
00581       return GSL_SUCCESS;
00582     }
00583     
00584     /// Free the allocated memory
00585     virtual int free() {
00586       ao.free(wrap.av_x_alpha);
00587       ao.free(wrap.av_g_alpha);
00588       gsl_vector_free(dg0);
00589       gsl_vector_free(dx0);
00590       gsl_vector_free(g0);
00591       gsl_vector_free(x0);
00592       gsl_vector_free(p);
00593       gsl_vector_free(st_dx);
00594       gsl_vector_free(st_grad);
00595       wrap.dim=0;
00596       dim=0;
00597       return 0;
00598     }
00599       
00600     /// Reset the minimizer to use the current point as a new starting point
00601     int restart() {
00602       iter = 0;
00603       return gsl_success;
00604     }
00605 
00606     /// Set the function and initial guess
00607     virtual int set(vec_t &x, double u_step_size, double tol_u, 
00608                     func_t &ufunc, param_t &upa) {
00609       
00610       iter = 0;
00611       step = u_step_size;
00612       delta_f = 0;
00613       
00614       st_x=&x;
00615       
00616       ufunc(dim,x,st_f,upa);
00617       {
00618         double fv2, deriv_h=1.0e-4;
00619         
00620         for(size_t i=0;i<dim;i++) {
00621           x[i]+=deriv_h;
00622           ufunc(dim,x,fv2,upa);
00623           x[i]-=deriv_h;
00624           gsl_vector_set(st_grad,i,(fv2-st_f)/deriv_h);
00625         }
00626       }
00627       
00628       /* Use the gradient as the initial direction */
00629 
00630       for(size_t i=0;i<dim;i++) {
00631         gsl_vector_set(x0,i,x[i]);
00632       }
00633       gsl_vector_memcpy(g0,st_grad);
00634       g0norm = gsl_blas_dnrm2 ( g0);
00635 
00636       gsl_vector_memcpy(p,st_grad);
00637       gsl_blas_dscal(-1/g0norm,p);
00638       pnorm = gsl_blas_dnrm2 ( p);     /* should be 1 */
00639       fp0 = - g0norm;
00640 
00641       /* Prepare the wrapper */
00642       
00643       wrap.prepare_wrapper(ufunc,upa,x0,st_f,g0,p);
00644       
00645       /* Prepare 1d minimisation parameters */
00646 
00647       rho = 0.01;
00648       sigma = tol_u;
00649       tau1 = 9;
00650       tau2 = 0.05;
00651       tau3 = 0.5;
00652       order = 3;  /* use cubic interpolation where possible */
00653 
00654       return GSL_SUCCESS;
00655  
00656     }
00657 
00658     /// The size of the first trial step
00659     double step_size;
00660     
00661     /// The tolerance for the 1-dimensional minimizer
00662     double lmin_tol;
00663 
00664     /** \brief Calculate the minimum \c min of \c func w.r.t the
00665         array \c x of size \c nvar.
00666     */
00667     virtual int mmin(size_t nn, vec_t &xx, double &fmin, param_t &pa,
00668                      func_t &ufunc) {
00669 
00670       int xiter=0, status;
00671 
00672       allocate(nn);
00673 
00674       set(xx,step_size,lmin_tol,ufunc,pa);
00675 
00676       do {
00677         xiter++;
00678 
00679         status=iterate();
00680         
00681         if (status) {
00682           break;
00683         }
00684 
00685         // Equivalent to gsl_multimin_test_gradient with
00686         // additional code to print out present iteration
00687         
00688         double norm=gsl_blas_dnrm2(st_grad);
00689         
00690         if(this->verbose>0) {
00691           this->print_iter(nn,*st_x,st_f,xiter,
00692                            norm,this->tolf,"gsl_mmin_bfgs2");
00693         }
00694 
00695         if (norm<this->tolf) status=gsl_success;
00696         else status=gsl_continue;
00697 
00698       }
00699       while (status == GSL_CONTINUE && xiter < this->ntrial);
00700       
00701       for(size_t i=0;i<nn;i++) xx[i]=(*st_x)[i];
00702 
00703       fmin=st_f;
00704       
00705       free();
00706 
00707       this->last_ntrial=xiter;
00708         
00709       return status;
00710     }
00711 
00712   };
00713 
00714 #ifndef DOXYGENP
00715 }
00716 #endif
00717 
00718 #endif

Documentation generated with Doxygen and provided under the GNU Free Documentation License. See License Information for details.

Project hosting provided by SourceForge.net Logo, O2scl Sourceforge Project Page