Object-oriented Scientific Computing Library: Version 0.910
ool_mmin_spg.h
00001 /*
00002   -------------------------------------------------------------------
00003   
00004   Copyright (C) 2006-2012, Andrew W. Steiner
00005   
00006   This file is part of O2scl.
00007   
00008   O2scl is free software; you can redistribute it and/or modify
00009   it under the terms of the GNU General Public License as published by
00010   the Free Software Foundation; either version 3 of the License, or
00011   (at your option) any later version.
00012   
00013   O2scl is distributed in the hope that it will be useful,
00014   but WITHOUT ANY WARRANTY; without even the implied warranty of
00015   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016   GNU General Public License for more details.
00017   
00018   You should have received a copy of the GNU General Public License
00019   along with O2scl. If not, see <http://www.gnu.org/licenses/>.
00020 
00021   -------------------------------------------------------------------
00022 */
00023 /*------------------------------------------------------------------------*
00024  * Open Optimization Library - Spectral Projected Gradient Method
00025  * 
00026  * spg/spg.c
00027  *
00028  * This program is free software; you can redistribute it and/or modify
00029  * it under the terms of the GNU General Public License as published by
00030  * the Free Software Foundation; either version 2 of the License, or (at
00031  * your option) any later version.
00032  * 
00033  * This program is distributed in the hope that it will be useful, but
00034  * WITHOUT ANY WARRANTY; without even the implied warranty of
00035  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00036  * General Public License for more details.
00037  * 
00038  * You should have received a copy of the GNU General Public License
00039  * along with this program; if not, write to the Free Software
00040  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
00041  *
00042  * Ricardo Biloti
00043  * since: May 3rd, 2005
00044  *
00045  * $Id: spg.c,v 1.4 2005/05/10 20:24:27 biloti Exp $
00046  *------------------------------------------------------------------------*/
00047 #ifndef O2SCL_OOL_MMIN_SPG_H
00048 #define O2SCL_OOL_MMIN_SPG_H
00049 
00050 #include <o2scl/ovector_tlate.h>
00051 #include <o2scl/multi_funct.h>
00052 #include <o2scl/ool_constr_mmin.h>
00053 #include <gsl/gsl_math.h>
00054 
00055 #ifndef DOXYGENP
00056 namespace o2scl {
00057 #endif
00058 
00059   /** \brief Constrained minimization by the spectral projected
00060       gradient method (OOL)
00061 
00062       This class applies a non-monotone line search strategy 
00063       to the classical projected gradient method. 
00064       
00065       As in \ref Birgin00, this class applies a nonmonotone Armijo
00066       sufficient decrease condition for accepting trial points as an
00067       improvement over the classical spectral projected gradient
00068       method. This method may be competitive with large problems
00069       because it has low memory requirements.
00070 
00071       Default template arguments
00072       - \c func_t - \ref multi_funct<>
00073       - \c dfunc_t - \ref grad_funct<>
00074       - \c vec_t - \ref ovector_base 
00075       - \c alloc_vec_t - \ref ovector
00076       - \c alloc_t - \ref ovector_alloc
00077 
00078       \future There is some memory allocation which isn't
00079       deallocated until the destructor, and should be handled
00080       a bit more elegantly.
00081   */
00082   template<class func_t=multi_funct<>, class dfunc_t=grad_funct<>, 
00083     class vec_t=ovector_base, class alloc_vec_t=ovector,
00084     class alloc_t=ovector_alloc> class ool_mmin_spg : 
00085   public ool_constr_mmin<func_t,dfunc_t,ool_hfunct<int>,vec_t,
00086     alloc_vec_t,alloc_t> {
00087 
00088 #ifndef DOXYGEN_INTERNAL
00089 
00090     protected:
00091 
00092     /// A convenient typedef for the unused Hessian product type
00093     typedef ool_hfunct<int> hfunc_t;
00094 
00095     /// Armijo parameter
00096     double alpha;
00097 
00098     /// Temporary vector
00099     alloc_vec_t xx;
00100 
00101     /// Temporary vector
00102     alloc_vec_t d;
00103 
00104     /// Temporary vector
00105     alloc_vec_t s;
00106 
00107     /// Temporary vector
00108     alloc_vec_t y;
00109 
00110     /// Temporary vector
00111     alloc_vec_t fvec;
00112 
00113     /// Non-monotone parameter
00114     size_t m;
00115 
00116     /// Desc
00117     int tail;
00118     
00119     /// Line search
00120     int line_search() {
00121 
00122       double lambda, lambdanew;
00123       double fmax, faux, fxx, dTg;
00124       short armijo_failure;
00125       size_t ii;
00126 
00127       // Saving the previous gradient and compute 
00128       // d = P(x - alpha * g) - x
00129       for(size_t i=0;i<this->dim;i++) {
00130         y[i]=this->gradient[i];
00131         d[i]=this->x[i];
00132       }
00133       for(size_t i=0;i<this->dim;i++) {
00134         d[i]-=alpha*this->gradient[i];
00135       }
00136       proj(d);
00137       for(size_t i=0;i<this->dim;i++) {
00138         d[i]-=this->x[i];
00139       }
00140 
00141       dTg=0.0;
00142       for(size_t i=0;i<this->dim;i++) dTg+=d[i]*this->gradient[i];
00143 
00144       lambda=1;
00145       armijo_failure=1;
00146       while (armijo_failure) {
00147 
00148         /* x trial */
00149         for(size_t i=0;i<this->dim;i++) xx[i]=this->x[i];
00150         for(size_t i=0;i<this->dim;i++) xx[i]+=lambda*d[i];
00151         fxx=(*this->func)(this->dim,xx);
00152 
00153         fmax=0;
00154         for(ii=0;ii<m;ii++) {
00155           faux=fvec[ii]+gamma*lambda*dTg;
00156           fmax=GSL_MAX(fmax,faux);
00157         }
00158         
00159         if (fxx>fmax) {
00160           armijo_failure=1;
00161           lambdanew=-lambda*lambda*dTg/(2*(fxx-this->f-lambda*dTg));
00162           lambda=GSL_MAX(sigma1*lambda,GSL_MIN(sigma2*lambda,lambdanew));
00163         } else {
00164           armijo_failure=0;
00165         }
00166       }
00167       
00168       // st->s = x_{k+1} - x_k 
00169       for(size_t i=0;i<this->dim;i++) s[i]=xx[i];
00170       for(size_t i=0;i<this->dim;i++) s[i]-=this->x[i];
00171 
00172       // M->x = x_{k+1} 
00173       for(size_t i=0;i<this->dim;i++) this->x[i]=xx[i];
00174       (*this->dfunc)(this->dim,this->x,this->gradient);
00175       this->f=fxx;
00176       
00177       // Infinite norm of g1 = d <- [P(x - g) - x] 
00178       for(size_t i=0;i<this->dim;i++) d[i]=this->x[i];
00179       for(size_t i=0;i<this->dim;i++) d[i]-=this->gradient[i];
00180       proj(d);
00181       for(size_t i=0;i<this->dim;i++) d[i]-=this->x[i];
00182       
00183       double maxval=fabs(d[0]);
00184       for(size_t i=1;i<this->dim;i++) {
00185         if (fabs(d[i])>maxval) {
00186           maxval=fabs(d[i]);
00187         }
00188       }
00189       this->size=fabs(maxval);
00190       
00191       // st->y = - (g(x_{k+1}) - g(x_k)) 
00192       for(size_t i=0;i<this->dim;i++) y[i]-=this->gradient[i];
00193 
00194       m++;
00195       m=GSL_MIN(M,m);
00196       tail++;
00197       tail=tail % M;
00198       fvec[tail]=this->f;
00199 
00200       return 0;
00201     }
00202     
00203     /// Project into feasible region
00204     int proj(vec_t &xt) {
00205       size_t ii, n=this->dim;
00206       
00207       for(ii=0;ii<n;ii++) {
00208         xt[ii]=GSL_MAX(this->L[ii],GSL_MIN(xt[ii],this->U[ii]));
00209       }
00210       return 0;
00211     }
00212 
00213 #endif
00214 
00215     public:
00216 
00217     ool_mmin_spg() {
00218       fmin=-1.0e99;
00219       tol=1.0e-4;
00220       alphamin=1.0e-30;
00221       alphamax=1.0e30;
00222       gamma=1.0e-4;
00223       sigma1=0.1;
00224       sigma2=0.9;
00225       M=10;
00226     }
00227 
00228     ~ool_mmin_spg() {
00229       this->ao.free(fvec);
00230     }
00231 
00232     /** \brief Minimum function value (default \f$ 10^{-99} \f$)
00233         
00234         If the function value is below this value, then the algorithm
00235         assumes that the function is not bounded and exits.
00236     */
00237     double fmin;
00238 
00239     /// Tolerance on infinite norm (default \f$ 10^{-4} \f$)
00240     double tol;
00241 
00242     /// Lower bound to spectral step size (default \f$ 10^{-30} \f$)
00243     double alphamin;
00244 
00245     /// Upper bound to spectral step size (default \f$ 10^{30} \f$)
00246     double alphamax;
00247 
00248     /// Sufficient decrease parameter (default \f$ 10^{-4} \f$)
00249     double gamma;
00250 
00251     /// Lower bound to the step length reduction (default 0.1)
00252     double sigma1;
00253 
00254     /// Upper bound to the step length reduction (default 0.9)
00255     double sigma2;
00256 
00257     /// Monotonicity parameter (M=1 forces monotonicity) (default 10)
00258     size_t M;
00259     
00260     /// Allocate memory
00261     virtual int allocate(const size_t n) {
00262       if (this->dim>0) free();
00263       this->ao.allocate(xx,n);
00264       this->ao.allocate(d,n);
00265       this->ao.allocate(s,n);
00266       this->ao.allocate(y,n);
00267       return ool_constr_mmin<func_t,dfunc_t,hfunc_t,vec_t,alloc_vec_t,
00268         alloc_t>::allocate(n);
00269     }
00270     
00271     /// Free previously allocated memory
00272     virtual int free() {
00273       if (this->dim>0) this->ao.free(xx);
00274       if (this->dim>0) this->ao.free(d);
00275       if (this->dim>0) this->ao.free(s);
00276       if (this->dim>0) this->ao.free(y);
00277       return ool_constr_mmin<func_t,dfunc_t,hfunc_t,vec_t,alloc_vec_t,
00278       alloc_t>::free();
00279     }
00280     
00281     /// Set the function, the initial guess, and the parameters
00282     virtual int set(func_t &fn, dfunc_t &dfn, vec_t &init) {
00283       
00284       int ret=ool_constr_mmin<func_t,dfunc_t,hfunc_t,vec_t,alloc_vec_t,
00285       alloc_t>::set(fn,dfn,init);
00286       
00287       // Turn initial guess into a feasible point
00288       proj(this->x);
00289       
00290       // Evaluate function and gradient
00291       this->f=(*this->func)(this->dim,this->x);
00292       (*this->dfunc)(this->dim,this->x,this->gradient);
00293       
00294       /* Infinite norm of d <- g1 = [P(x - g) - x] */
00295       o2scl::vector_copy(this->dim,this->x,d);
00296       for(size_t i=0;i<this->dim;i++) {
00297         d[i]-=this->gradient[i];
00298       }
00299       proj(d);
00300       for(size_t i=0;i<this->dim;i++) d[i]-=this->x[i];
00301 
00302       double maxval=fabs(d[0]);
00303       for(size_t i=1;i<this->dim;i++) {
00304         if (fabs(d[i])>maxval) {
00305           maxval=fabs(d[i]);
00306         }
00307       }
00308       this->size=fabs(maxval);
00309       
00310       /* alpha_0 */
00311       maxval=fabs(this->gradient[0]);
00312       for(size_t i=1;i<this->dim;i++) {
00313         if (fabs(this->gradient[i])>maxval) {
00314           maxval=fabs(this->gradient[i]);
00315         }
00316       }
00317       alpha=1.0/fabs(maxval);
00318 
00319       // FIXME: at the moment, this memory allocation is 
00320       // left hanging. It's taken care of the destructor,
00321       // but this isn't so elegant.
00322       this->ao.allocate(fvec,M);
00323       m=1;
00324       tail=0;
00325       fvec[tail]=this->f;
00326 
00327       return 0;
00328     }
00329 
00330     /// Restart the minimizer
00331     virtual int restart() {
00332 
00333       proj(this->x);
00334 
00335       // Evaluate function and gradient
00336       this->f=(*this->func)(this->dim,this->x);
00337       (*this->dfunc)(this->dim,this->x,this->gradient);
00338 
00339       // alpha_0 
00340       double maxval=fabs(this->gradient[0]);
00341       for(size_t i=1;i<this->dim;i++) {
00342         if (fabs(this->gradient[i])>maxval) {
00343           maxval=fabs(this->gradient[i]);
00344         }
00345       }
00346       alpha=1.0/fabs(maxval);
00347 
00348       // Infinite norm of d <- g1 = [P(x - g) - x]
00349       o2scl::vector_copy(this->dim,this->x,d);
00350       for(size_t i=0;i<this->dim;i++) {
00351         d[i]-=this->gradient[i];
00352       }
00353       proj(d);
00354       for(size_t i=0;i<this->dim;i++) d[i]-=this->x[i];
00355 
00356       maxval=fabs(d[0]);
00357       for(size_t i=1;i<this->dim;i++) {
00358         if (fabs(d[i])>maxval) {
00359           maxval=fabs(d[i]);
00360         }
00361       }
00362       this->size=fabs(maxval);
00363 
00364       m=1;
00365       tail=0;
00366       fvec[tail]=this->f;
00367 
00368       return 0;
00369     }
00370 
00371     /// Perform an iteration
00372     virtual int iterate() {
00373       double t;
00374       
00375       line_search();
00376 
00377       double bk=0.0;
00378       for(size_t i=0;i<this->dim;i++) bk+=s[i]*y[i];
00379 
00380       if (bk>=0.0) {
00381         alpha=alphamax;
00382       } else {
00383         double ak=0.0;
00384         for(size_t i=0;i<this->dim;i++) ak+=s[i]*s[i];
00385         ak=-ak/bk;
00386         alpha=GSL_MIN(alphamax,GSL_MAX(alphamin,ak));
00387       }
00388 
00389       return 0;
00390     }
00391 
00392     /// See if we're finished
00393     virtual int is_optimal() {
00394       if (this->size>tol && this->f>fmin) {
00395         return GSL_CONTINUE;
00396       }
00397       return GSL_SUCCESS;
00398     }
00399 
00400     /// Return string denoting type ("ool_mmin_spg")
00401     const char *type() { return "ool_mmin_spg"; }
00402       
00403   };
00404   
00405 #ifndef DOXYGENP
00406 }
00407 #endif
00408 
00409 #endif
00410 
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

Documentation generated with Doxygen. Provided under the GNU Free Documentation License (see License Information).

Get Object-oriented Scientific Computing
Lib at SourceForge.net. Fast, secure and Free Open Source software
downloads.