Object-oriented Scientific Computing Library: Version 0.910
hist.h
00001 /*
00002   -------------------------------------------------------------------
00003   
00004   Copyright (C) 2010-2012, Andrew W. Steiner
00005   
00006   This file is part of O2scl.
00007   
00008   O2scl is free software; you can redistribute it and/or modify
00009   it under the terms of the GNU General Public License as published by
00010   the Free Software Foundation; either version 3 of the License, or
00011   (at your option) any later version.
00012   
00013   O2scl is distributed in the hope that it will be useful,
00014   but WITHOUT ANY WARRANTY; without even the implied warranty of
00015   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016   GNU General Public License for more details.
00017   
00018   You should have received a copy of the GNU General Public License
00019   along with O2scl. If not, see <http://www.gnu.org/licenses/>.
00020 
00021   -------------------------------------------------------------------
00022 */
00023 #ifndef O2SCL_HIST_H
00024 #define O2SCL_HIST_H
00025 
00026 #include <iostream>
00027 #include <o2scl/convert_units.h>
00028 #include <o2scl/smart_interp.h>
00029 #include <o2scl/uvector_tlate.h>
00030 #include <o2scl/uniform_grid.h>
00031 
00032 #ifndef DOXYGENP
00033 namespace o2scl {
00034 #endif
00035   
00036   /** \brief A one-dimensional histogram class
00037       
00038       Experimental.
00039 
00040       One may set the histogram bins using \ref set_bins() or one may
00041       manually set the limit of one bin using the reference returned
00042       by get_bin_low(), get_bin_low_i(), get_bin_high(), or
00043       get_bin_high_i(). Note that if one attempts to set the bins on a
00044       histogram where the bins have already been set, one must ensure
00045       that the new and old binnings have the same size. This
00046       requirement is designed to help prevent accidental data loss.
00047 
00048       The upper edge of bin \c i is always equal to the lower edge
00049       of bin \c i+1. 
00050 
00051       By convention, all functions which take a bin index as an
00052       argument have an extra \c "_i" suffix to distinguish them from
00053       functions which take a floating-point value to be binned as
00054       their argument.
00055 
00056       Empty histograms have zero size. 
00057 
00058       The \ref set_bins() functions can only be used if (i) the current
00059       histogram is empty or (ii) the new number of bins is equal to
00060       the histogram size so that no reallocation is necessary.
00061 
00062       To save space, representative vectors are not allocated until
00063       they are used.
00064 
00065       \hline
00066       \b Bin Representatives 
00067 
00068       One way to operate on a histogram as a function is to designate
00069       a particular value inside each bin which represents the
00070       coordinate associated with that bin. These "bin representative
00071       values" are automatically created and can be used by the
00072       function evaluation and interpolation functions
00073       operator(double), interp(), deriv(), integ() for some particular
00074       interpolation type. By default, these representative values are
00075       taken to be the midpoint of each bin, but this option is
00076       configurable and the representative values may be set by the
00077       user for each individual bin.
00078 
00079       \hline
00080 
00081       \todo Check that the actions of set_bins(), clear() and 
00082       other functions perform the correct actions on the user_rep
00083       vector. I'm a bit concerned that set_bins_auto() shouldn't
00084       call user_rep.allocate().
00085 
00086       \todo More documentation and testing.
00087 
00088       \future Would be nice not to have to create a new
00089       \ref search_vec object in get_bin_index().
00090       \future Consider adding the analogs of the GSL histogram
00091       sampling functions 
00092       \future Add a function which computes the bin sizes?
00093   */
00094   class hist {
00095     
00096   protected:
00097 
00098     /// Bin locations (N+1)
00099     uvector ubin;
00100     
00101     /// Bin contents (N)
00102     uvector uwgt;
00103 
00104     /// Bin representative values (N)
00105     uvector urep;
00106     
00107     /// User-defined representative values (N)
00108     uvector user_rep;
00109     
00110     /// Number of bins
00111     size_t hsize;
00112 
00113     /// Representative mode
00114     size_t rmode;
00115 
00116     /// A pointer to the interpolation manager
00117     base_interp_mgr<uvector_const_view> *bim1;
00118     
00119     /// A pointer to the subvector interpolation manager
00120     base_interp_mgr<uvector_const_subvector> *bim2;
00121     
00122     /// Default interpolation manager
00123     def_interp_mgr<uvector_const_view,cspline_interp> dim1;
00124 
00125     /// Default interpolation manager
00126     def_interp_mgr<uvector_const_subvector,cspline_interp> dim2;
00127 
00128     /// Set the representative array according to current rmode
00129     void set_reps_auto();
00130 
00131     /// Interpolation typedef
00132     typedef o2scl_interp_vec<uvector_const_view> interp_t;
00133 
00134     /** \brief Allocate vectors for a histogram of size \c n
00135 
00136         This function also sets all the weights to zero.
00137      */
00138     void allocate(size_t n);
00139 
00140   public:
00141 
00142     hist();
00143 
00144     ~hist();
00145 
00146     /// Copy constructor
00147     hist(const hist &h);
00148 
00149     /// Copy constructor
00150     hist &operator=(const hist &h);
00151 
00152     /// The histogram size
00153     size_t size() const {
00154       return hsize;
00155     }
00156 
00157     /** \brief If true, allow abcissa larger than largest bin limit
00158         to correspond to the highest bin (default false)
00159     */
00160     bool extend_rhs;
00161 
00162     /// \name Initial bin setup
00163     //@{
00164     /** \brief Set bins from a \ref uniform_grid object
00165         
00166         \note If the size of the histogram uniform_grid is not equal
00167         to the current histogram size, this function requires the
00168         histogram be empty so it can reallocate the vectors for both
00169         the bins and the weights.
00170     */
00171     int set_bins(uniform_grid<double> g);
00172 
00173     /** \brief Set the bins from a vector
00174 
00175         \note If \c n is not equal to the current histogram size,
00176         this function requires the histogram be empty so it can
00177         reallocate the vectors for both the bins and the weights.
00178      */
00179     template<class vec_t> int set_bins(size_t n, const vec_t &v) {
00180       if (n!=hsize) {
00181         if (hsize!=0) {
00182           O2SCL_ERR2_RET("Requested binning change in non-empty ",
00183                          "histogram in hist::set_bins().",gsl_efailed);
00184         }
00185         allocate(n-1);
00186       }
00187       for(size_t i=0;i<n;i++) ubin[i]=v[i];
00188       set_reps_auto();
00189       return gsl_success;
00190     }
00191     //@}
00192 
00193     /// \name Weight functions
00194     //@{
00195     /// Increment bin for \c x by value \c val
00196     int update(double x, double val=1.0);
00197 
00198     /// Increment bin with index \c i by value \c val
00199     int update_i(size_t i, double val=1.0) {
00200       uwgt[i]+=val;
00201       return 0;
00202     }
00203 
00204     /// Return contents of bin with index \c i
00205     const double &get_wgt_i(size_t i) const;
00206 
00207     /// Return contents of bin with index \c i
00208     double &get_wgt_i(size_t i);
00209 
00210     /// Return contents of bin for \c x
00211     const double &get_wgt(double x) const {
00212       return get_wgt_i(get_bin_index(x));
00213     }
00214 
00215     /// Return contents of bin for \c x
00216     double &get_wgt(double x) {
00217       return get_wgt_i(get_bin_index(x));
00218     }
00219 
00220     /// Set contents of bin with index \c i to value \c val
00221     int set_wgt_i(size_t i, double val);
00222 
00223     /// Set contents of bin for \c x to value \c val
00224     int set_wgt(double x, double val) {
00225       return set_wgt_i(get_bin_index(x),val);
00226     }
00227     
00228     /// Get a reference to the full y vector
00229     const uvector &get_wgts() const {
00230       return uwgt;
00231     }
00232 
00233     /// Get a reference to the weight for the bin at index \c i
00234     const double &operator[](size_t i) const {
00235       return uwgt[i];
00236     }
00237     
00238     /// Get a reference to the weight for the bin at index \c i
00239     double &operator[](size_t i) {
00240       return uwgt[i];
00241     }
00242     //@}
00243 
00244     /// \name Bin manipulation
00245     //@{
00246     /// Get the index of the bin which holds \c x
00247     size_t get_bin_index(double x) const;
00248 
00249     /// Get the lower edge of bin of index \c i
00250     double &get_bin_low_i(size_t i);
00251 
00252     /// Get the lower edge of bin of index \c i
00253     const double &get_bin_low_i(size_t i) const;
00254 
00255     /// Get the upper edge of bin of index \c i
00256     double &get_bin_high_i(size_t i);
00257     
00258     /// Get the upper edge of bin of index \c i
00259     const double &get_bin_high_i(size_t i) const;
00260 
00261     /// Get the lower edge of bin of index \c i
00262     double &get_bin_low(double x) {
00263       return get_bin_low_i(get_bin_index(x));
00264     }
00265 
00266     /// Get the lower edge of bin of index \c i
00267     const double &get_bin_low(double x) const {
00268       return get_bin_low_i(get_bin_index(x));
00269     }
00270 
00271     /// Get the upper edge of bin of index \c i
00272     double &get_bin_high(double x) {
00273       return get_bin_high_i(get_bin_index(x));
00274     }
00275     
00276     /// Get the upper edge of bin of index \c i
00277     const double &get_bin_high(double x) const {
00278       return get_bin_high_i(get_bin_index(x));
00279     }
00280 
00281     /// Get a reference to the full vector of bin specifications
00282     const uvector &get_bins() const {
00283       return ubin;
00284     }
00285     //@}
00286 
00287     /** \brief Get maximum weight
00288      */
00289     double get_max_wgt() {
00290       double max=uwgt[0];
00291       for(size_t i=1;i<hsize;i++) {
00292         if (uwgt[i]>max) max=uwgt[i];
00293       }
00294       return max;
00295     }
00296 
00297     /** \brief Get the index of the maximum weight
00298      */
00299     size_t get_max_index() {
00300       double max=uwgt[0];
00301       size_t max_ix=0;
00302       for(size_t i=1;i<hsize;i++) {
00303         if (uwgt[i]>max) {
00304           max=uwgt[i];
00305           max_ix=i;
00306         }
00307       }
00308       return max_ix;
00309     }
00310 
00311     /** \brief Get the representative for the bin with maximum weight
00312      */
00313     double get_max_rep() {
00314       double max=uwgt[0];
00315       size_t max_ix=0;
00316       for(size_t i=1;i<hsize;i++) {
00317         if (uwgt[i]>max) {
00318           max=uwgt[i];
00319           max_ix=i;
00320         }
00321       }
00322       if (urep.size()==0) set_reps_auto();
00323       return urep[max_ix];
00324     }
00325 
00326     /// \name Delete functions
00327     //@{
00328     /// Clear the data, but leave the bins as is
00329     int clear_wgts();
00330 
00331     /// Clear the entire histogram
00332     int clear();
00333     //@}
00334 
00335     /// \name Rep modes (default is \c rmode_avg)
00336     //@{
00337     static const size_t rmode_avg=0;
00338     static const size_t rmode_user=1;
00339     static const size_t rmode_low=2;
00340     static const size_t rmode_high=3;
00341     static const size_t rmode_gmean=4;
00342     //@}
00343     
00344     /// \name Representative functions
00345     //@{
00346     /// Set the representative x-values for each bin
00347     template<class vec_t> int set_reps(size_t n, const vec_t &v) {
00348       rmode=rmode_user;
00349       if (user_rep.size()>0) user_rep.free();
00350       user_rep.allocate(n);
00351       if (user_rep.size()!=n) {
00352         std::string s="Expected a vector of size "+itos(user_rep.size())+
00353           " and got a vector of size "+itos(n)+" in hist::set_reps().";
00354         O2SCL_ERR_RET(s.c_str(),gsl_einval);
00355       }
00356       for(size_t i=0;i<n;i++) user_rep[i]=v[i];
00357       return gsl_success;
00358     }
00359 
00360     /// Set mode used to compute bin representatives
00361     int set_rep_mode(size_t mode);
00362 
00363     /// Get mode used to compute bin representatives
00364     size_t get_rep_mode() const {
00365       return rmode;
00366     }
00367 
00368     /// Return the representative of bin of index \c i
00369     double &get_rep_i(size_t i);
00370 
00371     /// Return the representative of bin containing \c x
00372     double &get_rep(double x) {
00373       return get_rep_i(get_bin_index(x));
00374     }
00375 
00376     /// Get a reference to the full representative vector
00377     const uvector &get_reps() {
00378       if (urep.size()==0) set_reps_auto();
00379       return urep;
00380     }
00381 
00382     /// Get a reference to the full data vector
00383     const uvector &get_user_reps() const {
00384       return user_rep;
00385     }
00386     //@}
00387 
00388     /// \name Evaluation and interpolation functions
00389     //@{
00390     /// Return the value of the function at \c x
00391     double operator()(double x);
00392 
00393     /// Return the value of the function at \c x
00394     double interp(double x);
00395 
00396     /// Return the derivative of the function at \c x
00397     double deriv(double x);
00398 
00399     /// Return the second derivative of the function at \c x
00400     double deriv2(double x);
00401 
00402     /// Return the integral of the function between \c x and \c y
00403     double integ(double x, double y);
00404 
00405     /// Set the base interpolation objects
00406     int set_interp(base_interp_mgr<uvector_const_view> &bi1,
00407                    base_interp_mgr<uvector_const_subvector> &bi2);
00408     //@}
00409 
00410   };
00411 
00412 #ifndef DOXYGENP
00413 }
00414 #endif
00415 
00416 #endif
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

Documentation generated with Doxygen. Provided under the GNU Free Documentation License (see License Information).

Get Object-oriented Scientific Computing
Lib at SourceForge.net. Fast, secure and Free Open Source software
downloads.