Object-oriented Scientific Computing Library: Version 0.910
vec_stats.h
Go to the documentation of this file.
00001 /*
00002   -------------------------------------------------------------------
00003   
00004   Copyright (C) 2006-2012, Andrew W. Steiner
00005   
00006   This file is part of O2scl.
00007   
00008   O2scl is free software; you can redistribute it and/or modify
00009   it under the terms of the GNU General Public License as published by
00010   the Free Software Foundation; either version 3 of the License, or
00011   (at your option) any later version.
00012   
00013   O2scl is distributed in the hope that it will be useful,
00014   but WITHOUT ANY WARRANTY; without even the implied warranty of
00015   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016   GNU General Public License for more details.
00017   
00018   You should have received a copy of the GNU General Public License
00019   along with O2scl. If not, see <http://www.gnu.org/licenses/>.
00020 
00021   -------------------------------------------------------------------
00022 */
00023 #ifndef O2SCL_VEC_STATS_H
00024 #define O2SCL_VEC_STATS_H
00025 
00026 /** \file vec_stats.h
00027     \brief File containing statistics template functions
00028 
00029     This file contains several function templates for computing
00030     statistics of vectors of double-precision data. It includes mean,
00031     median, variance, standard deviation, covariance, correlation, and
00032     other functions.
00033 
00034     No additional range checking is done on the vectors.
00035 
00036     \future Consider generalizing to other data types.
00037 */
00038 
00039 #include <o2scl/err_hnd.h>
00040 #include <gsl/gsl_sort.h>
00041 
00042 #ifndef DOXYGENP
00043 namespace o2scl {
00044 #endif
00045 
00046   /// \name Vector functions
00047   //@{
00048   /** \brief Compute the mean of the first \c n elements of a vector
00049 
00050       This function produces the same results
00051       as <tt>gsl_stats_mean()</tt>.
00052 
00053       If \c n is zero, this will return zero.
00054   */
00055   template<class vec_t> double vector_mean(size_t n, const vec_t &data) {
00056     
00057     long double mean=0.0;
00058     for(size_t i=0;i<n;i++) {
00059       mean+=(data[i]-mean)/(i+1);
00060     }
00061     return mean;
00062   }
00063 
00064   /** \brief Compute variance with specified mean known in advance
00065       
00066       This function computes
00067       \f[
00068       \frac{1}{N} \sum_{i} \left( x_i - \mu \right)^2
00069       \f]
00070       where the value of \f$ \mu \f$ is given in \c mean. 
00071 
00072       This function produces the same results
00073       as <tt>gsl_stats_variance_with_fixed_mean()</tt>.
00074   */
00075   template<class vec_t>
00076     double vector_variance_fmean(size_t n, const vec_t &data, double mean) {
00077     long double var=0.0;
00078     for(size_t i=0;i<n;i++) {
00079       long double delta=(data[i]-mean);
00080       var+=(delta*delta-var)/(i+1);
00081     }
00082     return var;
00083   }
00084 
00085   /** \brief Compute the variance with specified mean
00086 
00087       This function computes
00088       \f[
00089       \frac{1}{N-1} \sum_{i} \left( x_i - \mu \right)^2
00090       \f]
00091       where the value of \f$ \mu \f$ is given in \c mean.
00092       
00093       This function produces the same results
00094       as <tt>gsl_stats_variance_m</tt>.
00095 
00096       If \c n is 0 or 1, this function will call the error
00097       handler.
00098   */
00099   template<class vec_t>
00100     double vector_variance(size_t n, const vec_t &data, double mean) {
00101 
00102     if (n<2) {
00103       O2SCL_ERR2_RET("Cannot compute variance with less than 2 elements",
00104                      " in vector_variance().",gsl_einval);
00105     }
00106 
00107     double var=vector_variance_fmean<vec_t>(n,data,mean);
00108     return var*n/(n-1);
00109   }
00110 
00111   /** \brief Compute the variance 
00112 
00113       This function computes
00114       \f[
00115       \frac{1}{N-1} \sum_{i} \left( x_i - \mu \right)n^2
00116       \f]
00117       where \f$ \mu \f$ is the mean computed with \ref vector_mean().
00118       
00119       This function produces the same results
00120       as <tt>gsl_stats_variance</tt>.
00121 
00122       If \c n is 0 or 1, this function will call the error handler.
00123   */
00124   template<class vec_t> double vector_variance(size_t n, const vec_t &data) {
00125 
00126     if (n<2) {
00127       O2SCL_ERR2_RET("Cannot compute variance with less than 2 elements",
00128                      " in vector_variance().",gsl_einval);
00129     }
00130     
00131     double mean=vector_mean<vec_t>(n,data);
00132     double var=vector_variance_fmean<vec_t>(n,data,mean);
00133     return var*n/(n-1);
00134   }
00135 
00136   /** \brief Standard deviation with specified mean known in advance
00137 
00138       This function computes
00139       \f[
00140       \sqrt{\frac{1}{N} \sum_{i} \left( x_i - \mu \right)^2}
00141       \f]
00142       where the value of \f$ \mu \f$ is given in \c mean. 
00143 
00144       This function produces the same results
00145       as <tt>gsl_stats_sd_with_fixed_mean()</tt>.
00146 
00147       If \c n is zero, this function will return zero without calling
00148       the error handler.
00149   */
00150   template<class vec_t>
00151     double vector_stddev_fmean(size_t n, const vec_t &data, 
00152                                double mean) {
00153     double sd=vector_variance_fmean<vec_t>(n,data,mean);
00154     return std::sqrt(sd);
00155   }
00156 
00157   /** \brief Standard deviation with specified mean
00158 
00159       This function computes
00160       \f[
00161       \sqrt{\frac{1}{N-1} \sum_{i} \left( x_i - \mu \right)^2}
00162       \f]
00163       where \f$ \mu \f$ is the mean computed with \ref vector_mean().
00164 
00165       This function produces the same results
00166       as <tt>gsl_stats_sd()</tt>.
00167 
00168       If \c n is 0 or 1, this function will call the error handler.
00169   */
00170   template<class vec_t> double vector_stddev(size_t n, const vec_t &data) {
00171     
00172     if (n<2) {
00173       O2SCL_ERR2_RET("Cannot compute std. dev. with less than 2 elements",
00174                      " in vector_stddev().",gsl_einval);
00175     }
00176     
00177     double mean=vector_mean<vec_t>(n,data);
00178     double var=vector_variance_fmean<vec_t>(n,data,mean);
00179     return std::sqrt(var*n/(n-1));
00180   }
00181 
00182   /** \brief Standard deviation with specified mean
00183 
00184       This function computes
00185       \f[
00186       \sqrt{\frac{1}{N-1} \sum_{i} \left( x_i - \mu \right)^2}
00187       \f]
00188       where the value of \f$ \mu \f$ is given in \c mean. 
00189 
00190       This function produces the same results
00191       as <tt>gsl_stats_sd_m()</tt>.
00192 
00193       If \c n is 0 or 1, this function will call the error
00194       handler.
00195   */
00196   template<class vec_t> double vector_stddev(size_t n, const vec_t &data, 
00197                                              double mean) {
00198 
00199     if (n<2) {
00200       O2SCL_ERR2_RET("Cannot compute std. dev. with less than 2 elements",
00201                      " in vector_stddev().",gsl_einval);
00202     }
00203     
00204     double sd=vector_variance_fmean<vec_t>(n,data,mean);
00205     return std::sqrt(sd*n/(n-1));
00206   }
00207   
00208   /** \brief Absolute deviation from the specified mean
00209 
00210       This function computes
00211       \f[
00212       \sum_i | x_i - \mu |
00213       \f]
00214       where the value of \f$ \mu \f$ is given in \c mean. 
00215 
00216       This function produces the same results
00217       as <tt>gsl_stats_absdev_m()</tt>.
00218 
00219       If \c n is zero, this function will return zero
00220       without calling the error handler.
00221   */
00222   template<class vec_t> double vector_absdev(size_t n, const vec_t &data, 
00223                                              double mean) {
00224     
00225     if (n==0) return 0.0;
00226 
00227     long double sum=0.0;
00228     for(size_t i=0;i<n;i++) {
00229       sum+=fabs(data[i]-mean);
00230     }
00231     return sum/n;
00232   }
00233   
00234   /** \brief Absolute deviation from the computed mean
00235 
00236       This function computes
00237       \f[
00238       \sum_i | x_i - \mu |
00239       \f]
00240       where the value of \f$ \mu \f$ is mean as computed
00241       from \ref vector_mean().
00242 
00243       This function produces the same results
00244       as <tt>gsl_stats_absdev()</tt>.
00245 
00246       If \c n is zero, this function will return zero
00247       without calling the error handler.
00248   */
00249   template<class vec_t>
00250     double vector_absdev(size_t n, const vec_t &data) {
00251     double mean=vector_mean<vec_t>(n,data);
00252     return vector_absdev(n,data,mean);
00253   }
00254 
00255   /** \brief Skewness with specified mean and standard deviation
00256 
00257       This function computes 
00258       \f[
00259       \frac{1}{N} \sum_i \left[ 
00260       \frac{ \left(x_i - \mu \right)}{ \sigma }\right]^3
00261       \f]
00262       where the values of \f$ \mu \f$ and \f$ \sigma \f$ 
00263       are given in \c mean and \c stddev.
00264 
00265       This function produces the same results
00266       as <tt>gsl_stats_skew_m_sd()</tt>.
00267 
00268       If \c n is zero, this function will return zero
00269       without calling the error handler.
00270   */
00271   template<class vec_t> double vector_skew(size_t n, const vec_t &data, 
00272                                            double mean, double stddev) {
00273     long double skew=0.0;
00274     for(size_t i=0;i<n;i++) {
00275       long double x=(data[i]-mean)/stddev;
00276       skew+=(x*x*x-skew)/(i+1);
00277     }
00278     return skew;
00279   }
00280 
00281   /** \brief Skewness with computed mean and standard deviation
00282 
00283       This function computes 
00284       \f[
00285       \frac{1}{N} \sum_i \left[ 
00286       \frac{ \left(x_i - \mu \right)}{ \sigma }\right]^3
00287       \f]
00288       where the values of \f$ \mu \f$ and \f$ \sigma \f$ 
00289       are computed using \ref vector_mean() and \ref vector_stddev().
00290 
00291       This function produces the same results
00292       as <tt>gsl_stats_skew()</tt>.
00293 
00294       If \c n is zero, this function will return zero
00295       without calling the error handler.
00296   */
00297   template<class vec_t> double vector_skew(size_t n, const vec_t &data) {
00298     double mean=vector_mean<vec_t>(n,data);
00299     double sd=vector_stddev<vec_t>(n,data,mean);
00300     return vector_skew(n,data,mean,sd);
00301   }
00302 
00303   /** \brief Kurtosis with specified mean and standard deviation
00304 
00305       This function computes 
00306       \f[
00307       -3 + \frac{1}{N} \sum_i \left[ 
00308       \frac{ \left(x_i - \mu \right)}{ \sigma }\right]^4
00309       \f]
00310       where the values of \f$ \mu \f$ and \f$ \sigma \f$ 
00311       are given in \c mean and \c stddev.
00312 
00313       This function produces the same results
00314       as <tt>gsl_stats_kurtosis_m_sd()</tt>.
00315 
00316       If \c n is zero, this function will return zero
00317       without calling the error handler.
00318   */
00319   template<class vec_t>
00320     double vector_kurtosis(size_t n, const vec_t &data, double mean,
00321                            double stddev) {
00322     long double avg=0.0;
00323     for(size_t i=0;i<n;i++) {
00324       long double x=(data[i]-mean)/stddev;
00325       avg+=(x*x*x*x-avg)/(i+1);
00326     }
00327     return avg-3.0;
00328   }
00329 
00330   /** \brief Kurtosis with computed mean and standard deviation
00331 
00332       This function computes 
00333       \f[
00334       -3 + \frac{1}{N} \sum_i \left[ 
00335       \frac{ \left(x_i - \mu \right)}{ \sigma }\right]^4
00336       \f]
00337       where the values of \f$ \mu \f$ and \f$ \sigma \f$ 
00338       are computed using \ref vector_mean() and \ref vector_stddev().
00339 
00340       This function produces the same results
00341       as <tt>gsl_stats_kurtosis()</tt>.
00342 
00343       If \c n is zero, this function will return zero
00344       without calling the error handler.
00345   */
00346   template<class vec_t> double vector_kurtosis(size_t n, const vec_t &data) {
00347     double mean=vector_mean<vec_t>(n,data);
00348     double sd=vector_stddev<vec_t>(n,data,mean);
00349     return vector_kurtosis(n,data,mean,sd);
00350   }
00351 
00352   /** \brief Lag-1 autocorrelation
00353 
00354       This function computes
00355       \f[
00356       \left[
00357       \sum_i \left(x_i - \mu\right) \left(x_{i-1} - \mu \right)
00358       \right] \left[ 
00359       \sum_i \left(x_i - \mu\right)^2 
00360       \right]^{-1}
00361       \f]
00362 
00363       This function produces the same results
00364       as <tt>gsl_stats_lag1_autocorrelation_m()</tt>.
00365 
00366       If \c n is zero, this function will call the error handler.
00367   */
00368   template<class vec_t>
00369     double vector_lag1_autocorr(size_t n, const vec_t &data, double mean) {
00370     
00371     if (n<2) {
00372       O2SCL_ERR2_RET("Cannot compute lag1 with less than 2 elements",
00373                      " in vector_lag1_autocorr().",gsl_einval);
00374     }
00375 
00376     long double q=0.0;
00377     long double v=(data[0]-mean)*(data[0]-mean);
00378     for(size_t i=1;i<n;i++) {
00379       long double delta0=data[i-1]-mean;
00380       long double delta1=data[i]-mean;
00381       q+=(delta0*delta1-q)/(i+1);
00382       v+=(delta1*delta1-v)/(i+1);
00383     }
00384     return q/v;
00385   }
00386 
00387   /** \brief Lag-1 autocorrelation
00388 
00389       This function computes
00390       \f[
00391       \left[
00392       \sum_i \left(x_i - \mu\right) \left(x_{i-1} - \mu \right)
00393       \right] \left[ 
00394       \sum_i \left(x_i - \mu\right)^2 
00395       \right]^{-1}
00396       \f]
00397       
00398       This function produces the same results
00399       as <tt>gsl_stats_lag1_autocorrelation()</tt>.
00400 
00401       If \c n is zero, this function will call the error handler.
00402   */
00403   template<class vec_t> double vector_lag1_autocorr
00404     (size_t n, const vec_t &data) {
00405     double mean=vector_mean<vec_t>(n,data);
00406     return vector_lag1_autocorr(n,data,mean);
00407   }
00408 
00409   /** \brief Compute the covariance of two vectors
00410 
00411       This function computes
00412       \f[
00413       \frac{1}{n-1} \sum_i \left(x_i - {\bar{x}}\right)
00414       \left(y_i - {\bar{y}}\right)
00415       \f]
00416       where \f$ {\bar{x}} \f$ and \f$ {\bar{y}} \f$ are specified
00417       in \c mean1 and \c mean2, respectively.
00418 
00419       This function produces the same results
00420       as <tt>gsl_stats_covariance_m()</tt>.
00421 
00422       If \c n is zero, this function will return zero
00423       without calling the error handler.
00424   */
00425   template<class vec_t>
00426     double vector_covariance(size_t n, const vec_t &data1, const vec_t &data2,
00427                              double mean1, double mean2) {
00428     double covar=0.0;
00429     for(size_t i=0;i<n;i++) {
00430       double delta1=(data1[i]-mean1);
00431       double delta2=(data2[i]-mean2);
00432       covar+=(delta1*delta2-covar)/(i+1);
00433     }
00434     return covar*n/(n-1);
00435   }
00436 
00437   /** \brief Compute the covariance of two vectors
00438 
00439       This function computes
00440       \f[
00441       \frac{1}{n-1} \sum_i \left(x_i - {\bar{x}}\right)
00442       \left(y_i - {\bar{y}}\right)
00443       \f]
00444       where \f$ {\bar{x}} \f$ and \f$ {\bar{y}} \f$ are 
00445       the averages of \c data1 and \c data2 and are computed
00446       automatically using \ref vector_mean().
00447 
00448       This function produces the same
00449       results as <tt>gsl_stats_covariance()</tt>.
00450 
00451       If \c n is zero, this function will return zero
00452       without calling the error handler.
00453   */
00454   template<class vec_t>
00455     double vector_covariance(size_t n, const vec_t &data1, 
00456                              const vec_t &data2) {
00457     double covar=0.0;
00458     double mean1=vector_mean<vec_t>(n,data1);
00459     double mean2=vector_mean<vec_t>(n,data2);
00460     for(size_t i=0;i<n;i++) {
00461       long double delta1=(data1[i]-mean1);
00462       long double delta2=(data2[i]-mean2);
00463       covar+=(delta1*delta2-covar)/(i+1);
00464     }
00465     return covar*n/(n-1);
00466   }
00467   
00468   /** \brief Pearson's correlation
00469 
00470       This function computes the Pearson correlation coefficient 
00471       between \c data1 and \c data2 .
00472       
00473       This function produces the same
00474       results as <tt>gsl_stats_correlation()</tt>.
00475 
00476       \comment
00477       r = cov(x, y) / (\Hat\sigma_x \Hat\sigma_y)
00478       = {1/(n-1) \sum (x_i - \Hat x) (y_i - \Hat y)
00479       \over
00480       \sqrt{1/(n-1) \sum (x_i - \Hat x)^2} \sqrt{1/(n-1) 
00481       \sum (y_i - \Hat y)^2}
00482       }
00483       \endcomment
00484 
00485       If \c n is zero, this function will call the error handler.
00486   */
00487   template<class vec_t>
00488     double vector_correlation(size_t n, const vec_t &data1, 
00489                               const vec_t &data2) {
00490     size_t i;
00491 
00492     if (n<1) {
00493       O2SCL_ERR2_RET("Cannot compute correlation with no elements",
00494                      " in vector_correlation().",gsl_einval);
00495     }
00496 
00497     double sum_xsq = 0.0;
00498     double sum_ysq = 0.0;
00499     double sum_cross = 0.0;
00500     double ratio;
00501     double delta_x, delta_y;
00502     double mean_x, mean_y;
00503     double r;
00504 
00505     /*
00506      * Compute:
00507      * sum_xsq = Sum [ (x_i - mu_x)^2 ],
00508      * sum_ysq = Sum [ (y_i - mu_y)^2 ] and
00509      * sum_cross = Sum [ (x_i - mu_x) * (y_i - mu_y) ]
00510      * using the above relation from Welford's paper
00511      */
00512 
00513     mean_x = data1[0];
00514     mean_y = data2[0];
00515 
00516     for (i = 1; i < n; ++i) {
00517       ratio = i / (i + 1.0);
00518       delta_x = data1[i] - mean_x;
00519       delta_y = data2[i] - mean_y;
00520       sum_xsq += delta_x * delta_x * ratio;
00521       sum_ysq += delta_y * delta_y * ratio;
00522       sum_cross += delta_x * delta_y * ratio;
00523       mean_x += delta_x / (i + 1.0);
00524       mean_y += delta_y / (i + 1.0);
00525     }
00526     
00527     r = sum_cross / (std::sqrt(sum_xsq) * std::sqrt(sum_ysq));
00528     
00529     return r;
00530   }
00531 
00532   /** \brief Pooled variance
00533 
00534       \todo Document this
00535 
00536       This function produces the same
00537       results as <tt>gsl_stats_pvariance()</tt>.
00538       
00539       If \c n is zero, this function will return zero without calling
00540       the error handler.
00541   */
00542   template<class vec_t, class vec2_t>
00543     double vector_pvariance(size_t n1, const vec_t &data1, 
00544                             size_t n2, const vec2_t &data2) {
00545     double var1=vector_variance<vec_t>(n1,data1);
00546     double var2=vector_variance<vec2_t>(n2,data2);
00547     return (((n1-1)*var1)+((n2-1)*var2))/(n1+n2-2);
00548   }
00549 
00550   /** \brief Quantile from sorted data (ascending only)
00551 
00552       This function returns the quantile \c f of data which
00553       has already been sorted in ascending order. The quantile,
00554       \f$ q \f$ , is
00555       found by interpolation using 
00556       \f[
00557       q = \left(1-\delta\right) x_i \delta x_{i+1}
00558       \f]
00559       where \f$ i = \mathrm{floor}[ (n-1)f ] \f$ and 
00560       \f$ \delta = (n-1)f -i \f$ .
00561 
00562       This function produces the same
00563       results as <tt>gsl_stats_quantile_from_sorted_data()</tt>.
00564 
00565       No checks are made to ensure the data is sorted, or to ensure
00566       that \f$ 0 \leq 0 \leq 1 \f$. If \c n is zero, this function
00567       will return zero without calling the error handler.
00568   */
00569   template<class vec_t>
00570     double vector_quantile_sorted(size_t n, const vec_t &data, 
00571                                   const double f) {
00572 
00573     double index=f*(n-1);
00574     size_t lhs=((size_t)index);
00575     double delta=index-lhs;
00576     if (n==0) return 0.0;
00577     if (lhs==n-1) return data[lhs];
00578     return (1-delta)*data[lhs]+delta*data[lhs+1];
00579   }
00580   
00581   /** \brief Return the median of sorted (ascending or descending) data
00582 
00583       This function returns the median of sorted data (either
00584       ascending or descending), assuming the data has already been
00585       sorted. When the data set has an odd number of elements, the
00586       median is the value of the element at index \f$ (n-1)/2 \f$,
00587       otherwise, the median is taken to be the average of the elements
00588       at indices \f$ (n-1)/2 \f$ and \f$ n/2 \f$ .
00589 
00590       This function produces the same
00591       results as <tt>gsl_stats_median_from_sorted_data()</tt>.
00592 
00593       No checks are made to ensure the data is sorted. If \c n is
00594       zero, this function will return zero without calling the error
00595       handler.
00596   */
00597   template<class vec_t>
00598     double vector_median_sorted(size_t n, const vec_t &data) {
00599     
00600     if (n==0) return 0.0;
00601     
00602     size_t lhs=(n-1)/2;
00603     size_t rhs=n/2;
00604     
00605     if (lhs==rhs) return data[lhs];
00606 
00607     return (data[lhs]+data[rhs])/2.0;
00608   }
00609 
00610   /** \brief Compute the chi-squared statistic
00611 
00612       This function computes
00613       \f[
00614       \sum_i \left( \frac{\mathrm{obs}_i - \mathrm{exp}_i}
00615       {\mathrm{err}_i}\right)^2
00616       \f]
00617       where \f$ \mathrm{obs} \f$ are the observed values,
00618       \f$ \mathrm{exp} \f$ are the expected values, and 
00619       \f$ \mathrm{err} \f$ are the errors.
00620    */
00621   template<class vec_t, class vec2_t, class vec3_t>
00622     double vector_chi_squared(size_t n, const vec_t &obs, const vec2_t &exp,
00623                               const vec3_t &err) {
00624     double chi2=0.0;
00625     for(size_t i=0;i<n;i++) {
00626       chi2+=pow((obs-exp)/err,2.0);
00627     }
00628     return chi2;
00629   }
00630   //@}
00631 
00632   /// \name Weighted vector functions
00633   //@{
00634   /** \brief Compute the mean of weighted data
00635 
00636       This function computes 
00637       \f[
00638       \left( \sum_i w_i x_i \right) \left( \sum_i w_i \right)^{-1}
00639       \f]
00640 
00641       This function produces the same results
00642       as <tt>gsl_stats_wmean()</tt>.
00643 
00644       \comment
00645       M(n) = M(n-1) + (data[n] - M(n-1)) (w(n)/(W(n-1) + w(n)))
00646       W(n) = W(n-1) + w(n)
00647       \endcomment
00648   */
00649   template<class vec_t>
00650     double wvector_mean(size_t n, const vec_t &data, const vec_t &weights) {
00651 
00652     long double wmean=0.0;
00653     long double W=0.0;
00654     for(size_t i=0;i<n;i++) {
00655       double wi=weights[i];
00656       if (wi>0.0) {
00657         W+=wi;
00658         wmean+=(data[i]-wmean)*(wi/W);
00659       }
00660     }
00661     
00662     return wmean;
00663   }
00664 
00665   /** \brief Compute a normalization factor for weighted data
00666 
00667       This function is used internally in \ref wvector_variance(size_t
00668       n, vec_t &data, const vec2_t &weights, double wmean) and \ref
00669       wvector_stddev(size_t n, vec_t &data, const vec2_t &weights, double
00670       wmean) .
00671   */
00672   template<class vec_t> double wvector_factor(size_t n, const vec_t &weights) {
00673     
00674     long double a=0.0;
00675     long double b=0.0;
00676     long double factor;
00677     for(size_t i=0;i<n;i++) {
00678       double wi=weights[i];
00679       if (wi>0.0) {
00680         a+=wi;
00681         b+=wi*wi;
00682       }
00683     }
00684     factor=a*a/(a*a-b);
00685     return factor;
00686   }
00687   
00688   /** \brief Compute the variance of a weighted vector with a mean
00689       known in advance
00690 
00691       This function computes
00692       \f[
00693       \left[ \sum_i w_i \left(x_i-\mu\right)^2 \right] 
00694       \left[ \sum_i w_i \right]^{-1}
00695       \f]
00696 
00697       This function produces the same results
00698       as <tt>gsl_stats_wvariance_with_fixed_mean()</tt>.
00699 
00700   */
00701   template<class vec_t, class vec2_t>
00702     double wvector_variance_fmean(size_t n, const vec_t &data,
00703                                   const vec2_t &weights, double wmean) {
00704     long double wvariance=0.0;
00705     long double W=0.0;
00706     for(size_t i=0;i<n;i++) {
00707       double wi=weights[i];
00708       if (wi>0.0) {
00709         const long double delta=data[i]-wmean;
00710         W+=wi;
00711         wvariance+=(delta*delta-wvariance)*(wi/W);
00712       }
00713     }
00714 
00715     return wvariance;
00716   }
00717 
00718   /** \brief Compute the variance of a weighted vector with
00719       specified mean
00720 
00721       This function produces the same results
00722       as <tt>gsl_stats_wvariance_m()</tt>.
00723   */
00724   template<class vec_t, class vec2_t>
00725     double wvector_variance(size_t n, const vec_t &data,
00726                             const vec2_t &weights, double wmean) {
00727 
00728     const double variance=wvector_variance_fmean
00729       (n,data,weights,wmean);
00730     const double scale=wvector_factor(n,weights);
00731     const double wvar=scale*variance;
00732     return wvar;
00733   }
00734 
00735   /** \brief Compute the variance of a weighted vector where mean
00736       is computed automatically
00737 
00738       This function produces the same results
00739       as <tt>gsl_stats_wvariance()</tt>.
00740    */
00741   template<class vec_t, class vec2_t>
00742     double wvector_variance(size_t n, const vec_t &data,
00743                             const vec2_t &weights) {
00744 
00745     double wmean=wvector_mean(n,data,weights);
00746     return wvector_variance<vec_t,vec2_t>(n,data,weights,wmean);
00747   }
00748 
00749   /** \brief Compute the standard deviation of a weighted vector 
00750       with a mean known in advance
00751 
00752       This function produces the same results
00753       as <tt>gsl_stats_wsd_with_fixed_mean()</tt>.
00754   */
00755   template<class vec_t, class vec2_t>
00756     double wvector_stddev_fmean(size_t n, const vec_t &data,
00757                                 const vec2_t &weights, double wmean) {
00758     return sqrt(wvector_variance_fmean(n,data,weights,wmean));
00759   }
00760 
00761   /** \brief Compute the standard deviation of a weighted vector where mean
00762       is computed automatically
00763 
00764       This function produces the same results
00765       as <tt>gsl_stats_wsd()</tt>.
00766    */
00767   template<class vec_t, class vec2_t>
00768     double wvector_stddev(size_t n, const vec_t &data,
00769                           const vec2_t &weights) {
00770     double wmean=wvector_mean(n,data,weights);
00771     return sqrt(wvector_variance(n,data,weights,wmean));
00772   }
00773 
00774   /** \brief Compute the standard deviation of a weighted vector with
00775       specified mean
00776 
00777       This function produces the same results
00778       as <tt>gsl_stats_wsd_m()</tt>.
00779   */
00780   template<class vec_t, class vec2_t>
00781     double wvector_stddev(size_t n, const vec_t &data,
00782                           const vec2_t &weights, double wmean) {
00783     const double variance=wvector_variance_fmean
00784       (n,data,weights,wmean);
00785     const double scale=wvector_factor(n,weights);
00786     const double wvar=scale*variance;
00787     return sqrt(wvar);
00788   }
00789 
00790   /** \brief Compute the weighted sum of squares of data about the 
00791       specified weighted mean
00792 
00793       This function produces the same results
00794       as <tt>gsl_stats_wtss_m()</tt>.
00795    */
00796   template<class vec_t, class vec2_t>
00797     double wvector_sumsq(size_t n, const vec_t &data,
00798                          const vec2_t &weights, double wmean) {
00799     long double wtss=0.0;
00800     for(size_t i=0;i<n;i++) {
00801       double wi=weights[i];
00802       if (wi>0.0) {
00803         const long double delta=data[i]-wmean;
00804         wtss+=wi*delta*delta;
00805       }
00806     }
00807     
00808     return wtss;
00809   }
00810 
00811   /** \brief Compute the weighted sum of squares of data about the 
00812       weighted mean
00813 
00814       This function produces the same results
00815       as <tt>gsl_stats_wtss()</tt>.
00816    */
00817   template<class vec_t, class vec2_t>
00818     double wvector_sumsq(size_t n, const vec_t &data,
00819                          const vec2_t &weights) {
00820     
00821     double wmean=wvector_mean(n,data,weights);
00822     return wvector_sumsq(n,data,weights,wmean);
00823   }
00824 
00825   /** \brief Compute the absolute deviation of data about a specified mean
00826 
00827       This function produces the same results
00828       as <tt>gsl_stats_wabsdev_m()</tt>.
00829    */
00830   template<class vec_t, class vec2_t> 
00831     double wvector_absdev(size_t n, const vec_t &data, const vec2_t &weights, 
00832                           double wmean) {
00833     long double wabsdev=0.0;
00834     long double W=0.0;
00835     for(size_t i=0;i<n;i++) {
00836       double wi=weights[i];
00837       if (wi>0.0) {
00838         const long double delta=fabs(data[i]-wmean);
00839         W+=wi;
00840         wabsdev+=(delta-wabsdev)*(wi/W);
00841       }
00842     }
00843     return wabsdev;
00844   }
00845 
00846   /** \brief Compute the absolute deviation of data about a specified mean
00847       
00848       This function produces the same results
00849       as <tt>gsl_stats_wabsdev()</tt>.
00850   */
00851   template<class vec_t, class vec2_t>
00852     double wvector_absdev(size_t n, const vec_t &data,
00853                           const vec2_t &weights) {
00854     
00855     double wmean=wvector_mean(n,data,weights);
00856     return wvector_absdev(n,data,weights,wmean);
00857   }
00858 
00859   /** \brief Compute the skewness of data with specified mean
00860       and standard deviation
00861 
00862       This function produces the same results
00863       as <tt>gsl_stats_wskew_m_sd()</tt>.
00864   */
00865   template<class vec_t, class vec2_t>
00866     double wvector_skew(size_t n, const vec_t &data, const vec2_t &weights,
00867                         double wmean, double wsd) {
00868     long double wskew=0.0;
00869     long double W=0.0;
00870     for(size_t i=0;i<n;i++) {
00871       double wi=weights[i];
00872       if (wi>0.0) {
00873         const long double x=(data[i]-wmean)/wsd;
00874         W+=wi;
00875         wskew+=(x*x*x-wskew)*(wi/W);
00876       }
00877     }
00878     return wskew;
00879   }
00880   
00881   /** \brief Compute the skewness of data with specified mean
00882       and standard deviation
00883       
00884       This function produces the same results
00885       as <tt>gsl_stats_wskew()</tt>.
00886   */
00887   template<class vec_t, class vec2_t>
00888     double wvector_skew(size_t n, const vec_t &data, const vec2_t &weights) {
00889     double wmean=wvector_mean(n,data,weights);
00890     double wsd=wvector_stddev(n,data,weights,wmean);
00891     return wvector_skew(n,data,weights,wmean,wsd);
00892   }
00893 
00894   /** \brief Compute the kurtosis of data with specified mean
00895       and standard deviation
00896 
00897       This function produces the same results
00898       as <tt>gsl_stats_wkurtosis_m_sd()</tt>.
00899   */
00900   template<class vec_t, class vec2_t>
00901     double wvector_kurtosis(size_t n, const vec_t &data, const vec2_t &weights,
00902                             double wmean, double wsd) {
00903     long double wavg=0.0;
00904     long double W=0.0;
00905     for(size_t i=0;i<n;i++) {
00906       double wi=weights[i];
00907       if (wi>0.0) {
00908         const long double x=(data[i]-wmean)/wsd;
00909         W+=wi;
00910         wavg+=(x*x*x*x-wavg)*(wi/W);
00911       }
00912     }
00913     return wavg-3.0;
00914   }
00915 
00916   /** \brief Compute the kurtosis of data with specified mean
00917       and standard deviation
00918       
00919       This function produces the same results
00920       as <tt>gsl_stats_wkurtosis()</tt>.
00921   */
00922   template<class vec_t, class vec2_t>
00923     double wvector_kurtosis(size_t n, const vec_t &data, 
00924                             const vec2_t &weights) {
00925     double wmean=wvector_mean(n,data,weights);
00926     double wsd=wvector_stddev(n,data,weights,wmean);
00927     return wvector_kurtosis(n,data,weights,wmean,wsd);
00928   }
00929   //@}
00930 
00931 #ifndef DOXYGENP
00932 }
00933 #endif
00934 
00935 #endif
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

Documentation generated with Doxygen. Provided under the GNU Free Documentation License (see License Information).

Get Object-oriented Scientific Computing
Lib at SourceForge.net. Fast, secure and Free Open Source software
downloads.