vec_stats.h

Go to the documentation of this file.
00001 /*
00002   -------------------------------------------------------------------
00003   
00004   Copyright (C) 2006, 2007, 2008, Andrew W. Steiner
00005   
00006   This file is part of O2scl.
00007   
00008   O2scl is free software; you can redistribute it and/or modify
00009   it under the terms of the GNU General Public License as published by
00010   the Free Software Foundation; either version 3 of the License, or
00011   (at your option) any later version.
00012   
00013   O2scl is distributed in the hope that it will be useful,
00014   but WITHOUT ANY WARRANTY; without even the implied warranty of
00015   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016   GNU General Public License for more details.
00017   
00018   You should have received a copy of the GNU General Public License
00019   along with O2scl. If not, see <http://www.gnu.org/licenses/>.
00020 
00021   -------------------------------------------------------------------
00022 */
00023 #ifndef O2SCL_VEC_STATS_H
00024 #define O2SCL_VEC_STATS_H
00025 
00026 /** \file vec_stats.h
00027     \brief File containing statistics template functions
00028 */
00029 
00030 #include <iostream>
00031 #include <cmath>
00032 #include <string>
00033 #include <fstream>
00034 #include <sstream>
00035 #include <o2scl/err_hnd.h>
00036 #include <gsl/gsl_ieee_utils.h>
00037 #include <gsl/gsl_sort.h>
00038 
00039 #ifndef DOXYGENP
00040 namespace o2scl
00041 {
00042 #endif
00043 
00044   /// Compute the maximum of the first \c n elements of a vector
00045   template<class vec_t>
00046     double vector_max(const size_t n, vec_t &data) {
00047     
00048     if (n==0) {
00049       set_err_ret("Sent size=0 to vector_max().",gsl_efailed);
00050     }
00051     double max=data[0];
00052     for(size_t i=1;i<n;i++) {
00053       if (data[i]>max) {
00054         max=data[i];
00055       }
00056     }
00057     return max;
00058   }
00059 
00060   /// Compute the minimum of the first \c n elements of a vector
00061   template<class vec_t>
00062     double vector_min(const size_t n, vec_t &data) {
00063     
00064     if (n==0) {
00065       set_err_ret("Sent size=0 to vector_min().",gsl_efailed);
00066     }
00067     double min=data[0];
00068     for(size_t i=1;i<n;i++) {
00069       if (data[i]<min) {
00070         min=data[i];
00071       }
00072     }
00073     return min;
00074   }
00075 
00076   /// Compute the minimum and maximum of the first \c n elements of a vector
00077   template<class vec_t>
00078     int vector_minmax(const size_t n, vec_t &data, double &min, double &max) {
00079     
00080     if (n==0) {
00081       set_err_ret("Sent size=0 to vector_min().",gsl_efailed);
00082     }
00083     min=data[0];
00084     max=min;
00085     for(size_t i=1;i<n;i++) {
00086       if (data[i]<min) {
00087         min=data[i];
00088       }
00089       if (data[i]>max) {
00090         max=data[i];
00091       }
00092     }
00093     return 0;
00094   }
00095 
00096   /// Compute the maximum of the first \c n elements of a vector
00097   template<class vec_t>
00098     size_t vector_max_index(const size_t n, vec_t &data, double &max) {
00099     
00100     if (n==0) {
00101       set_err_ret("Sent size=0 to vector_max().",gsl_efailed);
00102     }
00103     size_t ix=0;
00104     max=data[0];
00105     for(size_t i=1;i<n;i++) {
00106       if (data[i]>max) {
00107         max=data[i];
00108         ix=i;
00109       }
00110     }
00111     return ix;
00112   }
00113 
00114   /// Compute the minimum of the first \c n elements of a vector
00115   template<class vec_t>
00116     int vector_min_index(const size_t n, vec_t &data, double &min) {
00117     
00118     if (n==0) {
00119       set_err_ret("Sent size=0 to vector_min().",gsl_efailed);
00120     }
00121     size_t ix=0;
00122     min=data[0];
00123     for(size_t i=1;i<n;i++) {
00124       if (data[i]<min) {
00125         min=data[i];
00126         ix=i;
00127       }
00128     }
00129     return ix;
00130   }
00131 
00132   /// Compute the minimum and maximum of the first \c n elements of a vector
00133   template<class vec_t>
00134     int vector_minmax_index(const size_t n, vec_t &data, double &min, 
00135                             size_t &ix, double &max, size_t &ix2) {
00136     
00137     if (n==0) {
00138       set_err_ret("Sent size=0 to vector_min().",gsl_efailed);
00139     }
00140     ix=0;
00141     ix2=0;
00142     min=data[0];
00143     max=min;
00144     for(size_t i=1;i<n;i++) {
00145       if (data[i]<min) {
00146         min=data[i];
00147         ix=i;
00148       }
00149       if (data[i]>max) {
00150         max=data[i];
00151         ix2=i;
00152       }
00153     }
00154     return 0;
00155   }
00156 
00157   /** 
00158       \brief Compute the sum of the first \c n elements of a vector
00159 
00160       If \c n is zero, this will set \c avg to zero and return 
00161       \ref gsl_success.
00162   */
00163   template<class vec_t>
00164     double vector_sum(const size_t n, vec_t &data) {
00165     
00166     double sum=0;
00167     for(size_t i=0;i<n;i++) {
00168       sum+=data[i];
00169     }
00170     return sum;
00171   }
00172 
00173   /** 
00174       \brief Compute the mean of the first \c n elements of a vector
00175 
00176       If \c n is zero, this will set \c avg to zero and return 
00177       \ref gsl_success.
00178   */
00179   template<class vec_t>
00180     double vector_mean(const size_t n, vec_t &data) {
00181     
00182     long double mean=0;
00183     for(size_t i=0;i<n;i++) {
00184       mean+=(data[i]-mean)/(i+1);
00185     }
00186     return mean;
00187   }
00188 
00189   /// Variance
00190   template<class vec_t>
00191     double vector_variance_fmean(const size_t n, vec_t &data, 
00192                                  double mean) {
00193     long double var=0;
00194     for(size_t i=0;i<n;i++) {
00195       long double delta=(data[i]-mean);
00196       var+=(delta*delta-var)/(i+1);
00197     }
00198     return var;
00199   }
00200 
00201   /// Standard deviation
00202   template<class vec_t>
00203     double vector_stddev_fmean(const size_t n, vec_t &data, 
00204                                double mean) {
00205     double sd=vector_variance_fmean<vec_t>(n,data,mean);
00206     return sqrt(sd);
00207   }
00208 
00209   /** \brief Compute the variance of the first \c n elements of a vector
00210       given the mean \c mean.
00211 
00212       If \c n is zero, this will set \c avg to zero and return 
00213       \ref gsl_success.
00214   */
00215   template<class vec_t>
00216     double vector_variance(const size_t n, vec_t &data, double mean) {
00217     double var=vector_variance_fmean<vec_t>(n,data,mean);
00218     return var*n/(n-1);
00219   }
00220 
00221   /// Variance
00222   template<class vec_t>
00223     double vector_variance(const size_t n, vec_t &data) {
00224     
00225     double mean=vector_mean<vec_t>(n,data);
00226     double var=vector_variance_fmean<vec_t>(n,data,mean);
00227     return var*n/(n-1);
00228   }
00229 
00230 
00231   /// Standard deviation
00232   template<class vec_t>
00233     double vector_stddev(const size_t n, vec_t &data) {
00234     
00235     double mean=vector_mean<vec_t>(n,data);
00236     double var=vector_variance_fmean<vec_t>(n,data,mean);
00237     return sqrt(var*n/(n-1));
00238   }
00239 
00240   /// Standard deviation
00241   template<class vec_t>
00242     double vector_stddev(const size_t n, vec_t &data, 
00243                          double mean) {
00244     double sd=vector_variance_fmean<vec_t>(n,data,mean);
00245     return sqrt(sd*n/(n-1));
00246   }
00247   
00248   /// Absolute deviation from the mean
00249   template<class vec_t>
00250     double vector_absdev(const size_t n, vec_t &data, 
00251                          double mean) {
00252     long double sum=0;
00253     for(size_t i=0;i<n;i++) {
00254       sum+=fabs(data[i]-mean);
00255     }
00256     return sum/n;
00257   }
00258   
00259   /// Absolute deviation from the mean
00260   template<class vec_t>
00261     double vector_absdev(const size_t n, vec_t &data) {
00262     double mean=vector_mean<vec_t>(n,data);
00263     return vector_absdev(n,data,mean);
00264   }
00265 
00266   /// Skewness
00267   template<class vec_t>
00268     double vector_skew(const size_t n, vec_t &data, double mean,
00269                        double stddev) {
00270     long double skew=0;
00271     for(size_t i=0;i<n;i++) {
00272       long double x=(data[i]-mean)/stddev;
00273       skew+=(x*x*x-skew)/(i+1);
00274     }
00275     return skew;
00276   }
00277 
00278   /// Skewness
00279   template<class vec_t>
00280     double vector_skew(const size_t n, vec_t &data) {
00281     double mean=vector_mean<vec_t>(n,data);
00282     double sd=vector_stddev<vec_t>(n,data,mean);
00283     return vector_skew(n,data,mean,sd);
00284   }
00285 
00286   /// Kurtosis
00287   template<class vec_t>
00288     double vector_kurtosis(const size_t n, vec_t &data, double mean,
00289                            double stddev) {
00290     long double avg=0;
00291     for(size_t i=0;i<n;i++) {
00292       long double x=(data[i]-mean)/stddev;
00293       avg+=(x*x*x*x-avg)/(i+1);
00294     }
00295     return avg-3.0;
00296   }
00297 
00298   /// Kurtosis
00299   template<class vec_t>
00300     double vector_kurtosis(const size_t n, vec_t &data) {
00301     double mean=vector_mean<vec_t>(n,data);
00302     double sd=vector_stddev<vec_t>(n,data,mean);
00303     return vector_kurtosis(n,data,mean,sd);
00304   }
00305 
00306   /// Lag1 autocorrelation
00307   template<class vec_t>
00308     double vector_lag1_autocorr(const size_t n, vec_t &data, double mean) {
00309     long double q=0;
00310     long double v=(data[0]-mean)*(data[0]-mean);
00311     for(size_t i=1;i<n;i++) {
00312       long double delta0=data[i-1]-mean;
00313       long double delta1=data[i]-mean;
00314       q+=(delta0*delta1-q)/(i+1);
00315       v+=(delta1*delta1-v)/(i+1);
00316     }
00317     return q/v;
00318   }
00319 
00320   /// Lag1 autocorrelation
00321   template<class vec_t>
00322     double vector_lag1_autocorr(const size_t n, vec_t &data) {
00323     double mean=vector_mean<vec_t>(n,data);
00324     return vector_lag1_autocorr(n,data,mean);
00325   }
00326 
00327   /// Covariance
00328   template<class vec_t>
00329     double vector_covariance(const size_t n, vec_t &data1, vec_t &data2,
00330                              double mean1, double mean2) {
00331     double covar=0;
00332     for(size_t i=0;i<n;i++) {
00333       double delta1=(data1[i]-mean1);
00334       double delta2=(data2[i]-mean2);
00335       covar+=(delta1*delta2-covar)/(i+1);
00336     }
00337     return covar;
00338   }
00339 
00340   /// Covariance
00341   template<class vec_t>
00342     double vector_covariance(const size_t n, vec_t &data1, vec_t &data2) {
00343     double covar=0;
00344     double mean1=vector_mean<vec_t>(n,data1);
00345     double mean2=vector_mean<vec_t>(n,data2);
00346     for(size_t i=0;i<n;i++) {
00347       long double delta1=(data1[i]-mean1);
00348       long double delta2=(data2[i]-mean2);
00349       covar+=(delta1*delta2-covar)/(i+1);
00350     }
00351     return covar;
00352   }
00353   
00354   /// Pearson's correlation
00355   template<class vec_t>
00356     double vector_correlation(const size_t n, vec_t &data1, vec_t &data2) {
00357     size_t i;
00358 
00359     double sum_xsq = 0.0;
00360     double sum_ysq = 0.0;
00361     double sum_cross = 0.0;
00362     double ratio;
00363     double delta_x, delta_y;
00364     double mean_x, mean_y;
00365     double r;
00366 
00367     /*
00368      * Compute:
00369      * sum_xsq = Sum [ (x_i - mu_x)^2 ],
00370      * sum_ysq = Sum [ (y_i - mu_y)^2 ] and
00371      * sum_cross = Sum [ (x_i - mu_x) * (y_i - mu_y) ]
00372      * using the above relation from Welford's paper
00373      */
00374 
00375     mean_x = data1[0];
00376     mean_y = data2[0];
00377 
00378     for (i = 1; i < n; ++i) {
00379       ratio = i / (i + 1.0);
00380       delta_x = data1[i] - mean_x;
00381       delta_y = data2[i] - mean_y;
00382       sum_xsq += delta_x * delta_x * ratio;
00383       sum_ysq += delta_y * delta_y * ratio;
00384       sum_cross += delta_x * delta_y * ratio;
00385       mean_x += delta_x / (i + 1.0);
00386       mean_y += delta_y / (i + 1.0);
00387     }
00388     
00389     r = sum_cross / (sqrt(sum_xsq) * sqrt(sum_ysq));
00390     
00391     return r;
00392   }
00393 
00394   /// Pooled variance
00395   template<class vec_t>
00396     double vector_pvariance(const size_t n1, vec_t &data1, 
00397                             const size_t n2, vec_t &data2) {
00398     double var1=vector_variance<vec_t>(n1,data1);
00399     double var2=vector_variance<vec_t>(n2,data2);
00400     return (((n1-1)*var1)+((n2-1)*var2))/(n1+n2-2);
00401   }
00402 
00403   /// Quantile
00404   template<class vec_t>
00405     double vector_quantile_sorted(const size_t n, vec_t &data, 
00406                                   const double f) {
00407 
00408     double index=f*(n-1);
00409     size_t lhs=((size_t)index);
00410     double delta=index-lhs;
00411     if (n==0) return 0;
00412     if (lhs==n-1) return data[lhs];
00413     return (1-delta)*data[lhs]+delta*data[lhs+1];
00414   }
00415 
00416   /// Quantile
00417   template<class vec_t>
00418     double vector_median_sorted(const size_t n, vec_t &data) {
00419 
00420     if (n==0) return 0;
00421 
00422     size_t lhs=(n-1)/2;
00423     size_t rhs=n/2;
00424 
00425     if (lhs==rhs) return data[lhs];
00426 
00427     return (data[lhs]+data[rhs])/2.0;
00428   }
00429 
00430 #ifndef DOXYGENP
00431 }
00432 #endif
00433 
00434 #endif

Documentation generated with Doxygen and provided under the GNU Free Documentation License. See License Information for details.

Project hosting provided by SourceForge.net Logo, O2scl Sourceforge Project Page