00001 /* 00002 ------------------------------------------------------------------- 00003 00004 Copyright (C) 2006, 2007, Andrew W. Steiner 00005 00006 This file is part of O2scl. 00007 00008 O2scl is free software; you can redistribute it and/or modify 00009 it under the terms of the GNU General Public License as published by 00010 the Free Software Foundation; either version 3 of the License, or 00011 (at your option) any later version. 00012 00013 O2scl is distributed in the hope that it will be useful, 00014 but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00016 GNU General Public License for more details. 00017 00018 You should have received a copy of the GNU General Public License 00019 along with O2scl. If not, see <http://www.gnu.org/licenses/>. 00020 00021 ------------------------------------------------------------------- 00022 */ 00023 #ifndef O2SCL_VEC_STATS_H 00024 #define O2SCL_VEC_STATS_H 00025 00026 /** \file vec_stats.h 00027 \brief File containing statistics template functions 00028 */ 00029 00030 #include <iostream> 00031 #include <cmath> 00032 #include <string> 00033 #include <fstream> 00034 #include <sstream> 00035 #include <o2scl/err_hnd.h> 00036 #include <gsl/gsl_ieee_utils.h> 00037 #include <gsl/gsl_sort.h> 00038 00039 #ifndef DOXYGENP 00040 namespace o2scl 00041 { 00042 #endif 00043 00044 /// Compute the maximum of the first \c n elements of a vector 00045 template<class vec_t> 00046 double vector_max(const size_t n, vec_t &data) { 00047 00048 if (n==0) { 00049 set_err_ret("Sent size=0 to vector_max().",gsl_efailed); 00050 } 00051 double max=data[0]; 00052 for(size_t i=1;i<n;i++) { 00053 if (data[i]>max) { 00054 max=data[i]; 00055 } 00056 } 00057 return max; 00058 } 00059 00060 /// Compute the minimum of the first \c n elements of a vector 00061 template<class vec_t> 00062 double vector_min(const size_t n, vec_t &data) { 00063 00064 if (n==0) { 00065 set_err_ret("Sent size=0 to vector_min().",gsl_efailed); 00066 } 00067 double min=data[0]; 00068 for(size_t i=1;i<n;i++) { 00069 if (data[i]<min) { 00070 min=data[i]; 00071 } 00072 } 00073 return min; 00074 } 00075 00076 /// Compute the minimum and maximum of the first \c n elements of a vector 00077 template<class vec_t> 00078 int vector_minmax(const size_t n, vec_t &data, double &min, double &max) { 00079 00080 if (n==0) { 00081 set_err_ret("Sent size=0 to vector_min().",gsl_efailed); 00082 } 00083 min=data[0]; 00084 max=min; 00085 for(size_t i=1;i<n;i++) { 00086 if (data[i]<min) { 00087 min=data[i]; 00088 } 00089 if (data[i]>max) { 00090 max=data[i]; 00091 } 00092 } 00093 return 0; 00094 } 00095 00096 /// Compute the maximum of the first \c n elements of a vector 00097 template<class vec_t> 00098 size_t vector_max_index(const size_t n, vec_t &data, double &max) { 00099 00100 if (n==0) { 00101 set_err_ret("Sent size=0 to vector_max().",gsl_efailed); 00102 } 00103 size_t ix=0; 00104 max=data[0]; 00105 for(size_t i=1;i<n;i++) { 00106 if (data[i]>max) { 00107 max=data[i]; 00108 ix=i; 00109 } 00110 } 00111 return ix; 00112 } 00113 00114 /// Compute the minimum of the first \c n elements of a vector 00115 template<class vec_t> 00116 int vector_min_index(const size_t n, vec_t &data, double &min) { 00117 00118 if (n==0) { 00119 set_err_ret("Sent size=0 to vector_min().",gsl_efailed); 00120 } 00121 size_t ix=0; 00122 min=data[0]; 00123 for(size_t i=1;i<n;i++) { 00124 if (data[i]<min) { 00125 min=data[i]; 00126 ix=i; 00127 } 00128 } 00129 return ix; 00130 } 00131 00132 /// Compute the minimum and maximum of the first \c n elements of a vector 00133 template<class vec_t> 00134 int vector_minmax_index(const size_t n, vec_t &data, double &min, 00135 size_t &ix, double &max, size_t &ix2) { 00136 00137 if (n==0) { 00138 set_err_ret("Sent size=0 to vector_min().",gsl_efailed); 00139 } 00140 ix=0; 00141 ix2=0; 00142 min=data[0]; 00143 max=min; 00144 for(size_t i=1;i<n;i++) { 00145 if (data[i]<min) { 00146 min=data[i]; 00147 ix=i; 00148 } 00149 if (data[i]>max) { 00150 max=data[i]; 00151 ix2=i; 00152 } 00153 } 00154 return 0; 00155 } 00156 00157 /** 00158 \brief Compute the sum of the first \c n elements of a vector 00159 00160 If \c n is zero, this will set \c avg to zero and return 00161 \ref gsl_success. 00162 */ 00163 template<class vec_t> 00164 double vector_sum(const size_t n, vec_t &data) { 00165 00166 double sum=0; 00167 for(size_t i=0;i<n;i++) { 00168 sum+=data[i]; 00169 } 00170 return sum; 00171 } 00172 00173 /** 00174 \brief Compute the mean of the first \c n elements of a vector 00175 00176 If \c n is zero, this will set \c avg to zero and return 00177 \ref gsl_success. 00178 */ 00179 template<class vec_t> 00180 double vector_mean(const size_t n, vec_t &data) { 00181 00182 long double mean=0; 00183 for(size_t i=0;i<n;i++) { 00184 mean+=(data[i]-mean)/(i+1); 00185 } 00186 return mean; 00187 } 00188 00189 /// Variance 00190 template<class vec_t> 00191 double vector_variance_fmean(const size_t n, vec_t &data, 00192 double mean) { 00193 long double var=0; 00194 for(size_t i=0;i<n;i++) { 00195 long double delta=(data[i]-mean); 00196 var+=(delta*delta-var)/(i+1); 00197 } 00198 return var; 00199 } 00200 00201 /// Standard deviation 00202 template<class vec_t> 00203 double vector_stddev_fmean(const size_t n, vec_t &data, 00204 double mean) { 00205 double sd=vector_variance_fmean<vec_t>(n,data,mean); 00206 return sqrt(sd); 00207 } 00208 00209 /** \brief Compute the variance of the first \c n elements of a vector 00210 given the mean \c mean. 00211 00212 If \c n is zero, this will set \c avg to zero and return 00213 \ref gsl_success. 00214 */ 00215 template<class vec_t> 00216 double vector_variance(const size_t n, vec_t &data, double mean) { 00217 double var=vector_variance_fmean<vec_t>(n,data,mean); 00218 return var*n/(n-1); 00219 } 00220 00221 /// Variance 00222 template<class vec_t> 00223 double vector_variance(const size_t n, vec_t &data) { 00224 00225 double mean=vector_mean<vec_t>(n,data); 00226 double var=vector_variance_fmean<vec_t>(n,data,mean); 00227 return var*n/(n-1); 00228 } 00229 00230 00231 /// Standard deviation 00232 template<class vec_t> 00233 double vector_stddev(const size_t n, vec_t &data) { 00234 00235 double mean=vector_mean<vec_t>(n,data); 00236 double var=vector_variance_fmean<vec_t>(n,data,mean); 00237 return sqrt(var*n/(n-1)); 00238 } 00239 00240 /// Standard deviation 00241 template<class vec_t> 00242 double vector_stddev(const size_t n, vec_t &data, 00243 double mean) { 00244 double sd=vector_variance_fmean<vec_t>(n,data,mean); 00245 return sqrt(sd*n/(n-1)); 00246 } 00247 00248 /// Absolute deviation from the mean 00249 template<class vec_t> 00250 double vector_absdev(const size_t n, vec_t &data, 00251 double mean) { 00252 long double sum=0; 00253 for(size_t i=0;i<n;i++) { 00254 sum+=fabs(data[i]-mean); 00255 } 00256 return sum/n; 00257 } 00258 00259 /// Absolute deviation from the mean 00260 template<class vec_t> 00261 double vector_absdev(const size_t n, vec_t &data) { 00262 double mean=vector_mean<vec_t>(n,data); 00263 return vector_absdev(n,data,mean); 00264 } 00265 00266 /// Skewness 00267 template<class vec_t> 00268 double vector_skew(const size_t n, vec_t &data, double mean, 00269 double stddev) { 00270 long double skew=0; 00271 for(size_t i=0;i<n;i++) { 00272 long double x=(data[i]-mean)/stddev; 00273 skew+=(x*x*x-skew)/(i+1); 00274 } 00275 return skew; 00276 } 00277 00278 /// Skewness 00279 template<class vec_t> 00280 double vector_skew(const size_t n, vec_t &data) { 00281 double mean=vector_mean<vec_t>(n,data); 00282 double sd=vector_stddev<vec_t>(n,data,mean); 00283 return vector_skew(n,data,mean,sd); 00284 } 00285 00286 /// Kurtosis 00287 template<class vec_t> 00288 double vector_kurtosis(const size_t n, vec_t &data, double mean, 00289 double stddev) { 00290 long double avg=0; 00291 for(size_t i=0;i<n;i++) { 00292 long double x=(data[i]-mean)/stddev; 00293 avg+=(x*x*x*x-avg)/(i+1); 00294 } 00295 return avg-3.0; 00296 } 00297 00298 /// Kurtosis 00299 template<class vec_t> 00300 double vector_kurtosis(const size_t n, vec_t &data) { 00301 double mean=vector_mean<vec_t>(n,data); 00302 double sd=vector_stddev<vec_t>(n,data,mean); 00303 return vector_kurtosis(n,data,mean,sd); 00304 } 00305 00306 /// Lag1 autocorrelation 00307 template<class vec_t> 00308 double vector_lag1_autocorr(const size_t n, vec_t &data, double mean) { 00309 long double q=0; 00310 long double v=(data[0]-mean)*(data[0]-mean); 00311 for(size_t i=1;i<n;i++) { 00312 long double delta0=data[i-1]-mean; 00313 long double delta1=data[i]-mean; 00314 q+=(delta0*delta1-q)/(i+1); 00315 v+=(delta1*delta1-v)/(i+1); 00316 } 00317 return q/v; 00318 } 00319 00320 /// Lag1 autocorrelation 00321 template<class vec_t> 00322 double vector_lag1_autocorr(const size_t n, vec_t &data) { 00323 double mean=vector_mean<vec_t>(n,data); 00324 return vector_lag1_autocorr(n,data,mean); 00325 } 00326 00327 /// Covariance 00328 template<class vec_t> 00329 double vector_covariance(const size_t n, vec_t &data1, vec_t &data2, 00330 double mean1, double mean2) { 00331 double covar=0; 00332 for(size_t i=0;i<n;i++) { 00333 double delta1=(data1[i]-mean1); 00334 double delta2=(data2[i]-mean2); 00335 covar+=(delta1*delta2-covar)/(i+1); 00336 } 00337 return covar; 00338 } 00339 00340 /// Covariance 00341 template<class vec_t> 00342 double vector_covariance(const size_t n, vec_t &data1, vec_t &data2) { 00343 double covar=0; 00344 double mean1=vector_mean<vec_t>(n,data1); 00345 double mean2=vector_mean<vec_t>(n,data2); 00346 for(size_t i=0;i<n;i++) { 00347 long double delta1=(data1[i]-mean1); 00348 long double delta2=(data2[i]-mean2); 00349 covar+=(delta1*delta2-covar)/(i+1); 00350 } 00351 return covar; 00352 } 00353 00354 /// Pearson's correlation 00355 template<class vec_t> 00356 double vector_correlation(const size_t n, vec_t &data1, vec_t &data2) { 00357 size_t i; 00358 00359 double sum_xsq = 0.0; 00360 double sum_ysq = 0.0; 00361 double sum_cross = 0.0; 00362 double ratio; 00363 double delta_x, delta_y; 00364 double mean_x, mean_y; 00365 double r; 00366 00367 /* 00368 * Compute: 00369 * sum_xsq = Sum [ (x_i - mu_x)^2 ], 00370 * sum_ysq = Sum [ (y_i - mu_y)^2 ] and 00371 * sum_cross = Sum [ (x_i - mu_x) * (y_i - mu_y) ] 00372 * using the above relation from Welford's paper 00373 */ 00374 00375 mean_x = data1[0]; 00376 mean_y = data2[0]; 00377 00378 for (i = 1; i < n; ++i) { 00379 ratio = i / (i + 1.0); 00380 delta_x = data1[i] - mean_x; 00381 delta_y = data2[i] - mean_y; 00382 sum_xsq += delta_x * delta_x * ratio; 00383 sum_ysq += delta_y * delta_y * ratio; 00384 sum_cross += delta_x * delta_y * ratio; 00385 mean_x += delta_x / (i + 1.0); 00386 mean_y += delta_y / (i + 1.0); 00387 } 00388 00389 r = sum_cross / (sqrt(sum_xsq) * sqrt(sum_ysq)); 00390 00391 return r; 00392 } 00393 00394 /// Pooled variance 00395 template<class vec_t> 00396 double vector_pvariance(const size_t n1, vec_t &data1, 00397 const size_t n2, vec_t &data2) { 00398 double var1=vector_variance<vec_t>(n1,data1); 00399 double var2=vector_variance<vec_t>(n2,data2); 00400 return (((n1-1)*var1)+((n2-1)*var2))/(n1+n2-2); 00401 } 00402 00403 /// Quantile 00404 template<class vec_t> 00405 double vector_quantile_sorted(const size_t n, vec_t &data, 00406 const double f) { 00407 00408 double index=f*(n-1); 00409 size_t lhs=((size_t)index); 00410 double delta=index-lhs; 00411 if (n==0) return 0; 00412 if (lhs==n-1) return data[lhs]; 00413 return (1-delta)*data[lhs]+delta*data[lhs+1]; 00414 } 00415 00416 /// Quantile 00417 template<class vec_t> 00418 double vector_median_sorted(const size_t n, vec_t &data) { 00419 00420 if (n==0) return 0; 00421 00422 size_t lhs=(n-1)/2; 00423 size_t rhs=n/2; 00424 00425 if (lhs==rhs) return data[lhs]; 00426 00427 return (data[lhs]+data[rhs])/2.0; 00428 } 00429 00430 #ifndef DOXYGENP 00431 } 00432 #endif 00433 00434 #endif
Documentation generated with Doxygen and provided under the GNU Free Documentation License. See License Information for details.
Project hosting provided by
,
O2scl Sourceforge Project Page