00001 /* 00002 ------------------------------------------------------------------- 00003 00004 Copyright (C) 2006, 2007, 2008, 2009, Andrew W. Steiner 00005 00006 This file is part of O2scl. 00007 00008 O2scl is free software; you can redistribute it and/or modify 00009 it under the terms of the GNU General Public License as published by 00010 the Free Software Foundation; either version 3 of the License, or 00011 (at your option) any later version. 00012 00013 O2scl is distributed in the hope that it will be useful, 00014 but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00016 GNU General Public License for more details. 00017 00018 You should have received a copy of the GNU General Public License 00019 along with O2scl. If not, see <http://www.gnu.org/licenses/>. 00020 00021 ------------------------------------------------------------------- 00022 */ 00023 #ifndef O2SCL_VEC_STATS_H 00024 #define O2SCL_VEC_STATS_H 00025 00026 /** \file vec_stats.h 00027 \brief File containing statistics template functions 00028 */ 00029 00030 #include <iostream> 00031 #include <cmath> 00032 #include <string> 00033 #include <fstream> 00034 #include <sstream> 00035 #include <o2scl/err_hnd.h> 00036 #include <gsl/gsl_ieee_utils.h> 00037 #include <gsl/gsl_sort.h> 00038 00039 #ifndef DOXYGENP 00040 namespace o2scl 00041 { 00042 #endif 00043 00044 /** 00045 \brief Compute the mean of the first \c n elements of a vector 00046 00047 If \c n is zero, this will set \c avg to zero and return 00048 \ref gsl_success. 00049 */ 00050 template<class vec_t> 00051 double vector_mean(const size_t n, vec_t &data) { 00052 00053 long double mean=0; 00054 for(size_t i=0;i<n;i++) { 00055 mean+=(data[i]-mean)/(i+1); 00056 } 00057 return mean; 00058 } 00059 00060 /// Variance 00061 template<class vec_t> 00062 double vector_variance_fmean(const size_t n, vec_t &data, 00063 double mean) { 00064 long double var=0; 00065 for(size_t i=0;i<n;i++) { 00066 long double delta=(data[i]-mean); 00067 var+=(delta*delta-var)/(i+1); 00068 } 00069 return var; 00070 } 00071 00072 /// Standard deviation 00073 template<class vec_t> 00074 double vector_stddev_fmean(const size_t n, vec_t &data, 00075 double mean) { 00076 double sd=vector_variance_fmean<vec_t>(n,data,mean); 00077 return std::sqrt(sd); 00078 } 00079 00080 /** \brief Compute the variance of the first \c n elements of a vector 00081 given the mean \c mean. 00082 00083 If \c n is zero, this will set \c avg to zero and return 00084 \ref gsl_success. 00085 */ 00086 template<class vec_t> 00087 double vector_variance(const size_t n, vec_t &data, double mean) { 00088 double var=vector_variance_fmean<vec_t>(n,data,mean); 00089 return var*n/(n-1); 00090 } 00091 00092 /// Variance 00093 template<class vec_t> 00094 double vector_variance(const size_t n, vec_t &data) { 00095 00096 double mean=vector_mean<vec_t>(n,data); 00097 double var=vector_variance_fmean<vec_t>(n,data,mean); 00098 return var*n/(n-1); 00099 } 00100 00101 00102 /// Standard deviation 00103 template<class vec_t> 00104 double vector_stddev(const size_t n, vec_t &data) { 00105 00106 double mean=vector_mean<vec_t>(n,data); 00107 double var=vector_variance_fmean<vec_t>(n,data,mean); 00108 return std::sqrt(var*n/(n-1)); 00109 } 00110 00111 /// Standard deviation 00112 template<class vec_t> 00113 double vector_stddev(const size_t n, vec_t &data, 00114 double mean) { 00115 double sd=vector_variance_fmean<vec_t>(n,data,mean); 00116 return std::sqrt(sd*n/(n-1)); 00117 } 00118 00119 /// Absolute deviation from the mean 00120 template<class vec_t> 00121 double vector_absdev(const size_t n, vec_t &data, 00122 double mean) { 00123 long double sum=0; 00124 for(size_t i=0;i<n;i++) { 00125 sum+=fabs(data[i]-mean); 00126 } 00127 return sum/n; 00128 } 00129 00130 /// Absolute deviation from the mean 00131 template<class vec_t> 00132 double vector_absdev(const size_t n, vec_t &data) { 00133 double mean=vector_mean<vec_t>(n,data); 00134 return vector_absdev(n,data,mean); 00135 } 00136 00137 /// Skewness 00138 template<class vec_t> 00139 double vector_skew(const size_t n, vec_t &data, double mean, 00140 double stddev) { 00141 long double skew=0; 00142 for(size_t i=0;i<n;i++) { 00143 long double x=(data[i]-mean)/stddev; 00144 skew+=(x*x*x-skew)/(i+1); 00145 } 00146 return skew; 00147 } 00148 00149 /// Skewness 00150 template<class vec_t> 00151 double vector_skew(const size_t n, vec_t &data) { 00152 double mean=vector_mean<vec_t>(n,data); 00153 double sd=vector_stddev<vec_t>(n,data,mean); 00154 return vector_skew(n,data,mean,sd); 00155 } 00156 00157 /// Kurtosis 00158 template<class vec_t> 00159 double vector_kurtosis(const size_t n, vec_t &data, double mean, 00160 double stddev) { 00161 long double avg=0; 00162 for(size_t i=0;i<n;i++) { 00163 long double x=(data[i]-mean)/stddev; 00164 avg+=(x*x*x*x-avg)/(i+1); 00165 } 00166 return avg-3.0; 00167 } 00168 00169 /// Kurtosis 00170 template<class vec_t> 00171 double vector_kurtosis(const size_t n, vec_t &data) { 00172 double mean=vector_mean<vec_t>(n,data); 00173 double sd=vector_stddev<vec_t>(n,data,mean); 00174 return vector_kurtosis(n,data,mean,sd); 00175 } 00176 00177 /// Lag1 autocorrelation 00178 template<class vec_t> 00179 double vector_lag1_autocorr(const size_t n, vec_t &data, double mean) { 00180 long double q=0; 00181 long double v=(data[0]-mean)*(data[0]-mean); 00182 for(size_t i=1;i<n;i++) { 00183 long double delta0=data[i-1]-mean; 00184 long double delta1=data[i]-mean; 00185 q+=(delta0*delta1-q)/(i+1); 00186 v+=(delta1*delta1-v)/(i+1); 00187 } 00188 return q/v; 00189 } 00190 00191 /// Lag1 autocorrelation 00192 template<class vec_t> 00193 double vector_lag1_autocorr(const size_t n, vec_t &data) { 00194 double mean=vector_mean<vec_t>(n,data); 00195 return vector_lag1_autocorr(n,data,mean); 00196 } 00197 00198 /// Covariance 00199 template<class vec_t> 00200 double vector_covariance(const size_t n, vec_t &data1, vec_t &data2, 00201 double mean1, double mean2) { 00202 double covar=0; 00203 for(size_t i=0;i<n;i++) { 00204 double delta1=(data1[i]-mean1); 00205 double delta2=(data2[i]-mean2); 00206 covar+=(delta1*delta2-covar)/(i+1); 00207 } 00208 return covar; 00209 } 00210 00211 /// Covariance 00212 template<class vec_t> 00213 double vector_covariance(const size_t n, vec_t &data1, vec_t &data2) { 00214 double covar=0; 00215 double mean1=vector_mean<vec_t>(n,data1); 00216 double mean2=vector_mean<vec_t>(n,data2); 00217 for(size_t i=0;i<n;i++) { 00218 long double delta1=(data1[i]-mean1); 00219 long double delta2=(data2[i]-mean2); 00220 covar+=(delta1*delta2-covar)/(i+1); 00221 } 00222 return covar; 00223 } 00224 00225 /// Pearson's correlation 00226 template<class vec_t> 00227 double vector_correlation(const size_t n, vec_t &data1, vec_t &data2) { 00228 size_t i; 00229 00230 double sum_xsq = 0.0; 00231 double sum_ysq = 0.0; 00232 double sum_cross = 0.0; 00233 double ratio; 00234 double delta_x, delta_y; 00235 double mean_x, mean_y; 00236 double r; 00237 00238 /* 00239 * Compute: 00240 * sum_xsq = Sum [ (x_i - mu_x)^2 ], 00241 * sum_ysq = Sum [ (y_i - mu_y)^2 ] and 00242 * sum_cross = Sum [ (x_i - mu_x) * (y_i - mu_y) ] 00243 * using the above relation from Welford's paper 00244 */ 00245 00246 mean_x = data1[0]; 00247 mean_y = data2[0]; 00248 00249 for (i = 1; i < n; ++i) { 00250 ratio = i / (i + 1.0); 00251 delta_x = data1[i] - mean_x; 00252 delta_y = data2[i] - mean_y; 00253 sum_xsq += delta_x * delta_x * ratio; 00254 sum_ysq += delta_y * delta_y * ratio; 00255 sum_cross += delta_x * delta_y * ratio; 00256 mean_x += delta_x / (i + 1.0); 00257 mean_y += delta_y / (i + 1.0); 00258 } 00259 00260 r = sum_cross / (std::sqrt(sum_xsq) * std::sqrt(sum_ysq)); 00261 00262 return r; 00263 } 00264 00265 /// Pooled variance 00266 template<class vec_t> 00267 double vector_pvariance(const size_t n1, vec_t &data1, 00268 const size_t n2, vec_t &data2) { 00269 double var1=vector_variance<vec_t>(n1,data1); 00270 double var2=vector_variance<vec_t>(n2,data2); 00271 return (((n1-1)*var1)+((n2-1)*var2))/(n1+n2-2); 00272 } 00273 00274 /// Quantile 00275 template<class vec_t> 00276 double vector_quantile_sorted(const size_t n, vec_t &data, 00277 const double f) { 00278 00279 double index=f*(n-1); 00280 size_t lhs=((size_t)index); 00281 double delta=index-lhs; 00282 if (n==0) return 0; 00283 if (lhs==n-1) return data[lhs]; 00284 return (1-delta)*data[lhs]+delta*data[lhs+1]; 00285 } 00286 00287 /// Quantile 00288 template<class vec_t> 00289 double vector_median_sorted(const size_t n, vec_t &data) { 00290 00291 if (n==0) return 0; 00292 00293 size_t lhs=(n-1)/2; 00294 size_t rhs=n/2; 00295 00296 if (lhs==rhs) return data[lhs]; 00297 00298 return (data[lhs]+data[rhs])/2.0; 00299 } 00300 00301 #ifndef DOXYGENP 00302 } 00303 #endif 00304 00305 #endif
Documentation generated with Doxygen and provided under the GNU Free Documentation License. See License Information for details.
Project hosting provided by
,
O2scl Sourceforge Project Page