23 #ifndef O2SCL_VEC_STATS_H 24 #define O2SCL_VEC_STATS_H 39 #include <o2scl/err_hnd.h> 40 #include <o2scl/vector.h> 42 #ifndef DOXYGEN_NO_O2NS 55 template<
class vec_t>
double vector_mean(
size_t n,
const vec_t &data) {
57 for(
size_t i=0;i<n;i++) {
58 mean+=(data[i]-mean)/(i+1);
89 for(
size_t i=0;i<n;i++) {
90 long double delta=(data[i]-mean);
91 var+=(delta*delta-var)/(i+1);
108 template<
class vec_t>
127 template<
class vec_t>
131 O2SCL_ERR2(
"Cannot compute variance with less than 2 elements",
135 double var=vector_variance_fmean<vec_t>(n,data,mean);
153 template<
class vec_t>
174 O2SCL_ERR2(
"Cannot compute variance with less than 2 elements",
178 double mean=vector_mean<vec_t>(n,data);
179 double var=vector_variance_fmean<vec_t>(n,data,mean);
214 template<
class vec_t>
217 double sd=vector_variance_fmean<vec_t>(n,data,mean);
218 return std::sqrt(sd);
235 template<
class vec_t>
256 O2SCL_ERR2(
"Cannot compute std. dev. with less than 2 elements",
260 double mean=vector_mean<vec_t>(n,data);
261 double var=vector_variance_fmean<vec_t>(n,data,mean);
262 return std::sqrt(var*n/(n-1));
300 O2SCL_ERR2(
"Cannot compute std. dev. with less than 2 elements",
304 double sd=vector_variance_fmean<vec_t>(n,data,mean);
305 return std::sqrt(sd*n/(n-1));
322 template<
class vec_t>
double vector_stddev(
const vec_t &data,
double mean) {
343 if (n==0)
return 0.0;
346 for(
size_t i=0;i<n;i++) {
347 sum+=fabs(data[i]-mean);
386 template<
class vec_t>
388 double mean=vector_mean<vec_t>(n,data);
407 template<
class vec_t>
428 template<
class vec_t>
double vector_skew(
size_t n,
const vec_t &data,
429 double mean,
double stddev) {
430 long double skew=0.0;
431 for(
size_t i=0;i<n;i++) {
432 long double x=(data[i]-mean)/stddev;
433 skew+=(x*x*x-skew)/(i+1);
455 double mean,
double stddev) {
475 template<
class vec_t>
double vector_skew(
size_t n,
const vec_t &data) {
476 double mean=vector_mean<vec_t>(n,data);
477 double sd=vector_stddev<vec_t>(n,data,mean);
517 template<
class vec_t>
521 for(
size_t i=0;i<n;i++) {
522 long double x=(data[i]-mean)/stddev;
523 avg+=(x*x*x*x-avg)/(i+1);
544 template<
class vec_t>
567 double mean=vector_mean<vec_t>(n,data);
568 double sd=vector_stddev<vec_t>(n,data,mean);
608 template<
class vec_t>
612 O2SCL_ERR2(
"Cannot compute lag1 with less than 2 elements",
617 long double v=(data[0]-mean)*(data[0]-mean);
618 for(
size_t i=1;i<n;i++) {
619 long double delta0=data[i-1]-mean;
620 long double delta1=data[i]-mean;
621 q+=(delta0*delta1-q)/(i+1);
622 v+=(delta1*delta1-v)/(i+1);
644 template<
class vec_t>
666 (
size_t n,
const vec_t &data) {
667 double mean=vector_mean<vec_t>(n,data);
704 template<
class vec_t>
713 long double q=0.0, v=0.0;
714 for(
size_t i=0;i<k;i++) {
716 v+=(data[i]-mean)*(data[i]-mean)/(i+1);
718 for(
size_t i=k;i<n;i++) {
719 long double delta0=data[i-k]-mean;
720 long double delta1=data[i]-mean;
721 q+=(delta0*delta1-q)/(i+1);
722 v+=(delta1*delta1-v)/(i+1);
740 template<
class vec_t>
760 (
size_t n,
const vec_t &data,
size_t k) {
761 double mean=vector_mean<vec_t>(n,data);
779 (
const vec_t &data,
size_t k) {
799 template<
class vec_t,
class vec2_t>
801 double mean1,
double mean2) {
803 for(
size_t i=0;i<n;i++) {
804 double delta1=(data1[i]-mean1);
805 double delta2=(data2[i]-mean2);
806 covar+=(delta1*delta2-covar)/(i+1);
808 return covar*n/(n-1);
827 template<
class vec_t,
class vec2_t>
829 double mean1,
double mean2) {
850 template<
class vec_t,
class vec2_t>
852 const vec2_t &data2) {
854 double mean1=vector_mean<vec_t>(n,data1);
855 double mean2=vector_mean<vec_t>(n,data2);
856 for(
size_t i=0;i<n;i++) {
857 long double delta1=(data1[i]-mean1);
858 long double delta2=(data2[i]-mean2);
859 covar+=(delta1*delta2-covar)/(i+1);
861 return covar*n/(n-1);
881 template<
class vec_t,
class vec2_t>
883 const vec2_t &data2) {
906 template<
class vec_t,
class vec2_t>
908 const vec2_t &data2) {
912 O2SCL_ERR2(
"Cannot compute correlation with no elements",
918 double sum_cross=0.0;
920 double delta_x, delta_y;
921 double mean_x, mean_y;
935 for (i=1; i < n; ++i) {
937 delta_x=data1[i] - mean_x;
938 delta_y=data2[i] - mean_y;
939 sum_xsq += delta_x * delta_x * ratio;
940 sum_ysq += delta_y * delta_y * ratio;
941 sum_cross += delta_x * delta_y * ratio;
942 mean_x += delta_x / (i + 1.0);
943 mean_y += delta_y / (i + 1.0);
946 r=sum_cross / (std::sqrt(sum_xsq) * std::sqrt(sum_ysq));
970 template<
class vec_t,
class vec2_t>
972 const vec2_t &data2) {
993 template<
class vec_t,
class vec2_t>
995 size_t n2,
const vec2_t &data2) {
996 double var1=vector_variance<vec_t>(n1,data1);
997 double var2=vector_variance<vec2_t>(n2,data2);
998 return (((n1-1)*var1)+((n2-1)*var2))/(n1+n2-2);
1018 template<
class vec_t,
class vec2_t>
1020 const vec2_t &data2) {
1043 template<
class vec_t>
1047 double index=f*(n-1);
1048 size_t lhs=((size_t)index);
1049 double delta=index-lhs;
1050 if (n==0)
return 0.0;
1051 if (lhs==n-1)
return data[lhs];
1052 return (1-delta)*data[lhs]+delta*data[lhs+1];
1074 template<
class vec_t>
1076 return vector_quantile_sorted<vec_t>(data.size(),data,f);
1095 template<
class vec_t>
1098 if (n==0)
return 0.0;
1103 if (lhs==rhs)
return data[lhs];
1105 return (data[lhs]+data[rhs])/2.0;
1124 template<
class vec_t>
1126 return vector_median_sorted<vec_t>(data.size(),data);
1140 template<
class vec_t,
class vec2_t,
class vec3_t>
1142 const vec3_t &err) {
1144 for(
size_t i=0;i<n;i++) {
1145 chi2+=pow((obs[i]-exp[i])/err[i],2.0);
1161 template<
class vec_t,
class vec2_t,
class vec3_t>
1163 const vec3_t &err) {
1164 return vector_chi_squared<vec_t,vec2_t,vec3_t>(obs.size(),obs,exp,err);
1185 template<
class vec_t,
class vec2_t>
1188 long double wmean=0.0;
1190 for(
size_t i=0;i<n;i++) {
1191 double wi=weights[i];
1194 wmean+=(data[i]-wmean)*(wi/W);
1216 template<
class vec_t,
class vec2_t>
1218 return wvector_mean<vec_t,vec2_t>(data.size(),data,weights);
1233 for(
size_t i=0;i<n;i++) {
1234 double wi=weights[i];
1252 return wvector_factor<vec_t>(weights.size(),weights);
1268 template<
class vec_t,
class vec2_t>
1270 const vec2_t &weights,
double wmean) {
1271 long double wvariance=0.0;
1273 for(
size_t i=0;i<n;i++) {
1274 double wi=weights[i];
1276 const long double delta=data[i]-wmean;
1278 wvariance+=(delta*delta-wvariance)*(wi/W);
1298 template<
class vec_t,
class vec2_t>
1300 const vec2_t &weights,
double wmean) {
1310 template<
class vec_t,
class vec2_t>
1312 const vec2_t &weights,
double wmean) {
1315 (n,data,weights,wmean);
1317 const double wvar=scale*variance;
1327 template<
class vec_t,
class vec2_t>
1329 const vec2_t &weights,
double wmean) {
1330 return wvector_variance<vec_t,vec2_t>(data.size(),data,weights,wmean);
1339 template<
class vec_t,
class vec2_t>
1341 const vec2_t &weights) {
1344 return wvector_variance<vec_t,vec2_t>(n,data,weights,wmean);
1353 template<
class vec_t,
class vec2_t>
1362 template<
class vec_t,
class vec2_t,
class vec3_t>
1364 const vec3_t &weights) {
1369 for(
size_t i=0;i<n;i++) {
1370 double wi=weights[i];
1373 double delta1=(data1[i]-mean1);
1374 double delta2=(data2[i]-mean2);
1375 covar+=(wi/W)*(delta1*delta2-covar);
1386 template<
class vec_t,
class vec2_t,
class vec3_t>
1388 const vec3_t &weights) {
1389 return wvector_covariance<vec_t,vec2_t,vec3_t>
1390 (data1.size(),data1,data2,weights);
1399 template<
class vec_t,
class vec2_t>
1401 const vec2_t &weights,
double wmean) {
1411 template<
class vec_t,
class vec2_t>
1413 const vec2_t &weights,
double wmean) {
1414 return wvector_stddev_fmean<vec_t,vec2_t>
1415 (data.size(),data,weights,wmean);
1424 template<
class vec_t,
class vec2_t>
1426 const vec2_t &weights) {
1437 template<
class vec_t,
class vec2_t>
1448 template<
class vec_t,
class vec2_t>
1450 const vec2_t &weights,
double wmean) {
1452 (n,data,weights,wmean);
1454 double wvar=scale*variance;
1464 template<
class vec_t,
class vec2_t>
1466 const vec2_t &weights,
double wmean) {
1467 return wvector_stddev<vec_t,vec2_t>(data.size(),data,weights,wmean);
1476 template<
class vec_t,
class vec2_t>
1478 const vec2_t &weights,
double wmean) {
1479 long double wtss=0.0;
1480 for(
size_t i=0;i<n;i++) {
1481 double wi=weights[i];
1483 const long double delta=data[i]-wmean;
1484 wtss+=wi*delta*delta;
1497 template<
class vec_t,
class vec2_t>
1499 const vec2_t &weights,
double wmean) {
1500 return wvector_sumsq<vec_t,vec2_t>(data.size(),data,weights,wmean);
1509 template<
class vec_t,
class vec2_t>
1511 const vec2_t &weights) {
1523 template<
class vec_t,
class vec2_t>
1525 return wvector_sumsq<vec_t,vec2_t>(data.size(),data,weights);
1533 template<
class vec_t,
class vec2_t>
1536 long double wabsdev=0.0;
1538 for(
size_t i=0;i<n;i++) {
1539 double wi=weights[i];
1541 const long double delta=fabs(data[i]-wmean);
1543 wabsdev+=(delta-wabsdev)*(wi/W);
1554 template<
class vec_t,
class vec2_t>
1557 return wvector_absdev<vec_t,vec2_t>(data.size(),data,weights,wmean);
1565 template<
class vec_t,
class vec2_t>
1567 const vec2_t &weights) {
1578 template<
class vec_t,
class vec2_t>
1580 return wvector_absdev<vec_t,vec2_t>(data.size(),data,weights);
1589 template<
class vec_t,
class vec2_t>
1591 double wmean,
double wsd) {
1592 long double wskew=0.0;
1594 for(
size_t i=0;i<n;i++) {
1595 double wi=weights[i];
1597 const long double x=(data[i]-wmean)/wsd;
1599 wskew+=(x*x*x-wskew)*(wi/W);
1611 template<
class vec_t,
class vec2_t>
1613 double wmean,
double wsd) {
1614 return wvector_skew<vec_t,vec2_t>(data.size(),data,weights,wmean,wsd);
1623 template<
class vec_t,
class vec2_t>
1636 template<
class vec_t,
class vec2_t>
1638 return wvector_skew<vec_t,vec2_t>(data.size(),data,weights);
1647 template<
class vec_t,
class vec2_t>
1649 double wmean,
double wsd) {
1650 long double wavg=0.0;
1652 for(
size_t i=0;i<n;i++) {
1653 double wi=weights[i];
1655 const long double x=(data[i]-wmean)/wsd;
1657 wavg+=(x*x*x*x-wavg)*(wi/W);
1669 template<
class vec_t,
class vec2_t>
1671 double wmean,
double wsd) {
1672 return wvector_kurtosis<vec_t,vec2_t>
1673 (data.size(),data,weights,wmean,wsd);
1682 template<
class vec_t,
class vec2_t>
1684 const vec2_t &weights) {
1696 template<
class vec_t,
class vec2_t>
1698 return wvector_kurtosis<vec_t,vec2_t>(data,weights);
1702 #ifndef DOXYGEN_NO_O2NS double vector_lagk_autocorr(size_t n, const vec_t &data, size_t k, double mean)
Lag-k autocorrelation.
double vector_mean(size_t n, const vec_t &data)
Compute the mean of the first n elements of a vector.
double wvector_mean(size_t n, const vec_t &data, const vec2_t &weights)
Compute the mean of weighted data.
The main O<span style='position: relative; top: 0.3em; font-size: 0.8em'>2</span>scl O$_2$scl names...
double wvector_absdev(size_t n, const vec_t &data, const vec2_t &weights, double wmean)
Compute the absolute deviation of data about a specified mean.
double wvector_variance_fmean(size_t n, const vec_t &data, const vec2_t &weights, double wmean)
Compute the variance of a weighted vector with a mean known in advance.
double vector_correlation(size_t n, const vec_t &data1, const vec2_t &data2)
Pearson's correlation.
double vector_variance_fmean(size_t n, const vec_t &data, double mean)
Compute variance with specified mean known in advance.
invalid argument supplied by user
double vector_skew(size_t n, const vec_t &data, double mean, double stddev)
Skewness with specified mean and standard deviation.
double vector_kurtosis(size_t n, const vec_t &data, double mean, double stddev)
Kurtosis with specified mean and standard deviation.
double vector_absdev(size_t n, const vec_t &data, double mean)
Absolute deviation from the specified mean.
double wvector_kurtosis(size_t n, const vec_t &data, const vec2_t &weights, double wmean, double wsd)
Compute the kurtosis of data with specified mean and standard deviation.
double vector_variance(size_t n, const vec_t &data, double mean)
Compute the variance with specified mean.
double vector_pvariance(size_t n1, const vec_t &data1, size_t n2, const vec2_t &data2)
The pooled variance of two vectors.
double vector_median_sorted(size_t n, const vec_t &data)
Return the median of sorted (ascending or descending) data.
double vector_stddev(size_t n, const vec_t &data)
Standard deviation with specified mean.
double wvector_stddev_fmean(size_t n, const vec_t &data, const vec2_t &weights, double wmean)
Compute the standard deviation of a weighted vector with a mean known in advance. ...
double wvector_covariance(size_t n, const vec_t &data1, const vec2_t &data2, const vec3_t &weights)
The weighted covariance of two vectors.
double wvector_stddev(size_t n, const vec_t &data, const vec2_t &weights)
Compute the standard deviation of a weighted vector where mean is computed automatically.
double wvector_variance(size_t n, const vec_t &data, const vec2_t &weights, double wmean)
Compute the variance of a weighted vector with specified mean.
#define O2SCL_ERR2(d, d2, n)
Set an error, two-string version.
double wvector_sumsq(size_t n, const vec_t &data, const vec2_t &weights, double wmean)
Compute the weighted sum of squares of data about the specified weighted mean.
double vector_covariance(size_t n, const vec_t &data1, const vec2_t &data2, double mean1, double mean2)
Compute the covariance of two vectors.
double vector_quantile_sorted(size_t n, const vec_t &data, const double f)
Quantile from sorted data (ascending only)
double vector_lag1_autocorr(size_t n, const vec_t &data, double mean)
Lag-1 autocorrelation.
double vector_chi_squared(size_t n, const vec_t &obs, const vec2_t &exp, const vec3_t &err)
Compute the chi-squared statistic.
double wvector_factor(size_t n, const vec_t &weights)
Compute a normalization factor for weighted data.
double wvector_skew(size_t n, const vec_t &data, const vec2_t &weights, double wmean, double wsd)
Compute the skewness of data with specified mean and standard deviation.
double vector_stddev_fmean(size_t n, const vec_t &data, double mean)
Standard deviation with specified mean known in advance.