| 1 | ;; Mean, standard deviation, and variance |
|---|
| 2 | ;; Liam Healy, Sat Dec 2 2006 - 22:15 |
|---|
| 3 | ;; Time-stamp: <2008-03-09 19:21:47EDT mean-variance.lisp> |
|---|
| 4 | ;; $Id$ |
|---|
| 5 | |
|---|
| 6 | (in-package :gsl) |
|---|
| 7 | |
|---|
| 8 | ;;; To do: stride other than 1 when that information is availble from |
|---|
| 9 | ;;; the vector. |
|---|
| 10 | |
|---|
| 11 | (defmacro defmfun-stats (&rest args) |
|---|
| 12 | "A defmfun for stats of double, single, and fixnum." |
|---|
| 13 | (defmfun-all 'vector '(double-float single-float fixnum) args "stats")) |
|---|
| 14 | |
|---|
| 15 | (defmacro defmfun-stats-ds (&rest args) |
|---|
| 16 | "A defmfun for stats of double and single." |
|---|
| 17 | (defmfun-all 'vector '(double-float single-float) args "stats")) |
|---|
| 18 | |
|---|
| 19 | ;;;;**************************************************************************** |
|---|
| 20 | ;;;; Mean and weighted mean |
|---|
| 21 | ;;;;**************************************************************************** |
|---|
| 22 | |
|---|
| 23 | (defgeneric mean (vector) |
|---|
| 24 | (:documentation ; FDL |
|---|
| 25 | "The arithmetic mean of the vector. |
|---|
| 26 | The arithmetic mean, or sample mean, is denoted by |
|---|
| 27 | \Hat\mu and defined as \Hat\mu = (1/N) \sum x_i. Returns a double-float.")) |
|---|
| 28 | |
|---|
| 29 | (defmfun-stats mean ((vector vector)) |
|---|
| 30 | "gsl_stats_mean" |
|---|
| 31 | (((gsl-array vector) :pointer) (1 :int) ((dim0 vector) size)) |
|---|
| 32 | :c-return :double) |
|---|
| 33 | |
|---|
| 34 | (defgeneric weighted-mean (vector weights) |
|---|
| 35 | (:documentation ; FDL |
|---|
| 36 | "The weighted mean of the dataset, using the set of weights |
|---|
| 37 | The weighted mean is defined as |
|---|
| 38 | \Hat\mu = (\sum w_i x_i) / (\sum w_i).")) |
|---|
| 39 | |
|---|
| 40 | (defmfun-stats-ds weighted-mean ((vector vector) weights) |
|---|
| 41 | "gsl_stats_wmean" |
|---|
| 42 | (((gsl-array weights) :pointer) (1 :int) |
|---|
| 43 | ((gsl-array vector) :pointer) (1 :int) ((dim0 vector) size)) |
|---|
| 44 | :c-return :double) |
|---|
| 45 | |
|---|
| 46 | ;;;;**************************************************************************** |
|---|
| 47 | ;;;; Variance |
|---|
| 48 | ;;;;**************************************************************************** |
|---|
| 49 | |
|---|
| 50 | (defgeneric variance-nom (vector) |
|---|
| 51 | (:documentation ; FDL |
|---|
| 52 | "The estimated, or sample, variance of data. The |
|---|
| 53 | estimated variance is denoted by \Hat\sigma^2 and is defined by |
|---|
| 54 | \Hat\sigma^2 = (1/(N-1)) \sum (x_i - \Hat\mu)^2 |
|---|
| 55 | where x_i are the elements of the dataset data. Note that |
|---|
| 56 | the normalization factor of 1/(N-1) results from the derivation |
|---|
| 57 | of \Hat\sigma^2 as an unbiased estimator of the population |
|---|
| 58 | variance \sigma^2. For samples drawn from a gaussian distribution |
|---|
| 59 | the variance of \Hat\sigma^2 itself is 2 \sigma^4 / N. |
|---|
| 60 | |
|---|
| 61 | This function computes the mean via a call to #'mean. If |
|---|
| 62 | you have already computed the mean then you can pass it directly to |
|---|
| 63 | #'variance-m.")) |
|---|
| 64 | |
|---|
| 65 | (defmfun-stats variance-nom ((vector vector)) |
|---|
| 66 | "gsl_stats_variance" |
|---|
| 67 | (((gsl-array vector) :pointer) (1 :int) ((dim0 vector) size)) |
|---|
| 68 | :index variance |
|---|
| 69 | :export nil |
|---|
| 70 | :c-return :double) |
|---|
| 71 | |
|---|
| 72 | (defgeneric variance-m (vector mean) |
|---|
| 73 | (:documentation ; FDL |
|---|
| 74 | "Compute the variance with the mean known to |
|---|
| 75 | avoid its recomputation.")) |
|---|
| 76 | |
|---|
| 77 | (defmfun-stats variance-m ((vector vector) mean) |
|---|
| 78 | "gsl_stats_variance_m" |
|---|
| 79 | (((gsl-array vector) :pointer) (1 :int) |
|---|
| 80 | ((dim0 vector) size) (mean :double)) |
|---|
| 81 | :index variance |
|---|
| 82 | :export nil |
|---|
| 83 | :c-return :double) |
|---|
| 84 | |
|---|
| 85 | (export 'variance) |
|---|
| 86 | (defun-optionals variance (data &optional mean) |
|---|
| 87 | -nom -m |
|---|
| 88 | ;; FDL |
|---|
| 89 | "The estimated, or sample, variance of data. The |
|---|
| 90 | estimated variance is denoted by \Hat\sigma^2 and is defined by |
|---|
| 91 | \Hat\sigma^2 = (1/(N-1)) \sum (x_i - \Hat\mu)^2 |
|---|
| 92 | where x_i are the elements of the dataset. Note that |
|---|
| 93 | the normalization factor of 1/(N-1) results from the derivation |
|---|
| 94 | of \Hat\sigma^2 as an unbiased estimator of the population |
|---|
| 95 | variance \sigma^2. For samples drawn from a gaussian distribution |
|---|
| 96 | the variance of \Hat\sigma^2 itself is 2 \sigma^4 / N.") |
|---|
| 97 | |
|---|
| 98 | ;;;;**************************************************************************** |
|---|
| 99 | ;;;; Weighted variance |
|---|
| 100 | ;;;;**************************************************************************** |
|---|
| 101 | |
|---|
| 102 | (defgeneric weighted-variance-nom (vector weights) |
|---|
| 103 | (:documentation ; FDL |
|---|
| 104 | "Compute the weighted variance with the mean unknown.")) |
|---|
| 105 | |
|---|
| 106 | (defmfun-stats-ds weighted-variance-nom ((vector vector) weights) |
|---|
| 107 | "gsl_stats_wvariance" |
|---|
| 108 | (((gsl-array weights) :pointer) (1 :int) |
|---|
| 109 | ((gsl-array vector) :pointer) (1 :int) ((dim0 vector) size)) |
|---|
| 110 | :index weighted-variance |
|---|
| 111 | :export nil |
|---|
| 112 | :c-return :double) |
|---|
| 113 | |
|---|
| 114 | (defgeneric weighted-variance-m (vector weights mean) |
|---|
| 115 | (:documentation ; FDL |
|---|
| 116 | "Compute the weighted variance with the mean known to |
|---|
| 117 | avoid its recomputation.")) |
|---|
| 118 | |
|---|
| 119 | (defmfun-stats-ds weighted-variance-m ((vector vector) weights mean) |
|---|
| 120 | "gsl_stats_wvariance_m" |
|---|
| 121 | (((gsl-array weights) :pointer) (1 :int) |
|---|
| 122 | ((gsl-array vector) :pointer) (1 :int) |
|---|
| 123 | ((dim0 vector) size) (mean :double)) |
|---|
| 124 | :index variance |
|---|
| 125 | :export nil |
|---|
| 126 | :c-return :double) |
|---|
| 127 | |
|---|
| 128 | (export 'weighted-variance) |
|---|
| 129 | (defun-optionals weighted-variance (data weights &optional mean) |
|---|
| 130 | -nom -m |
|---|
| 131 | ;; FDL |
|---|
| 132 | "The estimated variance using weights |
|---|
| 133 | The estimated variance of a weighted dataset is defined as |
|---|
| 134 | \Hat\sigma^2 = ((\sum w_i)/((\sum w_i)^2 - \sum (w_i^2))) |
|---|
| 135 | \sum w_i (x_i - \Hat\mu)^2 |
|---|
| 136 | Note that this expression reduces to an unweighted variance with the |
|---|
| 137 | familiar 1/(N-1) factor when there are N equal non-zero weights.") |
|---|
| 138 | |
|---|
| 139 | ;;;;**************************************************************************** |
|---|
| 140 | ;;;; Standard deviation |
|---|
| 141 | ;;;;**************************************************************************** |
|---|
| 142 | |
|---|
| 143 | (defgeneric standard-deviation-nom (vector) |
|---|
| 144 | (:documentation ; FDL |
|---|
| 145 | "The standard deviation, square root of the variance.")) |
|---|
| 146 | |
|---|
| 147 | (defmfun-stats standard-deviation-nom ((vector vector)) |
|---|
| 148 | "gsl_stats_sd" |
|---|
| 149 | (((gsl-array vector) :pointer) (1 :int) ((dim0 vector) size)) |
|---|
| 150 | :index standard-deviation |
|---|
| 151 | :export nil |
|---|
| 152 | :c-return :double) |
|---|
| 153 | |
|---|
| 154 | (defgeneric standard-deviation-m (vector mean) |
|---|
| 155 | (:documentation ; FDL |
|---|
| 156 | "The standard deviation with the mean known to |
|---|
| 157 | avoid its recomputation.")) |
|---|
| 158 | |
|---|
| 159 | (defmfun-stats standard-deviation-m ((vector vector) mean) |
|---|
| 160 | "gsl_stats_sd_m" |
|---|
| 161 | (((gsl-array vector) :pointer) (1 :int) |
|---|
| 162 | ((dim0 vector) size) (mean :double)) |
|---|
| 163 | :c-return :double) |
|---|
| 164 | |
|---|
| 165 | (export 'standard-deviation) |
|---|
| 166 | (defun-optionals standard-deviation (data &optional mean) |
|---|
| 167 | -nom -m |
|---|
| 168 | ;; FDL |
|---|
| 169 | "The standard deviation, square root of the variance.") |
|---|
| 170 | |
|---|
| 171 | ;;;;**************************************************************************** |
|---|
| 172 | ;;;; Weighted standard deviation |
|---|
| 173 | ;;;;**************************************************************************** |
|---|
| 174 | |
|---|
| 175 | (defgeneric weighted-standard-deviation-nom (vector weights)) |
|---|
| 176 | |
|---|
| 177 | (defmfun-stats-ds weighted-standard-deviation-nom |
|---|
| 178 | ((vector vector) weights) |
|---|
| 179 | "gsl_stats_wsd" |
|---|
| 180 | (((gsl-array weights) :pointer) (1 :int) |
|---|
| 181 | ((gsl-array vector) :pointer) (1 :int) ((dim0 vector) size)) |
|---|
| 182 | :index weighted-standard-deviation |
|---|
| 183 | :export nil |
|---|
| 184 | :c-return :double) |
|---|
| 185 | |
|---|
| 186 | (defgeneric weighted-standard-deviation-m (vector weights mean)) |
|---|
| 187 | |
|---|
| 188 | (defmfun-stats-ds weighted-standard-deviation-m |
|---|
| 189 | ((vector vector) weights mean) |
|---|
| 190 | "gsl_stats_wsd_m" |
|---|
| 191 | (((gsl-array weights) :pointer) (1 :int) |
|---|
| 192 | ((gsl-array vector) :pointer) (1 :int) |
|---|
| 193 | ((dim0 vector) size) (mean :double)) |
|---|
| 194 | :c-return :double) |
|---|
| 195 | |
|---|
| 196 | (export 'weighted-standard-deviation) |
|---|
| 197 | (defun-optionals weighted-standard-deviation (data weights &optional mean) |
|---|
| 198 | -nom -m |
|---|
| 199 | ;; FDL |
|---|
| 200 | "The standard deviation, square root of the variance.") |
|---|
| 201 | |
|---|
| 202 | ;;;;**************************************************************************** |
|---|
| 203 | ;;;; Variance with fixed mean |
|---|
| 204 | ;;;;**************************************************************************** |
|---|
| 205 | |
|---|
| 206 | (defgeneric variance-with-fixed-mean (vector mean) |
|---|
| 207 | (:documentation ; FDL |
|---|
| 208 | "An unbiased estimate of the variance of |
|---|
| 209 | data when the population mean mean of the underlying |
|---|
| 210 | distribution is known a priori. In this case the estimator for |
|---|
| 211 | the variance uses the factor 1/N and the sample mean |
|---|
| 212 | \Hat\mu is replaced by the known population mean \mu, |
|---|
| 213 | \Hat\sigma^2 = (1/N) \sum (x_i - \mu)^2.")) |
|---|
| 214 | |
|---|
| 215 | (defmfun-stats variance-with-fixed-mean ((vector vector) mean) |
|---|
| 216 | "gsl_stats_variance_with_fixed_mean" |
|---|
| 217 | (((gsl-array vector) :pointer) (1 :int) ((dim0 vector) size) |
|---|
| 218 | (mean :double)) |
|---|
| 219 | :c-return :double) |
|---|
| 220 | |
|---|
| 221 | (defgeneric standard-deviation-with-fixed-mean (vector mean) |
|---|
| 222 | (:documentation ; FDL |
|---|
| 223 | "The standard deviation of data for a fixed population |
|---|
| 224 | mean. The result is the square root of the |
|---|
| 225 | corresponding variance function.")) |
|---|
| 226 | |
|---|
| 227 | (defmfun-stats standard-deviation-with-fixed-mean |
|---|
| 228 | ((vector vector) mean) |
|---|
| 229 | "gsl_stats_sd_with_fixed_mean" |
|---|
| 230 | (((gsl-array vector) :pointer) (1 :int) ((dim0 vector) size) |
|---|
| 231 | (mean :double)) |
|---|
| 232 | :c-return :double) |
|---|
| 233 | |
|---|
| 234 | ;;;;**************************************************************************** |
|---|
| 235 | ;;;; Weighted variance with fixed mean |
|---|
| 236 | ;;;;**************************************************************************** |
|---|
| 237 | |
|---|
| 238 | (defgeneric weighted-variance-with-fixed-mean (vector weights mean) |
|---|
| 239 | (:documentation ; FDL |
|---|
| 240 | "An unbiased estimate of the variance of weighted |
|---|
| 241 | dataset when the population mean of the underlying |
|---|
| 242 | distribution is known a priori. In this case the estimator for |
|---|
| 243 | the variance replaces the sample mean \Hat\mu by the known |
|---|
| 244 | population mean \mu, |
|---|
| 245 | \Hat\sigma^2 = (\sum w_i (x_i - \mu)^2) / (\sum w_i).")) |
|---|
| 246 | |
|---|
| 247 | (defmfun-stats-ds weighted-variance-with-fixed-mean |
|---|
| 248 | ((vector vector) weights mean) |
|---|
| 249 | "gsl_stats_wvariance_with_fixed_mean" |
|---|
| 250 | (((gsl-array weights) :pointer) (1 :int) |
|---|
| 251 | ((gsl-array vector) :pointer) (1 :int) ((dim0 vector) size) |
|---|
| 252 | (mean :double)) |
|---|
| 253 | :c-return :double) |
|---|
| 254 | |
|---|
| 255 | (defgeneric weighted-standard-deviation-with-fixed-mean |
|---|
| 256 | (vector weights mean) |
|---|
| 257 | (:documentation ; FDL |
|---|
| 258 | "The square root of the corresponding variance |
|---|
| 259 | function #'weighted-variance-with-fixed-mean.")) |
|---|
| 260 | |
|---|
| 261 | (defmfun-stats-ds weighted-standard-deviation-with-fixed-mean |
|---|
| 262 | ((vector vector) weights mean) |
|---|
| 263 | "gsl_stats_wsd_with_fixed_mean" |
|---|
| 264 | (((gsl-array weights) :pointer) (1 :int) |
|---|
| 265 | ((gsl-array vector) :pointer) (1 :int) ((dim0 vector) size) |
|---|
| 266 | (mean :double)) |
|---|
| 267 | :c-return :double) |
|---|
| 268 | |
|---|
| 269 | ;;;;**************************************************************************** |
|---|
| 270 | ;;;; Examples and unit test |
|---|
| 271 | ;;;;**************************************************************************** |
|---|
| 272 | |
|---|
| 273 | #| |
|---|
| 274 | (make-tests mean-variance |
|---|
| 275 | (letm ((vec (vector-double-float #(-3.21d0 1.0d0 12.8d0))) |
|---|
| 276 | (weights (vector-double-float #(3.0d0 1.0d0 2.0d0)))) |
|---|
| 277 | (let ((mean (mean vec)) |
|---|
| 278 | (wmean (weighted-mean vec weights))) |
|---|
| 279 | (list |
|---|
| 280 | mean wmean |
|---|
| 281 | (variance vec) |
|---|
| 282 | (variance vec mean) |
|---|
| 283 | (weighted-variance vec weights) |
|---|
| 284 | (weighted-variance vec weights wmean) |
|---|
| 285 | (standard-deviation vec) |
|---|
| 286 | (standard-deviation vec mean) |
|---|
| 287 | (variance-with-fixed-mean vec 4.0d0) |
|---|
| 288 | (standard-deviation-with-fixed-mean vec 4.0d0)))) |
|---|
| 289 | (letm ((vec (vector-fixnum #(8 4 -2)))) |
|---|
| 290 | (let ((mean (mean vec))) |
|---|
| 291 | (list |
|---|
| 292 | mean |
|---|
| 293 | (variance vec) |
|---|
| 294 | (variance vec mean) |
|---|
| 295 | (standard-deviation vec) |
|---|
| 296 | (standard-deviation vec mean) |
|---|
| 297 | (variance-with-fixed-mean vec 4.0d0) |
|---|
| 298 | (standard-deviation-with-fixed-mean vec 4.0d0))))) |
|---|
| 299 | |# |
|---|
| 300 | |
|---|
| 301 | (LISP-UNIT:DEFINE-TEST MEAN-VARIANCE |
|---|
| 302 | (LISP-UNIT::ASSERT-NUMERICAL-EQUAL |
|---|
| 303 | (LIST |
|---|
| 304 | (LIST 3.5300000000000002d0 2.8283333333333336d0 |
|---|
| 305 | 68.88069999999999d0 68.88069999999999d0 |
|---|
| 306 | 84.98058636363639d0 84.98058636363639d0 |
|---|
| 307 | 8.29943974012704d0 8.29943974012704d0 |
|---|
| 308 | 46.14136666666667d0 6.792743677385941d0)) |
|---|
| 309 | (MULTIPLE-VALUE-LIST |
|---|
| 310 | (LETM |
|---|
| 311 | ((VEC (VECTOR-DOUBLE-FLOAT #(-3.21d0 1.0d0 12.8d0))) |
|---|
| 312 | (WEIGHTS (VECTOR-DOUBLE-FLOAT #(3.0d0 1.0d0 2.0d0)))) |
|---|
| 313 | (LET ((MEAN (MEAN VEC)) |
|---|
| 314 | (WMEAN (WEIGHTED-MEAN VEC WEIGHTS))) |
|---|
| 315 | (LIST MEAN WMEAN (VARIANCE VEC) (VARIANCE VEC MEAN) |
|---|
| 316 | (WEIGHTED-VARIANCE VEC WEIGHTS) |
|---|
| 317 | (WEIGHTED-VARIANCE VEC WEIGHTS WMEAN) |
|---|
| 318 | (STANDARD-DEVIATION VEC) |
|---|
| 319 | (STANDARD-DEVIATION VEC MEAN) |
|---|
| 320 | (VARIANCE-WITH-FIXED-MEAN VEC 4.0d0) |
|---|
| 321 | (STANDARD-DEVIATION-WITH-FIXED-MEAN VEC |
|---|
| 322 | 4.0d0)))))) |
|---|
| 323 | (LISP-UNIT::ASSERT-NUMERICAL-EQUAL |
|---|
| 324 | (LIST |
|---|
| 325 | (LIST 3.3333333333333335d0 25.333333333333336d0 |
|---|
| 326 | 25.333333333333336d0 5.033222956847167d0 |
|---|
| 327 | 5.033222956847167d0 17.333333333333332d0 |
|---|
| 328 | 4.163331998932265d0)) |
|---|
| 329 | (MULTIPLE-VALUE-LIST |
|---|
| 330 | (LETM ((VEC (VECTOR-FIXNUM #(8 4 -2)))) |
|---|
| 331 | (LET ((MEAN (MEAN VEC))) |
|---|
| 332 | (LIST MEAN (VARIANCE VEC) (VARIANCE VEC MEAN) |
|---|
| 333 | (STANDARD-DEVIATION VEC) |
|---|
| 334 | (STANDARD-DEVIATION VEC MEAN) |
|---|
| 335 | (VARIANCE-WITH-FIXED-MEAN VEC 4.0d0) |
|---|
| 336 | (STANDARD-DEVIATION-WITH-FIXED-MEAN VEC |
|---|
| 337 | 4.0d0))))))) |
|---|