| 1 | ;; Median and percentile |
|---|
| 2 | ;; Liam Healy, Sun Dec 31 2006 - 13:19 |
|---|
| 3 | ;; Time-stamp: <2008-03-09 19:19:28EDT median-percentile.lisp> |
|---|
| 4 | ;; $Id$ |
|---|
| 5 | |
|---|
| 6 | (in-package :gsl) |
|---|
| 7 | |
|---|
| 8 | ;;; To do: stride other than 1 when that information is availble from |
|---|
| 9 | ;;; the vector. |
|---|
| 10 | |
|---|
| 11 | (defmfun median (sorted-data) |
|---|
| 12 | "gsl_stats_median_from_sorted_data" |
|---|
| 13 | (((gsl-array sorted-data) :pointer) (1 :int) ((dim0 sorted-data) size)) |
|---|
| 14 | :c-return :double |
|---|
| 15 | :documentation ; FDL |
|---|
| 16 | "The median value of sorted-data. The elements of the array |
|---|
| 17 | must be in ascending numerical order. There are no checks to see |
|---|
| 18 | whether the data are sorted, so the function #'sort should |
|---|
| 19 | always be used first. |
|---|
| 20 | When the dataset has an odd number of elements the median is the value |
|---|
| 21 | of element (n-1)/2. When the dataset has an even number of |
|---|
| 22 | elements the median is the mean of the two nearest middle values, |
|---|
| 23 | elements (n-1)/2 and n/2. Since the algorithm for |
|---|
| 24 | computing the median involves interpolation this function always returns |
|---|
| 25 | a floating-point number, even for integer data types.") |
|---|
| 26 | |
|---|
| 27 | (defmfun quantile (sorted-data fraction) |
|---|
| 28 | "gsl_stats_quantile_from_sorted_data" |
|---|
| 29 | (((gsl-array sorted-data) :pointer) (1 :int) ((dim0 sorted-data) size) |
|---|
| 30 | (fraction :double)) |
|---|
| 31 | :c-return :double |
|---|
| 32 | :documentation ; FDL |
|---|
| 33 | "A quantile value of sorted-data, vector-double-float. The |
|---|
| 34 | elements of the array must be in ascending numerical order. The |
|---|
| 35 | quantile is determined by a fraction between 0 and 1. For |
|---|
| 36 | example, to compute the value of the 75th percentile |
|---|
| 37 | 'fraction should have the value 0.75. |
|---|
| 38 | There are no checks to see whether the data are sorted, so the function |
|---|
| 39 | #'sort should always be used first. |
|---|
| 40 | \hbox{quantile} = (1 - \delta) x_i + \delta x_{i+1} |
|---|
| 41 | where i is floor((n - 1)f) and \delta is (n-1)f - i. |
|---|
| 42 | Thus the minimum value of the array (data[0*stride]) is given by |
|---|
| 43 | 'fraction equal to zero, the maximum value (data[(n-1)*stride]) is |
|---|
| 44 | given by 'fraction equal to one and the median value is given by 'fraction |
|---|
| 45 | equal to 0.5. Since the algorithm for computing quantiles involves |
|---|
| 46 | interpolation this function always returns a floating-point number, even |
|---|
| 47 | for integer data types.") |
|---|
| 48 | |
|---|
| 49 | ;;; Examples and unit test |
|---|
| 50 | |
|---|
| 51 | #| |
|---|
| 52 | (make-tests median-percentile |
|---|
| 53 | (letm ((vec (vector-double-float #(-3.21d0 1.0d0 12.8d0)))) |
|---|
| 54 | (median vec)) |
|---|
| 55 | (letm ((vec (vector-double-float |
|---|
| 56 | #(-18.0d0 -12.0d0 -3.21d0 0.5d0 1.0d0 2.7d0 12.8d0)))) |
|---|
| 57 | (quantile vec 0.75d0))) |
|---|
| 58 | |# |
|---|
| 59 | |
|---|
| 60 | (LISP-UNIT:DEFINE-TEST MEDIAN-PERCENTILE |
|---|
| 61 | (LISP-UNIT::ASSERT-NUMERICAL-EQUAL |
|---|
| 62 | (LIST 1.0d0) |
|---|
| 63 | (MULTIPLE-VALUE-LIST |
|---|
| 64 | (LETM ((VEC (VECTOR-DOUBLE-FLOAT #(-3.21d0 1.0d0 12.8d0)))) |
|---|
| 65 | (MEDIAN VEC)))) |
|---|
| 66 | (LISP-UNIT::ASSERT-NUMERICAL-EQUAL |
|---|
| 67 | (LIST 1.85d0) |
|---|
| 68 | (MULTIPLE-VALUE-LIST |
|---|
| 69 | (LETM ((VEC (VECTOR-DOUBLE-FLOAT |
|---|
| 70 | #(-18.0d0 -12.0d0 -3.21d0 0.5d0 1.0d0 2.7d0 12.8d0)))) |
|---|
| 71 | (QUANTILE VEC 0.75d0))))) |
|---|
| 72 | |
|---|