Mercurial > hg > octave-max
comparison scripts/statistics/base/quantile.m @ 11889:221d555a5b91 release-3-0-x
Modified statistics to calculate consistent median.
author | Ben Abbott <bpabbott@mac.com> |
---|---|
date | Fri, 21 Nov 2008 16:58:19 +0100 |
parents | |
children | bc982528de11 |
comparison
equal
deleted
inserted
replaced
11888:8dc2fa08600c | 11889:221d555a5b91 |
---|---|
1 ## Copyright (C) 2008 Ben Abbott | |
2 ## | |
3 ## This file is part of Octave. | |
4 ## | |
5 ## Octave is free software; you can redistribute it and/or modify it | |
6 ## under the terms of the GNU General Public License as published by | |
7 ## the Free Software Foundation; either version 3 of the License, or (at | |
8 ## your option) any later version. | |
9 ## | |
10 ## Octave is distributed in the hope that it will be useful, but | |
11 ## WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 ## General Public License for more details. | |
14 ## | |
15 ## You should have received a copy of the GNU General Public License | |
16 ## along with Octave; see the file COPYING. If not, see | |
17 ## <http://www.gnu.org/licenses/>. | |
18 | |
19 ## -*- texinfo -*- | |
20 ## @deftypefn {Function File} {@var{q} =} quantile (@var{x}, @var{p}) | |
21 ## @deftypefnx {Function File} {@var{q} =} quantile (@var{x}, @var{p}, @var{dim}) | |
22 ## @deftypefnx {Function File} {@var{q} =} quantile (@var{x}, @var{p}, @var{dim}, @var{method} ) | |
23 ## For a sample, @var{x}, calculate the quantiles, @var{q}, corresponding to | |
24 ## the cumulative probability values in @var{p}. All non-numeric values (NaNs) of | |
25 ## @var{x} are ignored. | |
26 ## | |
27 ## If @var{x} is a matrix, compute the quantiles for each column and | |
28 ## return them in a matrix, such that the i-th row of @var{q} contains | |
29 ## the @var{p}(i)th quantiles of each column of @var{x}. | |
30 ## | |
31 ## The optional argument @var{dim} determines the dimension along which | |
32 ## the percentiles are calculated. If @var{dim} is omitted, and @var{x} is | |
33 ## a vector or matrix, it defaults to 1 (column wise quantiles). In the | |
34 ## instance that @var{x} is a N-d array, @var{dim} defaults to the first | |
35 ## dimension whose size greater than unity. | |
36 ## | |
37 ## The methods available to calculate sample quantiles are the nine methods | |
38 ## used by R (http://www.r-project.org/). The default value is METHOD = 5. | |
39 ## | |
40 ## Discontinuous sample quantile methods 1, 2, and 3 | |
41 ## | |
42 ## @enumerate 1 | |
43 ## @item Method 1: Inverse of empirical distribution function. | |
44 ## @item Method 2: Similar to method 1 but with averaging at discontinuities. | |
45 ## @item Method 3: SAS definition: nearest even order statistic. | |
46 ## @end enumerate | |
47 ## | |
48 ## Continuous sample quantile methods 4 through 9, where p(k) is the linear | |
49 ## interpolation function respecting each methods' representative cdf. | |
50 ## | |
51 ## @enumerate 4 | |
52 ## @item Method 4: p(k) = k / n. That is, linear interpolation of the empirical cdf. | |
53 ## @item Method 5: p(k) = (k - 0.5) / n. That is a piecewise linear function where | |
54 ## the knots are the values midway through the steps of the empirical cdf. | |
55 ## @item Method 6: p(k) = k / (n + 1). | |
56 ## @item Method 7: p(k) = (k - 1) / (n - 1). | |
57 ## @item Method 8: p(k) = (k - 1/3) / (n + 1/3). The resulting quantile estimates | |
58 ## are approximately median-unbiased regardless of the distribution of @var{x}. | |
59 ## @item Method 9: p(k) = (k - 3/8) / (n + 1/4). The resulting quantile estimates | |
60 ## are approximately unbiased for the expected order statistics if @var{x} is | |
61 ## normally distributed. | |
62 ## @end enumerate | |
63 ## | |
64 ## Hyndman and Fan (1996) recommend method 8. Maxima, S, and R | |
65 ## (versions prior to 2.0.0) use 7 as their default. Minitab and SPSS | |
66 ## use method 6. Matlab uses method 5. | |
67 ## | |
68 ## References: | |
69 ## | |
70 ## @itemize @bullet | |
71 ## @item Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) The New | |
72 ## S Language. Wadsworth & Brooks/Cole. | |
73 ## | |
74 ## @item Hyndman, R. J. and Fan, Y. (1996) Sample quantiles in | |
75 ## statistical packages, American Statistician, 50, 361-365. | |
76 ## | |
77 ## @item R: A Language and Environment for Statistical Computing; | |
78 ## @url{http://cran.r-project.org/doc/manuals/fullrefman.pdf}. | |
79 ## @end itemize | |
80 ## @end deftypefn | |
81 | |
82 ## Author: Ben Abbott <bpabbott@mac.com> | |
83 ## Description: Matlab style quantile function of a discrete/continuous distribution | |
84 | |
85 function q = quantile (x, p, dim, method) | |
86 | |
87 if (nargin < 1 || nargin > 4) | |
88 print_usage (); | |
89 endif | |
90 | |
91 if (nargin < 2) | |
92 p = [0.00 0.25, 0.50, 0.75, 1.00]; | |
93 endif | |
94 | |
95 if (nargin < 3) | |
96 dim = 1; | |
97 endif | |
98 | |
99 if (nargin < 4) | |
100 method = 5; | |
101 endif | |
102 | |
103 if (dim > ndims(x)) | |
104 error ("quantile: invalid dimension.") | |
105 endif | |
106 | |
107 # Set the permutation vector. | |
108 perm = 1:ndims(x); | |
109 perm(1) = dim; | |
110 perm(dim) = 1; | |
111 | |
112 # Permute dim to the 1st index. | |
113 x = permute (x, perm); | |
114 | |
115 # Save the size of the permuted x N-d array. | |
116 sx = size (x); | |
117 | |
118 # Reshape to a 2-d array. | |
119 x = reshape (x, [sx(1), prod(sx(2:end))]); | |
120 | |
121 # Calculate the quantiles. | |
122 q = __quantile__ (x, p, method); | |
123 | |
124 # Return the shape to the original N-d array. | |
125 q = reshape (q, [numel(p), sx(2:end)]); | |
126 | |
127 # Permute the 1st index back to dim. | |
128 q = ipermute (q, perm); | |
129 | |
130 endfunction | |
131 | |
132 %!test | |
133 %! p = 0.5; | |
134 %! x = sort (rand (11)); | |
135 %! q = quantile (x, p); | |
136 %! assert (q, x(6,:)) | |
137 %! x = x.'; | |
138 %! q = quantile (x, p, 2); | |
139 %! assert (q, x(:,6)); | |
140 | |
141 %!test | |
142 %! p = [0.00, 0.25, 0.50, 0.75, 1.00]; | |
143 %! x = [1; 2; 3; 4]; | |
144 %! a = [1.0000 1.0000 2.0000 3.0000 4.0000 | |
145 %! 1.0000 1.5000 2.5000 3.5000 4.0000 | |
146 %! 1.0000 1.0000 2.0000 3.0000 4.0000 | |
147 %! 1.0000 1.0000 2.0000 3.0000 4.0000 | |
148 %! 1.0000 1.5000 2.5000 3.5000 4.0000 | |
149 %! 1.0000 1.2500 2.5000 3.7500 4.0000 | |
150 %! 1.0000 1.7500 2.5000 3.2500 4.0000 | |
151 %! 1.0000 1.4167 2.5000 3.5833 4.0000 | |
152 %! 1.0000 1.4375 2.5000 3.5625 4.0000]; | |
153 %! for m = (1:9) | |
154 %! q = quantile (x, p, 1, m).'; | |
155 %! assert (q, a(m,:), 0.0001) | |
156 %! endfor | |
157 | |
158 %!test | |
159 %! p = [0.00, 0.25, 0.50, 0.75, 1.00]; | |
160 %! x = [1; 2; 3; 4; 5]; | |
161 %! a = [1.0000 2.0000 3.0000 4.0000 5.0000 | |
162 %! 1.0000 2.0000 3.0000 4.0000 5.0000 | |
163 %! 1.0000 1.0000 2.0000 4.0000 5.0000 | |
164 %! 1.0000 1.2500 2.5000 3.7500 5.0000 | |
165 %! 1.0000 1.7500 3.0000 4.2500 5.0000 | |
166 %! 1.0000 1.5000 3.0000 4.5000 5.0000 | |
167 %! 1.0000 2.0000 3.0000 4.0000 5.0000 | |
168 %! 1.0000 1.6667 3.0000 4.3333 5.0000 | |
169 %! 1.0000 1.6875 3.0000 4.3125 5.0000]; | |
170 %! for m = (1:9) | |
171 %! q = quantile (x, p, 1, m).'; | |
172 %! assert (q, a(m,:), 0.0001) | |
173 %! endfor | |
174 | |
175 %!test | |
176 %! p = [0.00, 0.25, 0.50, 0.75, 1.00]; | |
177 %! x = [1; 2; 5; 9]; | |
178 %! a = [1.0000 1.0000 2.0000 5.0000 9.0000 | |
179 %! 1.0000 1.5000 3.5000 7.0000 9.0000 | |
180 %! 1.0000 1.0000 2.0000 5.0000 9.0000 | |
181 %! 1.0000 1.0000 2.0000 5.0000 9.0000 | |
182 %! 1.0000 1.5000 3.5000 7.0000 9.0000 | |
183 %! 1.0000 1.2500 3.5000 8.0000 9.0000 | |
184 %! 1.0000 1.7500 3.5000 6.0000 9.0000 | |
185 %! 1.0000 1.4167 3.5000 7.3333 9.0000 | |
186 %! 1.0000 1.4375 3.5000 7.2500 9.0000]; | |
187 %! for m = (1:9) | |
188 %! q = quantile (x, p, 1, m).'; | |
189 %! assert (q, a(m,:), 0.0001) | |
190 %! endfor | |
191 | |
192 %!test | |
193 %! p = [0.00, 0.25, 0.50, 0.75, 1.00]; | |
194 %! x = [1; 2; 5; 9; 11]; | |
195 %! a = [1.0000 2.0000 5.0000 9.0000 11.0000 | |
196 %! 1.0000 2.0000 5.0000 9.0000 11.0000 | |
197 %! 1.0000 1.0000 2.0000 9.0000 11.0000 | |
198 %! 1.0000 1.2500 3.5000 8.0000 11.0000 | |
199 %! 1.0000 1.7500 5.0000 9.5000 11.0000 | |
200 %! 1.0000 1.5000 5.0000 10.0000 11.0000 | |
201 %! 1.0000 2.0000 5.0000 9.0000 11.0000 | |
202 %! 1.0000 1.6667 5.0000 9.6667 11.0000 | |
203 %! 1.0000 1.6875 5.0000 9.6250 11.0000]; | |
204 %! for m = (1:9) | |
205 %! q = quantile (x, p, 1, m).'; | |
206 %! assert (q, a(m,:), 0.0001) | |
207 %! endfor | |
208 | |
209 %!test | |
210 %! p = [0.00, 0.25, 0.50, 0.75, 1.00]; | |
211 %! x = [16; 11; 15; 12; 15; 8; 11; 12; 6; 10]; | |
212 %! a = [6.0000 10.0000 11.0000 15.0000 16.0000 | |
213 %! 6.0000 10.0000 11.5000 15.0000 16.0000 | |
214 %! 6.0000 8.0000 11.0000 15.0000 16.0000 | |
215 %! 6.0000 9.0000 11.0000 13.5000 16.0000 | |
216 %! 6.0000 10.0000 11.5000 15.0000 16.0000 | |
217 %! 6.0000 9.5000 11.5000 15.0000 16.0000 | |
218 %! 6.0000 10.2500 11.5000 14.2500 16.0000 | |
219 %! 6.0000 9.8333 11.5000 15.0000 16.0000 | |
220 %! 6.0000 9.8750 11.5000 15.0000 16.0000]; | |
221 %! for m = (1:9) | |
222 %! q = quantile (x, p, 1, m).'; | |
223 %! assert (q, a(m,:), 0.0001) | |
224 %! endfor | |
225 | |
226 %!test | |
227 %! p = [0.00, 0.25, 0.50, 0.75, 1.00]; | |
228 %! x = [-0.58851; 0.40048; 0.49527; -2.551500; -0.52057; ... | |
229 %! -0.17841; 0.057322; -0.62523; 0.042906; 0.12337]; | |
230 %! a = [-2.551474 -0.588505 -0.178409 0.123366 0.495271 | |
231 %! -2.551474 -0.588505 -0.067751 0.123366 0.495271 | |
232 %! -2.551474 -0.625231 -0.178409 0.123366 0.495271 | |
233 %! -2.551474 -0.606868 -0.178409 0.090344 0.495271 | |
234 %! -2.551474 -0.588505 -0.067751 0.123366 0.495271 | |
235 %! -2.551474 -0.597687 -0.067751 0.192645 0.495271 | |
236 %! -2.551474 -0.571522 -0.067751 0.106855 0.495271 | |
237 %! -2.551474 -0.591566 -0.067751 0.146459 0.495271 | |
238 %! -2.551474 -0.590801 -0.067751 0.140686 0.495271]; | |
239 %! for m = (1:9) | |
240 %! q = quantile (x, p, 1, m).'; | |
241 %! assert (q, a(m,:), 0.0001) | |
242 %! endfor | |
243 | |
244 %!test | |
245 %! p = 0.5; | |
246 %! x = [0.112600, 0.114800, 0.052100, 0.236400, 0.139300 | |
247 %! 0.171800, 0.727300, 0.204100, 0.453100, 0.158500 | |
248 %! 0.279500, 0.797800, 0.329600, 0.556700, 0.730700 | |
249 %! 0.428800, 0.875300, 0.647700, 0.628700, 0.816500 | |
250 %! 0.933100, 0.931200, 0.963500, 0.779600, 0.846100]; | |
251 %! tol = 0.00001; | |
252 %! x(5,5) = NaN; | |
253 %! assert (quantile(x, p, 1), [0.27950, 0.79780, 0.32960, 0.55670, 0.44460], tol); | |
254 %! x(1,1) = NaN; | |
255 %! assert (quantile(x, p, 1), [0.35415, 0.79780, 0.32960, 0.55670, 0.44460], tol); | |
256 %! x(3,3) = NaN; | |
257 %! assert (quantile(x, p, 1), [0.35415, 0.79780, 0.42590, 0.55670, 0.44460], tol); | |
258 | |
259 %!test | |
260 %! sx = [2, 3, 4]; | |
261 %! x = rand (sx); | |
262 %! dim = 2; | |
263 %! p = 0.5; | |
264 %! yobs = quantile (x, p, dim); | |
265 %! yexp = median (x, dim); | |
266 %! assert (yobs, yexp); | |
267 |