Mercurial > hg > octave-nkf
view scripts/statistics/base/__quantile__.m @ 11889:221d555a5b91 release-3-0-x
Modified statistics to calculate consistent median.
author | Ben Abbott <bpabbott@mac.com> |
---|---|
date | Fri, 21 Nov 2008 16:58:19 +0100 |
parents | |
children | cf59d542f33e af0adfbd3d16 |
line wrap: on
line source
## Copyright (C) 2008 Ben Abbott, Jaroslav Hajek ## ## This file is part of Octave. ## ## Octave is free software; you can redistribute it and/or modify it ## under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 3 of the License, or (at ## your option) any later version. ## ## Octave is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Octave; see the file COPYING. If not, see ## <http://www.gnu.org/licenses/>. ## -*- texinfo -*- ## @deftypefn {Function File} {@var{q} =} __quantile__ (@var{x}, @var{p}) ## @deftypefnx {Function File} {@var{q} =} __quantile__ (@var{x}, @var{p}, @var{method}) ## For the cumulative probability values in @var{p}, compute the ## quantiles, @var{q} (the inverse of the cdf), for the sample, @var{x}. ## ## The optional input, @var{method}, refers to nine methods available in R ## (http://www.r-project.org/). The default is @var{method} = 7. For more ## detail, see `help quantile'. ## @seealso{prctile, quantile, statistics} ## @end deftypefn ## Author: Ben Abbott <bpabbott@mac.com> ## Vectorized version: Jaroslav Hajek <highegg@gmail.com> ## Description: Quantile function of a empirical samples function inv = __quantile__ (x, p, method = 5) if (nargin < 2 || nargin > 3) print_usage (); endif if (! ismatrix (x)) error ("quantile: x must be a matrix"); endif ## Save length and set shape of quantiles. n = numel (p); p = p(:); ## Save length and set shape of samples. ## TODO: does sort guarantee that NaN's come at the end? x = sort (x); m = sum (! isnan (x)); mx = size (x, 1); nx = size (x, 2); ## Initialize output values. inv = Inf*(-(p < 0) + (p > 1)); inv = repmat (inv, 1, nx); ## Do the work. if (any(k = find((p >= 0) & (p <= 1)))) n = length (k); p = p (k); ## Special case. if (mx == 1) inv(k,:) = repmat (x, n, 1); return endif ## The column-distribution indices. pcd = kron (ones (n, 1), mx*(0:nx-1)); mm = kron (ones (n, 1), m); switch method case {1, 2, 3} switch method case 1 p = max (ceil (kron (p, m)), 1); inv(k,:) = x(p + pcd); case 2 p = kron (p, m); p_lr = max (ceil (p), 1); p_rl = min (floor (p + 1), mm); inv(k,:) = (x(p_lr + pcd) + x(p_rl + pcd))/2; case 3 ## Used by SAS, method PCTLDEF=2. ## http://support.sas.com/onlinedoc/913/getDoc/en/statug.hlp/stdize_sect14.htm t = max (kron (p, m), 1); t = roundb (t); inv(k,:) = x(t + pcd); endswitch otherwise switch method case 4 p = kron (p, m); case 5 ## Used by Matlab. p = kron (p, m) + 0.5; case 6 ## Used by Minitab and SPSS. p = kron (p, m+1); case 7 ## Used by S and R. p = kron (p, m-1) + 1; case 8 ## Median unbiased . p = kron (p, m+1/3) + 1/3; case 9 ## Approximately unbiased respecting order statistics. p = kron (p, m+0.25) + 0.375; otherwise error ("quantile: Unknown method, '%d'",method) endswitch ## Duplicate single values. imm1 = mm == 1; x(2,imm1) = x(1,imm1); ## Interval indices. pi = max (min (floor (p), mm-1), 1); pr = max (min (p - pi, 1), 0); pi += pcd; inv(k,:) = (1-pr) .* x(pi) + pr .* x(pi+1); endswitch endif endfunction %!test %! p = 0.5; %! x = sort (rand (11)); %! q = __quantile__ (x, p); %! assert (q, x(6,:)) %!test %! p = [0.00, 0.25, 0.50, 0.75, 1.00]; %! x = [1; 2; 3; 4]; %! a = [1.0000 1.0000 2.0000 3.0000 4.0000 %! 1.0000 1.5000 2.5000 3.5000 4.0000 %! 1.0000 1.0000 2.0000 3.0000 4.0000 %! 1.0000 1.0000 2.0000 3.0000 4.0000 %! 1.0000 1.5000 2.5000 3.5000 4.0000 %! 1.0000 1.2500 2.5000 3.7500 4.0000 %! 1.0000 1.7500 2.5000 3.2500 4.0000 %! 1.0000 1.4167 2.5000 3.5833 4.0000 %! 1.0000 1.4375 2.5000 3.5625 4.0000]; %! for m = (1:9) %! q = __quantile__ (x, p, m).'; %! assert (q, a(m,:), 0.0001) %! endfor %!test %! p = [0.00, 0.25, 0.50, 0.75, 1.00]; %! x = [1; 2; 3; 4; 5]; %! a = [1.0000 2.0000 3.0000 4.0000 5.0000 %! 1.0000 2.0000 3.0000 4.0000 5.0000 %! 1.0000 1.0000 2.0000 4.0000 5.0000 %! 1.0000 1.2500 2.5000 3.7500 5.0000 %! 1.0000 1.7500 3.0000 4.2500 5.0000 %! 1.0000 1.5000 3.0000 4.5000 5.0000 %! 1.0000 2.0000 3.0000 4.0000 5.0000 %! 1.0000 1.6667 3.0000 4.3333 5.0000 %! 1.0000 1.6875 3.0000 4.3125 5.0000]; %! for m = (1:9) %! q = __quantile__ (x, p, m).'; %! assert (q, a(m,:), 0.0001) %! endfor %!test %! p = [0.00, 0.25, 0.50, 0.75, 1.00]; %! x = [1; 2; 5; 9]; %! a = [1.0000 1.0000 2.0000 5.0000 9.0000 %! 1.0000 1.5000 3.5000 7.0000 9.0000 %! 1.0000 1.0000 2.0000 5.0000 9.0000 %! 1.0000 1.0000 2.0000 5.0000 9.0000 %! 1.0000 1.5000 3.5000 7.0000 9.0000 %! 1.0000 1.2500 3.5000 8.0000 9.0000 %! 1.0000 1.7500 3.5000 6.0000 9.0000 %! 1.0000 1.4167 3.5000 7.3333 9.0000 %! 1.0000 1.4375 3.5000 7.2500 9.0000]; %! for m = (1:9) %! q = __quantile__ (x, p, m).'; %! assert (q, a(m,:), 0.0001) %! endfor %!test %! p = [0.00, 0.25, 0.50, 0.75, 1.00]; %! x = [1; 2; 5; 9; 11]; %! a = [1.0000 2.0000 5.0000 9.0000 11.0000 %! 1.0000 2.0000 5.0000 9.0000 11.0000 %! 1.0000 1.0000 2.0000 9.0000 11.0000 %! 1.0000 1.2500 3.5000 8.0000 11.0000 %! 1.0000 1.7500 5.0000 9.5000 11.0000 %! 1.0000 1.5000 5.0000 10.0000 11.0000 %! 1.0000 2.0000 5.0000 9.0000 11.0000 %! 1.0000 1.6667 5.0000 9.6667 11.0000 %! 1.0000 1.6875 5.0000 9.6250 11.0000]; %! for m = (1:9) %! q = __quantile__ (x, p, m).'; %! assert (q, a(m,:), 0.0001) %! endfor %!test %! p = [0.00, 0.25, 0.50, 0.75, 1.00]; %! x = [16; 11; 15; 12; 15; 8; 11; 12; 6; 10]; %! a = [6.0000 10.0000 11.0000 15.0000 16.0000 %! 6.0000 10.0000 11.5000 15.0000 16.0000 %! 6.0000 8.0000 11.0000 15.0000 16.0000 %! 6.0000 9.0000 11.0000 13.5000 16.0000 %! 6.0000 10.0000 11.5000 15.0000 16.0000 %! 6.0000 9.5000 11.5000 15.0000 16.0000 %! 6.0000 10.2500 11.5000 14.2500 16.0000 %! 6.0000 9.8333 11.5000 15.0000 16.0000 %! 6.0000 9.8750 11.5000 15.0000 16.0000]; %! for m = (1:9) %! q = __quantile__ (x, p, m).'; %! assert (q, a(m,:), 0.0001) %! endfor %!test %! p = [0.00, 0.25, 0.50, 0.75, 1.00]; %! x = [-0.58851; 0.40048; 0.49527; -2.551500; -0.52057; ... %! -0.17841; 0.057322; -0.62523; 0.042906; 0.12337]; %! a = [-2.551474 -0.588505 -0.178409 0.123366 0.495271 %! -2.551474 -0.588505 -0.067751 0.123366 0.495271 %! -2.551474 -0.625231 -0.178409 0.123366 0.495271 %! -2.551474 -0.606868 -0.178409 0.090344 0.495271 %! -2.551474 -0.588505 -0.067751 0.123366 0.495271 %! -2.551474 -0.597687 -0.067751 0.192645 0.495271 %! -2.551474 -0.571522 -0.067751 0.106855 0.495271 %! -2.551474 -0.591566 -0.067751 0.146459 0.495271 %! -2.551474 -0.590801 -0.067751 0.140686 0.495271]; %! for m = (1:9) %! q = __quantile__ (x, p, m).'; %! assert (q, a(m,:), 0.0001) %! endfor %!test %! p = 0.5; %! x = [0.112600, 0.114800, 0.052100, 0.236400, 0.139300 %! 0.171800, 0.727300, 0.204100, 0.453100, 0.158500 %! 0.279500, 0.797800, 0.329600, 0.556700, 0.730700 %! 0.428800, 0.875300, 0.647700, 0.628700, 0.816500 %! 0.933100, 0.931200, 0.963500, 0.779600, 0.846100]; %! tol = 0.00001; %! x(5,5) = NaN; %! assert (__quantile__ (x, p), [0.27950, 0.79780, 0.32960, 0.55670, 0.44460], tol); %! x(1,1) = NaN; %! assert (__quantile__ (x, p), [0.35415, 0.79780, 0.32960, 0.55670, 0.44460], tol); %! x(3,3) = NaN; %! assert (__quantile__ (x, p), [0.35415, 0.79780, 0.42590, 0.55670, 0.44460], tol);