Mercurial > hg > octave-lyh
annotate scripts/statistics/tests/kruskal_wallis_test.m @ 9051:1bf0ce0930be
Grammar check TexInfo in all .m files
Cleanup documentation sources to follow a few consistent rules.
Spellcheck was NOT done. (but will be in another changeset)
author | Rik <rdrider0-list@yahoo.com> |
---|---|
date | Fri, 27 Mar 2009 22:31:03 -0700 |
parents | eb63fbe60fab |
children | f63ee6a20f57 |
rev | line source |
---|---|
7017 | 1 ## Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2002, 2005, 2006, |
8920 | 2 ## 2007, 2008, 2009 Kurt Hornik |
3426 | 3 ## |
3922 | 4 ## This file is part of Octave. |
5 ## | |
6 ## Octave is free software; you can redistribute it and/or modify it | |
7 ## under the terms of the GNU General Public License as published by | |
7016 | 8 ## the Free Software Foundation; either version 3 of the License, or (at |
9 ## your option) any later version. | |
3426 | 10 ## |
3922 | 11 ## Octave is distributed in the hope that it will be useful, but |
3200 | 12 ## WITHOUT ANY WARRANTY; without even the implied warranty of |
13 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
3426 | 14 ## General Public License for more details. |
15 ## | |
3200 | 16 ## You should have received a copy of the GNU General Public License |
7016 | 17 ## along with Octave; see the file COPYING. If not, see |
18 ## <http://www.gnu.org/licenses/>. | |
3200 | 19 |
3454 | 20 ## -*- texinfo -*- |
21 ## @deftypefn {Function File} {[@var{pval}, @var{k}, @var{df}] =} kruskal_wallis_test (@var{x1}, @dots{}) | |
3200 | 22 ## Perform a Kruskal-Wallis one-factor "analysis of variance". |
23 ## | |
3454 | 24 ## Suppose a variable is observed for @var{k} > 1 different groups, and |
25 ## let @var{x1}, @dots{}, @var{xk} be the corresponding data vectors. | |
3200 | 26 ## |
27 ## Under the null hypothesis that the ranks in the pooled sample are not | |
3454 | 28 ## affected by the group memberships, the test statistic @var{k} is |
29 ## approximately chi-square with @var{df} = @var{k} - 1 degrees of | |
30 ## freedom. | |
3200 | 31 ## |
7485 | 32 ## If the data contains ties (some value appears more than once) |
33 ## @var{k} is divided by | |
34 ## | |
8507 | 35 ## 1 - @var{sum_ties} / (@var{n}^3 - @var{n}) |
7485 | 36 ## |
8507 | 37 ## where @var{sum_ties} is the sum of @var{t}^2 - @var{t} over each group |
7485 | 38 ## of ties where @var{t} is the number of ties in the group and @var{n} |
9051
1bf0ce0930be
Grammar check TexInfo in all .m files
Rik <rdrider0-list@yahoo.com>
parents:
8920
diff
changeset
|
39 ## is the total number of values in the input data. For more info on |
7485 | 40 ## this adjustment see "Use of Ranks in One-Criterion Variance Analysis" |
41 ## in Journal of the American Statistical Association, Vol. 47, | |
42 ## No. 260 (Dec 1952) by William H. Kruskal and W. Allen Wallis. | |
43 ## | |
3454 | 44 ## The p-value (1 minus the CDF of this distribution at @var{k}) is |
45 ## returned in @var{pval}. | |
46 ## | |
47 ## If no output argument is given, the p-value is displayed. | |
48 ## @end deftypefn | |
3200 | 49 |
5428 | 50 ## Author: KH <Kurt.Hornik@wu-wien.ac.at> |
3456 | 51 ## Description: Kruskal-Wallis test |
3426 | 52 |
3979 | 53 function [pval, k, df] = kruskal_wallis_test (varargin) |
3426 | 54 |
3200 | 55 m = nargin; |
56 if (m < 2) | |
6046 | 57 print_usage (); |
3200 | 58 endif |
3426 | 59 |
3200 | 60 n = []; |
61 p = []; | |
3979 | 62 |
3200 | 63 for i = 1 : m; |
3979 | 64 x = varargin{i}; |
4030 | 65 if (! isvector (x)) |
3456 | 66 error ("kruskal_wallis_test: all arguments must be vectors"); |
3200 | 67 endif |
68 l = length (x); | |
69 n = [n, l]; | |
3273 | 70 p = [p, (reshape (x, 1, l))]; |
3200 | 71 endfor |
3426 | 72 |
3200 | 73 r = ranks (p); |
74 | |
75 k = 0; | |
76 j = 0; | |
77 for i = 1 : m; | |
78 k = k + (sum (r ((j + 1) : (j + n(i))))) ^ 2 / n(i); | |
79 j = j + n(i); | |
80 endfor | |
3426 | 81 |
7485 | 82 n = length (p); |
83 k = 12 * k / (n * (n + 1)) - 3 * (n + 1); | |
84 | |
85 ## Adjust the result to takes ties into account. | |
86 sum_ties = sum (polyval ([1, 0, -1, 0], runlength (sort (p)))); | |
87 k = k / (1 - sum_ties / (n^3 - n)); | |
88 | |
89 df = m - 1; | |
3200 | 90 pval = 1 - chisquare_cdf (k, df); |
3426 | 91 |
3200 | 92 if (nargout == 0) |
3456 | 93 printf ("pval: %g\n", pval); |
3200 | 94 endif |
95 | |
96 endfunction | |
97 | |
7485 | 98 ## Test with ties |
99 %!assert (abs(kruskal_wallis_test([86 86], [74]) - 0.157299207050285) < 0.0000000000001) |