7017
|
1 ## Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2002, 2005, 2006, |
|
2 ## 2007 Kurt Hornik |
3426
|
3 ## |
3922
|
4 ## This file is part of Octave. |
|
5 ## |
|
6 ## Octave is free software; you can redistribute it and/or modify it |
|
7 ## under the terms of the GNU General Public License as published by |
7016
|
8 ## the Free Software Foundation; either version 3 of the License, or (at |
|
9 ## your option) any later version. |
3426
|
10 ## |
3922
|
11 ## Octave is distributed in the hope that it will be useful, but |
3200
|
12 ## WITHOUT ANY WARRANTY; without even the implied warranty of |
|
13 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
3426
|
14 ## General Public License for more details. |
|
15 ## |
3200
|
16 ## You should have received a copy of the GNU General Public License |
7016
|
17 ## along with Octave; see the file COPYING. If not, see |
|
18 ## <http://www.gnu.org/licenses/>. |
3200
|
19 |
3454
|
20 ## -*- texinfo -*- |
|
21 ## @deftypefn {Function File} {[@var{pval}, @var{k}, @var{df}] =} kruskal_wallis_test (@var{x1}, @dots{}) |
3200
|
22 ## Perform a Kruskal-Wallis one-factor "analysis of variance". |
|
23 ## |
3454
|
24 ## Suppose a variable is observed for @var{k} > 1 different groups, and |
|
25 ## let @var{x1}, @dots{}, @var{xk} be the corresponding data vectors. |
3200
|
26 ## |
|
27 ## Under the null hypothesis that the ranks in the pooled sample are not |
3454
|
28 ## affected by the group memberships, the test statistic @var{k} is |
|
29 ## approximately chi-square with @var{df} = @var{k} - 1 degrees of |
|
30 ## freedom. |
3200
|
31 ## |
7485
|
32 ## If the data contains ties (some value appears more than once) |
|
33 ## @var{k} is divided by |
|
34 ## |
8507
|
35 ## 1 - @var{sum_ties} / (@var{n}^3 - @var{n}) |
7485
|
36 ## |
8507
|
37 ## where @var{sum_ties} is the sum of @var{t}^2 - @var{t} over each group |
7485
|
38 ## of ties where @var{t} is the number of ties in the group and @var{n} |
|
39 ## is the total number of values in the input data. For more info on |
|
40 ## this adjustment see "Use of Ranks in One-Criterion Variance Analysis" |
|
41 ## in Journal of the American Statistical Association, Vol. 47, |
|
42 ## No. 260 (Dec 1952) by William H. Kruskal and W. Allen Wallis. |
|
43 ## |
3454
|
44 ## The p-value (1 minus the CDF of this distribution at @var{k}) is |
|
45 ## returned in @var{pval}. |
|
46 ## |
|
47 ## If no output argument is given, the p-value is displayed. |
|
48 ## @end deftypefn |
3200
|
49 |
5428
|
50 ## Author: KH <Kurt.Hornik@wu-wien.ac.at> |
3456
|
51 ## Description: Kruskal-Wallis test |
3426
|
52 |
3979
|
53 function [pval, k, df] = kruskal_wallis_test (varargin) |
3426
|
54 |
3200
|
55 m = nargin; |
|
56 if (m < 2) |
6046
|
57 print_usage (); |
3200
|
58 endif |
3426
|
59 |
3200
|
60 n = []; |
|
61 p = []; |
3979
|
62 |
3200
|
63 for i = 1 : m; |
3979
|
64 x = varargin{i}; |
4030
|
65 if (! isvector (x)) |
3456
|
66 error ("kruskal_wallis_test: all arguments must be vectors"); |
3200
|
67 endif |
|
68 l = length (x); |
|
69 n = [n, l]; |
3273
|
70 p = [p, (reshape (x, 1, l))]; |
3200
|
71 endfor |
3426
|
72 |
3200
|
73 r = ranks (p); |
|
74 |
|
75 k = 0; |
|
76 j = 0; |
|
77 for i = 1 : m; |
|
78 k = k + (sum (r ((j + 1) : (j + n(i))))) ^ 2 / n(i); |
|
79 j = j + n(i); |
|
80 endfor |
3426
|
81 |
7485
|
82 n = length (p); |
|
83 k = 12 * k / (n * (n + 1)) - 3 * (n + 1); |
|
84 |
|
85 ## Adjust the result to takes ties into account. |
|
86 sum_ties = sum (polyval ([1, 0, -1, 0], runlength (sort (p)))); |
|
87 k = k / (1 - sum_ties / (n^3 - n)); |
|
88 |
|
89 df = m - 1; |
3200
|
90 pval = 1 - chisquare_cdf (k, df); |
3426
|
91 |
3200
|
92 if (nargout == 0) |
3456
|
93 printf ("pval: %g\n", pval); |
3200
|
94 endif |
|
95 |
|
96 endfunction |
|
97 |
7485
|
98 ## Test with ties |
|
99 %!assert (abs(kruskal_wallis_test([86 86], [74]) - 0.157299207050285) < 0.0000000000001) |