3200
|
1 ## Copyright (C) 1995, 1996, 1997 Kurt Hornik |
3426
|
2 ## |
3922
|
3 ## This file is part of Octave. |
|
4 ## |
|
5 ## Octave is free software; you can redistribute it and/or modify it |
|
6 ## under the terms of the GNU General Public License as published by |
3200
|
7 ## the Free Software Foundation; either version 2, or (at your option) |
|
8 ## any later version. |
3426
|
9 ## |
3922
|
10 ## Octave is distributed in the hope that it will be useful, but |
3200
|
11 ## WITHOUT ANY WARRANTY; without even the implied warranty of |
|
12 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
3426
|
13 ## General Public License for more details. |
|
14 ## |
3200
|
15 ## You should have received a copy of the GNU General Public License |
3922
|
16 ## along with Octave; see the file COPYING. If not, write to the Free |
5307
|
17 ## Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
|
18 ## 02110-1301, USA. |
3200
|
19 |
3454
|
20 ## -*- texinfo -*- |
3604
|
21 ## @deftypefn {Function File} {[@var{pval}, @var{ks}, @var{d}] =} kolmogorov_smirnov_test_2 (@var{x}, @var{y}, @var{alt}) |
3454
|
22 ## Perform a 2-sample Kolmogorov-Smirnov test of the null hypothesis |
|
23 ## that the samples @var{x} and @var{y} come from the same (continuous) |
|
24 ## distribution. I.e., if F and G are the CDFs corresponding to the |
|
25 ## @var{x} and @var{y} samples, respectively, then the null is that F == |
|
26 ## G. |
3200
|
27 ## |
3454
|
28 ## With the optional argument string @var{alt}, the alternative of |
|
29 ## interest can be selected. If @var{alt} is @code{"!="} or |
|
30 ## @code{"<>"}, the null is tested against the two-sided alternative F |
|
31 ## != G. In this case, the test statistic @var{ks} follows a two-sided |
|
32 ## Kolmogorov-Smirnov distribution. If @var{alt} is @code{">"}, the |
3457
|
33 ## one-sided alternative F > G is considered. Similarly for @code{"<"}, |
|
34 ## the one-sided alternative F < G is considered. In this case, the |
|
35 ## test statistic @var{ks} has a one-sided Kolmogorov-Smirnov |
|
36 ## distribution. The default is the two-sided case. |
3200
|
37 ## |
3454
|
38 ## The p-value of the test is returned in @var{pval}. |
3200
|
39 ## |
3604
|
40 ## The third returned value, @var{d}, is the test statistic, the maximum |
|
41 ## vertical distance between the two cumulative distribution functions. |
|
42 ## |
3426
|
43 ## If no output argument is given, the p-value is displayed. |
3454
|
44 ## @end deftypefn |
3200
|
45 |
5428
|
46 ## Author: KH <Kurt.Hornik@wu-wien.ac.at> |
3456
|
47 ## Description: Two-sample Kolmogorov-Smirnov test |
3200
|
48 |
3604
|
49 function [pval, ks, d] = kolmogorov_smirnov_test_2 (x, y, alt) |
3426
|
50 |
3200
|
51 if (nargin < 2 || nargin > 3) |
3456
|
52 usage ("[pval, ks] = kolmogorov_smirnov_test_2 (x, y, tol)"); |
3200
|
53 endif |
|
54 |
4030
|
55 if (! (isvector (x) && isvector (y))) |
3458
|
56 error ("kolmogorov_smirnov_test_2: both x and y must be vectors"); |
3200
|
57 endif |
|
58 |
|
59 if (nargin == 2) |
|
60 alt = "!="; |
3426
|
61 else |
3200
|
62 if (! isstr (alt)) |
3458
|
63 error ("kolmogorov_smirnov_test_2: alt must be a string"); |
3200
|
64 endif |
|
65 endif |
|
66 |
|
67 n_x = length (x); |
|
68 n_y = length (y); |
|
69 n = n_x * n_y / (n_x + n_y); |
|
70 x = reshape (x, n_x, 1); |
|
71 y = reshape (y, n_y, 1); |
|
72 [s, i] = sort ([x; y]); |
|
73 count (find (i <= n_x)) = 1 / n_x; |
|
74 count (find (i > n_x)) = - 1 / n_y; |
3604
|
75 |
4689
|
76 z = cumsum (count); |
|
77 ds = diff (s); |
|
78 if (any (ds == 0)) |
3604
|
79 ## There are some ties, so keep only those changes. |
|
80 warning ("cannot compute correct p-values with ties") |
4689
|
81 elems = [find(ds); n_x+n_y]; |
3604
|
82 z = z(elems); |
|
83 endif |
|
84 |
3200
|
85 if (strcmp (alt, "!=") || strcmp (alt, "<>")) |
3604
|
86 d = max (abs (z)); |
|
87 ks = sqrt (n) * d; |
3200
|
88 pval = 1 - kolmogorov_smirnov_cdf (ks); |
|
89 elseif (strcmp (alt, ">")) |
3604
|
90 d = max (z); |
|
91 ks = sqrt (n) * d; |
|
92 pval = exp (-2 * ks^2); |
|
93 elseif (strcmp (alt, "<")) |
|
94 d = min (z); |
|
95 ks = -sqrt (n) * d; |
|
96 pval = exp (-2 * ks^2); |
3200
|
97 else |
3456
|
98 error ("kolmogorov_smirnov_test_2: option %s not recognized", alt); |
3200
|
99 endif |
3426
|
100 |
3200
|
101 if (nargout == 0) |
3456
|
102 printf (" pval: %g\n", pval); |
3200
|
103 endif |
|
104 |
|
105 endfunction |