# HG changeset patch # User jwe # Date 909638589 0 # Node ID 781c930425fdbf07378cea56f4b801129ae48d99 # Parent 9ceefd891930823cb06340ac3d39bf444629c8f2 [project @ 1998-10-29 05:23:08 by jwe] diff --git a/scripts/statistics/base/Makefile.in b/scripts/statistics/base/Makefile.in new file mode 100644 --- /dev/null +++ b/scripts/statistics/base/Makefile.in @@ -0,0 +1,75 @@ +# +# Makefile for octave's scripts/statistics/base directory +# +# John W. Eaton +# jwe@bevo.che.wisc.edu +# University of Wisconsin-Madison +# Department of Chemical Engineering + +TOPDIR = ../../.. + +script_sub_dir = statistics/base + +srcdir = @srcdir@ +top_srcdir = @top_srcdir@ +VPATH = @srcdir@ + +include $(TOPDIR)/Makeconf + +INSTALL = @INSTALL@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_DATA = @INSTALL_DATA@ + +SOURCES = *.m + +DISTFILES = Makefile.in $(SOURCES) + +FCN_FILES = $(wildcard $(srcdir)/*.m) +FCN_FILES_NO_DIR = $(notdir $(FCN_FILES)) + +BINDISTFILES = $(FCN_FILES) + +all: +.PHONY: all + +install install-strip: + $(top_srcdir)/mkinstalldirs $(fcnfiledir)/$(script_sub_dir) + for f in $(FCN_FILES_NO_DIR); do \ + rm -f $(fcnfiledir)/$(script_sub_dir)/$$f; \ + $(INSTALL_DATA) $(srcdir)/$$f $(fcnfiledir)/$(script_sub_dir)/$$f; \ + done +.PHONY: install install-strip + +uninstall: + for f in $(FCN_FILES_NO_DIR); \ + do rm -f $(fcnfiledir)/$(script_sub_dir)/$$f; \ + done +.PHONY: uninstall + +clean: +.PHONY: clean + +tags: $(SOURCES) + ctags $(SOURCES) + +TAGS: $(SOURCES) + etags $(SOURCES) + +mostlyclean: clean +.PHONY: mostlyclean + +distclean: clean + rm -f Makefile +.PHONY: distclean + +maintainer-clean: distclean + rm -f tags TAGS +.PHONY: maintainer-clean + +dist: + ln $(DISTFILES) ../../../`cat ../../../.fname`/scripts/$(script_sub_dir) +.PHONY: dist + +bin-dist: + ln $(BINDISTFILES) ../../../`cat ../../../.fname`/scripts/$(script_sub_dir) +.PHONY: bin-dist diff --git a/scripts/statistics/base/center.m b/scripts/statistics/base/center.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/base/center.m @@ -0,0 +1,39 @@ +## Copyright (C) 1995, 1996, 1997 Kurt Hornik +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this file. If not, write to the Free Software Foundation, +## 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +## usage: center (x) +## +## If x is a vector, subtract its mean. +## If x is a matrix, do the above for each column. + +## Author: KH +## Description: Center by subtracting means + +function retval = center (x) + + if (nargin != 1) + usage ("center (x)"); + endif + + if is_vector (x) + retval = x - mean(x); + elseif is_matrix (x) + retval = x - ones (rows (x), 1) * mean(x); + else + error ("center: x must be a vector or a matrix."); + endif + +endfunction \ No newline at end of file diff --git a/scripts/statistics/base/cloglog.m b/scripts/statistics/base/cloglog.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/base/cloglog.m @@ -0,0 +1,31 @@ +## Copyright (C) 1995, 1996, 1997 Kurt Hornik +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this file. If not, write to the Free Software Foundation, +## 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +## Returns cloglog (x) = - log (- log (x)), the complementary log-log +## function. + +## Author: KH +## Description: Complementary log-log function + +function y = cloglog (x) + + if (nargin != 1) + usage ("cloglog (x)"); + endif + + y = - log (- log (x)); + +endfunction \ No newline at end of file diff --git a/scripts/statistics/base/cor.m b/scripts/statistics/base/cor.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/base/cor.m @@ -0,0 +1,46 @@ +## Copyright (C) 1995, 1996, 1997 Kurt Hornik +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this file. If not, write to the Free Software Foundation, +## 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +## usage: cor (x [, y]) +## +## The (i,j)-th entry of cor (x, y) is the correlation between the i-th +## variable in x and the j-th variable in y. +## +## For matrices, each row is an observation and each column a variable; +## vectors are always observations and may be row or column vectors. +## +## cor (x) is cor (x, x). + +## Author: KH +## Description: Compute correlations + +function retval = cor (x, y) + + if (nargin < 1 || nargin > 2) + usage ("cor (x [, y])"); + endif + + if (nargin == 2) + c = cov (x, y); + s = std (x)' * std (y); + retval = c ./ s; + elseif (nargin == 1) + c = cov (x); + s = reshape (sqrt (diag (c)), 1, columns (c)); + retval = c ./ (s' * s); + endif + +endfunction diff --git a/scripts/statistics/base/corrcoef.m b/scripts/statistics/base/corrcoef.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/base/corrcoef.m @@ -0,0 +1,48 @@ +## Copyright (C) 1996, 1997 John W. Eaton +## +## This file is part of Octave. +## +## Octave is free software; you can redistribute it and/or modify it +## under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## Octave is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with Octave; see the file COPYING. If not, write to the Free +## Software Foundation, 59 Temple Place - Suite 330, Boston, MA +## 02111-1307, USA. + +## usage: corrcoef (x, y) +## +## The (i,j)-th entry of corrcoef (x, y) is the correlation between the +## i-th variable in x and the j-th variable in y. +## For matrices, each row is an observation and each column a variable; +## vectors are always observations and may be row or column vectors. +## corrcoef (x) is corrcoef (x, x). + +## Author: Kurt Hornik +## Created: March 1993 +## Adapted-By: jwe + +function retval = corrcoef (x, y) + + if (nargin < 1 || nargin > 2) + usage ("corrcoef (x, y)"); + endif + + if (nargin == 2) + c = cov (x, y); + s = std (x)' * std (y); + retval = c ./ s; + elseif (nargin == 1) + c = cov (x); + s = reshape (sqrt (diag (c)), 1, columns (c)); + retval = c ./ (s' * s); + endif + +endfunction diff --git a/scripts/statistics/base/cov.m b/scripts/statistics/base/cov.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/base/cov.m @@ -0,0 +1,56 @@ +## Copyright (C) 1995, 1996, 1997 Kurt Hornik +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this file. If not, write to the Free Software Foundation, +## 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +## usage: cov (x [, y]) +## +## The (i,j)-th entry of cov (x, y) is the covariance between the i-th +## variable in x and the j-th variable in y. +## +## For matrices, each row is an observation and each column a variable; +## vectors are always observations and may be row or column vectors. +## +## cov (x) is cov (x, x). + +## Author: KH +## Description: Compute covariances + +function c = cov (x, y) + + if (nargin < 1 || nargin > 2) + usage ("cov (x [, y])"); + endif + + if (rows (x) == 1) + x = x'; + endif + n = rows (x); + + if (nargin == 2) + if (rows (y) == 1) + y = y'; + endif + if (rows (y) != n) + error ("cov: x and y must have the same number of observations."); + endif + x = x - ones (n, 1) * sum (x) / n; + y = y - ones (n, 1) * sum (y) / n; + c = conj (x' * y / (n - 1)); + elseif (nargin == 1) + x = x - ones (n, 1) * sum (x) / n; + c = conj (x' * x / (n - 1)); + endif + +endfunction diff --git a/scripts/statistics/base/cut.m b/scripts/statistics/base/cut.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/base/cut.m @@ -0,0 +1,59 @@ +## Copyright (C) 1996, 1997 Kurt Hornik +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this file. If not, write to the Free Software Foundation, +## 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +## usage: cut (X, BREAKS) +## +## Create categorical data out of numerical or continuous data by +## cutting into intervals. +## +## If BREAKS is a scalar, the data is cut into that many equal-width +## intervals. If BREAKS is a vector of break points, the category has +## length(BREAKS)-1 groups. +## +## Returns a vector of the same size as X telling which group each point +## in X belongs to. Groups are labelled from 1 to the number of groups; +## points outside the range of BREAKS are labelled by NaN. + +## Author: KH +## Description: Cut data into intervals + +function group = cut (X, BREAKS) + + if (nargin != 2) + usage ("cut (X, BREAKS)"); + endif + + if !is_vector (X) + error ("cut: X must be a vector"); + endif + if is_scalar (BREAKS) + BREAKS = linspace (min (X), max (X), BREAKS + 1); + BREAKS(1) = BREAKS(1) - 1; + elseif is_vector (BREAKS) + BREAKS = sort (BREAKS); + else + error ("cut: BREAKS must be a scalar or vector"); + endif + + group = NaN * ones (size (X)); + m = length (BREAKS); + if any (k = find ((X >= min (BREAKS)) & (X <= max (BREAKS)))) + n = length (k); + group(k) = sum ((ones (m, 1) * reshape (X(k), 1, n)) + > (reshape (BREAKS, m, 1) * ones (1, n))); + endif + +endfunction diff --git a/scripts/statistics/base/gls.m b/scripts/statistics/base/gls.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/base/gls.m @@ -0,0 +1,69 @@ +## Copyright (C) 1996, 1997 John W. Eaton +## +## This file is part of Octave. +## +## Octave is free software; you can redistribute it and/or modify it +## under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## Octave is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with Octave; see the file COPYING. If not, write to the Free +## Software Foundation, 59 Temple Place - Suite 330, Boston, MA +## 02111-1307, USA. + +## usage: [BETA, v [,R]] = gls (Y, X, O) +## +## Generalized Least Squares (GLS) estimation for the multivariate model +## +## Y = X*B + E, mean(E) = 0, cov(vec(E)) = (s^2)*O +## +## with Y ... T x p As usual, each row of Y and X is an observation +## X ... T x k and each column a variable. +## B ... k x p +## E ... T x p +## O ... Tp x Tp. +## +## BETA is the GLS estimator for B. +## v is the GLS estimator for s^2. +## R = Y - X*BETA is the matrix of GLS residuals. + +## Author: Teresa Twaroch +## Created: May 1993 +## Adapted-By: jwe + +function [BETA, v, R] = gls (Y, X, O) + + if (nargin != 3) + usage ("[BETA, v [, R]] = gls (Y, X, O)"); + endif + + [rx, cx] = size (X); + [ry, cy] = size (Y); + if (rx != ry) + error ("gls: incorrect matrix dimensions"); + endif + + O = O^(-1/2); + Z = kron (eye (cy), X); + Z = O * Z; + Y1 = O * reshape (Y, ry*cy, 1); + U = Z' * Z; + r = rank (U); + + if (r == cx*cy) + B = inv (U) * Z' * Y1; + else + B = pinv (Z) * Y1; + endif + + BETA = reshape (B, cx, cy); + R = Y - X * BETA; + v = (reshape (R, ry*cy, 1))' * (O^2) * reshape (R, ry*cy, 1) / (rx*cy - r); + +endfunction diff --git a/scripts/statistics/base/iqr.m b/scripts/statistics/base/iqr.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/base/iqr.m @@ -0,0 +1,35 @@ +## Copyright (C) 1995, 1996, 1997 Kurt Hornik +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this file. If not, write to the Free Software Foundation, +## 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +## usage: iqr (x) +## +## If x is a vector, return the interquartile range, i.e., the +## difference between the upper and lower quartile, of the input data. +## +## If x is a matrix, do the above for each column of x. + +## Author KH +## Description: Interquartile range + +function y = iqr (x) + + if (nargin != 1) + usage ("iqr (x)"); + endif + + y = empirical_inv (3/4, x) - empirical_inv (1/4, x); + +endfunction diff --git a/scripts/statistics/base/kendall.m b/scripts/statistics/base/kendall.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/base/kendall.m @@ -0,0 +1,71 @@ +## Copyright (C) 1995, 1996, 1997 Kurt Hornik +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this file. If not, write to the Free Software Foundation, +## 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +## usage: kendall (x [, y]) +## +## Computes Kendall's tau for each of the variables specified by the +## input arguments. +## +## For matrices, each row is an observation and each column a variable; +## vectors are always observations and may be row or column vectors. +## +## kendall (x) is equivalent to kendall (x, x). +## +## For two data vectors x, y of common length n, Kendall's tau is the +## correlation of the signs of all rank differences of x and y; i.e., +## if both x and y have distinct entries, then \tau = \frac{1}{n(n-1)} +## \sum_{i,j} SIGN(q_i-q_j) SIGN(r_i-r_j), where the q_i and r_i are the +## ranks of x and y, respectively. +## +## If x and y are drawn from independent distributions, Kendall's tau is +## asymptotically normal with mean 0 and variance (2 * (2n+5)) / (9 * n +## * (n-1)). + +## Author: KH +## Description: Kendall's rank correlation tau + +function tau = kendall (x, y) + + if ((nargin < 1) || (nargin > 2)) + usage ("kendall (x [, y])"); + endif + + if (rows (x) == 1) + x = x'; + endif + [n, c] = size (x); + + if (nargin == 2) + if (rows (y) == 1) + y = y'; + endif + if (rows (y) != n) + error (["kendall: ", ... + "x and y must have the same number of observations"]); + else + x = [x, y]; + endif + endif + + r = ranks (x); + m = sign (kron (r, ones (n, 1)) - kron (ones (n, 1), r)); + tau = cor (m); + + if (nargin == 2) + tau = tau (1 : c, (c + 1) : columns (x)); + endif + +endfunction \ No newline at end of file diff --git a/scripts/statistics/base/kurtosis.m b/scripts/statistics/base/kurtosis.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/base/kurtosis.m @@ -0,0 +1,59 @@ +## Copyright (C) 1996, 1997 John W. Eaton +## +## This file is part of Octave. +## +## Octave is free software; you can redistribute it and/or modify it +## under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## Octave is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with Octave; see the file COPYING. If not, write to the Free +## Software Foundation, 59 Temple Place - Suite 330, Boston, MA +## 02111-1307, USA. + +## usage: kurtosis (x) +## +## If x is a vector of length N, return the kurtosis +## +## kurtosis(x) = N^(-1) std(x)^(-4) SUM_i (x(i)-mean(x))^4 - 3 +## +## of x. +## +## If x is a matrix, return a row vector containing the kurtosis for each +## column. + +## Author: KH +## Created: 29 July 1994 +## Adapted-By: jwe + +function retval = kurtosis (x) + + if (nargin != 1) + usage ("kurtosis (x)"); + endif + + if (is_vector (x)) + x = x - mean (x); + if (! any (x)) + retval = 0; + else + retval = sum (x .^ 4) / (length (x) * std (x) ^ 4) - 3; + endif + elseif (is_matrix (x)) + [nr, nc] = size (x); + x = x - ones (nr, 1) * mean (x); + retval = zeros (1, nc); + s = std (x); + ind = find (s > 0); + retval (ind) = sum (x (:, ind) .^ 4) ./ (nr * s (ind) .^ 4) - 3; + else + error ("kurtosis: x has to be a matrix or a vector."); + endif + +endfunction diff --git a/scripts/statistics/base/logit.m b/scripts/statistics/base/logit.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/base/logit.m @@ -0,0 +1,30 @@ +## Copyright (C) 1995, 1996, 1997 Kurt Hornik +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this file. If not, write to the Free Software Foundation, +## 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +## For each component of p, return the logit log (p / (1-p)) of p. + +## Author: KH +## Description: Logit transformation + +function y = logit (p) + + if (nargin == 1) + y = logistic_inv (p); + else + usage ("logit (p)"); + endif + +endfunction \ No newline at end of file diff --git a/scripts/statistics/base/mahalanobis.m b/scripts/statistics/base/mahalanobis.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/base/mahalanobis.m @@ -0,0 +1,55 @@ +## Copyright (C) 1996, 1997 John W. Eaton +## +## This file is part of Octave. +## +## Octave is free software; you can redistribute it and/or modify it +## under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## Octave is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with Octave; see the file COPYING. If not, write to the Free +## Software Foundation, 59 Temple Place - Suite 330, Boston, MA +## 02111-1307, USA. + +## usage: mahalanobis (X, Y) +## +## Returns Mahalanobis' D-square distance between the multivariate +## samples X and Y, which must have the same number of components +## (columns), but may have a different number of observations (rows). + +## Author: Friedrich Leisch +## Created: July 1993 +## Adapted-By: jwe + +function retval = mahalanobis (X, Y) + + if (nargin != 2) + usage ("mahalanobis (X, Y)"); + endif + + [xr, xc] = size (X); + [yr, yc] = size (Y); + + if (xc != yc) + error ("mahalanobis: X and Y must have the same number of columns."); + endif + + Xm = sum (X) / xr; + Ym = sum (Y) / yr; + + X = X - ones (xr, 1) * Xm; + Y = Y - ones (yr, 1) * Ym; + + W = (X' * X + Y' * Y) / (xr + yr - 2); + + Winv = inv (W); + + retval = (Xm - Ym) * Winv * (Xm - Ym)'; + +endfunction diff --git a/scripts/statistics/base/mean.m b/scripts/statistics/base/mean.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/base/mean.m @@ -0,0 +1,71 @@ +## Copyright (C) 1995, 1996, 1997 Kurt Hornik +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this file. If not, write to the Free Software Foundation, +## 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +## usage: mean (x [, opt]) +## +## For vector arguments, return the mean the values. +## For matrix arguments, return a row vector containing the mean for +## each column. +## +## With the optional argument opt, the kind of mean computed can be +## selected. +## If opt is "a", the (ordinary) arithmetic mean is computed. This +## is the default. +## If opt is "g", the geometric mean is computed. +## If opt is "h", the harmonic mean is computed. + +## Author: KH +## Description: Compute arithmetic, geometric, and harmonic mean + +function y = mean (x, opt) + + if ((nargin < 1) || (nargin > 2)) + usage ("mean (x [, opt])"); + endif + + if isempty (x) + error ("mean: x must not be empty"); + endif + + if (rows (x) == 1) + x = x'; + endif + + if (nargin == 1) + opt = "a"; + endif + + [r, c] = size (x); + + if (strcmp (opt, "a")) + y = sum (x) / r; + elseif (strcmp (opt, "g")) + y = NaN * ones (1, c); + i = find (all (x > 0)); + if any (i) + y(i) = exp (sum (log (x(:, i))) / r); + endif + elseif (strcmp (opt, "h")) + y = NaN * ones (1, c); + i = find (all (x != 0)); + if any (i) + y(i) = r ./ sum (1 ./ x(:, i)); + endif + else + error (sprintf ("mean: option `%s' not recognized", opt)); + endif + +endfunction diff --git a/scripts/statistics/base/meansq.m b/scripts/statistics/base/meansq.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/base/meansq.m @@ -0,0 +1,34 @@ +## Copyright (C) 1995, 1996, 1997 Kurt Hornik +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this file. If not, write to the Free Software Foundation, +## 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +## usage: meansq (x) +## +## For vector arguments, return the mean square of the values. +## For matrix arguments, return a row vector contaning the mean square +## of each column. + +## Author: KH +## Description: Compute mean square + +function y = meansq (x) + + if (nargin != 1) + usage ("meansq (x)"); + endif + + y = mean (x.^2); + +endfunction diff --git a/scripts/statistics/base/median.m b/scripts/statistics/base/median.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/base/median.m @@ -0,0 +1,59 @@ +## Copyright (C) 1996, 1997 John W. Eaton +## +## This file is part of Octave. +## +## Octave is free software; you can redistribute it and/or modify it +## under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## Octave is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with Octave; see the file COPYING. If not, write to the Free +## Software Foundation, 59 Temple Place - Suite 330, Boston, MA +## 02111-1307, USA. + +## usage: median (a) +## +## For vector arguments, return the median of the values. +## +## For matrix arguments, return a row vector containing the median for +## each column. +## +## See also: std, mean + +## Author: jwe + +function retval = median (a) + + if (nargin != 1) + usage ("median (a)"); + endif + + [nr, nc] = size (a); + s = sort (a); + if (nr == 1 && nc > 0) + if (rem (nc, 2) == 0) + i = nc/2; + retval = (s (i) + s (i+1)) / 2; + else + i = ceil (nc/2); + retval = s (i); + endif + elseif (nr > 0 && nc > 0) + if (rem (nr, 2) == 0) + i = nr/2; + retval = (s (i,:) + s (i+1,:)) / 2; + else + i = ceil (nr/2); + retval = s (i,:); + endif + else + error ("median: invalid matrix argument"); + endif + +endfunction diff --git a/scripts/statistics/base/moment.m b/scripts/statistics/base/moment.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/base/moment.m @@ -0,0 +1,61 @@ +## Copyright (C) 1995, 1996, 1997 Kurt Hornik +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this file. If not, write to the Free Software Foundation, +## 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +## usage: moment (x, p [, opt]) +## +## Computes the p-th moment of x if it is a vector; if x is a matrix, +## return the row vector of the p-th moment of each column. +## +## With the optional string opt, the kind of moment to be computed can +## be specified. If opt contains `c' or `a', central and/or absolute +## moments are returned. I.e., `moment(x, 3, "ac")' computes the third +## central absolute moment of x. + +## Can easily be made to work for continuous distributions (using quad) +## as well, but how does the general case work? + +## Author: KH +## Description: Compute moments + +function m = moment (x, p, opt) + + if ((nargin < 2) || (nargin > 3)) + usage ("moment (x, p [, type]") + endif + + [nr, nc] = size (x); + if (nr == 0 || nc == 0) + error ("moment: x must not be empty"); + elseif (nr == 1) + x = reshape (x, nc, 1); + nr = nc; + endif + + if (nargin == 3) + tmp = implicit_str_to_num_ok; + implicit_str_to_num_ok = "true"; + if any (opt == "c") + x = x - ones (nr, 1) * sum (x) / nr; + endif + if any (opt == "a") + x = abs (x); + endif + implicit_str_to_num_ok = tmp; + endif + + m = sum(x .^ p) / nr; + +endfunction diff --git a/scripts/statistics/base/ols.m b/scripts/statistics/base/ols.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/base/ols.m @@ -0,0 +1,70 @@ +## Copyright (C) 1996, 1997 John W. Eaton +## +## This file is part of Octave. +## +## Octave is free software; you can redistribute it and/or modify it +## under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## Octave is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with Octave; see the file COPYING. If not, write to the Free +## Software Foundation, 59 Temple Place - Suite 330, Boston, MA +## 02111-1307, USA. + +## usage: [BETA, SIGMA [, R]] = ols (Y, X) +## +## Ordinary Least Squares (OLS) estimation for the multivariate model +## +## Y = X*B + E, mean(E) = 0, cov(vec(E)) = kron(S,I) +## +## with Y ... T x p As usual, each row of Y and X is an observation +## X ... T x k and each column a variable. +## B ... k x p +## E ... T x p. +## +## BETA is the OLS estimator for B, i.e. +## +## BETA = pinv(X)*Y, +## +## where pinv(X) denotes the pseudoinverse of X. +## SIGMA is the OLS estimator for the matrix S, i.e. +## +## SIGMA = (Y - X*BETA)'*(Y - X*BETA) / (T - rank(X)). +## +## R = Y - X*BETA is the matrix of OLS residuals. + +## Author: Teresa Twaroch +## Created: May 1993 +## Adapted-By: jwe + +function [BETA, SIGMA, R] = ols (Y, X) + + if (nargin != 2) + error("usage : [BETA, SIGMA [, R]] = ols (Y, X)"); + endif + + [nr, nc] = size (X); + [ry, cy] = size (Y); + if (nr != ry) + error ("ols: incorrect matrix dimensions"); + endif + + Z = X' * X; + r = rank (Z); + + if (r == nc) + BETA = inv (Z) * X' * Y; + else + BETA = pinv (X) * Y; + endif + + R = Y - X * BETA; + SIGMA = R' * R / (nr - r); + +endfunction diff --git a/scripts/statistics/base/ppplot.m b/scripts/statistics/base/ppplot.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/base/ppplot.m @@ -0,0 +1,67 @@ +## Copyright (C) 1995, 1996, 1997 Kurt Hornik +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this file. If not, write to the Free Software Foundation, +## 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +## usage: [p, y] = ppplot (x [, dist [, params]]) +## +## Performs a PP-plot (probability plot). +## +## If F is the CDF of the distribution `dist' with parameters `params' +## and x a sample vector of length n, the PP-plot graphs ordinate y(i) = +## F (i-th largest element of x) versus abscissa p(i) = (i - 0.5)/n. If +## the sample comes from F, the pairs will approximately follow a +## straight line. +## +## The default for `dist' is the standard normal distribution. The +## optional argument `params' contains a list of parameters of +## `dist'. E.g., for a probability plot of the uniform distribution on +## [2,4] ans x, use `ppplot (x, "uniform", 2, 4)'. +## +## If no output arguments are given, the data are plotted directly. + +## Author: KH +## Description: Perform a PP-plot (probability plot) + +function [p, y] = ppplot (x, dist, ...) + + if (nargin < 1) + usage ("ppplot (x [, dist [, params]])"); + endif + + if !is_vector (x) + error ("ppplot: x must be a vector."); + endif + + s = sort (x); + n = length (x); + p = ((1 : n)' - 0.5) / n; + if (nargin == 1) + F = "stdnormal_cdf"; + else + F = sprintf ("%s_cdf", dist); + endif; + if (nargin <= 2) + y = feval (F, s); + else + y = feval (F, s, all_va_args); + endif + + if (nargout == 0) + axis ([0, 1, 0, 1]); + set nokey; + plot (p, y); + endif + +endfunction diff --git a/scripts/statistics/base/probit.m b/scripts/statistics/base/probit.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/base/probit.m @@ -0,0 +1,32 @@ +## Copyright (C) 1995, 1996, 1997 Kurt Hornik +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this file. If not, write to the Free Software Foundation, +## 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +## For each component of p, return the probit (the quantile of the +## standard normal distribution) of p. + +## Written by KH on 1995/02/04 +## Description: Probit transformation + +function y = probit (p) + + if (nargin == 1) + y = stdnormal_inv (p); + else + usage ("probit (p)"); + endif + +endfunction + \ No newline at end of file diff --git a/scripts/statistics/base/qqplot.m b/scripts/statistics/base/qqplot.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/base/qqplot.m @@ -0,0 +1,76 @@ +## Copyright (C) 1995, 1996, 1997 Kurt Hornik +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this file. If not, write to the Free Software Foundation, +## 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +## usage: [q, s] = qqplot (x [, dist [, params]]) +## +## Performs a QQ-plot (quantile plot). +## +## If F is the CDF of the distribution `dist' with parameters `params' +## and G its inverse, and x a sample vector of length n, the QQ-plot +## graphs ordinate s(i) = i-th largest element of x versus abscissa q(i) +## = G((i - 0.5)/n). +## +## If the sample comes from F except for a transformation of location +## and scale, the pairs will approximately follow a straight line. +## +## The default for `dist' is the standard normal distribution. The +## optional argument `params' contains a list of parameters of +## `dist'. E.g., for a quantile plot of the uniform distribution on +## [2,4] and x, use `qqplot (x, "uniform", 2, 4)'. +## +## If no output arguments are given, the data are plotted directly. + +## Author: KH +## Description: Perform a QQ-plot (quantile plot) + +function [q, s] = qqplot (x, dist, ...) + + if (nargin < 1) + usage ("qqplot (x [,dist [,params]])"); + endif + + if !(is_vector(x)) + error ("qqplot: x must be a vector."); + endif + + s = sort (x); + n = length (x); + t = ((1 : n)' - .5) / n; + if (nargin == 1) + f = "stdnormal_inv"; + else + f = sprintf ("%s_inv", dist); + endif; + if (nargin <= 2) + q = feval (f, t); + q_label = f; + else + param_string = sprintf ("%g", va_arg ()); + for k = 2 : (nargin - 2); + param_string = sprintf ("%s, %g", param_string, va_arg ()) + endfor + q = eval (sprintf ("%s (t, %s);", f, param_string)); + q_label = sprintf ("%s with parameter(s) %s", f, param_string); + endif + + if (nargout == 0) + xlabel (q_label); + ylabel ("sample points"); + set nokey; + plot (q, s); + endif + +endfunction diff --git a/scripts/statistics/base/range.m b/scripts/statistics/base/range.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/base/range.m @@ -0,0 +1,34 @@ +## Copyright (C) 1995, 1996, 1997 Kurt Hornik +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this file. If not, write to the Free Software Foundation, +## 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +## usage: range (x) +## +## If x is a vector, return the range, i.e., the difference between the +## maximum and the minimum, of the input data. +## If x is a matrix, do the above for each column of x. + +## Author: KH +## Description: Compute range + +function y = range (x) + + if (nargin != 1) + usage ("range (x)"); + endif + + y = max (x) - min (x); + +endfunction diff --git a/scripts/statistics/base/ranks.m b/scripts/statistics/base/ranks.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/base/ranks.m @@ -0,0 +1,49 @@ +## Copyright (C) 1995, 1996, 1997 Kurt Hornik +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this file. If not, write to the Free Software Foundation, +## 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +## usage: ranks (x) +## +## If x is a vector, return the (column) vector of ranks of x adjusted +## for ties. +## If x is a matrix, do the above for each column of x. + +## Author: KH +## Description: Compute ranks + +## This code is rather ugly, but is there an easy way to get the ranks +## adjusted for ties from sort? + +function y = ranks (x) + + if (nargin != 1) + usage ("ranks (x)"); + endif + + y = []; + + [r, c] = size (x); + if ((r == 1) && (c > 0)) + p = x' * ones (1, c); + y = sum (p < p') + (sum (p == p') + 1) / 2; + elseif (r > 1) + o = ones (1, r); + for i = 1 : c; + p = x (:, i) * o; + y = [y, ( sum (p < p') + (sum (p == p') + 1) / 2 )']; + endfor + endif + +endfunction diff --git a/scripts/statistics/base/run_count.m b/scripts/statistics/base/run_count.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/base/run_count.m @@ -0,0 +1,57 @@ +## Copyright (C) 1995, 1996, 1997 Friedrich Leisch +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this file. If not, write to the Free Software Foundation, +## 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +## usage: run_count (x, n) +## +## Counts the upward runs in the columns of x of length 1, 2, ... n-1 +## and >= n. + +## Author: FL +## Description: Count upward runs + +function retval = run_count (x, n) + + [xr, xc] = size(x); + + tmp = zeros (xr,xc); + retval = zeros (n, xc); + + for j = 1 : xc + run = 1; + count = 1; + + for k = 2 : xr + + if x(k, j) < x(k-1, j) + tmp(run, j) = count; + run = run + 1; + count = 0; + endif + + count = count + 1; + + endfor + + tmp(run, j) = count; + + endfor + + for k=1 : (n-1) + retval(k, :) = sum (tmp == k); + endfor + retval(n, :) = sum (tmp >= n); + +endfunction diff --git a/scripts/statistics/base/skewness.m b/scripts/statistics/base/skewness.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/base/skewness.m @@ -0,0 +1,59 @@ +## Copyright (C) 1996, 1997 John W. Eaton +## +## This file is part of Octave. +## +## Octave is free software; you can redistribute it and/or modify it +## under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## Octave is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with Octave; see the file COPYING. If not, write to the Free +## Software Foundation, 59 Temple Place - Suite 330, Boston, MA +## 02111-1307, USA. + +## usage: skewness (x) +## +## If x is a vector of length N, return the skewness +## +## skewness (x) = N^(-1) std(x)^(-3) SUM_i (x(i)-mean(x))^3 +## +## of x. +## +## If x is a matrix, return a row vector containing the skewness for each +## column. + +## Author: KH +## Created: 29 July 1994 +## Adapted-By: jwe + +function retval = skewness (x) + + if (nargin != 1) + usage ("skewness (x)"); + endif + + if (is_vector (x)) + x = x - mean (x); + if (! any (x)) + retval = 0; + else + retval = sum (x .^ 3) / (length (x) * std (x) ^ 3); + endif + elseif (is_matrix (x)) + [nr, nc] = size (x); + x = x - ones (nr, 1) * mean (x); + retval = zeros (1, nc); + s = std (x); + ind = find (s > 0); + retval (ind) = sum (x (:, ind) .^ 3) ./ (nr * s (ind) .^ 3); + else + error ("skewness: x has to be a matrix or a vector."); + endif + +endfunction diff --git a/scripts/statistics/base/spearman.m b/scripts/statistics/base/spearman.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/base/spearman.m @@ -0,0 +1,61 @@ +## Copyright (C) 1995, 1996, 1997 Kurt Hornik +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this file. If not, write to the Free Software Foundation, +## 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +## usage: spearman (x [, y]) +## +## Computes Spearman's rank correlation coefficient rho for each of the +## variables specified by the input arguments. +## +## For matrices, each row is an observation and each column a variable; +## vectors are always observations and may be row or column vectors. +## +## spearman (x) is equivalent to spearman (x, x). +## +## For two data vectors x and y, Spearman's rho is the correlation of +## the ranks of x and y. +## +## If x and y are drawn from independent distributions, rho has zero +## mean and variance 1 / (n - 1), and is asymptotically normally +## distributed. + +## Author: KH +## Description: Spearman's rank correlation rho + +function rho = spearman (x, y) + + if ((nargin < 1) || (nargin > 2)) + usage ("spearman (x [, y])"); + endif + + if (rows (x) == 1) + x = x'; + endif + n = rows (x); + + if (nargin == 1) + rho = cor (ranks (x)); + else + if (rows (y) == 1) + y = y'; + endif + if (rows (y) != n) + error (["spearman: ", ... + "x and y must have the same number of observations"]); + endif + rho = cor (ranks (x), ranks (y)); + endif + +endfunction diff --git a/scripts/statistics/base/statistics.m b/scripts/statistics/base/statistics.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/base/statistics.m @@ -0,0 +1,47 @@ +## Copyright (C) 1995, 1996, 1997 Kurt Hornik +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this file. If not, write to the Free Software Foundation, +## 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +## usage: S = statistics (X) +## +## If X is a matrix, return a matrix S with the min, first quartile, +## median, third quartile, max, mean, std, skewness and kurtosis of the +## columns of X as its rows. +## +## If X is a vector, treat it as a column vector. + +## Author: KH +## Description: Compute basic statistics + +function S = statistics (X) + + if (nargin != 1) + usage ("S = statistics (X)"); + endif + + if (prod (size (X)) > 1) + if (is_vector (X)) + X = reshape (X, length (X), 1); + endif + for k=1:columns(X) + S(:,k) = [min (X(:,k)); empirical_inv ([0.25;0.5;0.75], X(:,k)); + max (X(:,k)); mean (X(:,k)); std (X(:,k)); + skewness (X(:,k)); kurtosis (X(:,k))]; + endfor + else + error ("statistics: invalid argument"); + endif + +endfunction diff --git a/scripts/statistics/base/std.m b/scripts/statistics/base/std.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/base/std.m @@ -0,0 +1,49 @@ +## Copyright (C) 1996, 1997 John W. Eaton +## +## This file is part of Octave. +## +## Octave is free software; you can redistribute it and/or modify it +## under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## Octave is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with Octave; see the file COPYING. If not, write to the Free +## Software Foundation, 59 Temple Place - Suite 330, Boston, MA +## 02111-1307, USA. + +## usage: std (a) +## +## For vector arguments, std returns the standard deviation of the +## values. For matrix arguments, std returns a row vector containing +## the standard deviation for each column. +## +## See also: mean, median + +## Author: jwe + +function retval = std (a) + + if (nargin != 1) + usage ("std (a)"); + endif + + nr = rows (a); + nc = columns (a); + if (nc == 1 && nr == 1) + retval = 0; + elseif (nc == 1 || nr == 1) + n = length (a); + retval = sqrt (sumsq (a - mean (a)) / (n - 1)); + elseif (nr > 1 && nc > 0) + retval = sqrt (sumsq (a - ones (nr, 1) * mean (a)) / (nr - 1)); + else + error ("std: invalid matrix argument"); + endif + +endfunction diff --git a/scripts/statistics/base/studentize.m b/scripts/statistics/base/studentize.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/base/studentize.m @@ -0,0 +1,47 @@ +## Copyright (C) 1995, 1996, 1997 Kurt Hornik +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this file. If not, write to the Free Software Foundation, +## 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +## usage: studentize (x) +## +## If x is a vector, subtract its mean and divide by its standard +## deviation. +## +## If x is a matrix, do the above for each column. + +## Author: KH +## Description: Subtract mean and divide by standard deviation + +function t = studentize (x) + + if (nargin != 1) + usage ("studentize (x)"); + endif + + if is_vector (x) + if (std (x) == 0) + t = zeros (size (x)); + else + t = (x - mean (x)) / std (x); + endif + elseif is_matrix (x) + l = ones (rows (x), 1); + t = x - l * mean (x); + t = t ./ (l * max ([std (t); !any (t)])); + else + error ("studentize: x must be a vector or a matrix."); + endif + +endfunction \ No newline at end of file diff --git a/scripts/statistics/base/table.m b/scripts/statistics/base/table.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/base/table.m @@ -0,0 +1,54 @@ +## Copyright (C) 1995, 1996, 1997 Kurt Hornik +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this file. If not, write to the Free Software Foundation, +## 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +## usage: [t, l_x] = table (x) +## [t, l_x, l_y] = table (x, y) +## +## Create a contingency table t from data vectors. The l vectors are +## the corresponding levels. +## +## Currently, only 1- and 2-dimensional tables are supported. + +## Author: KH +## Description: Cross tabulation + +function [t, v, w] = table (x, y) + + if (nargin == 1) + if !(is_vector (x)) + error ("table: x must be a vector"); + endif + v = values (x); + for i = 1 : length (v) + t(i) = sum (x == v(i) | isnan (v(i)) * isnan (x)); + endfor + elseif (nargin == 2) + if !(is_vector (x) && is_vector (y) && (length (x) == length (y))) + error ("table: x and y must be vectors of the same length"); + endif + v = values (x); + w = values (y); + for i = 1 : length (v) + for j = 1 : length (w) + t(i,j) = sum ((x == v(i) | isnan (v(i)) * isnan (x)) & + (y == w(j) | isnan (w(j)) * isnan (y))); + endfor + endfor + else + usage ("[t, l_x, ...] = table (x, ...)"); + endif + +endfunction \ No newline at end of file diff --git a/scripts/statistics/base/values.m b/scripts/statistics/base/values.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/base/values.m @@ -0,0 +1,45 @@ +## Copyright (C) 1995, 1996, 1997 Kurt Hornik +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this file. If not, write to the Free Software Foundation, +## 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +## usage: values (x) +## +## Return the different values in a column vector, arranged in ascending +## order. + +## Author: KH +## Description: Extract unique elements + +function v = values (x) + + if (nargin != 1) + usage ("values (x)"); + endif + + if !(is_vector (x)) + error ("values: x must be a vector"); + endif + + i = any (isnan (x)); + x = x(find(!isnan (x))); # HACK! + n = length (x); + x = reshape (x, n, 1); + s = sort (x); + v = s([1; find (s(2:n) > s(1:n-1)) + 1]); + if (i) + v = [v; NaN]; + endif + +endfunction diff --git a/scripts/statistics/base/var.m b/scripts/statistics/base/var.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/base/var.m @@ -0,0 +1,44 @@ +## Copyright (C) 1995, 1996, 1997 Kurt Hornik +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this file. If not, write to the Free Software Foundation, +## 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +## usage: var (x) +## +## For vector arguments, return the (real) variance of the values. +## For matrix arguments, return a row vector contaning the variance for +## each column. + +## Author: KH +## Description: Compute variance + +function y = var(x) + + if (nargin != 1) + usage ("var (x)"); + endif + + [nr, nc] = size (x); + if (nr == 0 || nc == 0) + error ("var: x must not be empty"); + elseif ((nr == 1) && (nc == 1)) + y = 0; + elseif ((nr == 1) || (nc == 1)) + n = length (x); + y = (sumsq (x) - sum(x)^2 / n) / (n - 1); + else + y = (sumsq (x) - sum(x).^2 / nr) / (nr - 1); + endif + +endfunction diff --git a/scripts/statistics/distributions/Makefile.in b/scripts/statistics/distributions/Makefile.in new file mode 100644 --- /dev/null +++ b/scripts/statistics/distributions/Makefile.in @@ -0,0 +1,75 @@ +# +# Makefile for octave's scripts/statistics/distributions directory +# +# John W. Eaton +# jwe@bevo.che.wisc.edu +# University of Wisconsin-Madison +# Department of Chemical Engineering + +TOPDIR = ../../.. + +script_sub_dir = statistics/distributions + +srcdir = @srcdir@ +top_srcdir = @top_srcdir@ +VPATH = @srcdir@ + +include $(TOPDIR)/Makeconf + +INSTALL = @INSTALL@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_DATA = @INSTALL_DATA@ + +SOURCES = *.m + +DISTFILES = Makefile.in $(SOURCES) + +FCN_FILES = $(wildcard $(srcdir)/*.m) +FCN_FILES_NO_DIR = $(notdir $(FCN_FILES)) + +BINDISTFILES = $(FCN_FILES) + +all: +.PHONY: all + +install install-strip: + $(top_srcdir)/mkinstalldirs $(fcnfiledir)/$(script_sub_dir) + for f in $(FCN_FILES_NO_DIR); do \ + rm -f $(fcnfiledir)/$(script_sub_dir)/$$f; \ + $(INSTALL_DATA) $(srcdir)/$$f $(fcnfiledir)/$(script_sub_dir)/$$f; \ + done +.PHONY: install install-strip + +uninstall: + for f in $(FCN_FILES_NO_DIR); \ + do rm -f $(fcnfiledir)/$(script_sub_dir)/$$f; \ + done +.PHONY: uninstall + +clean: +.PHONY: clean + +tags: $(SOURCES) + ctags $(SOURCES) + +TAGS: $(SOURCES) + etags $(SOURCES) + +mostlyclean: clean +.PHONY: mostlyclean + +distclean: clean + rm -f Makefile +.PHONY: distclean + +maintainer-clean: distclean + rm -f tags TAGS +.PHONY: maintainer-clean + +dist: + ln $(DISTFILES) ../../../`cat ../../../.fname`/scripts/$(script_sub_dir) +.PHONY: dist + +bin-dist: + ln $(BINDISTFILES) ../../../`cat ../../../.fname`/scripts/$(script_sub_dir) +.PHONY: bin-dist diff --git a/scripts/statistics/models/Makefile.in b/scripts/statistics/models/Makefile.in new file mode 100644 --- /dev/null +++ b/scripts/statistics/models/Makefile.in @@ -0,0 +1,75 @@ +# +# Makefile for octave's scripts/statistics/models directory +# +# John W. Eaton +# jwe@bevo.che.wisc.edu +# University of Wisconsin-Madison +# Department of Chemical Engineering + +TOPDIR = ../../.. + +script_sub_dir = statistics/models + +srcdir = @srcdir@ +top_srcdir = @top_srcdir@ +VPATH = @srcdir@ + +include $(TOPDIR)/Makeconf + +INSTALL = @INSTALL@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_DATA = @INSTALL_DATA@ + +SOURCES = *.m + +DISTFILES = Makefile.in $(SOURCES) + +FCN_FILES = $(wildcard $(srcdir)/*.m) +FCN_FILES_NO_DIR = $(notdir $(FCN_FILES)) + +BINDISTFILES = $(FCN_FILES) + +all: +.PHONY: all + +install install-strip: + $(top_srcdir)/mkinstalldirs $(fcnfiledir)/$(script_sub_dir) + for f in $(FCN_FILES_NO_DIR); do \ + rm -f $(fcnfiledir)/$(script_sub_dir)/$$f; \ + $(INSTALL_DATA) $(srcdir)/$$f $(fcnfiledir)/$(script_sub_dir)/$$f; \ + done +.PHONY: install install-strip + +uninstall: + for f in $(FCN_FILES_NO_DIR); \ + do rm -f $(fcnfiledir)/$(script_sub_dir)/$$f; \ + done +.PHONY: uninstall + +clean: +.PHONY: clean + +tags: $(SOURCES) + ctags $(SOURCES) + +TAGS: $(SOURCES) + etags $(SOURCES) + +mostlyclean: clean +.PHONY: mostlyclean + +distclean: clean + rm -f Makefile +.PHONY: distclean + +maintainer-clean: distclean + rm -f tags TAGS +.PHONY: maintainer-clean + +dist: + ln $(DISTFILES) ../../../`cat ../../../.fname`/scripts/$(script_sub_dir) +.PHONY: dist + +bin-dist: + ln $(BINDISTFILES) ../../../`cat ../../../.fname`/scripts/$(script_sub_dir) +.PHONY: bin-dist diff --git a/scripts/statistics/tests/Makefile.in b/scripts/statistics/tests/Makefile.in new file mode 100644 --- /dev/null +++ b/scripts/statistics/tests/Makefile.in @@ -0,0 +1,75 @@ +# +# Makefile for octave's scripts/statistics/tests directory +# +# John W. Eaton +# jwe@bevo.che.wisc.edu +# University of Wisconsin-Madison +# Department of Chemical Engineering + +TOPDIR = ../../.. + +script_sub_dir = statistics/tests + +srcdir = @srcdir@ +top_srcdir = @top_srcdir@ +VPATH = @srcdir@ + +include $(TOPDIR)/Makeconf + +INSTALL = @INSTALL@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_DATA = @INSTALL_DATA@ + +SOURCES = *.m + +DISTFILES = Makefile.in $(SOURCES) + +FCN_FILES = $(wildcard $(srcdir)/*.m) +FCN_FILES_NO_DIR = $(notdir $(FCN_FILES)) + +BINDISTFILES = $(FCN_FILES) + +all: +.PHONY: all + +install install-strip: + $(top_srcdir)/mkinstalldirs $(fcnfiledir)/$(script_sub_dir) + for f in $(FCN_FILES_NO_DIR); do \ + rm -f $(fcnfiledir)/$(script_sub_dir)/$$f; \ + $(INSTALL_DATA) $(srcdir)/$$f $(fcnfiledir)/$(script_sub_dir)/$$f; \ + done +.PHONY: install install-strip + +uninstall: + for f in $(FCN_FILES_NO_DIR); \ + do rm -f $(fcnfiledir)/$(script_sub_dir)/$$f; \ + done +.PHONY: uninstall + +clean: +.PHONY: clean + +tags: $(SOURCES) + ctags $(SOURCES) + +TAGS: $(SOURCES) + etags $(SOURCES) + +mostlyclean: clean +.PHONY: mostlyclean + +distclean: clean + rm -f Makefile +.PHONY: distclean + +maintainer-clean: distclean + rm -f tags TAGS +.PHONY: maintainer-clean + +dist: + ln $(DISTFILES) ../../../`cat ../../../.fname`/scripts/$(script_sub_dir) +.PHONY: dist + +bin-dist: + ln $(BINDISTFILES) ../../../`cat ../../../.fname`/scripts/$(script_sub_dir) +.PHONY: bin-dist diff --git a/scripts/statistics/tests/anova.m b/scripts/statistics/tests/anova.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/tests/anova.m @@ -0,0 +1,105 @@ +## Copyright (C) 1995, 1996, 1997 Kurt Hornik +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this file. If not, write to the Free Software Foundation, +## 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +## Performs a one-way analysis of variance (ANOVA). The goal is to test +## whether the population means of data taken from k different groups +## are all equal. +## +## anova (y, g) provides all data in a single vector y; g is the vector +## of corresponding group labels (e.g., numbers from 1 to k). This is +## the general form which does not impose any restriction on the number +## of data in each group or the group labels (other than that they must +## be scalars). +## +## anova (y), where y is a matrix, treats each column as a group. This +## form is only appropriate for balanced ANOVA where the numbers of +## samples from each group are all equal. +## +## Under the null of constant means, the statistic f follows an F +## distribution with df_b and df_w degrees of freedom. pval is the +## p-value (1 minus the CDF of this distribution at f) of the test. +## +## If no output argument is given, the standard one-way ANOVA table is +## printed. + +## Author: KH +## Description: One-way analysis of variance (ANOVA) + +function [pval, f, df_b, df_w] = anova (y, g) + + if ((nargin < 1) || (nargin > 2)) + usage ("anova (y [, g])"); + elseif (nargin == 1) + if (is_vector (y)) + error ("anova: for `anova (y)', y must not be a vector"); + endif + [group_count, k] = size (y); + n = group_count * k; + group_mean = mean (y); + else + if (! is_vector (y)) + error ("anova: for `anova (y, g)', y must be a vector"); + endif + n = length (y); + if (! is_vector (g) || (length (g) != n)) + error (["anova: for `anova (y, g)', g must be a vector", ... + " of the same length y"]); + endif + s = sort (g); + i = find (s (2 : n) > s(1 : (n-1))); + k = length (i) + 1; + if (k == 1) + error ("anova: there should be at least 2 groups"); + else + group_label = s ([1, reshape (i, 1, k-1) + 1]); + endif + for i = 1 : k; + v = y (find (g == group_label (i))); + group_count (i) = length (v); + group_mean (i) = mean (v); + endfor + + endif + + total_mean = mean (group_mean); + SSB = sum (group_count .* (group_mean - total_mean) .^ 2); + SST = sumsq (reshape (y, n, 1) - total_mean); + SSW = SST - SSB; + df_b = k - 1; + df_w = n - k; + v_b = SSB / df_b; + v_w = SSW / df_w; + f = v_b / v_w; + pval = 1 - f_cdf (f, df_b, df_w); + + if (nargout == 0) + ## This eventually needs to be done more cleanly ... + printf ("\n"); + printf ("One-way ANOVA Table:\n"); + printf ("\n"); + printf ("Source of Variation Sum of Squares df Empirical Var\n"); + printf ("*********************************************************\n"); + printf ("Between Groups %15.4f %4d %13.4f\n", SSB, df_b, v_b); + printf ("Within Groups %15.4f %4d %13.4f\n", SSW, df_w, v_w); + printf ("---------------------------------------------------------\n"); + printf ("Total %15.4f %4d\n", SST, n - 1); + printf ("\n"); + printf ("Test Statistic f %15.4f\n", f); + printf ("p-value %15.4f\n", pval); + printf ("\n"); + endif + +endfunction diff --git a/scripts/statistics/tests/bartlett_test.m b/scripts/statistics/tests/bartlett_test.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/tests/bartlett_test.m @@ -0,0 +1,62 @@ +## Copyright (C) 1995, 1996, 1997 Kurt Hornik +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this file. If not, write to the Free Software Foundation, +## 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +## usage: [pval, chisq, df] = bartlett_test (x1, ...) +## +## Performs a Bartlett test for the homogeneity of variances in the data +## vectors x1, x2, ..., xk, where k > 1. +## +## Under the null of equal variances, the test statistic chisq +## approximately ollows a chi-square distribution with df degrees of +## freedom; pval is the p-value (1 minus the CDF of this distribution at +## chisq) of the test. +## +## If no output argument is given, the p-value is displayed. + +## Author: KH +## Description: Bartlett test for homogeneity of variances + +function [pval, chisq, df] = bartlett_test (...) + + k = nargin; + if (k < 2) + usage ("[pval, chisq, df] = bartlett_test (x1, ...)"); + endif + + f = zeros (k, 1); + v = zeros (k, 1); + va_start (); + for i = 1 : k; + x = va_arg (); + if (! is_vector (x)) + error ("bartlett_test: all arguments must be vectors"); + endif + f(i) = length (x) - 1; + v(i) = var (x); + endfor + + f_tot = sum (f); + v_tot = sum (f .* v) / f_tot; + c = 1 + (sum (1 ./ f) - 1 / f_tot) / (3 * (k - 1)); + chisq = (f_tot * log (v_tot) - sum (f .* log (v))) / c; + df = k; + pval = 1 - chisquare_cdf (chisq, df); + + if (nargout == 0) + printf(" pval: %g\n", pval); + endif + +endfunction diff --git a/scripts/statistics/tests/chisquare_test_homogeneity.m b/scripts/statistics/tests/chisquare_test_homogeneity.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/tests/chisquare_test_homogeneity.m @@ -0,0 +1,63 @@ +## Copyright (C) 1995, 1996, 1997 Kurt Hornik +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this file. If not, write to the Free Software Foundation, +## 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +## usage: [pval, chisq, df] = chisquare_test_homogeneity (x, y, c) +## +## Given two samples x and y, perform a chisquare test for homogeneity +## of the null hypothesis that x and y come from the same distribution, +## based on the partition induced by the (strictly increasing) entries +## of c. +## +## For large samples, the test statistic chisq approximately follows a +## chisquare distribution with df = length(c) degrees pf freedom. pval +## is the p-value (1 minus the CDF of this distribution at chisq) of the +## test. +## +## If no output argument is given, the p-value is displayed. + +## Author: KH +## Description: Chi-square test for homogeneity + +function [pval, chisq, df] = chisquare_test_homogeneity (x, y, c) + + if (nargin != 3) + usage ("[pval, chisq, df] = chisquare_test_homogeneity (x, y, c)"); + endif + + if (! (is_vector(x) && is_vector(y) && is_vector(c))) + error ("chisquare_test_homogeneity: x, y and c must be vectors"); + endif + ## Now test c for strictly increasing entries + df = length (c); + if (any ( (c(2 : df) - c(1 : (df - 1))) <= 0)) + error ("chisquare_test_homogeneity: c must be increasing"); + endif + + c = [reshape (c, 1, df), Inf]; + l_x = length (x); + x = reshape (x, l_x, 1); + n_x = sum (x * ones (1, df+1) < ones (l_x, 1) * c); + l_y = length (y); + y = reshape (y, l_y, 1); + n_y = sum(y * ones (1, df+1) < ones (l_y, 1) * c); + chisq = l_x * l_y * sum ((n_x/l_x - n_y/l_y).^2 ./ (n_x + n_y)); + pval = 1 - chisquare_cdf (chisq, df); + + if (nargout == 0) + printf(" pval: %g\n", pval); + endif + +endfunction diff --git a/scripts/statistics/tests/chisquare_test_independence.m b/scripts/statistics/tests/chisquare_test_independence.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/tests/chisquare_test_independence.m @@ -0,0 +1,49 @@ +## Copyright (C) 1995, 1996, 1997 Kurt Hornik +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this file. If not, write to the Free Software Foundation, +## 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +## usage: [pval, chisq, df] = chisquare_test_independence (X) +## +## Perform a chi-square test for indepence based on the contingency +## table X. +## +## Under the null hypothesis of independence, chisq approximately has a +## chi-square distribution with df degrees of freedom. pval is the +## p-value (1 minus the CDF of this distribution at chisq) of the test. +## +## If no output argument is given, the p-value is displayed. + +## Author: KH +## Description: Chi-square test for independence + +function [pval, chisq, df] = chisquare_test_independence (X) + + if (nargin != 1) + usage ("chisquare_test_independence (X)"); + endif + + [r s] = size (X); + df = (r - 1) * (s - 1); + n = sum (sum (X)); + Y = sum (X')' * sum (X) / n; + X = (X - Y) .^2 ./ Y; + chisq = sum (sum (X)); + pval = 1 - chisquare_cdf (chisq, df); + + if (nargout == 0) + printf(" pval: %g\n", pval); + endif + +endfunction \ No newline at end of file diff --git a/scripts/statistics/tests/cor_test.m b/scripts/statistics/tests/cor_test.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/tests/cor_test.m @@ -0,0 +1,117 @@ +## Copyright (C) 1995, 1996, 1997 Kurt Hornik +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this file. If not, write to the Free Software Foundation, +## 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +## usage: cor_test (X, Y [, ALTERNATIVE [, METHOD]]) +## +## Test whether two samples X and Y come from uncorrelated populations. +## +## The optional argument string ALTERNATIVE describes the alternative +## hypothesis, and can be "!=" or "<>" (non-zero), ">" (greater than 0), +## or "<" (less than 0). The default is the two-sided case. +## +## The optional argument string METHOD specifies on which correlation +## coefficient the test should be based. +## If METHOD is "pearson" (default), the (usual) Pearson's product +## moment correlation coefficient is used. In this case, the data +## should come from a bivariate normal distribution. Otherwise, the +## other two methods offer nonparametric alternatives. +## If METHOD is "kendall", then Kendall's rank correlation tau is used. +## If METHOD is "spearman", then Spearman's rank correlation rho is used. +## Only the first character is necessary. +## +## The output is a structure with the following elements: +## pval The p-value of the test. +## stat The value of the test statistic. +## dist The distribution of the test statistic. +## params The parameters of the null distribution of the +## test statistic. +## alternative The alternative hypothesis. +## method The method used for testing. +## +## If no output argument is given, the pval is displayed. + +## Author: FL +## Adapted-by: KH +## Description: Test for zero correlation + +function t = cor_test (X, Y, ALTERNATIVE, METHOD) + + if ((nargin < 2) || (nargin > 4)) + usage ("cor_test (X, Y [, ALTERNATIVE [, METHOD]])") + endif + + if (!is_vector (X) || !is_vector (Y) || length (X) != length (Y)) + error ("cor_test: X and Y must be vectors of the same length") + endif + + if (nargin < 3) + ALTERNATIVE = "!="; + elseif !isstr (ALTERNATIVE) + error ("cor_test: ALTERNATIVE must be a string"); + endif + + if (nargin < 4) + METHOD = "pearson"; + elseif !isstr (METHOD) + error ("cor_test: METHOD must be a string"); + endif + + n = length (X); + m = METHOD (1); + + if (m == "p") + r = cor (X, Y); + df = n - 2; + t.method = "Pearson's product moment correlation"; + t.params = df; + t.stat = sqrt (df) .* r / sqrt (1 - r.^2); + t.dist = "t"; + cdf = t_cdf (t.stat, df); + elseif (m == "k") + tau = kendall (X, Y); + t.method = "Kendall's rank correlation tau"; + t.params = []; + t.stat = tau / sqrt ((2 * (2*n+5)) / (9*n*(n-1))); + t.dist = "stdnormal"; + cdf = stdnormal_cdf (t.stat); + elseif (m == "s") + rho = spearman (X, Y); + t.method = "Spearman's rank correlation rho"; + t.params = []; + t.stat = sqrt (n-1) * (rho - 6/(n^3-n)); + t.dist = "stdnormal"; + cdf = stdnormal_cdf (t.stat); + else + error ("cor_test: method `%s' not recognized", METHOD) + endif + + if (strcmp (ALTERNATIVE, "!=") || strcmp (ALTERNATIVE, "<>")) + t.pval = 2 * min (cdf, 1 - cdf); + elseif (strcmp (ALTERNATIVE, ">")) + t.pval = 1 - cdf; + elseif (strcmp (ALTERNATIVE, "<")) + t.pval = cdf; + else + error ("cor_test: alternative `%s' not recognized", ALTERNATIVE); + endif + + t.alternative = ALTERNATIVE; + + if (nargout == 0) + printf ("pval: %g\n", t.pval); + endif + +endfunction diff --git a/scripts/statistics/tests/f_test_regression.m b/scripts/statistics/tests/f_test_regression.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/tests/f_test_regression.m @@ -0,0 +1,76 @@ +## Copyright (C) 1995, 1996, 1997 Kurt Hornik +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this file. If not, write to the Free Software Foundation, +## 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +## usage: [pval, f, df_num, df_den] = f_test_regression (y, X, R [, r]) +## +## Performs an F test for the null hypothesis R * b = r in a classical +## normal regression model y = X * b + e. +## +## Under the null, the test statistic f follows an F distribution with +## df_num and df_den degrees of freedom; pval is the p-value (1 minus +## the CDF of this distribution at f) of the test. +## +## If not given explicitly, r = 0. +## +## If no output argument is given, the p-value is displayed. + +## Author: KH +## Description: Test linear hypotheses in linear regression model + +function [pval, f, df_num, df_den] = f_test_regression (y, X, R, r) + + if (nargin < 3 || nargin > 4) + usage (["[pval, f, df_num, df_den] ", ... + "= f_test_regression (y, X, R [, r])"]); + endif + + [T, k] = size (X); + if !( is_vector (y) && (length (y) == T) ) + error (["f_test_regression: ", ... + "y must be a vector of length rows (X)."]); + endif + y = reshape (y, T, 1); + + [q, c_R ] = size (R); + if (c_R != k) + error (["f_test_regression: ", ... + "R must have as many columns as X."]); + endif + + if (nargin == 4) + s_r = size (r); + if ((min (s_r) != 1) || (max (s_r) != q)) + error (["f_test_regression: ", ... + "r must be a vector of length rows (R)."]); + endif + r = reshape (r, q, 1); + else + r = zeros (q, 1); + endif + + df_num = q; + df_den = T - k; + + [b, v] = ols (y, X); + diff = R * b - r; + f = diff' * inv (R * inv (X' * X) * R') * diff / ( q * v ); + pval = 1 - f_cdf (f, df_num, df_den); + + if (nargout == 0) + printf (" pval: %g\n", pval); + endif + +endfunction diff --git a/scripts/statistics/tests/hotelling_test.m b/scripts/statistics/tests/hotelling_test.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/tests/hotelling_test.m @@ -0,0 +1,70 @@ +## Copyright (C) 1996, 1997 Kurt Hornik +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this file. If not, write to the Free Software Foundation, +## 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +## usage: [pval, Tsq] = hotelling_test (x, m) +## +## For a sample x from a multivariate normal distribution with unknown +## mean and covariance matrix, test the null hypothesis that mean (x) == +## m. +## +## Tsq is Hotelling's T^2. Under the null, (n-p) T^2 / (p(n-1)) has an +## F distribution with p and n-p degrees of freedom, where n and p are +## the numbers of samples and variables, respectively. +## +## pval is the p-value of the test. +## +## If no output argument is given, the p-value of the test is displayed. + +## Author: KH +## Description: Test for mean of a multivariate normal + +function [pval, Tsq] = hotelling_test (x, m) + + if (nargin != 2) + usage ("hotelling_test (x, m)"); + endif + + if (is_vector (x)) + if (! is_scalar (m)) + error ("hotelling_test: If x is a vector, m must be a scalar."); + endif + n = length (x); + p = 1; + elseif (is_matrix (x)) + [n, p] = size (x); + if (n <= p) + error ("hotelling_test: x must have more rows than columns."); + endif + if (is_vector (m) && length (m) == p) + m = reshape (m, 1, p); + else + error (strcat ("hotelling_test: ", + "If x is a matrix, m must be a vector\n", + "\tof length columns (x)")); + endif + else + error ("hotelling_test: x must be a matrix or vector"); + endif + + d = mean (x) - m; + Tsq = n * d * (cov (x) \ d'); + pval = 1 - f_cdf ((n-p) * Tsq / (p * (n-1)), p, n-p); + + if (nargout == 0) + printf (" pval: %g\n", pval); + endif + +endfunction diff --git a/scripts/statistics/tests/hotelling_test_2.m b/scripts/statistics/tests/hotelling_test_2.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/tests/hotelling_test_2.m @@ -0,0 +1,71 @@ +## Copyright (C) 1996, 1997 Kurt Hornik +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this file. If not, write to the Free Software Foundation, +## 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +## usage: [pval, Tsq] = hotelling_test_2 (x, y) +## +## For two samples x from multivariate normal distributions with the +## same number of variables (columns), unknown means and unknown equal +## covariance matrices, test the null hypothesis mean (x) == mean (y). +## +## Tsq is Hotelling's two-sample T^2. Under the null, +## (n_x+n_y-p-1) T^2 / (p(n_x+n_y-2)) +## has an F distribution with p and n_x+n_y-p-1 degrees of freedom, +## where n_x and n_y are the sample sizes and p is the number of +## variables. +## +## pval is the p-value of the test. +## +## If no output argument is given, the p-value of the test is displayed. + +## Author: KH +## Description: Compare means of two multivariate normals + +function [pval, Tsq] = hotelling_test_2 (x, y) + + if (nargin != 2) + usage ("hotelling_test_2 (x, y)"); + endif + + if (is_vector (x)) + n_x = length (x); + if (! is_vector (y)) + error ("hotelling_test_2: If x is a vector, y must be too."); + else + n_y = length (y); + p = 1; + endif + elseif (is_matrix (x)) + [n_x, p] = size (x); + [n_y, q] = size (y); + if (p != q) + error (strcat ("hotelling_test_2: ", + "x and y must have the same number of columns")); + endif + else + error ("hotelling_test_2: x and y must be matrices (or vectors)"); + endif + + d = mean (x) - mean (y); + S = ((n_x - 1) * cov (x) + (n_y - 1) * cov (y)) / (n_x + n_y - 2); + Tsq = (n_x * n_y / (n_x + n_y)) * d * (S \ d'); + pval = 1 - f_cdf ((n_x + n_y - p - 1) * Tsq / (p * (n_x + n_y - 2)), + p, n_x + n_y - p - 1); + + if (nargout == 0) + printf (" pval: %g\n", pval); + endif + +endfunction \ No newline at end of file diff --git a/scripts/statistics/tests/kolmogorov_smirnov_test.m b/scripts/statistics/tests/kolmogorov_smirnov_test.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/tests/kolmogorov_smirnov_test.m @@ -0,0 +1,97 @@ +## Copyright (C) 1995, 1996, 1997 Kurt Hornik +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this file. If not, write to the Free Software Foundation, +## 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +## usage: +## [pval, ks] = kolmogorov_smirnov_test (x, dist [, params] [, alt]) +## +## +## Performs a Kolmogorov-Smirnov test of the null hypothesis that the +## sample x comes from the (continuous) distribution dist. I.e., if F +## and G are the CDFs corresponding to the sample and dist, respectively, +## then the null is that F == G. +## +## The optional argument params contains a list of parameters of dist. +## E.g., to test whether a sample x comes from a uniform distribution on +## [2,4], use `kolmogorov_smirnov_test(x, "uniform", 2, 4)'. +## +## With the optional argument string alt, the alternative of interest +## can be selected. If alt is "!=" or "<>", the null is tested against +## the two-sided alternative F != G. In this case, the test statistic +## ks follows a two-sided Kolmogorov-Smirnov distribution. +## If alt is ">", the one-sided alternative F > G is considered, +## similarly for "<". In this case, the test statistic ks has a +## one-sided Kolmogorov-Smirnov distribution. +## The default is the two-sided case. +## +## pval is the p-value of the test. +## +## If no output argument is given, the p-value is displayed. + +## Author: KH +## Description: One-sample Kolmogorov-Smirnov test + +function [pval, ks] = kolmogorov_smirnov_test (x, dist, ...) + + if (nargin < 2) + error (sprintf (["usage:\n\t", ... + "[pval, ks] = ", ... + "kolmogorov_smirnov_test (x, dist, [, params] [, alt])"])); + endif + + if (! is_vector (x)) + error ("kolmogorov_smirnov_test: x must be a vector."); + endif + + n = length (x); + s = sort (x); + f = sprintf ("%s_cdf", dist); + + alt = "!="; + + if (nargin == 2) + z = reshape (feval (f, s), 1, n); + else + args = ""; + for k = 1 : (nargin-2); + tmp = va_arg (); + if isstr (tmp) + alt = tmp; + else + args = sprintf ("%s, %g", args, tmp); + endif + endfor + z = reshape (eval (sprintf ("%s(s%s);", f, args)), 1, n); + endif + + if (strcmp (alt, "!=") || strcmp (alt, "<>")) + ks = sqrt(n) * max(max([abs(z - (0:(n-1))/n); abs(z - (1:n)/n)])); + pval = 1 - kolmogorov_smirnov_cdf (ks); + elseif (strcmp (alt, ">")) + ks = sqrt(n) * max (max ([z - (0:(n-1))/n; z - (1:n)/n])); + pval = exp(- 2 * ks^2); + elseif (strcmp (alt, "<")) + ks = - sqrt(n) * min (min ([z - (0:(n-1))/n; z - (1:n)/n])); + pval = exp(- 2 * ks^2); + else + error (sprintf (["kolmogorov_smirnov_test: ", ... + "alternative %s not recognized"], alt)); + endif + + if (nargout == 0) + printf ("pval: %g\n", pval); + endif + +endfunction diff --git a/scripts/statistics/tests/kolmogorov_smirnov_test_2.m b/scripts/statistics/tests/kolmogorov_smirnov_test_2.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/tests/kolmogorov_smirnov_test_2.m @@ -0,0 +1,86 @@ +## Copyright (C) 1995, 1996, 1997 Kurt Hornik +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this file. If not, write to the Free Software Foundation, +## 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +## usage: [pval, ks] = kolmogorov_smirnov_test_2 (x, y [, alt]) +## +## Performs a 2-sample Kolmogorov-Smirnov test of the null hypothesis +## that the samples x and y come from the same (continuous) distribution. +## I.e., if F and G are the CDFs corresponding to the x and y samples, +## respectively, then the null is that F == G. +## +## With the optional argument string alt, the alternative of interest +## can be selected. +## If alt is "!=" or "<>", the null is tested against the two-sided +## alternative F != G. In this case, the test statistic ks follows a +## two-sided Kolmogorov-Smirnov distribution. +## If alt is ">", the one-sided alternative F > G is considered, +## similarly for "<". In this case, the test statistic ks has a +## one-sided Kolmogorov-Smirnov distribution. +## The default is the two-sided case. +## +## pval is the p-value of the test. +## +## If no output argument is given, the p-value is displayed. + +## Author: KH +## Description: Two-sample Kolmogorov-Smirnov test + +function [pval, ks] = kolmogorov_smirnov_test_2 (x, y, alt) + + if (nargin < 2 || nargin > 3) + usage (strcat ("[pval, ks] = ", + "kolmogorov_smirnov_test_2 (x, y [, tol])")); + endif + + if !( is_vector (x) && is_vector (y)) + error ("kolmogorov_smirnov_test_2: both x and y must be vectors."); + endif + + if (nargin == 2) + alt = "!="; + else + if (! isstr (alt)) + error ("kolmogorov_smirnov_test_2: alt must be a string."); + endif + endif + + n_x = length (x); + n_y = length (y); + n = n_x * n_y / (n_x + n_y); + x = reshape (x, n_x, 1); + y = reshape (y, n_y, 1); + [s, i] = sort ([x; y]); + count (find (i <= n_x)) = 1 / n_x; + count (find (i > n_x)) = - 1 / n_y; + if (strcmp (alt, "!=") || strcmp (alt, "<>")) + ks = sqrt (n) * max (abs (cumsum (count))); + pval = 1 - kolmogorov_smirnov_cdf (ks); + elseif (strcmp (alt, ">")) + ks = sqrt (n) * max (cumsum (count)); + pval = exp(- 2 * ks^2); + elseif (strcmp(alt, "<")) + ks = - sqrt (n) * min (cumsum (count)); + pval = exp(- 2 * ks^2); + else + error (sprintf (["kolmogorov_smirnov_test_2: ", ... + "option %s not recognized"], alt)); + endif + + if (nargout == 0) + printf (" pval: %g\n", pval); + endif + +endfunction diff --git a/scripts/statistics/tests/kruskal_wallis_test.m b/scripts/statistics/tests/kruskal_wallis_test.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/tests/kruskal_wallis_test.m @@ -0,0 +1,74 @@ +## Copyright (C) 1995, 1996, 1997 Kurt Hornik +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this file. If not, write to the Free Software Foundation, +## 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +## usage: [pval, k, df] = kruskal_wallis_test (x1, ...) +## +## Perform a Kruskal-Wallis one-factor "analysis of variance". +## +## Suppose a variable is observed for k > 1 different groups, and let +## x1, ..., xk be the corresponding data vectors. +## +## Under the null hypothesis that the ranks in the pooled sample are not +## affected by the group memberships, the test statistic k is +## approximately chi-square with df = k - 1 degrees of freedom. pval is +## the p-value (1 minus the CDF of this distribution at k) of this test. +## +## If no output argument is given, the pval is displayed. + +## Author: KH +## Description: Kruskal-Wallis test + +function [pval, k, df] = kruskal_wallis_test (...) + + m = nargin; + if (m < 2) + usage ("[pval, k, df] = kruskal_wallis_test (x1, ...)"); + endif + + n = []; + p = []; + va_start; + for i = 1 : m; + x = va_arg (); + if (! is_vector (x)) + error ("kruskal_wallis_test: all arguments must be vectors"); + endif + l = length (x); + n = [n, l]; + p = [p, reshape (x, 1, l)]; + endfor + + r = ranks (p); + + k = 0; + j = 0; + for i = 1 : m; + k = k + (sum (r ((j + 1) : (j + n(i))))) ^ 2 / n(i); + j = j + n(i); + endfor + + n = length (p); + k = 12 * k / (n * (n + 1)) - 3 * (n + 1); + df = m - 1; + pval = 1 - chisquare_cdf (k, df); + + if (nargout == 0) + printf ("pval: %g\n", pval); + endif + +endfunction + + diff --git a/scripts/statistics/tests/manova.m b/scripts/statistics/tests/manova.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/tests/manova.m @@ -0,0 +1,154 @@ +## Copyright (C) 1996, 1997 Kurt Hornik +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this file. If not, write to the Free Software Foundation, +## 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +## usage: manova (Y, g) +## +## Performs a one-way multivariate analysis of variance (MANOVA). The +## goal is to test whether the p-dimensional population means of data +## taken from k different groups are all equal. All data are assumed +## drawn independently from p-dimensional normal distributions with the +## same covariance matrix. +## +## Y is the data matrix. As usual, rows are observations and columns +## are variables. g is the vector of corresponding group labels (e.g., +## numbers from 1 to k), so that necessarily, length (g) must be the +## same as rows (Y). +## +## The LR test statistic (Wilks' Lambda) and approximate p-values are +## computed and displayed. + +## Three test statistics (Wilks, Hotelling-Lawley, and Pillai-Bartlett) +## and corresponding approximate p-values are calculated and displayed. +## (Currently NOT because the f_cdf respectively betai code is too bad.) + +## Author: TF +## Adapted-By: KH +## Description: One-way multivariate analysis of variance (MANOVA) + +function manova (Y, g) + + if (nargin != 2) + usage ("manova (Y, g)"); + endif + + if (is_vector (Y)) + error ("manova: Y must not be a vector"); + endif + + [n, p] = size (Y); + + if (!is_vector (g) || (length (g) != n)) + error ("manova: g must be a vector of length rows (Y)"); + endif + + s = sort (g); + i = find (s (2:n) > s(1:(n-1))); + k = length (i) + 1; + + if (k == 1) + error ("manova: there should be at least 2 groups"); + else + group_label = s ([1, reshape (i, 1, k - 1) + 1]); + endif + + Y = Y - ones (n, 1) * mean (Y); + SST = Y' * Y; + + s = zeros (1, p); + SSB = zeros (p, p); + for i = 1 : k; + v = Y (find (g == group_label (i)), :); + s = sum (v); + SSB = SSB + s' * s / rows (v); + endfor + n_b = k - 1; + + SSW = SST - SSB; + n_w = n - k; + + l = real (eig (SSB / SSW)); + l (l < eps) = 0; + + ## Wilks' Lambda + ## ============= + + Lambda = prod (1 ./ (1 + l)); + + delta = n_w + n_b - (p + n_b + 1) / 2 + df_num = p * n_b + W_pval_1 = 1 - chisquare_cdf (- delta * log (Lambda), df_num); + + if (p < 3) + eta = p; + else + eta = sqrt ((p^2 * n_b^2 - 4) / (p^2 + n_b^2 - 5)) + endif + + df_den = delta * eta - df_num / 2 + 1 + + WT = exp (- log (Lambda) / eta) - 1 + W_pval_2 = 1 - f_cdf (WT * df_den / df_num, df_num, df_den); + + if (0) + + ## Hotelling-Lawley Test + ## ===================== + + HL = sum (l); + + theta = min (p, n_b); + u = (abs (p - n_b) - 1) / 2; + v = (n_w - p - 1) / 2; + + df_num = theta * (2 * u + theta + 1); + df_den = 2 * (theta * v + 1); + + HL_pval = 1 - f_cdf (HL * df_den / df_num, df_num, df_den); + + ## Pillai-Bartlett + ## =============== + + PB = sum (l ./ (1 + l)); + + df_den = theta * (2 * v + theta + 1); + PB_pval = 1 - f_cdf (PB * df_den / df_num, df_num, df_den); + + printf ("\n"); + printf ("One-way MANOVA Table:\n"); + printf ("\n"); + printf ("Test Test Statistic Approximate p\n"); + printf ("**************************************************\n"); + printf ("Wilks %10.4f %10.9f \n", Lambda, W_pval_1); + printf (" %10.9f \n", W_pval_2); + printf ("Hotelling-Lawley %10.4f %10.9f \n", HL, HL_pval); + printf ("Pillai-Bartlett %10.4f %10.9f \n", PB, PB_pval); + printf ("\n"); + + endif + + printf ("\n"); + printf ("MANOVA Results:\n"); + printf ("\n"); + printf ("# of groups: %d\n", k); + printf ("# of samples: %d\n", n); + printf ("# of variables: %d\n", p); + printf ("\n"); + printf ("Wilks' Lambda: %5.4f\n", Lambda); + printf ("Approximate p: %10.9f (chisquare approximation)\n", W_pval_1); + printf (" %10.9f (F approximation)\n", W_pval_2); + printf ("\n"); + +endfunction diff --git a/scripts/statistics/tests/mcnemar_test.m b/scripts/statistics/tests/mcnemar_test.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/tests/mcnemar_test.m @@ -0,0 +1,64 @@ +## Copyright (C) 1996, 1997 Kurt Hornik +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this file. If not, write to the Free Software Foundation, +## 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +## usage: [pval, chisq, df] = mcnemar_test (x) +## +## For a square contingency table x of data cross-classified on the row +## and column variables, McNemar's test can be used for testing the null +## hypothesis of symmetry of the classification probabilities. +## +## Under the null, chisq is approximately distributed as chisquare with +## df degrees of freedom, and pval is the p-value (1 minus the CDF of +## this distribution at chisq) of the test. +## +## If no output argument is given, the p-value of the test is displayed. + +## Author: KH +## Description: McNemar's test for symmetry + +function [pval, chisq, df] = mcnemar_test (x) + + if (nargin != 1) + usage ("mcnemar_test (x)"); + endif + + if (! (min (size (x)) > 1) && is_square (x)) + error (strcat ("mcnemar_test: ", + "x must be a square matrix of size > 1.")); + elseif (! (all (all (x >= 0)) && all (all (x == round (x))))) + error (strcat ("mcnemar_test: ", + "all entries of x must be nonnegative integers.")); + endif + + r = rows (x); + df = r * (r - 1) / 2; + if (r == 2) + num = max (abs (x - x') - 1, 0) .^ 2; + else + num = abs (x - x') .^ 2; + endif + + chisq = sum (sum (triu (num ./ (x + x'), 1))); + pval = 1 - chisquare_cdf (chisq, df); + + if (nargout == 0) + printf (" pval: %g\n", pval); + endif + +endfunction + + + diff --git a/scripts/statistics/tests/prop_test_2.m b/scripts/statistics/tests/prop_test_2.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/tests/prop_test_2.m @@ -0,0 +1,77 @@ +## Copyright (C) 1996, 1997 Kurt Hornik +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this file. If not, write to the Free Software Foundation, +## 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +## usage: [pval, z] = prop_test_2 (x1, n1, x2, n2 [, alt]) +## +## If x1 and n1 are the counts of successes and trials in one sample, +## and x2 and n2 those in a second one, test the null hypothesis that +## the success probabilities p1 and p2 are the same. +## Under the null, the test statistic z approximately follows a +## standard normal distribution. +## +## With the optional argument string alt, the alternative of interest +## can be selected. +## If alt is "!=" or "<>", the null is tested against the two-sided +## alternative p1 != p2. +## If alt is ">", the one-sided alternative p1 > p2 is used, similarly +## for "<". +## The default is the two-sided case. +## +## pval is the p-value of the test. +## +## If no output argument is given, the p-value of the test is displayed. + +## Author: KH +## Description: Compare two proportions + +function [pval, z] = prop_test_2 (x1, n1, x2, n2, alt) + + if ((nargin < 4) || (nargin > 5)) + usage ("[pval, z] = prop_test_2 (x1, n1, x2, n2 [, alt])"); + endif + + ## Could do sanity checking on x1, n1, x2, n2 here + + p1 = x1 / n1; + p2 = x2 / n2; + pc = (x1 + x2) / (n1 + n2); + + z = (p1 - p2) / sqrt (pc * (1 - pc) * (1/n1 + 1/n2)); + + cdf = stdnormal_cdf (z); + + if (nargin == 4) + alt = "!="; + endif + + if !isstr (alt) + error ("prop_test_2: alt must be a string"); + endif + if (strcmp (alt, "!=") || strcmp (alt, "<>")) + pval = 2 * min (cdf, 1 - cdf); + elseif strcmp (alt, ">") + pval = 1 - cdf; + elseif strcmp (alt, "<") + pval = cdf; + else + error (sprintf ("prop_test_2: option %s not recognized", alt)); + endif + + if (nargout == 0) + printf (" pval: %g\n", pval); + endif + +endfunction diff --git a/scripts/statistics/tests/run_test.m b/scripts/statistics/tests/run_test.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/tests/run_test.m @@ -0,0 +1,53 @@ +## Copyright (C) 1995, 1996, 1997 Friedrich Leisch +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this file. If not, write to the Free Software Foundation, +## 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +## usage: [pval, chisq] = run_test (x) +## +## Performs a chi-square test with 6 degrees of freedom based on the +## upward runs in the columns of x. Can be used to test whether x +## contains independent data. +## +## If no output argument is given, the pval is displayed. + +## Author: FL +## Description: Run test for independence + +function [pval, chisq] = run_test (x) + + if (nargin != 1) + usage ("run_test (x)"); + endif + + A = [ 4529.4 9044.9 13568 18091 22615 27892 + 9044.4 18097 27139 36187 45234 55789 + 13568 27139 40721 54281 67852 83685 + 18091 36187 54281 72414 90470 111580 + 22615 45234 67852 90470 113262 139476 + 27892 55789 83685 111580 139476 172860 ]; + + b = [1/6; 5/24; 11/120; 19/720; 29/5040; 1/840]; + + n = rows (x); + r = run_count (x, 6) - n * b * ones (1, columns(x)); + + chisq = diag (r' * A * r)' / n; + pval = chisquare_cdf (chisq, 6); + + if (nargout == 0) + printf("pval: %g\n", pval); + endif + +endfunction diff --git a/scripts/statistics/tests/sign_test.m b/scripts/statistics/tests/sign_test.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/tests/sign_test.m @@ -0,0 +1,77 @@ +## Copyright (C) 1995, 1996, 1997 Kurt Hornik +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this file. If not, write to the Free Software Foundation, +## 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +## usage: [pval, b, n] = sign_test (x, y [, alt]) +## +## For two matched-pair samples x and y, perform a sign test of the +## null hypothesis PROB(x > y) == PROB(x < y) == 1/2. +## Under the null, the test statistic b roughly follows a binomial +## distribution with parameters n = sum (x != y) and p = 1/2. +## +## With the optional argument alt, the alternative of interest can be +## selected. +## If alt is "!=" or "<>", the null hypothesis is tested against the +## two-sided alternative PROB(x < y) != 1/2. +## If alt is ">", the one-sided alternative PROB(x > y) > 1/2 ("x is +## stochastically greater than y") is considered, similarly for "<". +## The default is the two-sided case. +## +## pval is the p-value of the test. +## +## If no output argument is given, the p-value of the test is displayed. + +## Author: KH +## Description: Sign test + +function [pval, b, n] = sign_test (x, y, alt) + + if ((nargin < 2) || (nargin > 3)) + usage ("[pval, b, n] = sign_test (x, y [, alt])"); + endif + + if (! (is_vector (x) && is_vector (y) && (length (x) == length (y)))) + error ("sign_test: x and y must be vectors of the same length"); + endif + + n = length (x); + x = reshape (x, 1, n); + y = reshape (y, 1, n); + n = sum (x != y); + b = sum (x > y); + cdf = binomial_cdf (b, n, 1/2); + + if (nargin == 2) + alt = "!="; + endif + + if (! isstr (alt)) + error ("sign_test: alt must be a string"); + endif + if (strcmp (alt, "!=") || strcmp (alt, "<>")) + pval = 2 * min (cdf, 1 - cdf); + elseif strcmp (alt, ">") + pval = 1 - cdf; + elseif strcmp (alt, "<") + pval = cdf; + else + error (sprintf ("sign_test: option %s not recognized", alt)); + endif + + if (nargout == 0) + printf (" pval: %g\n", pval); + endif + +endfunction diff --git a/scripts/statistics/tests/t_test.m b/scripts/statistics/tests/t_test.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/tests/t_test.m @@ -0,0 +1,78 @@ +## Copyright (C) 1995, 1996, 1997 Kurt Hornik +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this file. If not, write to the Free Software Foundation, +## 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +## usage: [pval, t, df] = t_test (x, m [, alt]) +## +## For a sample x from a normal distribution with unknown mean and +## variance, perform a t-test of the null hypothesis mean(x) == m. +## Under the null, the test statistic t follows a Student distribution +## with df = length (x) - 1 degrees of freedom. +## +## With the optional argument string alt, the alternative of interest +## can be selected. +## If alt is "!=" or "<>", the null is tested against the two-sided +## alternative mean(x) != m. +## If alt is ">", the one-sided alternative mean(x) > m is considered, +## similarly for "<". +## The default is the two-sided case. +## +## pval is the p-value of the test. +## +## If no output argument is given, the p-value of the test is displayed. + +## Author: KH +## Description: Student's one-sample t test + +function [pval, t, df] = t_test (x, m, alt) + + if ((nargin < 2) || (nargin > 3)) + usage ("[pval, t, df] = t_test (x, m [, alt])"); + endif + + if (! is_vector (x)) + error ("t_test: x must be a vector."); + endif + if (! is_scalar (m)) + error ("t_test: m must be a scalar."); + endif + + n = length (x); + df = n - 1; + t = sqrt (n) * (sum (x) / n - m) / std (x); + cdf = t_cdf (t, df); + + if (nargin == 2) + alt = "!="; + endif + + if (! isstr (alt)) + error ("t_test: alt must be a string"); + endif + if (strcmp (alt, "!=") || strcmp (alt, "<>")) + pval = 2 * min (cdf, 1 - cdf); + elseif strcmp (alt, ">") + pval = 1 - cdf; + elseif strcmp (alt, "<") + pval = cdf; + else + error (sprintf ("t_test: option %s not recognized", alt)); + endif + + if (nargout == 0) + printf (" pval: %g\n", pval); + endif + +endfunction diff --git a/scripts/statistics/tests/t_test_2.m b/scripts/statistics/tests/t_test_2.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/tests/t_test_2.m @@ -0,0 +1,80 @@ +## Copyright (C) 1995, 1996, 1997 Kurt Hornik +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this file. If not, write to the Free Software Foundation, +## 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +## usage: [pval, t, df] = t_test_2 (x, y [, alt]) +## +## For two samples x and y from normal distributions with unknown means +## and unknown equal variances, perform a two-sample t-test of the null +## hypothesis of equal means. +## Under the null, the test statistic t follows a Student distribution +## with df degrees of freedom. +## +## With the optional argument string alt, the alternative of interest +## can be selected. +## If alt is "!=" or "<>", the null is tested against the two-sided +## alternative mean(x) != mean(y). +## If alt is ">", the one-sided alternative mean(x) > mean(y) is used, +## similarly for "<". +## The default is the two-sided case. +## +## pval is the p-value of the test. +## +## If no output argument is given, the p-value of the test is displayed. + +## Author: KH +## Description: Student's two-sample t test + +function [pval, t, df] = t_test_2 (x, y, alt) + + if ((nargin < 2) || (nargin > 3)) + usage ("[pval, t, df] = t_test_2 (x, y [, alt])"); + endif + + if (! (is_vector (x) && is_vector (y))) + error ("t_test_2: both x and y must be vectors"); + endif + + n_x = length (x); + n_y = length (y); + df = n_x + n_y - 2; + mu_x = sum (x) / n_x; + mu_y = sum (y) / n_y; + v = sumsq (x - mu_x) + sumsq (y - mu_y); + t = (mu_x - mu_y) * sqrt ( (n_x * n_y * df) / (v * (n_x + n_y)) ); + cdf = t_cdf (t, df); + + if (nargin == 2) + alt = "!="; + endif + + if (! isstr (alt)) + error ("t_test_2: alt must be a string"); + endif + if (strcmp (alt, "!=") || strcmp (alt, "<>")) + pval = 2 * min (cdf, 1 - cdf); + elseif strcmp (alt, ">") + pval = 1 - cdf; + elseif strcmp (alt, "<") + pval = cdf; + else + error (sprintf ("t_test_2: option %s not recognized", alt)); + endif + + if (nargout == 0) + printf (" pval: %g\n", pval); + endif + +endfunction diff --git a/scripts/statistics/tests/t_test_regression.m b/scripts/statistics/tests/t_test_regression.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/tests/t_test_regression.m @@ -0,0 +1,97 @@ +## Copyright (C) 1995, 1996, 1997 Kurt Hornik +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this file. If not, write to the Free Software Foundation, +## 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +## usage: [pval, t, df] = t_test_regression (y, X, R [, r] [, alt]) +## +## Performs an t test for the null hypothesis R * b = r in a classical +## normal regression model y = X * b + e. +## Under the null, the test statistic t follows a t distribution with +## df degrees of freedom. +## +## r is taken as 0 if not given explicitly. +## +## With the optional argument string alt, the alternative of interest +## can be selected. +## If alt is "!=" or "<>", the null is tested against the two-sided +## alternative R * b != r. +## If alt is ">", the one-sided alternative R * b > r is used, +## similarly for "<". +## The default is the two-sided case. +## +## pval is the p-value of the test. +## +## If no output argument is given, the p-value of the test is displayed. + +## Author: KH +## Description: Test one linear hypothesis in linear regression model + +function [pval, t, df] = t_test_regression (y, X, R, r, alt) + + if (nargin == 3) + r = 0; + alt = "!="; + elseif (nargin == 4) + if (isstr (r)) + alt = r; + r = 0; + else + alt = "!="; + endif + elseif !(nargin == 5) + usage (["[pval, t, df] ", ... + "= t_test_regression (y, X, R [, r] [, alt]"]); + endif + + if (! is_scalar (r)) + error ("t_test_regression: r must be a scalar"); + elseif (! isstr (alt)) + error ("t_test_regression: alt must be a string"); + endif + + [T, k] = size (X); + if !(is_vector (y) && (length (y) == T)) + error (["t_test_regression: ", ... + "y must be a vector of length rows (X)"]); + endif + s = size (R); + if !((max (s) == k) && (min (s) == 1)) + error (["t_test_regression: ", ... + "R must be a vector of length columns (X)"]); + endif + + R = reshape (R, 1, k); + y = reshape (y, T, 1); + [b, v] = ols (y, X); + df = T - k; + t = (R * b - r) / sqrt (v * R * inv (X' * X) * R'); + cdf = t_cdf (t, df); + + if (strcmp (alt, "!=") || strcmp (alt, "<>")) + pval = 2 * min (cdf, 1 - cdf); + elseif strcmp (alt, ">") + pval = 1 - cdf; + elseif strcmp (alt, "<") + pval = cdf; + else + error (["t_test_regression: ", ... + sprintf ("the value %s for alt is not possible", alt)]); + endif + + if (nargout == 0) + printf ("pval: %g\n", pval); + endif + +endfunction diff --git a/scripts/statistics/tests/u_test.m b/scripts/statistics/tests/u_test.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/tests/u_test.m @@ -0,0 +1,82 @@ +## Copyright (C) 1995, 1996, 1997 Kurt Hornik +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this file. If not, write to the Free Software Foundation, +## 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +## usage: [pval, z] = u_test (x, y [, alt]) +## +## For two samples x and y, perform a Mann-Whitney U-test of the null +## hypothesis PROB(x > y) == 1/2 == PROB(x < y). Under the null, the +## test statistic z approximately follows a standard normal +## distribution. Note that this test is equivalent to the Wilcoxon +## rank-sum test. +## +## With the optional argument string alt, the alternative of interest +## can be selected. +## If alt is "!=" or "<>", the null is tested against the two-sided +## alternative PROB(x > y) != 1/2. +## If alt is ">", the one-sided alternative PROB(x > y) > 1/2 is +## considered, similarly for "<". +## The default is the two-sided case. +## +## pval is the p-value of the test. +## +## If no output argument is given, the p-value of the test is displayed. + +## This implementation is still incomplete---for small sample sizes, +## the normal approximation is rather bad ... + +## Author: KH +## Description: Mann-Whitney U-test + +function [pval, z] = u_test (x, y, alt) + + if ((nargin < 2) || (nargin > 3)) + usage ("[pval, z] = u_test (x, y [, alt])"); + endif + + if (! (is_vector (x) && is_vector (y))) + error ("u_test: both x and y must be vectors"); + endif + + n_x = length (x); + n_y = length (y); + r = ranks ([reshape (x, 1, n_x), reshape (y, 1, n_y)]); + z = (sum (r(1 : n_x)) - n_x * (n_x + n_y + 1) / 2) ... + / sqrt (n_x * n_y * (n_x + n_y + 1) / 12); + + cdf = stdnormal_cdf (z); + + if (nargin == 2) + alt = "!="; + endif + + if (! isstr (alt)) + error("u_test: alt must be a string"); + endif + if (strcmp (alt, "!=") || strcmp (alt, "<>")) + pval = 2 * min (cdf, 1 - cdf); + elseif (strcmp (alt, ">")) + pval = cdf; + elseif (strcmp (alt, "<")) + pval = 1 - cdf; + else + error (sprintf ("u_test: option %s not recognized", alt)); + endif + + if (nargout == 0) + printf (" pval: %g\n", pval); + endif + +endfunction diff --git a/scripts/statistics/tests/var_test.m b/scripts/statistics/tests/var_test.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/tests/var_test.m @@ -0,0 +1,76 @@ +## Copyright (C) 1995, 1996, 1997 Kurt Hornik +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this file. If not, write to the Free Software Foundation, +## 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +## usage: [pval, f, df_num, df_den] = var_test (x, y [, alt]) +## +## For two samples x and y from normal distributions with unknown +## means and unknown variances, perform an F-test of the null +## hypothesis of equal variances. +## Under the null, the test statistic f follows an F-distribution +## with df_num and df_den degrees of freedom. +## +## With the optional argument string alt, the alternative of interest +## can be selected. +## If alt is "!=" or "<>", the null is tested against the two-sided +## alternative var(x) != var(y). +## If alt is ">", the one-sided alternative var(x) > var(y) is used, +## similarly for "<". +## The default is the two-sided case. +## +## pval is the p-value of the test. +## +## If no output argument is given, the p-value of the test is displayed. + +## Author: KH +## Description: F test to compare two variances + +function [pval, f, df_num, df_den] = var_test (x, y, alt) + + if ((nargin < 2) || (nargin > 3)) + usage ("[pval, f, df_num, df_den] = var_test (x, y [, alt])"); + endif + + if (! (is_vector (x) && is_vector (y))) + error ("var_test: both x and y must be vectors"); + endif + + df_num = length (x) - 1; + df_den = length (y) - 1; + f = var (x) / var (y); + cdf = f_cdf (f, df_num, df_den); + + if (nargin == 2) + alt = "!="; + endif + + if (! isstr (alt)) + error ("var_test: alt must be a string"); + endif + if (strcmp (alt, "!=") || strcmp (alt, "<>")) + pval = 2 * min (cdf, 1 - cdf); + elseif (strcmp (alt, ">")) + pval = 1 - cdf; + elseif (strcmp (alt, "<")) + pval = cdf; + else + error (sprintf ("var_test: option %s not recognized", alt)); + endif + + if (nargout == 0) + printf ("pval: %g\n", pval); + endif + +endfunction diff --git a/scripts/statistics/tests/welch_test.m b/scripts/statistics/tests/welch_test.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/tests/welch_test.m @@ -0,0 +1,82 @@ +## Copyright (C) 1995, 1996, 1997 Kurt Hornik +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this file. If not, write to the Free Software Foundation, +## 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +## usage: [pval, t, df] = welch_test (x, y [, alt]) +## +## For two samples x and y from normal distributions with unknown means +## and unknown and not necessarily equal variances, perform a Welch test +## of the null hypothesis of equal means. +## Under the null, the test statistic t approximately follows a Student +## distribution with df degrees of freedom. +## +## With the optional argument string alt, the alternative of interest +## can be selected. +## If alt is "!=" or "<>", the null is tested against the two-sided +## alternative mean(x) != m. +## If alt is ">", the one-sided alternative mean(x) > m is considered, +## similarly for "<". +## The default is the two-sided case. +## +## pval is the p-value of the test. +## +## If no output argument is given, the p-value of the test is displayed. + +## Author: KH +## Description: Welch two-sample t test + +function [pval, t, df] = welch_test (x, y, alt) + + if ((nargin < 2) || (nargin > 3)) + usage ("[pval, t, df] = welch_test (x, y [, alt])"); + endif + + if (! (is_vector (x) && is_vector (y))) + error ("welch_test: both x and y must be vectors"); + endif + + n_x = length (x); + n_y = length (y); + mu_x = sum (x) / n_x; + mu_y = sum (y) / n_y; + v_x = sumsq (x - mu_x) / (n_x * (n_x - 1)); + v_y = sumsq (x - mu_y) / (n_y * (n_y - 1)); + c = v_x / (v_x + v_y); + df = 1 / ( c^2 / (n_x - 1) + (1 - c)^2 / (n_y - 1)); + t = (mu_x - mu_y) / sqrt (v_x + v_y); + cdf = t_cdf (t, df); + + if (nargin == 2) + alt = "!="; + endif + + if (! isstr (alt)) + error ("welch_test: alt must be a string"); + endif + if (strcmp (alt, "!=") || strcmp (alt, "<>")) + pval = 2 * min (cdf, 1 - cdf); + elseif (strcmp (alt, ">")) + pval = 1 - cdf; + elseif (strcmp (alt, "<")) + pval = cdf; + else + error (sprintf ("welch_test: option %s not recognized", alt)); + endif + + if (nargout == 0) + printf (" pval: %g\n", pval); + endif + +endfunction diff --git a/scripts/statistics/tests/wilcoxon_test.m b/scripts/statistics/tests/wilcoxon_test.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/tests/wilcoxon_test.m @@ -0,0 +1,85 @@ +## Copyright (C) 1995, 1996, 1997 Kurt Hornik +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this file. If not, write to the Free Software Foundation, +## 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +## usage: [pval, z] = wilcoxon_test (x, y [, alt]) +## +## For two matched-pair sample vectors x and y, perform a Wilcoxon +## signed-rank test of the null hypothesis PROB(x > y) == 1/2. +## Under the null, the test statistic z approximately follows a +## standard normal distribution. +## +## With the optional argument string alt, the alternative of interest +## can be selected. +## If alt is "!=" or "<>", the null is tested against the two-sided +## alternative PROB(x > y) != 1/2. +## If alt is ">", the one-sided alternative PROB(x > y) > 1/2 is +## considered, similarly for "<". +## The default is the two-sided case. +## +## pval is the p-value of the test. +## +## If no output argument is given, the p-value of the test is displayed. + +## Author: KH +## Description: Wilcoxon signed-rank test + +function [pval, z] = wilcoxon_test (x, y, alt) + + if ((nargin < 2) || (nargin > 3)) + usage ("[pval, z] = wilcoxon_test (x, y [, alt])"); + endif + + if (! (is_vector (x) && is_vector (y) && (length (x) == length (y)))) + error ("wilcoxon_test: x and y must be vectors of the same length"); + endif + + n = length (x); + x = reshape (x, 1, n); + y = reshape (y, 1, n); + d = x - y; + d = d (find (d != 0)); + n = length (d); + if (n > 0) + r = ranks (abs (d)); + z = sum (r (find (d > 0))); + z = ((z - n * (n + 1) / 4) / sqrt (n * (n + 1) * (2 * n + 1) / 24)); + else + z = 0; + endif + + cdf = stdnormal_cdf (z); + + if (nargin == 2) + alt = "!="; + endif + + if (! isstr (alt)) + error("wilcoxon_test: alt must be a string"); + elseif (strcmp (alt, "!=") || strcmp (alt, "<>")) + pval = 2 * min (cdf, 1 - cdf); + elseif (strcmp (alt, ">")) + pval = 1 - cdf; + elseif (strcmp (alt, "<")) + pval = cdf; + else + error (sprintf ("wilcoxon_test: option %s not recognized", alt)); + endif + + if (nargout == 0) + printf (" pval: %g\n", pval); + endif + +endfunction diff --git a/scripts/statistics/tests/z_test.m b/scripts/statistics/tests/z_test.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/tests/z_test.m @@ -0,0 +1,83 @@ +## Copyright (C) 1995, 1996, 1997 Kurt Hornik +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this file. If not, write to the Free Software Foundation, +## 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +## usage: [pval, z] = z_test (x, m, v [, alt]) +## +## Perform a Z-test of the null hypothesis mean(x) == m for a sample x +## from a normal distribution with unknown mean and known variance v. +## Under the null, the test statistic z follows a standard normal +## distribution. +## +## With the optional argument string alt, the alternative of interest +## can be selected. +## If alt is "!=" or "<>", the null is tested against the two-sided +## alternative mean(x) != m. +## If alt is ">", the one-sided alternative mean(x) > m is considered, +## similarly for "<". +## The default is the two-sided case. +## +## pval is the p-value of the test. +## +## If no output argument is given, the p-value of the test is displayed +## along with some information. + +## Author: KH +## Description: Test for mean of a normal sample with known variance + +function [pval, z] = z_test (x, m, v, alt) + + if ((nargin < 3) || (nargin > 4)) + usage ("[pval, z] = z_test (x, m, v [, alt])"); + endif + + if (! is_vector (x)) + error ("z_test: x must be a vector."); + endif + if (! is_scalar (m)) + error ("z_test: m must be a scalar."); + endif + if (! (is_scalar (v) && (v > 0))) + error ("z_test: v must be a positive scalar."); + endif + + n = length (x); + z = sqrt (n/v) * (sum (x) / n - m); + cdf = stdnormal_cdf (z); + + if (nargin == 3) + alt = "!="; + endif + + if (! isstr (alt)) + error ("z_test: alt must be a string"); + elseif (strcmp (alt, "!=") || strcmp (alt, "<>")) + pval = 2 * min (cdf, 1 - cdf); + elseif (strcmp (alt, ">")) + pval = 1 - cdf; + elseif (strcmp (alt, "<")) + pval = cdf; + else + error (sprintf ("z_test: option %s not recognized", alt)); + endif + + if (nargout == 0) + s = strcat ("Z-test of mean(x) == %g against mean(x) %s %g,\n", + "with known var(x) == %g:\n", + " pval = %g\n"); + printf (s, m, alt, m, v, pval); + endif + +endfunction diff --git a/scripts/statistics/tests/z_test_2.m b/scripts/statistics/tests/z_test_2.m new file mode 100644 --- /dev/null +++ b/scripts/statistics/tests/z_test_2.m @@ -0,0 +1,85 @@ +## Copyright (C) 1995, 1996, 1997 Kurt Hornik +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, but +## WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +## General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this file. If not, write to the Free Software Foundation, +## 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +## usage: [pval, z] = z_test_2 (x, y, v_x, v_y [, alt]) +## +## For two samples x and y from normal distributions with unknown +## means and known variances v_x and v_y, perform a Z-test of the +## hypothesis of equal means. +## Under the null, the test statistic z follows a standard normal +## distribution. +## +## With the optional argument string alt, the alternative of interest +## can be selected. +## If alt is "!=" or "<>", the null is tested against the two-sided +## alternative mean(x) != mean(y). +## If alt is ">", the one-sided alternative mean(x) > mean(y) is +## used, similarly for "<". +## The default is the two-sided case. +## +## pval is the p-value of the test. +## +## If no output argument is given, the p-value of the test is displayed +## along with some information. + +## Author: KH +## Description: Compare means of two normal samples with known variances + +function [pval, z] = z_test_2 (x, y, v_x, v_y, alt) + + if ((nargin < 4) || (nargin > 5)) + usage ("[pval, z] = z_test_2 (x, y, v_x, v_y [, alt])"); + endif + + if (! (is_vector (x) && is_vector (y))) + error("z_test_2: both x and y must be vectors"); + elseif (! (is_scalar (v_x) && (v_x > 0) + && is_scalar (v_y) && (v_y > 0))) + error ("z_test_2: both v_x and v_y must be positive scalars."); + endif + + n_x = length (x); + n_y = length (y); + mu_x = sum (x) / n_x; + mu_y = sum (y) / n_y; + z = (mu_x - mu_y) / sqrt (v_x / n_x + v_y / n_y); + cdf = stdnormal_cdf (z); + + if (nargin == 4) + alt = "!="; + endif + + if (! isstr (alt)) + error ("z_test_2: alt must be a string"); + elseif (strcmp (alt, "!=") || strcmp (alt, "<>")) + pval = 2 * min (cdf, 1 - cdf); + elseif (strcmp (alt, ">")) + pval = 1 - cdf; + elseif (strcmp (alt, "<")) + pval = cdf; + else + error (sprintf ("z_test_2: option %s not recognized", alt)); + endif + + if (nargout == 0) + s = strcat ("Two-sample Z-test of mean(x) == mean(y) against ", + "mean(x) %s mean(y),\n", + "with known var(x) == %g and var(y) == %g:\n", + " pval = %g\n"); + printf (s, alt, v_x, v_y, pval); + endif + +endfunction