# HG changeset patch # User jwe # Date 1083298893 0 # Node ID 28ab079d8f0e531f3c585775bc8cac6895335e75 # Parent a9f67193e3a0d2b6123da674b58999a34c4ec39b [project @ 2004-04-30 04:21:33 by jwe] diff --git a/scripts/ChangeLog b/scripts/ChangeLog --- a/scripts/ChangeLog +++ b/scripts/ChangeLog @@ -1,3 +1,18 @@ +2004-04-29 David Bateman + + * statistics/base/ranks.m, statistics/base/run_count.m, + statistics/base/studentize.m, statistics/base/kurtosis.m + statistics/base/statistics.m, statistics/base/skewness.m + statistics/base/iqr.m: + Make N-d array aware. Allow optional argument to define the + dimension along which to operate. Update the documentation. + + * statistics/base/ranks.m: Change algorithm to use sort, + and adjust for the ties after. + + * statistics/base/run_counts.m: Change algorithm to use + the a combination of diff and find, rather than a for-loop. + 2004-04-23 Paul Kienzle * plot/hist.m: Correctly determine cutoffs. New tests. diff --git a/scripts/statistics/base/iqr.m b/scripts/statistics/base/iqr.m --- a/scripts/statistics/base/iqr.m +++ b/scripts/statistics/base/iqr.m @@ -18,31 +18,60 @@ ## 02111-1307, USA. ## -*- texinfo -*- -## @deftypefn {Function File} {} iqr (@var{x}) +## @deftypefn {Function File} {} iqr (@var{x}, @var{dim}) ## If @var{x} is a vector, return the interquartile range, i.e., the ## difference between the upper and lower quartile, of the input data. ## -## If @var{x} is a matrix, do the above for each column of @var{x}. +## If @var{x} is a matrix, do the above for first non singleton +## dimension of @var{x}.. If the option @var{dim} argument is given, +## then operate along this dimension. ## @end deftypefn ## Author KH ## Description: Interquartile range -function y = iqr (x) +function y = iqr (x, dim) - if (nargin != 1) - usage ("iqr (x)"); + if (nargin != 1 && nargin != 2) + usage ("iqr (x, dim)"); endif - if (rows (x) == 1) - x = x.'; + nd = ndims (x); + sz = size (x); + nel = numel (x); + if (nargin != 2) + %% Find the first non-singleton dimension + dim = 1; + while (dim < nd + 1 && sz (dim) == 1) + dim = dim + 1; + endwhile + if (dim > nd) + dim = 1; + endif + else + if (! (isscalar (dim) && dim == round (dim)) && dim > 0 && + dim < (nd + 1)) + error ("iqr: dim must be an integer and valid dimension"); + endif endif - [r, c] = size (x); - y = zeros (1, c); + ## This code is a bit heavy, but is needed until empirical_inv + ## takes other than vector arguments. + c = sz (dim); + sz (dim) = 1; + y = zeros (sz); + stride = prod (sz (1:dim-1)); + for i = 1 : nel / c; + offset = i; + offset2 = 0; + while (offset > stride) + offset -= stride; + offset2++; + endwhile + offset += offset2 * stride * c; + rng = [0 : c-1] * stride + offset; - for i = 1:c; - y(i) = empirical_inv (3/4, x(:,i)) - empirical_inv (1/4, x(:,i)); + y (i) = empirical_inv (3/4, x(rng)) - empirical_inv (1/4, x(rng)); endfor endfunction diff --git a/scripts/statistics/base/kurtosis.m b/scripts/statistics/base/kurtosis.m --- a/scripts/statistics/base/kurtosis.m +++ b/scripts/statistics/base/kurtosis.m @@ -18,7 +18,7 @@ ## 02111-1307, USA. ## -*- texinfo -*- -## @deftypefn {Function File} {} kurtosis (@var{x}) +## @deftypefn {Function File} {} kurtosis (@var{x}, @var{dim}) ## If @var{x} is a vector of length @math{N}, return the kurtosis ## @iftex ## @tex @@ -35,36 +35,53 @@ ## @end ifinfo ## ## @noindent -## of @var{x}. If @var{x} is a matrix, return the row vector containing -## the kurtosis of each column. +## of @var{x}. If @var{x} is a matrix, return the kurtosis over the +## first non-singleton dimension. The optional argument @var{dim} +## can be given to force the kurtosis to be given over that +## dimension. ## @end deftypefn ## Author: KH ## Created: 29 July 1994 ## Adapted-By: jwe -function retval = kurtosis (x) +function retval = kurtosis (x, dim) - if (nargin != 1) - usage ("kurtosis (x)"); + if (nargin != 1 && nargin != 2) + usage ("kurtosis (x, dim)"); endif - if (isvector (x)) - x = x - mean (x); - if (! any (x)) - retval = 0; - else - retval = sum (x .^ 4) / (length (x) * std (x) ^ 4) - 3; + nd = ndims (x); + sz = size (x); + if (nargin != 2) + %% Find the first non-singleton dimension + dim = 1; + while (dim < nd + 1 && sz (dim) == 1) + dim = dim + 1; + endwhile + if (dim > nd) + dim = 1; endif - elseif (ismatrix (x)) - [nr, nc] = size (x); - x = x - ones (nr, 1) * mean (x); - retval = zeros (1, nc); - s = std (x); - ind = find (s > 0); - retval (ind) = sum (x (:, ind) .^ 4) ./ (nr * s (ind) .^ 4) - 3; else + if (! (isscalar (dim) && dim == round (dim)) && dim > 0 && + dim < (nd + 1)) + error ("kurtosis: dim must be an integer and valid dimension"); + endif + endif + + if (! ismatrix (x)) error ("kurtosis: x has to be a matrix or a vector"); endif + c = sz (dim); + sz (dim) = 1; + idx = ones (1, nd); + idx (dim) = c; + x = x - repmat (mean (x, dim), idx); + retval = zeros (sz); + s = std (x, [], dim); + x = sum(x.^4, dim); + ind = find (s > 0); + retval (ind) = x (ind) ./ (c * s (ind) .^ 4) - 3; + endfunction diff --git a/scripts/statistics/base/ranks.m b/scripts/statistics/base/ranks.m --- a/scripts/statistics/base/ranks.m +++ b/scripts/statistics/base/ranks.m @@ -18,37 +18,78 @@ ## 02111-1307, USA. ## -*- texinfo -*- -## @deftypefn {Function File} {} ranks (@var{x}) +## @deftypefn {Function File} {} ranks (@var{x}, @var{dim}) ## If @var{x} is a vector, return the (column) vector of ranks of ## @var{x} adjusted for ties. ## -## If @var{x} is a matrix, do the above for each column of @var{x}. +## If @var{x} is a matrix, do the above for along the first +## non-singleton dimension. If the optional argument @var{dim} is +## given, operate along this dimension. ## @end deftypefn ## Author: KH ## Description: Compute ranks -## This code is rather ugly, but is there an easy way to get the ranks -## adjusted for ties from sort? +## This code was rather ugly, since it didn't use sort due to the +## fact of how to deal with ties. Now it does use sort and its +## even uglier!!! At least it handles NDArrays.. + +function y = ranks (x, dim) + + if (nargin != 1 && nargin != 2) + usage ("ranks (x, dim)"); + endif -function y = ranks (x) - - if (nargin != 1) - usage ("ranks (x)"); + nd = ndims (x); + sz = size (x); + if (nargin != 2) + %% Find the first non-singleton dimension + dim = 1; + while (dim < nd + 1 && sz (dim) == 1) + dim = dim + 1; + endwhile + if (dim > nd) + dim = 1; + endif + else + if (! (isscalar (dim) && dim == round (dim)) && dim > 0 && + dim < (nd + 1)) + error ("ranks: dim must be an integer and valid dimension"); + endif endif - y = []; - - [r, c] = size (x); - if ((r == 1) && (c > 0)) - p = x' * ones (1, c); - y = sum (p < p') + (sum (p == p') + 1) / 2; - elseif (r > 1) - o = ones (1, r); - for i = 1 : c; - p = x (:, i) * o; - y = [y, (sum (p < p') + (sum (p == p') + 1) / 2)']; - endfor + if (sz (dim) == 1) + y = ones(sz); + else + ## The algorithm works only on dim=1, so permute if necesary + if (dim != 1) + perm = [1 : nd]; + perm (1) = dim; + perm (dim) = 1; + x = permute (x, perm); + endif + sz = size (x); + infvec = -Inf * ones ([1, sz(2 : end)]); + [xs, y] = sort (x); + eq_el = find (diff ([xs; infvec]) == 0); + if (isempty (eq_el)) + [eq_el, y] = sort (y); + else + runs = complement (eq_el+1, eq_el); + runs = reshape (y (runs), size (runs)) + + floor (runs ./ sz (1)) * sz(1); + len = diff (find (diff ([Inf; eq_el; -Inf]) != 1)) + 1; + [eq_el, y] = sort (y); + for i = 1 : length(runs) + p = y (runs (i)) + (len (i) - 1) / 2; + for j = 0 : len (i) - 1 + y (runs (i) + j) = p; + endfor + endfor + endif + if (dim != 1) + y = permute (y, perm); + endif endif endfunction diff --git a/scripts/statistics/base/run_count.m b/scripts/statistics/base/run_count.m --- a/scripts/statistics/base/run_count.m +++ b/scripts/statistics/base/run_count.m @@ -19,43 +19,75 @@ ## -*- texinfo -*- ## @deftypefn {Function File} {} run_count (@var{x}, @var{n}) -## Count the upward runs in the columns of @var{x} of length 1, 2, ..., -## @var{n}-1 and greater than or equal to @var{n}. +## Count the upward runs along the first non-singleton dimension of +## @var{x} of length 1, 2, ..., @var{n}-1 and greater than or equal +## to @var{n}. If the optional argument @var{dim} is given operate +## along this dimension ## @end deftypefn ## Author: FL ## Description: Count upward runs -function retval = run_count (x, n) +function retval = run_count (x, n, dim) - [xr, xc] = size(x); + if (nargin != 2 && nargin != 3) + usage ("run_count (x, n) or run_count (x, n, dim)"); + endif - tmp = zeros (xr,xc); - retval = zeros (n, xc); - - for j = 1 : xc - run = 1; - count = 1; - - for k = 2 : xr + nd = ndims (x); + sz = size (x); + if (nargin != 3) + %% Find the first non-singleton dimension + dim = 1; + while (dim < nd + 1 && sz (dim) == 1) + dim = dim + 1; + endwhile + if (dim > nd) + dim = 1; + endif + else + if (! (isscalar (dim) && dim == round (dim)) && dim > 0 && + dim < (nd + 1)) + error ("run_count: dim must be an integer and valid dimension"); + endif + endif - if x(k, j) < x(k-1, j) - tmp(run, j) = count; - run = run + 1; - count = 0; - endif - - count = count + 1; + if (! (isscalar (n) && n == round (n)) && n > 0 ) + error ("run_count: n must be a positive integer"); + endif + + nd = ndims (x); + if (dim != 1) + perm = [1 : nd]; + perm (1) = dim; + perm (dim) = 1; + x = permute (x, perm); + endif - endfor - - tmp(run, j) = count; - + sz = size (x); + idx = cell (); + for i = 1 : nd + idx {i} = 1 : sz(i); endfor + c = sz (1); + tmp = zeros ([c + 1, sz(2 : end)]); + infvec = Inf * ones ([1, sz(2 : end)]); + ind = find (diff ([infvec; x; -infvec]) < 0); + tmp (ind (2 : end) - 1) = diff (ind); + tmp = tmp (idx {:}); + + sz (1) = n; + retval = zeros (sz); for k=1 : (n-1) - retval(k, :) = sum (tmp == k); + idx {1} = k; + retval (idx {:}) = sum (tmp == k); endfor - retval(n, :) = sum (tmp >= n); + idx {1} = n; + retval (idx {:}) = sum (tmp >= n); + + if (dim != 1) + retval = ipermute (retval, perm); + endif endfunction diff --git a/scripts/statistics/base/skewness.m b/scripts/statistics/base/skewness.m --- a/scripts/statistics/base/skewness.m +++ b/scripts/statistics/base/skewness.m @@ -18,7 +18,7 @@ ## 02111-1307, USA. ## -*- texinfo -*- -## @deftypefn {Function File} {} skewness (@var{x}) +## @deftypefn {Function File} {} skewness (@var{x}, @var{dim}) ## If @var{x} is a vector of length @math{n}, return the skewness ## @iftex ## @tex @@ -35,36 +35,52 @@ ## @end ifinfo ## ## @noindent -## of @var{x}. If @var{x} is a matrix, return the row vector containing -## the skewness of each column. +## of @var{x}. If @var{x} is a matrix, return the skewness along the +## first non-singleton dimension of the matrix. If the optional +## @var{dim} argument is given, operate along this dimension. ## @end deftypefn ## Author: KH ## Created: 29 July 1994 ## Adapted-By: jwe -function retval = skewness (x) +function retval = skewness (x, dim) - if (nargin != 1) - usage ("skewness (x)"); + if (nargin != 1 && nargin != 2) + usage ("skewness (x, dim)"); endif - if (isvector (x)) - x = x - mean (x); - if (! any (x)) - retval = 0; - else - retval = sum (x .^ 3) / (length (x) * std (x) ^ 3); + nd = ndims (x); + sz = size (x); + if (nargin != 2) + %% Find the first non-singleton dimension + dim = 1; + while (dim < nd + 1 && sz (dim) == 1) + dim = dim + 1; + endwhile + if (dim > nd) + dim = 1; endif - elseif (ismatrix (x)) - [nr, nc] = size (x); - x = x - ones (nr, 1) * mean (x); - retval = zeros (1, nc); - s = std (x); - ind = find (s > 0); - retval (ind) = sum (x (:, ind) .^ 3) ./ (nr * s (ind) .^ 3); else + if (! (isscalar (dim) && dim == round (dim)) && dim > 0 && + dim < (nd + 1)) + error ("skewness: dim must be an integer and valid dimension"); + endif + endif + + if (! ismatrix (x)) error ("skewness: x has to be a matrix or a vector"); endif + c = sz (dim); + idx = ones (1, nd); + idx (dim) = c; + x = x - repmat (mean (x, dim), idx); + sz (dim) = 1; + retval = zeros (sz); + s = std (x, [], dim); + ind = find (s > 0); + x = sum (x .^ 3, dim); + retval (ind) = x (ind) ./ (c * s (ind) .^ 3); + endfunction diff --git a/scripts/statistics/base/statistics.m b/scripts/statistics/base/statistics.m --- a/scripts/statistics/base/statistics.m +++ b/scripts/statistics/base/statistics.m @@ -29,27 +29,54 @@ ## Author: KH ## Description: Compute basic statistics -function S = statistics (X) +function S = statistics (X, dim) - if (nargin != 1) - usage ("S = statistics (X)"); + if (nargin != 1 && nargin != 2) + usage ("S = statistics (X, dim)"); endif - if (prod (size (X)) > 1) - if (isvector (X)) - X = reshape (X, length (X), 1); + nd = ndims (X); + sz = size (X); + nel = numel (X); + if (nargin != 2) + %% Find the first non-singleton dimension + dim = 1; + while (dim < nd + 1 && sz (dim) == 1) + dim = dim + 1; + endwhile + if (dim > nd) + dim = 1; + endif + else + if (! (isscalar (dim) && dim == round (dim)) && dim > 0 && + dim < (nd + 1)) + error ("statistics: dim must be an integer and valid dimension"); endif - for k=1:columns(X) - S(:,k) = [(min (X(:,k))); - (empirical_inv ([0.25;0.5;0.75], X(:,k))); - (max (X(:,k))); - (mean (X(:,k))); - (std (X(:,k))); - (skewness (X(:,k))); - (kurtosis (X(:,k)))]; - endfor - else + endif + + if (! ismatrix (X) || sz (dim) < 2) error ("statistics: invalid argument"); - endif + endif + + ## This code is a bit heavy, but is needed until empirical_inv + ## takes other than vector arguments. + c = sz (dim); + stride = prod (sz (1:dim-1)); + sz (dim) = 3; + emp_inv = zeros (sz); + for i = 1 : nel / c; + offset = i; + offset2 = 0; + while (offset > stride) + offset -= stride; + offset2++; + endwhile + rng = [0 : c-1] * stride + offset + offset2 * stride * c; + rng2 = [0 : 2] * stride + offset + offset2 * stride * 3; + emp_inv(rng2) = empirical_inv ([0.25; 0.5; 0.75], X(rng)); + endfor + + S = cat (dim, min (X, [], dim), emp_inv, max (X, [], dim), mean (X, dim), + std (X, [], dim), skewness (X, dim), kurtosis (X, dim)); endfunction diff --git a/scripts/statistics/base/studentize.m b/scripts/statistics/base/studentize.m --- a/scripts/statistics/base/studentize.m +++ b/scripts/statistics/base/studentize.m @@ -18,34 +18,50 @@ ## 02111-1307, USA. ## -*- texinfo -*- -## @deftypefn {Function File} {} studentize (@var{x}) +## @deftypefn {Function File} {} studentize (@var{x}, @var{dim}) ## If @var{x} is a vector, subtract its mean and divide by its standard ## deviation. ## -## If @var{x} is a matrix, do the above for each column. +## If @var{x} is a matrix, do the above along the first non-singleton +## dimension. If the optional argument @var{dim} is given then operate +## along this dimension. ## @end deftypefn ## Author: KH ## Description: Subtract mean and divide by standard deviation -function t = studentize (x) +function t = studentize (x, dim) - if (nargin != 1) - usage ("studentize (x)"); + if (nargin != 1 && nargin != 2) + usage ("studentize (x, dim)"); endif - if isvector (x) - if (std (x) == 0) - t = zeros (size (x)); - else - t = (x - mean (x)) / std (x); + nd = ndims (x); + sz = size (x); + if (nargin != 2) + %% Find the first non-singleton dimension + dim = 1; + while (dim < nd + 1 && sz (dim) == 1) + dim = dim + 1; + endwhile + if (dim > nd) + dim = 1; endif - elseif ismatrix (x) - l = ones (rows (x), 1); - t = x - l * mean (x); - t = t ./ (l * max ([(std (t)); (! any (t))])); else + if (! (isscalar (dim) && dim == round (dim)) && dim > 0 && + dim < (nd + 1)) + error ("studentize: dim must be an integer and valid dimension"); + endif + endif + + if (! ismatrix (x)) error ("studentize: x must be a vector or a matrix"); endif -endfunction \ No newline at end of file + c = sz (dim); + idx = ones (1, nd); + idx (dim) = c; + t = x - repmat (mean (x, dim), idx); + t = t ./ repmat (max (cat (dim, std(t, [], dim), ! any (t, dim)), [], dim), idx); + +endfunction