changeset 11922:746f13936eee release-3-0-x

improve set functions for Matlab compatibility
author Jaroslav Hajek <highegg@gmail.com>
date Fri, 16 Jan 2009 08:10:28 +0100
parents 166a195399f7
children 304855b33b67
files scripts/ChangeLog scripts/set/intersect.m scripts/set/setdiff.m scripts/set/setxor.m scripts/set/union.m scripts/set/unique.m
diffstat 6 files changed, 216 insertions(+), 73 deletions(-) [+]
line wrap: on
line diff
--- a/scripts/ChangeLog
+++ b/scripts/ChangeLog
@@ -1,3 +1,12 @@
+2008-07-10  Jaroslav Hajek <highegg@gmail.com>
+
+	* set/unique.m: Implement 'first' and 'last', some simplifications.
+	* set/union.m: Implement output indices.
+	* set/intersect.m: Implement 'rows'.
+	* set/setdiff.m: Implement output indices.
+	* set/setxor.m: Implement 'rows' and output indices.
+	Add tests and adjust docs in all of the above.
+
 2008-01-04  Soren Hauberg   <hauberg@gmail.com>
 
 	* set/create_set.m, set/union.m: Accept "rows" argument.
--- a/scripts/set/intersect.m
+++ b/scripts/set/intersect.m
@@ -1,4 +1,5 @@
 ## Copyright (C) 2000, 2006, 2007 Paul Kienzle
+## Copyright (C) 2008 Jaroslav Hajek
 ##
 ## This file is part of Octave.
 ##
@@ -30,33 +31,50 @@
 ## @end deftypefn
 ## @seealso{unique, union, setxor, setdiff, ismember}
 
-function [c, ia, ib] = intersect (a, b)
-  if (nargin != 2)
+function [c, ia, ib] = intersect (a, b, varargin)
+
+  if (nargin < 2 || nargin > 3)
     print_usage ();
   endif
 
+  if (nargin == 3 && ! strcmpi (varargin{1}, "rows"))
+    error ("intersect: if a third input argument is present, it must be the string 'rows'");
+  endif
+
+
   if (isempty (a) || isempty (b))
     c = ia = ib = [];
   else
     ## form a and b into sets
-    [a, ja] = unique (a);
-    [b, jb] = unique (b);
-
-    c = [a(:); b(:)];
-    [c, ic] = sort (c);               ## [a(:);b(:)](ic) == c
-
-    if (iscellstr (c))
-      ii = find (strcmp (c(1:end-1), c(2:end)));
-    else
-      ii = find (c(1:end-1) == c(2:end));
+    if (nargout > 1)
+      [a, ja] = unique (a, varargin{:});
+      [b, jb] = unique (b, varargin{:});
     endif
 
-    c  = c(ii);                       ## The answer
-    ia = ja(ic(ii));                  ## a(ia) == c
-    ib = jb(ic(ii+1) - length (a));   ## b(ib) == c
+    if (nargin > 2)
+      c = [a; b];
+      [c, ic] = sortrows (c);
+      ii = find (all (c(1:end-1,:) == c(2:end,:), 2));
+      c = c(ii,:);
+    else
+      c = [a(:); b(:)];
+      [c, ic] = sort (c);               ## [a(:);b(:)](ic) == c
+      if (iscellstr (c))
+	ii = find (strcmp (c(1:end-1), c(2:end)));
+      else
+	ii = find (c(1:end-1) == c(2:end));
+      endif
+      c = c(ii);
+    endif
 
 
-    if (size (b, 1) == 1 || size (a, 1) == 1)
+    if (nargout > 1)
+      ia = ja(ic(ii));                  ## a(ia) == c
+      ib = jb(ic(ii+1) - length (a));   ## b(ib) == c
+    endif
+
+
+    if (nargin == 2 && (size (b, 1) == 1 || size (a, 1) == 1))
       c = c.';
     endif
   endif
@@ -74,3 +92,12 @@
 %! assert(ib,[5 1 2 6]);
 %! assert(a(ia),c);
 %! assert(b(ib),c);
+%!test
+%! a = [1,1,2;1,4,5;2,1,7];
+%! b = [1,4,5;2,3,4;1,1,2;9,8,7];
+%! [c,ia,ib] = intersect(a,b,'rows');
+%! assert(c,[1,1,2;1,4,5]);
+%! assert(ia,[1;2]);
+%! assert(ib,[3;1]);
+%! assert(a(ia,:),c);
+%! assert(b(ib,:),c);
--- a/scripts/set/setdiff.m
+++ b/scripts/set/setdiff.m
@@ -1,4 +1,5 @@
 ## Copyright (C) 2000, 2005, 2006, 2007 Paul Kienzle
+## Copyright (C) 2008 Jaroslav Hajek
 ##
 ## This file is part of Octave.
 ##
@@ -19,19 +20,22 @@
 ## -*- texinfo -*-
 ## @deftypefn {Function File} {} setdiff (@var{a}, @var{b})
 ## @deftypefnx {Function File} {} setdiff (@var{a}, @var{b}, "rows")
+## @deftypefnx {Function File} {[@var{c}, @var{i}] = } setdiff (@var{a}, @var{b})
 ## Return the elements in @var{a} that are not in @var{b}, sorted in
 ## ascending order.  If @var{a} and @var{b} are both column vectors
 ## return a column vector, otherwise return a row vector.
 ##
 ## Given the optional third argument @samp{"rows"}, return the rows in
 ## @var{a} that are not in @var{b}, sorted in ascending order by rows.
+##
+## If requested, return @var{i} such that @code{c = a(i)}.
 ## @seealso{unique, union, intersect, setxor, ismember}
 ## @end deftypefn
 
 ## Author: Paul Kienzle
 ## Adapted-by: jwe
 
-function c = setdiff (a, b, byrows_arg)
+function [c, i] = setdiff (a, b, byrows_arg)
 
   if (nargin < 2 || nargin > 3)
     print_usage ();
@@ -50,7 +54,11 @@
   endif
 
   if (byrows)
-    c = unique (a, "rows");
+    if (nargout > 1)
+      [c, i] = unique (a, "rows");
+    else
+      c = unique (a, "rows");
+    endif
     if (! isempty (c) && ! isempty (b))
       ## Form a and b into combined set.
       b = unique (b, "rows");
@@ -58,9 +66,16 @@
       ## Eliminate those elements of a that are the same as in b.
       dups = find (all (dummy(1:end-1,:) == dummy(2:end,:), 2));
       c(idx(dups),:) = [];
+      if (nargout > 1)
+	i(idx(dups),:) = [];
+      endif
     endif
   else
-    c = unique (a);
+    if (nargout > 1)
+      [c, i] = unique (a);
+    else
+      c = unique (a);
+    endif
     if (! isempty (c) && ! isempty (b))
       ## Form a and b into combined set.
       b = unique (b);
@@ -72,6 +87,9 @@
 	dups = find (dummy(1:end-1) == dummy(2:end));
       endif
       c(idx(dups)) = [];
+      if (nargout > 1)
+	i(idx(dups)) = [];
+      endif
       ## Reshape if necessary.
       if (size (c, 1) != 1 && size (b, 1) == 1)
 	c = c.';
@@ -88,3 +106,9 @@
 %!assert(setdiff([1; 2; 3; 4], [1; 2; 4], "rows"), 3)
 %!assert(setdiff([1, 2; 3, 4], [1, 2; 3, 6], "rows"), [3, 4])
 %!assert(setdiff({"one","two";"three","four"},{"one","two";"three","six"}), {"four"})
+
+%!test
+%! a = [3, 1, 4, 1, 5]; b = [1, 2, 3, 4];
+%! [y, i] = setdiff (a, b.');
+%! assert(y, [5]);
+%! assert(y, a(i));
--- a/scripts/set/setxor.m
+++ b/scripts/set/setxor.m
@@ -1,4 +1,5 @@
 ## Copyright (C) 2000, 2006, 2007 Paul Kienzle
+## Copyright (C) 2008 Jaroslav Hajek
 ##
 ## This file is part of Octave.
 ##
@@ -18,22 +19,38 @@
 
 ## -*- texinfo -*-
 ## @deftypefn {Function File} {} setxor (@var{a}, @var{b})
+## @deftypefnx {Function File} {} setxor (@var{a}, @var{b}, 'rows')
 ##
 ## Return the elements exclusive to @var{a} or @var{b}, sorted in ascending
 ## order. If @var{a} and @var{b} are both column vectors return a column
 ## vector, otherwise return a row vector.
 ##
+## @deftypefnx {Function File} {[@var{c}, @var{ia}, @var{ib}] =} setxor (@var{a}, @var{b})
+##
+## Return index vectors @var{ia} and @var{ib} such that @code{a==c(ia)} and
+## @code{b==c(ib)}.
+## 
 ## @seealso{unique, union, intersect, setdiff, ismember}
 ## @end deftypefn
 
-function c = setxor (a, b)
-  if (nargin != 2)
+function [c, ia, ib] = setxor (a, b, varargin)
+
+  if (nargin < 2 || nargin > 3)
     print_usage ();
   endif
 
+  if (nargin == 3 && ! strcmpi (varargin{1}, "rows"))
+    error ("setxor: if a third input argument is present, it must be the string 'rows'");
+  endif
+
   ## Form A and B into sets.
-  a = unique (a);
-  b = unique (b);
+  if (nargout > 1)
+    [a, ia] = unique (a, varargin{:});
+    [b, ib] = unique (b, varargin{:});
+  else
+    a = unique (a, varargin{:});
+    b = unique (b, varargin{:});
+  endif
 
   if (isempty (a))
     c = b;
@@ -41,16 +58,39 @@
     c = a;
   else
     ## Reject duplicates.
-    c = sort ([a(:); b(:)]);
-    n = length (c);
-    idx = find (c(1:n-1) == c(2:n));
-    if (! isempty (idx))
-      c([idx, idx+1]) = [];
-    endif
-    if (size (a, 1) == 1 || size (b, 1) == 1)
-      c = c.';
+    if (nargin > 2)
+      na = rows (a); nb = rows (b);
+      [c, i] = sortrows ([a; b]);
+      n = rows (c);
+      idx = find (all (c(1:n-1) == c(2:n), 2));
+      if (! isempty (idx))
+	c([idx, idx+1],:) = [];
+	i([idx, idx+1],:) = [];
+      endif
+    else
+      na = numel (a); nb = numel (b);
+      [c, i] = sort ([a(:); b(:)]);
+      n = length (c);
+      idx = find (c(1:n-1) == c(2:n));
+      if (! isempty (idx))
+	c([idx, idx+1]) = [];
+	i([idx, idx+1]) = [];
+      endif
+      if (size (a, 1) == 1 || size (b, 1) == 1)
+	c = c.';
+      endif
     endif
   endif
+  if (nargout > 1)
+    ia = ia(i(i <= na));
+    ib = ib(i(i > na) - na);
+  endif
+
 endfunction
 
 %!assert(setxor([1,2,3],[2,3,4]),[1,4])
+%!test
+%! a = [3, 1, 4, 1, 5]; b = [1, 2, 3, 4];
+%! [y, ia, ib] = setxor (a, b.');
+%! assert(y, [2, 5]);
+%! assert(y, sort([a(ia), b(ib)]));
--- a/scripts/set/union.m
+++ b/scripts/set/union.m
@@ -1,5 +1,6 @@
 ## Copyright (C) 1994, 1996, 1997, 1999, 2000, 2003, 2004, 2005, 2006,
 ##               2007, 2008 John W. Eaton
+## Copyright (C) 2008 Jaroslav Hajek
 ##
 ## This file is part of Octave.
 ##
@@ -18,10 +19,10 @@
 ## <http://www.gnu.org/licenses/>.
 
 ## -*- texinfo -*-
-## @deftypefn {Function File} {} union (@var{x}, @var{y})
-## @deftypefnx{Function File} {} union (@var{x}, @var{y}, "rows")
-## Return the set of elements that are in either of the sets @var{x} and
-## @var{y}.  For example,
+## @deftypefn {Function File} {} union (@var{a}, @var{b})
+## @deftypefnx{Function File} {} union (@var{a}, @var{b}, "rows")
+## Return the set of elements that are in either of the sets @var{a} and
+## @var{b}.  For example,
 ##
 ## @example
 ## @group
@@ -31,7 +32,7 @@
 ## @end example
 ##
 ## If the optional third input argument is the string "rows" each row of
-## the matrices @var{x} and @var{y} will be considered an element of sets.
+## the matrices @var{a} and @var{b} will be considered an element of sets.
 ## For example,
 ## @example
 ## @group
@@ -41,12 +42,18 @@
 ##     3   4
 ## @end group
 ## @end example
+##
+## @deftypefnx {Function File} {[@var{c}, @var{ia}, @var{ib}] =} union (@var{a}, @var{b})
+##
+## Return index vectors @var{ia} and @var{ib} such that @code{a==c(ia)} and
+## @code{b==c(ib)}.
+## 
 ## @seealso{create_set, intersection, complement}
 ## @end deftypefn
 
 ## Author: jwe
 
-function y = union (a, b, varargin)
+function [y, ia, ib] = union (a, b, varargin)
 
   if (nargin < 2 || nargin > 3)
     print_usage ();
@@ -56,21 +63,30 @@
     error ("union: if a third input argument is present, it must be the string 'rows'");
   endif
 
-  if (isempty (a))
-    y = create_set (b, varargin{:});
-  elseif (isempty (b))
-    y = create_set (a, varargin{:});
-  elseif (nargin == 2)
-    y = create_set ([a(:); b(:)]);
+  if (nargin == 2)
+    y = [a(:); b(:)];
+    na = numel (a); nb = numel (b);
     if (size (a, 1) == 1 || size (b, 1) == 1)
-      y = y(:).';
-    else
-      y = y(:);
+      y = y.';
     endif
   elseif (ndims (a) == 2 && ndims (b) == 2 && columns (a) == columns (b))
-    y = create_set ([a; b], "rows");
+    y = [a; b];
+    na = rows (a); nb = rows (b);
   else
     error ("union: input arguments must contain the same number of columns when \"rows\" is specified");
   endif
 
+  if (nargout == 1)
+    y = unique (y, varargin{:});
+  else
+    [y, i] = unique (y, varargin{:});
+    ia = i(i <= na);
+    ib = i(i > na) - na;
+  endif
+
+%!test
+%! a = [3, 1, 4, 1, 5]; b = [1, 2, 3, 4];
+%! [y, ia, ib] = union (a, b.');
+%! assert(y, [1, 2, 3, 4, 5]);
+%! assert(y, sort([a(ia), b(ib)]));
 endfunction
--- a/scripts/set/unique.m
+++ b/scripts/set/unique.m
@@ -1,4 +1,5 @@
 ## Copyright (C) 2000, 2001, 2005, 2006, 2007 Paul Kienzle
+## Copyright (C) 2008 Jaroslav Hajek
 ##
 ## This file is part of Octave.
 ##
@@ -31,19 +32,45 @@
 ##
 ## Return index vectors @var{i} and @var{j} such that @code{x(i)==y} and
 ## @code{y(j)==x}.
+## 
+## Additionally, one of 'first' or 'last' can be given as an argument.
+## 'last' (default) specifies that the highest possible indices are returned
+## in @var{i}, while 'first' means the lowest.
 ## @seealso{union, intersect, setdiff, setxor, ismember}
 ## @end deftypefn
 
-function [y, i, j] = unique (x, r)
+function [y, i, j] = unique (x, varargin)
 
-  if (nargin < 1 || nargin > 2 || (nargin == 2 && ! strcmp (r, "rows")))
+  if (nargin < 1)
     print_usage ();
   endif
 
-  if (nargin == 1)
+  ## parse options
+  if (iscellstr (varargin))
+    optfirst = strmatch ('first', varargin) > 0;
+    optlast = strmatch ('last', varargin) > 0;
+    optrows = strmatch ('rows', varargin) > 0 && size (x, 2) > 1;
+    if (optfirst && optlast)
+      error ("unique: cannot specify both 'last' and 'first'.");
+    elseif (optfirst + optlast + optrows != nargin-1)
+      error ("unique: invalid option.");
+    endif
+    optlast = ! optfirst;
+  else
+    error ("unique: options must be strings");
+  endif
+
+  if (iscell (x))
+    if (optrows)
+      warning ("unique: 'rows' is ignored for cell arrays");
+      optrows = false;
+    endif
+  endif
+
+  if (optrows)
+    n = size (x, 1);
+  else
     n = numel (x);
-  else
-    n = size (x, 1);
   endif
 
   y = x;
@@ -55,20 +82,9 @@
     return;
   endif
 
-  if (ischar (x))
-    y = toascii (y);
-  endif
-
-  if (nargin == 2 && size (y, 2) > 1)
+  if (optrows)
     [y, i] = sortrows (y);
-    if (iscell (y))
-      match = cellfun ("size", y(1:n-1,:), 1) == cellfun ("size", y(2:n,:), 1);
-      idx = find (match);
-      match(idx) = all (char (y(idx)) == char (y(idx+1)), 2);
-      match = all (match');
-    else
-      match = all ([y(1:n-1,:) == y(2:n,:)]');
-    endif
+    match = all (y(1:n-1,:) == y(2:n,:), 2);
     idx = find (match);
     y(idx,:) = [];
   else
@@ -77,11 +93,9 @@
     endif
     [y, i] = sort (y);
     if (iscell (y))
-      match = cellfun ("length", y(1:n-1)) == cellfun ("length", y(2:n));
-      idx = find(match);
-      match(idx) = all (char (y(idx)) == char (y(idx+1)), 2);
+      match = strcmp (y(1:n-1), y(2:n));
     else
-      match = [y(1:n-1) == y(2:n)];
+      match = (y(1:n-1) == y(2:n));
     endif
     idx = find (match);
     y(idx) = [];
@@ -93,11 +107,12 @@
     j = i;
     j(i) = cumsum (prepad (! match, n, 1));
   endif
-  i(idx) = [];
+  if (optfirst)
+    i(idx+1) = [];
+  else
+    i(idx) = [];
+  endif
 
-  if (ischar (x))
-    y = char (y);
-  endif
 
 endfunction
 
@@ -110,3 +125,15 @@
 %!assert(unique([1,NaN,Inf,NaN,Inf]),[1,Inf,NaN,NaN])
 %!assert(unique({'Foo','Bar','Foo'}),{'Bar','Foo'})
 %!assert(unique({'Foo','Bar','FooBar'}),{'Bar','Foo','FooBar'})
+
+%!test
+%! [a,i,j] = unique([1,1,2,3,3,3,4]);
+%! assert(a,[1,2,3,4])
+%! assert(i,[2,3,6,7])
+%! assert(j,[1,1,2,3,3,3,4])
+%!
+%!test
+%! [a,i,j] = unique([1,1,2,3,3,3,4],'first');
+%! assert(a,[1,2,3,4])
+%! assert(i,[1,3,4,7])
+%! assert(j,[1,1,2,3,3,3,4])