changeset 8877:2c8b2399247b

implement strsplit; deprecate split
author Jaroslav Hajek <highegg@gmail.com>
date Thu, 26 Feb 2009 10:29:59 +0100
parents a017b920530e
children ebb8c1dcf4d3
files scripts/ChangeLog scripts/deprecated/Makefile.in scripts/deprecated/split.m scripts/general/int2str.m scripts/general/num2str.m scripts/help/__makeinfo__.m scripts/help/lookfor.m scripts/miscellaneous/compare_versions.m scripts/miscellaneous/tar.m scripts/miscellaneous/unpack.m scripts/miscellaneous/what.m scripts/miscellaneous/zip.m scripts/pkg/pkg.m scripts/strings/Makefile.in scripts/strings/split.m scripts/strings/strsplit.m scripts/strings/strtok.m scripts/testfun/rundemos.m
diffstat 17 files changed, 103 insertions(+), 50 deletions(-) [+]
line wrap: on
line diff
--- a/scripts/ChangeLog
+++ b/scripts/ChangeLog
@@ -1,3 +1,23 @@
+2009-02-26  Jaroslav Hajek  <highegg@gmail.com>
+
+	* strings/strsplit.m: New function.
+	* strings/split.m: Move to deprecated/.
+	* strings/Makefile.in: Update.
+	* deprecated/Makefile.in: Update.
+
+	* general/int2str.m: Use strsplit instead of split.
+	* general/num2str.m: Ditto.
+	* help/__makeinfo__.m: Ditto.
+	* help/lookfor.m: Ditto.
+	* miscellaneous/compare_versions.m: Ditto.
+	* miscellaneous/tar.m: Ditto.
+	* miscellaneous/unpack.m: Ditto.
+	* miscellaneous/what.m: Ditto.
+	* miscellaneous/zip.m: Ditto.
+	* pkg/pkg.m: Ditto.
+	* strings/strtok.m: Ditto.
+	* testfun/rundemos.m: Ditto.
+
 2009-02-25  John W. Eaton  <jwe@octave.org>
 
 	* Makefile.in (distclean maintainer-clean): Remove tags and TAGS
--- a/scripts/deprecated/Makefile.in
+++ b/scripts/deprecated/Makefile.in
@@ -50,7 +50,7 @@
   pascal_rnd.m poisson_cdf.m poisson_inv.m poisson_pdf.m \
   poisson_rnd.m polyinteg.m setstr.m spatan2.m spchol2inv.m \
   spcholinv.m spcumprod.m spcumsum.m spchol.m spdet.m spdiag.m \
-  spfind.m spinv.m spkron.m splchol.m splu.m spmax.m spmin.m \
+  spfind.m spinv.m spkron.m splchol.m split.m splu.m spmax.m spmin.m \
   spprod.m spqr.m spsum.m spsumsq.m struct_contains.m \
   struct_elements.m t_cdf.m t_inv.m t_pdf.m t_rnd.m uniform_cdf.m \
   uniform_inv.m uniform_pdf.m uniform_rnd.m unmark_command.m \
rename from scripts/strings/split.m
rename to scripts/deprecated/split.m
--- a/scripts/strings/split.m
+++ b/scripts/deprecated/split.m
@@ -18,6 +18,10 @@
 
 ## -*- texinfo -*-
 ## @deftypefn {Function File} {} split (@var{s}, @var{t}, @var{n})
+## This function has been deprecated. Use @code{char (strsplit (s, t))}
+## instead.
+## @end deftypefn
+
 ## Divides the string @var{s} into pieces separated by @var{t}, returning
 ## the result in a string array (padded with blanks to form a valid
 ## matrix).  If the optional input @var{n} is supplied, split @var{s}
--- a/scripts/general/int2str.m
+++ b/scripts/general/int2str.m
@@ -70,7 +70,7 @@
     endif
     tmp = sprintf (fmt, permute (x, [2, 1, 3 : nd]));
     tmp(end) = "";
-    retval = split (tmp, "\n");
+    retval = char (strsplit (tmp, "\n"));
   else
     print_usage ();
   endif
--- a/scripts/general/num2str.m
+++ b/scripts/general/num2str.m
@@ -142,7 +142,7 @@
     endwhile
 
     tmp(length (tmp)) = "";
-    retval = strtrim (split (tmp, "\n"));
+    retval = char (strtrim (strsplit (tmp, "\n")));
   else
     if (nargin == 2)
       if (ischar (arg))
--- a/scripts/help/__makeinfo__.m
+++ b/scripts/help/__makeinfo__.m
@@ -107,7 +107,7 @@
         endif
       endif
       see_also_args = text (bracket_start+1:(stop-1));
-      see_also_args = strtrim (cellstr (split (see_also_args, ",")));
+      see_also_args = strtrim (strsplit (see_also_args, ","));
       expanded = see_also (see_also_args);
       text = strcat (text (1:start-1), expanded, text (stop+1:end));
     endif
--- a/scripts/help/lookfor.m
+++ b/scripts/help/lookfor.m
@@ -63,10 +63,10 @@
   endif
   
   ## Search functions in new path dirs.
-  orig_path = split_str (__pathorig__ (), pathsep ());
+  orig_path = strsplit (__pathorig__ (), pathsep ());
 
   ## ditto for path.
-  new_path = split_str (path (), pathsep ());
+  new_path = strsplit (path (), pathsep ());
 
   ## scratch out directories already covered by orig_path.
   if (had_core_cache)
@@ -183,30 +183,3 @@
   endif
 endfunction
 
-## split string using a separator (or more separators)
-## FIXME: maybe this function should be available to users?
-function s = split_str (p, sep)
-  if (isempty (p))
-    s = cell (size (p));
-  else
-    ## split p according to delimiter.
-    if (isscalar (sep))
-      ## single separator
-      idx = find (p == sep);
-    else
-      ## multiple separators
-      idx = strchr (p, sep);
-    endif
-
-    ## get substring sizes.
-    if (isempty (idx))
-      sizes = numel (p);
-    else
-      sizes = [idx(1)-1, diff(idx)-1, numel(p)-idx(end)];
-    endif
-    ## remove separators.
-    p(idx) = []; 
-    ## convert!
-    s = mat2cell (p, 1, sizes);
-  endif
-endfunction
--- a/scripts/miscellaneous/compare_versions.m
+++ b/scripts/miscellaneous/compare_versions.m
@@ -109,8 +109,8 @@
     v2nochar = v2;
   endif
 
-  v1n = str2num (split (v1nochar, "."));
-  v2n = str2num (split (v2nochar, "."));
+  v1n = str2num (char (strsplit (v1nochar, ".")));
+  v2n = str2num (char (strsplit (v2nochar, ".")));
   if ((isempty (v1n) && isempty (v1c)) || (isempty (v2n) && isempty(v2c)))
     error ("compare_versions: given version strings are not valid: %s %s",
 	   v1, v2);
--- a/scripts/miscellaneous/tar.m
+++ b/scripts/miscellaneous/tar.m
@@ -56,7 +56,7 @@
 	  if (output(end) == "\n")
 	    output(end) = [];
 	  endif
-          entries = cellstr (split (output, "\n"));
+          entries = strsplit (output, "\n");
 	  entries = entries';
 	endif
       else
--- a/scripts/miscellaneous/unpack.m
+++ b/scripts/miscellaneous/unpack.m
@@ -194,7 +194,7 @@
     if (output(length (output)) == "\n")
       output(length (output)) = [];
     endif
-    files = parser (cellstr (split (output, "\n")))';
+    files = parser (strsplit (output, "\n"))';
 
     ## Move files if necessary
     if (needmove)
--- a/scripts/miscellaneous/what.m
+++ b/scripts/miscellaneous/what.m
@@ -33,9 +33,7 @@
     d = pwd ();
   elseif (isempty (strfind (d, filesep ())))
     ## Find the appropriate directory on the path.
-    p = split (path (), pathsep());
-    p = cellfun (@(x) deblank (x), mat2cell (p, ones (1, size (p, 1)), ...
-		size (p, 2)), "UniformOutput", false);
+    p = strtrim (strsplit (path (), pathsep()));
     d = p{find (cellfun (@(x) ! isempty (strfind (x, d)), p))(end)};
   else
     [status, msg, msgid] = fileattrib (d);
--- a/scripts/miscellaneous/zip.m
+++ b/scripts/miscellaneous/zip.m
@@ -56,7 +56,7 @@
 	    if (entries(end) == "\n")
 	      entries(end) = [];
 	    endif
-            entries = cellstr (split (entries, "\n"))';
+            entries = strsplit (entries, "\n");
 	  else
 	    error ("zip: zipinfo failed with exit status = %d", status);
 	  endif
--- a/scripts/pkg/pkg.m
+++ b/scripts/pkg/pkg.m
@@ -1754,12 +1754,7 @@
 ## Split the text into a cell array of strings by sep.
 ## Example: "A, B" => {"A", "B"} (with sep = ",")
 function out = split_by (text, sep)
-  text_matrix = split (text, sep);
-  num_words = size (text_matrix, 1);
-  out = cell (num_words, 1);
-  for i = 1:num_words
-    out{i} = strip (text_matrix(i, :));
-  endfor
+  out = strtrim (strsplit (text, sep));
 endfunction
 
 ## Create an INDEX file for a package that doesn't provide one.
--- a/scripts/strings/Makefile.in
+++ b/scripts/strings/Makefile.in
@@ -35,7 +35,7 @@
 
 SOURCES = base2dec.m bin2dec.m blanks.m deblank.m dec2base.m \
   dec2bin.m dec2hex.m findstr.m hex2dec.m index.m isletter.m isstrprop.m \
-  mat2str.m regexptranslate.m rindex.m split.m str2double.m \
+  mat2str.m regexptranslate.m rindex.m strsplit.m str2double.m \
   str2num.m strcat.m cstrcat.m strcmpi.m strchr.m strfind.m strjust.m strmatch.m \
   strncmpi.m strrep.m strtok.m strtrim.m strtrunc.m \
   substr.m validatestring.m
new file mode 100644
--- /dev/null
+++ b/scripts/strings/strsplit.m
@@ -0,0 +1,63 @@
+## Copyright (C) 2009 Jaroslav Hajek
+##
+## This program is free software; you can redistribute it and/or modify it
+## under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 3 of the License, or (at
+## your option) any later version.
+##
+## This program is distributed in the hope that it will be useful, but
+## WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+## General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with this program; see the file COPYING.  If not, see
+## <http://www.gnu.org/licenses/>.
+
+## -*- texinfo -*-
+## @deftypefn {Function File} {[@var{s}] =} strsplit (@var{p}, @var{sep}, @var{strip_empty})
+## Splits a single string using one or more delimiters.
+## The result is returned as a cell array of strings. Consecutive delimiters
+## and delimiters at boundaries result in empty strings, unless @var{strip_empty} is true.
+## The default value of @var{strip_empty} is false.
+## @end deftypefn
+
+function s = strsplit (p, sep, strip_empty = false)
+  if (! ischar (p) || rows (p) > 1 || ! ischar (sep) || ! islogical (strip_empty))
+    print_usage ();
+  endif
+
+  if (isempty (p))
+    s = cell (size (p));
+  else
+    ## split p according to delimiter.
+    if (isscalar (sep))
+      ## single separator
+      idx = find (p == sep);
+    else
+      ## multiple separators
+      idx = strchr (p, sep);
+    endif
+
+    ## get substring sizes.
+    if (isempty (idx))
+      sizes = numel (p);
+    else
+      sizes = [idx(1)-1, diff(idx)-1, numel(p)-idx(end)];
+    endif
+    ## remove separators.
+    p(idx) = []; 
+    if (strip_empty)
+      ## omit zero lengths.
+      sizes = sizes (sizes != 0); 
+    endif
+    ## convert!
+    s = mat2cell (p, 1, sizes);
+  endif
+endfunction
+
+%!assert (all (strcmp (strsplit ("road to hell", " "), {"road", "to", "hell"})))
+
+%!assert (all (strcmp (strsplit ("road to^hell", " ^"), {"road", "to", "hell"})))
+
+%!assert (all (strcmp (strsplit ("road   to--hell", " -", true), {"road", "to", "hell"})))
--- a/scripts/strings/strtok.m
+++ b/scripts/strings/strtok.m
@@ -36,7 +36,7 @@
 ##         rem = *27+31
 ## @end group
 ## @end example
-## @seealso{index, split}
+## @seealso{index, strsplit}
 ## @end deftypefn
 
 ## FIXME: check what to do for a null delimiter
--- a/scripts/testfun/rundemos.m
+++ b/scripts/testfun/rundemos.m
@@ -25,7 +25,7 @@
 function rundemos (directory)
 
   if (nargin == 0)
-    dirs = cellstr (split (path (), pathsep ()));
+    dirs = strsplit (path (), pathsep ());
   elseif (nargin == 1)
     if (is_absolute_filename (directory))
       dirs = {directory};