changeset 16556:03a28487fa9d

Collect collapsed delimiters in strsplit(). * scripts/strings/strsplit.m: Use regular expression to collect collapsed delimiters for DELIMITERTYPE = "regularexpression" and "simple". Use loop to collect collapsed delimiters for "legacy".
author Ben Abbott <bpabbott@mac.com>
date Tue, 23 Apr 2013 08:14:54 -0400
parents 8fc1f6535380
children 04fb96f4bea1
files scripts/strings/strsplit.m
diffstat 1 files changed, 41 insertions(+), 8 deletions(-) [+]
line wrap: on
line diff
--- a/scripts/strings/strsplit.m
+++ b/scripts/strings/strsplit.m
@@ -246,6 +246,16 @@
     if (nargout > 1)
       ## Grab the separators
       matches = num2cell (str(idx)(:)).';
+      if (args.collapsedelimiters)
+        ## Collapse the consequtive delimiters
+        ## TODO - is there a vectorized way?
+        for m = numel(matches):-1:2
+          if (strlens(m) == 0)
+            matches{m-1} = [matches{m-1:m}];
+            matches(m) = [];
+          endif
+        end
+      endif
     endif
     ## Remove separators.
     str(idx) = [];
@@ -262,10 +272,10 @@
       del = sprintf ('%s|', del{:});
       del(end) = [];
     endif
+    if (args.collapsedelimiters)
+      del = ["(", del, ")+"];
+    endif
     [result, ~, ~, ~, matches] = regexp (str, del, "split");
-    if (args.collapsedelimiters)
-      result(cellfun (@isempty, result)) = [];
-    endif
     if (strncmpi (args.delimitertype, "simple", length_deltype))
       matches = cellfun (@(x) regexp2simple (x, true), matches,
         "uniformoutput", false);
@@ -312,7 +322,7 @@
 %!test
 %! [s, m] = strsplit (str, {"\\s", "ain"}, true, "delimitertype", "r");
 %! assert (s, {"The", "r", "in", "Sp", "stays", "m", "ly", "in", "the", "pl", "."})
-%! assert (m, {" ", "ain", " ", " ", "ain", " ", " ", "ain", " ", " ", " ", "ain"})
+%! assert (m, {" ", "ain ", " ", "ain ", " ", "ain", " ", " ", " ", "ain"})
 % Split on " " and "ain", and treat multiple delimiters separately.
 %!test
 %! [s, m] = strsplit (str, {" ", "ain"}, "collapsedelimiters", false);
@@ -327,10 +337,6 @@
 %!assert (strsplit (["a,bc,,de"], ",", false), {"a", "bc", char(ones(1,0)), "de"})
 %!assert (strsplit (["a,bc,de"], ",", true), {"a", "bc", "de"})
 %!assert (strsplit (["a,bc,de"], {","," "}, true), {"a", "bc", "de"})
-%!test
-%! [s, m] = strsplit ("hello \t world", 1);
-%! assert (s, {"hello", "world"});
-%! assert (m, {" ", "\t", " "});
 
 %!assert (strsplit ("road to hell", " ", "delimitertype", "r"), {"road", "to", "hell"})
 %!assert (strsplit ("road to^hell", '\^| ', "delimitertype", "r"), {"road", "to", "hell"})
@@ -349,6 +355,33 @@
 %!assert (strsplit (["a,bc";",de"], ",", true, "delimitertype", "l"), {"a", "bc", "de "})
 %!assert (strsplit (["a,bc";",de"], ", ", true, "delimitertype", "l"), {"a", "bc", "de"})
 
+## Test "match" for consecutive delmiters
+%!test
+%! [a, m] = strsplit ("a\t \nb", '\s', 'delimitertype', 'regularexpression',
+%!   'collapsedelimiters', false);
+%! assert (a, {"a", "", "", "b"})
+%! assert (m, {"\t", " ", "\n"})
+%!test
+%! [a, m] = strsplit ("a\t \nb", '\s', false, 'delimitertype', 'regularexpression');
+%! assert (a, {"a", "", "", "b"})
+%! assert (m, {"\t", " ", "\n"})
+%!test
+%! [a, m] = strsplit ("a\t \nb", '\s', "delimitertype", "regularexpression");
+%! assert (a, {"a", "b"})
+%! assert (m, {"\t \n"})
+%!test
+%! [a, m] = strsplit ("a\t \nb", {"\t", " ", "\n"}, "delimitertype", "simple");
+%! assert (a, {"a", "b"})
+%! assert (m, {"\t \n"})
+%!test
+%! [a, m] = strsplit ("a123b", "123", "delimitertype", "legacy");
+%! assert (a, {"a", "b"})
+%! assert (m, {"123"})
+%!test
+%! [s, m] = strsplit ("hello \t world", 1);
+%! assert (s, {"hello", "world"});
+%! assert (m, {" \t "});
+
 %% Test input validation
 %!error strsplit ()
 %!error strsplit ("abc", "b", true, 4)