Mercurial > hg > octave-nkf
changeset 16554:03a28487fa9d
Collect collapsed delimiters in strsplit().
* scripts/strings/strsplit.m: Use regular expression to collect collapsed
delimiters for DELIMITERTYPE = "regularexpression" and "simple". Use loop
to collect collapsed delimiters for "legacy".
author | Ben Abbott <bpabbott@mac.com> |
---|---|
date | Tue, 23 Apr 2013 08:14:54 -0400 |
parents | 8fc1f6535380 |
children | 04fb96f4bea1 |
files | scripts/strings/strsplit.m |
diffstat | 1 files changed, 41 insertions(+), 8 deletions(-) [+] |
line wrap: on
line diff
--- a/scripts/strings/strsplit.m +++ b/scripts/strings/strsplit.m @@ -246,6 +246,16 @@ if (nargout > 1) ## Grab the separators matches = num2cell (str(idx)(:)).'; + if (args.collapsedelimiters) + ## Collapse the consequtive delimiters + ## TODO - is there a vectorized way? + for m = numel(matches):-1:2 + if (strlens(m) == 0) + matches{m-1} = [matches{m-1:m}]; + matches(m) = []; + endif + end + endif endif ## Remove separators. str(idx) = []; @@ -262,10 +272,10 @@ del = sprintf ('%s|', del{:}); del(end) = []; endif + if (args.collapsedelimiters) + del = ["(", del, ")+"]; + endif [result, ~, ~, ~, matches] = regexp (str, del, "split"); - if (args.collapsedelimiters) - result(cellfun (@isempty, result)) = []; - endif if (strncmpi (args.delimitertype, "simple", length_deltype)) matches = cellfun (@(x) regexp2simple (x, true), matches, "uniformoutput", false); @@ -312,7 +322,7 @@ %!test %! [s, m] = strsplit (str, {"\\s", "ain"}, true, "delimitertype", "r"); %! assert (s, {"The", "r", "in", "Sp", "stays", "m", "ly", "in", "the", "pl", "."}) -%! assert (m, {" ", "ain", " ", " ", "ain", " ", " ", "ain", " ", " ", " ", "ain"}) +%! assert (m, {" ", "ain ", " ", "ain ", " ", "ain", " ", " ", " ", "ain"}) % Split on " " and "ain", and treat multiple delimiters separately. %!test %! [s, m] = strsplit (str, {" ", "ain"}, "collapsedelimiters", false); @@ -327,10 +337,6 @@ %!assert (strsplit (["a,bc,,de"], ",", false), {"a", "bc", char(ones(1,0)), "de"}) %!assert (strsplit (["a,bc,de"], ",", true), {"a", "bc", "de"}) %!assert (strsplit (["a,bc,de"], {","," "}, true), {"a", "bc", "de"}) -%!test -%! [s, m] = strsplit ("hello \t world", 1); -%! assert (s, {"hello", "world"}); -%! assert (m, {" ", "\t", " "}); %!assert (strsplit ("road to hell", " ", "delimitertype", "r"), {"road", "to", "hell"}) %!assert (strsplit ("road to^hell", '\^| ', "delimitertype", "r"), {"road", "to", "hell"}) @@ -349,6 +355,33 @@ %!assert (strsplit (["a,bc";",de"], ",", true, "delimitertype", "l"), {"a", "bc", "de "}) %!assert (strsplit (["a,bc";",de"], ", ", true, "delimitertype", "l"), {"a", "bc", "de"}) +## Test "match" for consecutive delmiters +%!test +%! [a, m] = strsplit ("a\t \nb", '\s', 'delimitertype', 'regularexpression', +%! 'collapsedelimiters', false); +%! assert (a, {"a", "", "", "b"}) +%! assert (m, {"\t", " ", "\n"}) +%!test +%! [a, m] = strsplit ("a\t \nb", '\s', false, 'delimitertype', 'regularexpression'); +%! assert (a, {"a", "", "", "b"}) +%! assert (m, {"\t", " ", "\n"}) +%!test +%! [a, m] = strsplit ("a\t \nb", '\s', "delimitertype", "regularexpression"); +%! assert (a, {"a", "b"}) +%! assert (m, {"\t \n"}) +%!test +%! [a, m] = strsplit ("a\t \nb", {"\t", " ", "\n"}, "delimitertype", "simple"); +%! assert (a, {"a", "b"}) +%! assert (m, {"\t \n"}) +%!test +%! [a, m] = strsplit ("a123b", "123", "delimitertype", "legacy"); +%! assert (a, {"a", "b"}) +%! assert (m, {"123"}) +%!test +%! [s, m] = strsplit ("hello \t world", 1); +%! assert (s, {"hello", "world"}); +%! assert (m, {" \t "}); + %% Test input validation %!error strsplit () %!error strsplit ("abc", "b", true, 4)