# HG changeset patch # User Rik # Date 1318612501 25200 # Node ID 46e68badedb8636a62869c8535b3c745f8d024c8 # Parent 782dc237a02d3f93a9f9fb3375b01153cbe42d2a strsplit.m: Expand to accept 2-D character arrays. Improve input validation. * strsplit.m: Expand to accept 2-D character arrays. Improve input validation. Add tests. Document new feature. diff --git a/scripts/strings/strsplit.m b/scripts/strings/strsplit.m --- a/scripts/strings/strsplit.m +++ b/scripts/strings/strsplit.m @@ -17,53 +17,100 @@ ## . ## -*- texinfo -*- -## @deftypefn {Function File} {[@var{s}] =} strsplit (@var{p}, @var{sep}, @var{strip_empty}) -## Split a single string using one or more delimiters and return a cell +## @deftypefn {Function File} {[@var{cstr}] =} strsplit (@var{p}, @var{sep}, @var{strip_empty}) +## Split a string using one or more delimiters and return a cell ## array of strings. Consecutive delimiters and delimiters at ## boundaries result in empty strings, unless @var{strip_empty} is true. ## The default value of @var{strip_empty} is false. +## +## 2-D character arrays are split at delimiters and at the original column +## boundaries. +## +## Example: +## @example +## strsplit ("a,b,c", ",") +## @result{} +## @{ +## [1,1] = a +## [1,2] = b +## [1,3] = c +## @} +## +## strsplit (["a,b" ; "cde"], ",") +## @result{} +## @{ +## [1,1] = a +## [1,2] = b +## [1,3] = cde +## @} +## @group +## @end group +## @end example ## @seealso{strtok} ## @end deftypefn function s = strsplit (p, sep, strip_empty = false) - if (nargin < 2 || nargin > 3 || ! ischar (p) || rows (p) > 1 - || ! ischar (sep) || ! isscalar (strip_empty)) + if (nargin < 2 || nargin > 3) print_usage (); + elseif (! ischar (p) || ! ischar (sep)) + error ("strsplit: P and SEP must be string values"); + elseif (! isscalar (strip_empty)) + error ("strsplit: STRIP_EMPTY must be a scalar value"); endif if (isempty (p)) s = cell (size (p)); else - ## Split p according to delimiter. + if (rows (p) > 1) + ## For 2-D arrays, add separator character at line boundaries + ## and transform to single string + p(:, end+1) = sep(1); + p = reshape (p.', 1, numel (p)); + p(end) = []; + endif + + ## Split p according to delimiter if (isscalar (sep)) - ## Single separator. + ## Single separator idx = find (p == sep); else - ## Multiple separators. + ## Multiple separators idx = strchr (p, sep); endif - ## Get substring sizes. + ## Get substring lengths. if (isempty (idx)) - sizes = numel (p); + strlens = length (p); else - sizes = [idx(1)-1, diff(idx)-1, numel(p)-idx(end)]; + strlens = [idx(1)-1, diff(idx)-1, numel(p)-idx(end)]; endif ## Remove separators. p(idx) = []; if (strip_empty) ## Omit zero lengths. - sizes = sizes (sizes != 0); + strlens = strlens(strlens != 0); endif + ## Convert! - s = mat2cell (p, 1, sizes); + s = mat2cell (p, 1, strlens); endif endfunction -%!assert (all (strcmp (strsplit ("road to hell", " "), {"road", "to", "hell"}))) + +%!assert (strsplit ("road to hell", " "), {"road", "to", "hell"}) +%!assert (strsplit ("road to^hell", " ^"), {"road", "to", "hell"}) +%!assert (strsplit ("road to--hell", " -", true), {"road", "to", "hell"}) +%!assert (strsplit (["a,bc";",de"], ","), {"a", "bc", ones(1,0), "de "}) +%!assert (strsplit (["a,bc";",de"], ",", true), {"a", "bc", "de "}) +%!assert (strsplit (["a,bc";",de"], ", ", true), {"a", "bc", "de"}) -%!assert (all (strcmp (strsplit ("road to^hell", " ^"), {"road", "to", "hell"}))) +%% Test input validation +%!error strsplit () +%!error strsplit ("abc") +%!error strsplit ("abc", "b", true, 4) +%!error

strsplit (123, "b") +%!error

strsplit ("abc", 1) +%!error strsplit ("abc", "def", ones(3,3)) -%!assert (all (strcmp (strsplit ("road to--hell", " -", true), {"road", "to", "hell"})))