view scripts/strings/strsplit.m @ 20368:7503499a252b stable

doc: Update docstrings to have one sentence summary as first line. Update scripts in audio, elfun, general, geometry, and image directories. * scripts/audio/@audioplayer/__get_properties__.m, scripts/audio/@audioplayer/audioplayer.m, scripts/audio/@audioplayer/get.m, scripts/audio/@audioplayer/isplaying.m, scripts/audio/@audioplayer/play.m, scripts/audio/@audioplayer/playblocking.m, scripts/audio/@audioplayer/set.m, scripts/audio/@audioplayer/subsasgn.m, scripts/audio/@audioplayer/subsref.m, scripts/audio/@audiorecorder/audiorecorder.m, scripts/audio/@audiorecorder/get.m, scripts/audio/@audiorecorder/getaudiodata.m, scripts/audio/@audiorecorder/getplayer.m, scripts/audio/@audiorecorder/isrecording.m, scripts/audio/@audiorecorder/play.m, scripts/audio/@audiorecorder/record.m, scripts/audio/@audiorecorder/recordblocking.m, scripts/audio/@audiorecorder/set.m, scripts/audio/@audiorecorder/stop.m, scripts/audio/@audiorecorder/subsasgn.m, scripts/audio/@audiorecorder/subsref.m, scripts/audio/lin2mu.m, scripts/audio/mu2lin.m, scripts/audio/record.m, scripts/audio/sound.m, scripts/audio/soundsc.m, scripts/audio/wavread.m, scripts/audio/wavwrite.m, scripts/elfun/cosd.m, scripts/elfun/sind.m, scripts/elfun/tand.m, scripts/general/accumarray.m, scripts/general/accumdim.m, scripts/general/bitcmp.m, scripts/general/bitget.m, scripts/general/bitset.m, scripts/general/blkdiag.m, scripts/general/cart2pol.m, scripts/general/cart2sph.m, scripts/general/cell2mat.m, scripts/general/celldisp.m, scripts/general/chop.m, scripts/general/circshift.m, scripts/general/common_size.m, scripts/general/cplxpair.m, scripts/general/cumtrapz.m, scripts/general/dblquad.m, scripts/general/deal.m, scripts/general/del2.m, scripts/general/display.m, scripts/general/divergence.m, scripts/general/fieldnames.m, scripts/general/flip.m, scripts/general/flipdim.m, scripts/general/fliplr.m, scripts/general/flipud.m, scripts/general/gradient.m, scripts/general/interp3.m, scripts/general/interpft.m, scripts/general/interpn.m, scripts/general/loadobj.m, scripts/general/logspace.m, scripts/general/methods.m, scripts/general/nargchk.m, scripts/general/narginchk.m, scripts/general/nargoutchk.m, scripts/general/nextpow2.m, scripts/general/nthargout.m, scripts/general/num2str.m, scripts/general/pol2cart.m, scripts/general/polyarea.m, scripts/general/postpad.m, scripts/general/prepad.m, scripts/general/profile.m, scripts/general/quadgk.m, scripts/general/quadl.m, scripts/general/quadv.m, scripts/general/randi.m, scripts/general/rat.m, scripts/general/repmat.m, scripts/general/rot90.m, scripts/general/rotdim.m, scripts/general/saveobj.m, scripts/general/shift.m, scripts/general/shiftdim.m, scripts/general/sortrows.m, scripts/general/sph2cart.m, scripts/general/structfun.m, scripts/general/subsindex.m, scripts/general/trapz.m, scripts/general/triplequad.m, scripts/geometry/delaunayn.m, scripts/geometry/dsearch.m, scripts/geometry/dsearchn.m, scripts/geometry/griddata.m, scripts/geometry/griddata3.m, scripts/geometry/griddatan.m, scripts/geometry/inpolygon.m, scripts/geometry/rectint.m, scripts/geometry/tsearchn.m, scripts/geometry/voronoi.m, scripts/geometry/voronoin.m, scripts/help/__unimplemented__.m, scripts/help/doc.m, scripts/help/doc_cache_create.m, scripts/help/get_first_help_sentence.m, scripts/help/help.m, scripts/help/lookfor.m, scripts/help/print_usage.m, scripts/help/type.m, scripts/help/which.m, scripts/image/autumn.m, scripts/image/bone.m, scripts/image/brighten.m, scripts/image/cmpermute.m, scripts/image/colorcube.m, scripts/image/contrast.m, scripts/image/cool.m, scripts/image/copper.m, scripts/image/cubehelix.m, scripts/image/flag.m, scripts/image/gmap40.m, scripts/image/gray.m, scripts/image/gray2ind.m, scripts/image/hot.m, scripts/image/hsv.m, scripts/image/image.m, scripts/image/imagesc.m, scripts/image/imfinfo.m, scripts/image/imformats.m, scripts/image/imread.m, scripts/image/imshow.m, scripts/image/imwrite.m, scripts/image/iscolormap.m, scripts/image/jet.m, scripts/image/lines.m, scripts/image/ntsc2rgb.m, scripts/image/ocean.m, scripts/image/pink.m, scripts/image/prism.m, scripts/image/rainbow.m, scripts/image/rgb2ntsc.m, scripts/image/spinmap.m, scripts/image/spring.m, scripts/image/summer.m, scripts/image/white.m, scripts/image/winter.m: Update docstrings to have one sentence summary as first line. Re-structure to have line lengths <= 80 chars.
author Rik <rik@octave.org>
date Sun, 03 May 2015 09:36:20 -0700
parents 21dbaf135667
children df437a52bcaf
line wrap: on
line source

## Copyright (C) 2009-2015 Jaroslav Hajek
##
## This file is part of Octave.
##
## Octave is free software; you can redistribute it and/or modify it
## under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 3 of the License, or (at
## your option) any later version.
##
## Octave is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Octave; see the file COPYING.  If not, see
## <http://www.gnu.org/licenses/>.

## -*- texinfo -*-
## @deftypefn  {Function File} {[@var{cstr}] =} strsplit (@var{str})
## @deftypefnx {Function File} {[@var{cstr}] =} strsplit (@var{str}, @var{del})
## @deftypefnx {Function File} {[@var{cstr}] =} strsplit (@dots{}, @var{name}, @var{value})
## @deftypefnx {Function File} {[@var{cstr}, @var{matches}] =} strsplit (@dots{})
## Split the string @var{str} using the delimiters specified by @var{del}
## and return a cell string array of substrings.
##
## If a delimiter is not specified the string is split at whitespace
## @code{@{" ", "\f", "\n", "\r", "\t", "\v"@}}.  Otherwise, the delimiter,
## @var{del} must be a string or cell array of strings.  By default,
## consecutive delimiters in the input string @var{s} are collapsed into one
## resulting in a single split.
##
## Supported @var{name}/@var{value} pair arguments are:
##
## @itemize
## @item @var{collapsedelimiters} which may take the value of @code{true}
## (default) or @code{false}.
##
## @item @var{delimitertype} which may take the value of @qcode{"simple"}
## (default) or @nospell{@qcode{"regularexpression"}}.  A simple delimiter
## matches the text exactly as written.  Otherwise, the syntax for regular
## expressions outlined in @code{regexp} is used.
## @end itemize
##
## The optional second output, @var{matches}, returns the delimiters which were
## matched in the original string.
##
## Examples with simple delimiters:
##
## @example
## strsplit ("a b c")
##       @result{}
##           @{
##             [1,1] = a
##             [1,2] = b
##             [1,3] = c
##           @}
##
## strsplit ("a,b,c", ",")
##       @result{}
##           @{
##             [1,1] = a
##             [1,2] = b
##             [1,3] = c
##           @}
##
## strsplit ("a foo b,bar c", @{" ", ",", "foo", "bar"@})
##       @result{}
##           @{
##             [1,1] = a
##             [1,2] = b
##             [1,3] = c
##           @}
##
## strsplit ("a,,b, c", @{",", " "@}, "collapsedelimiters", false)
##       @result{}
##           @{
##             [1,1] = a
##             [1,2] =
##             [1,3] = b
##             [1,4] =
##             [1,5] = c
##           @}
##
## @end example
##
## Examples with @nospell{regularexpression} delimiters:
##
## @smallexample
## strsplit ("a foo b,bar c", ',|\s|foo|bar', "delimitertype", "regularexpression")
##       @result{}
##           @{
##             [1,1] = a
##             [1,2] = b
##             [1,3] = c
##           @}
##
## strsplit ("a,,b, c", '[, ]', "collapsedelimiters", false, "delimitertype", "regularexpression")
##       @result{}
##           @{
##             [1,1] = a
##             [1,2] =
##             [1,3] = b
##             [1,4] =
##             [1,5] = c
##           @}
##
## strsplit ("a,\t,b, c", @{',', '\s'@}, "delimitertype", "regularexpression")
##       @result{}
##           @{
##             [1,1] = a
##             [1,2] = b
##             [1,3] = c
##           @}
##
## strsplit ("a,\t,b, c", @{',', ' ', '\t'@}, "collapsedelimiters", false)
##       @result{}
##           @{
##             [1,1] = a
##             [1,2] =
##             [1,3] =
##             [1,4] = b
##             [1,5] =
##             [1,6] = c
##           @}
## @end smallexample
##
## @seealso{ostrsplit, strjoin, strtok, regexp}
## @end deftypefn

function [cstr, matches] = strsplit (str, del, varargin)

  args.collapsedelimiters = true;
  args.delimitertype = "simple";

  [reg, params] = parseparams (varargin);

  if (nargin < 1)
    print_usage ();
  elseif (numel (reg) > 1)
    print_usage ();
  elseif (numel (reg) == 1)
    ## This is undocumented behavior to accept a logical 3rd arg.
    if (islogical (reg{1}))
      args.collapsedelimiters = reg{1};
    else
      print_usage ();
    endif
  endif
  fields = fieldnames (args);
  for n = 1:2:numel (params)
    if (any (strcmpi (params{n}, fields)))
      args.(tolower (params{n})) = params{n+1};
    elseif (ischar (varargin{n}))
      error ("strsplit:invalid_parameter_name",
             "strsplit: invalid parameter name, '%s'", varargin{n});
    else
      print_usage ();
    endif
  endfor

  ## Save the length of the "delimitertype" parameter
  length_deltype = length (args.delimitertype);

  if (nargin == 1 || (nargin > 1 && islogical (del)))
    if (nargin > 1)
      ## Second input is the "collapsedelimiters" parameter
      args.collapsedelimiters = del;
    endif
    ## Set proper default for the delimiter type
    if (strncmpi (args.delimitertype, "simple", length_deltype))
      del = {" ", "\f", "\n", "\r", "\t", "\v"};
    else
      del = '\s';
    endif
  endif

  if (! ischar (str) || (! ischar (del) && ! iscellstr (del)))
    error ("strsplit: S and DEL must be string values");
  elseif (! isscalar (args.collapsedelimiters))
    error ("strsplit: COLLAPSEDELIMITERS must be a scalar value");
  endif

  if (strncmpi (args.delimitertype, "simple", length_deltype))
    is_simple = true; 
  elseif (strncmpi (args.delimitertype, "regularexpression", length_deltype))
    is_simple = false; 
  else
    error ("strsplit:invalid_delimitertype", "strsplit: Invalid DELIMITERTYPE");
  endif

  if (is_simple)
    if (iscellstr (del))
      del = cellfun (@do_string_escapes, del, "uniformoutput", false);
    else
      del = do_string_escapes (del);
    endif
    ## Escape characters which have a special meaning in regexp.
    del = regexprep (del, '([{}()[\]^$.*?|\\])', '\\$1');
  endif

  if (isempty (str))
    cstr = {str};
  else
    if (iscellstr (del))
      del = sprintf ("%s|", del{:});
      del(end) = [];
    endif
    if (args.collapsedelimiters)
      del = [ "(" del ")+" ];
    endif
    [cstr, matches] = regexp (str, del, "split", "match");
  endif

endfunction


%!shared str
%! str = "The rain in Spain stays mainly in the plain.";

## Split on all whitespace.
%!assert (strsplit (str), {"The", "rain", "in", "Spain", "stays", ...
%!                         "mainly", "in", "the", "plain."})
## Split on "ain".
%!assert (strsplit (str, "ain"), {"The r", " in Sp", " stays m", ...
%!                                "ly in the pl", "."})
## Split on " " and "ain" (treating multiple delimiters as one).
%!test
%! s = strsplit (str, '\s|ain', true, "delimitertype", "r");
%! assert (s, {"The", "r", "in", "Sp", "stays", ...
%!             "m", "ly", "in", "the", "pl", "."});
%!test
%! s = strsplit (str, '\s|ain', true, "delimitertype", "r");
%! assert (s, {"The", "r", "in", "Sp", "stays", ...
%!             "m", "ly", "in", "the", "pl", "."});
%!test
%! [s, m] = strsplit (str, {'\s', 'ain'}, true, "delimitertype", "r");
%! assert (s, {"The", "r", "in", "Sp", "stays", ...
%!             "m", "ly", "in", "the", "pl", "."});
%! assert (m, {" ", "ain ", " ", "ain ", " ", "ain", " ", " ", " ", "ain"});
## Split on " " and "ain", and treat multiple delimiters separately.
%!test
%! [s, m] = strsplit (str, {" ", "ain"}, "collapsedelimiters", false);
%! assert (s, {"The", "r", "", "in", "Sp", "", "stays", ...
%!             "m", "ly", "in", "the", "pl", "."});
%! assert (m, {" ", "ain", " ", " ", "ain", " ", " ", "ain", ...
%!             " ", " ", " ", "ain"});

%!assert (strsplit ("road to hell"), {"road", "to", "hell"})
%!assert (strsplit ("road to hell", " "), {"road", "to", "hell"})
%!assert (strsplit ("road to^hell", {" ","^"}), {"road", "to", "hell"})
%!assert (strsplit ("road   to--hell", {" ","-"}, true), {"road", "to", "hell"})
%!assert (strsplit (["a,bc,,de"], ",", false, "delimitertype", "s"), {"a", "bc", "", "de"})
%!assert (strsplit (["a,bc,,de"], ",", false), {"a", "bc", "", "de"})
%!assert (strsplit (["a,bc,de"], ",", true), {"a", "bc", "de"})
%!assert (strsplit (["a,bc,de"], {","," "}, true), {"a", "bc", "de"})

%!assert (strsplit ("road to hell", " ", "delimitertype", "r"), {"road", "to", "hell"})
%!assert (strsplit ("road to^hell", '\^| ', "delimitertype", "r"), {"road", "to", "hell"})
%!assert (strsplit ("road to^hell", "[ ^]", "delimitertype", "r"), {"road", "to", "hell"})
%!assert (strsplit ("road   to--hell", "[ -]", false, "delimitertype", "r"), {"road", "", "", "to", "", "hell"})
%!assert (strsplit (["a,bc,de"], ",", "delimitertype", "r"), {"a", "bc", "de"})
%!assert (strsplit (["a,bc,,de"], ",", false, "delimitertype", "r"), {"a", "bc", "", "de"})
%!assert (strsplit (["a,bc,de"], ",", true, "delimitertype", "r"), {"a", "bc", "de"})
%!assert (strsplit (["a,bc,de"], "[, ]", true, "delimitertype", "r"), {"a", "bc", "de"})
%!assert (strsplit ("hello \t world", true, "delimitertype", "r"), {"hello", "world"});

%!assert (strsplit ("foo\tbar", '\t', "delimitertype", "r"), {"foo", "bar"})
%!assert (strsplit ("foo\tbar", '\t', "delimitertype", "s"), {"foo", "bar"})

## Test "match" for consecutive delmiters
%!test
%! [a, m] = strsplit ("a\t \nb", '\s', "delimitertype", "regularexpression",
%!   "collapsedelimiters", false);
%! assert (a, {"a", "", "", "b"})
%! assert (m, {"\t", " ", "\n"})
%!test
%! [a, m] = strsplit ("a\t \nb", '\s', false, "delimitertype", "regularexpression");
%! assert (a, {"a", "", "", "b"})
%! assert (m, {"\t", " ", "\n"})
%!test
%! [a, m] = strsplit ("a\t \nb", '\s', "delimitertype", "regularexpression");
%! assert (a, {"a", "b"})
%! assert (m, {"\t \n"})
%!test
%! [a, m] = strsplit ("a\t \nb", {"\t", " ", "\n"}, "delimitertype", "simple");
%! assert (a, {"a", "b"})
%! assert (m, {"\t \n"})
%!test
%! [s, m] = strsplit ("hello \t world", true);
%! assert (s, {"hello", "world"});
%! assert (m, {" \t "});

## Compatibility
%! assert (strsplit ("", "a"), {""})
%! assert (strsplit ("a", "a"), {"", ""})
%! assert (strsplit ("aa", "a"), {"", ""})
%! assert (strsplit ("aaa", "a"), {"", ""})

## Bug #44641
%!assert (strsplit ("xxx<yyy", "<"), {"xxx", "yyy"}) 
%!assert (strsplit ('xxx\yyy', '\'), {"xxx", "yyy"}) 

## Test input validation
%!error strsplit ()
%!error strsplit ("abc", "b", true, 4)
%!error <invalid parameter name, 'foo'> strsplit ("abc", "b", "foo", "true")
%!error <S and DEL must be string values> strsplit (123, "b")
%!error <COLLAPSEDELIMITERS must be a scalar value> strsplit ("abc", "def", "collapsedelimiters", ones (3,3))
%!error <Invalid DELIMITERTYPE> strsplit ("abc", "b", "delimitertype", "foobar")