diff scripts/strings/strtok.m @ 5827:1fe78adb91bc

[project @ 2006-05-22 06:25:14 by jwe]
author jwe
date Mon, 22 May 2006 06:25:14 +0000
parents
children 93c65f2a5668
line wrap: on
line diff
new file mode 100644
--- /dev/null
+++ b/scripts/strings/strtok.m
@@ -0,0 +1,141 @@
+## Copyright (C) 2000 Paul Kienzle
+##
+## This file is part of Octave.
+##
+## Octave is free software; you can redistribute it and/or modify it
+## under the terms of the GNU General Public License as published by
+## the Free Software Foundation; either version 2, or (at your option)
+## any later version.
+##
+## Octave is distributed in the hope that it will be useful, but
+## WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+## General Public License for more details.
+##
+## You should have received a copy of the GNU General Public License
+## along with Octave; see the file COPYING.  If not, write to the Free
+## Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+## 02110-1301, USA.
+
+## -*- texinfo -*-
+## @deftypefn {Function File} {[@var{tok}, @var{rem}] =} strtok (@var{str}, @var{delim})
+## 
+## Find all characters up to but not including the first character which
+## is in the string delim.  If @var{rem} is requested, it contains the
+## remainder of the string, starting at the first deliminator. Leading
+## delimiters are ignored.  If @var{delim} is not specified, space is assumed.
+##
+## @end deftypefn
+
+## TODO: check what to do for a null delimiter
+
+function [tok, rem] = strtok (str, delim)
+
+  if (nargin<1 || nargin > 2)
+    print_usage ();
+  endif
+
+  if (nargin < 2 || isempty (delim))
+    delim = " "; 
+  endif
+
+  if (isempty (str))
+    tok = rem = "";
+  elseif (length (delim) > 3)
+    start = 1;
+    len = length (str);
+    while (start <= len)
+      if (all (str(start) != delim))
+	break; 
+      endif
+      start++;
+    endwhile
+    stop = start;
+    while (stop <= len)
+      if (any (str(stop) == delim))
+	break; 
+      endif
+      stop++;
+    endwhile
+    tok = str(start:stop-1);
+    rem = str(stop:len);
+  else
+    if (length (delim) == 1)
+      idx = find (str == delim);
+    elseif (length (delim) == 2)
+      idx = find (str == delim(1) | str == delim(2));
+    else
+      idx = find (str == delim(1) | str == delim(2) | str == delim(3));
+    endif
+    if (isempty (idx))
+      tok = str;
+      rem = "";
+    else
+      ## Find first non-leading delimiter.
+      skip = find (idx(:)' != 1:length(idx));
+      if (isempty (skip))
+      	tok = str(idx(length(idx))+1:length(str));
+      	rem = "";
+      else
+      	tok = str(skip(1):idx(skip(1))-1);
+      	rem = str(idx(skip(1)):length(str));
+      endif
+    endif
+  endif
+
+endfunction
+
+%!demo
+%! strtok("this is the life")
+%! % split at the first space, returning "this"
+
+%!demo
+%! s = "14*27+31"
+%! while 1
+%!   [t,s] = strtok(s, "+-*/");
+%!   printf("<%s>", t);
+%!   if isempty(s), break; endif
+%!   printf("<%s>", s(1));
+%! endwhile
+%! printf("\n");
+%! % ----------------------------------------------------
+%! % Demonstrates processing of an entire string split on
+%! % a variety of delimiters. Tokens and delimiters are 
+%! % printed one after another in angle brackets.  The
+%! % string is:
+
+%!# test the tokens for all cases
+%!assert(strtok(""), "");             # no string
+%!assert(strtok("this"), "this");     # no delimiter in string
+%!assert(strtok("this "), "this");    # delimiter at end
+%!assert(strtok("this is"), "this");  # delimiter in middle
+%!assert(strtok(" this"), "this");    # delimiter at start
+%!assert(strtok(" this "), "this");   # delimiter at start and end
+%!assert(strtok(" "), ""(1:0));            # delimiter only
+
+%!# test the remainder for all cases
+%!test [t,r] = strtok(""); assert(r, "");
+%!test [t,r] = strtok("this"); assert(r, "");
+%!test [t,r] = strtok("this "); assert(r, " ");
+%!test [t,r] = strtok("this is"); assert(r, " is");
+%!test [t,r] = strtok(" this"); assert(r, "");
+%!test [t,r] = strtok(" this "); assert(r, " ");
+%!test [t,r] = strtok(" "); assert(r, "");
+
+%!# simple check with 2 and 3 delimeters
+%!assert(strtok("this is", "i "), "th");
+%!assert(strtok("this is", "ij "), "th");
+
+%!# test all cases for 4 delimiters since a different 
+%!# algorithm is used when more than 3 delimiters
+%!assert(strtok("","jkl "), "");
+%!assert(strtok("this","jkl "), "this");
+%!assert(strtok("this ","jkl "), "this");
+%!assert(strtok("this is","jkl "), "this");
+%!assert(strtok(" this","jkl "), "this");
+%!assert(strtok(" this ","jkl "), "this");
+%!assert(strtok(" ","jkl "), ""(1:0));
+
+%!# test 'bad' string orientations
+%!assert(strtok(" this "'), "this"');   # delimiter at start and end
+%!assert(strtok(" this "',"jkl "), "this"');