Mercurial > hg > octave-lyh
annotate scripts/strings/strtok.m @ 13304:e1524d82f8e0
strtrunc.m: Fix errors in strread, textscan introduced by changeset 86d18a3cc911
* strtrunc.m: Fix errors in strread, textscan introduced by changeset 86d18a3cc911
author | Rik <octave@nomad.inbox5.com> |
---|---|
date | Sat, 08 Oct 2011 20:14:35 -0700 |
parents | c792872f8942 |
children | fa94d6a93d45 |
rev | line source |
---|---|
11523 | 1 ## Copyright (C) 2000-2011 Paul Kienzle |
5827 | 2 ## |
3 ## This file is part of Octave. | |
4 ## | |
5 ## Octave is free software; you can redistribute it and/or modify it | |
6 ## under the terms of the GNU General Public License as published by | |
7016 | 7 ## the Free Software Foundation; either version 3 of the License, or (at |
8 ## your option) any later version. | |
5827 | 9 ## |
10 ## Octave is distributed in the hope that it will be useful, but | |
11 ## WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 ## General Public License for more details. | |
14 ## | |
15 ## You should have received a copy of the GNU General Public License | |
7016 | 16 ## along with Octave; see the file COPYING. If not, see |
17 ## <http://www.gnu.org/licenses/>. | |
5827 | 18 |
19 ## -*- texinfo -*- | |
20 ## @deftypefn {Function File} {[@var{tok}, @var{rem}] =} strtok (@var{str}, @var{delim}) | |
11587
c792872f8942
all script files: untabify and strip trailing whitespace
John W. Eaton <jwe@octave.org>
parents:
11523
diff
changeset
|
21 ## |
5827 | 22 ## Find all characters up to but not including the first character which |
23 ## is in the string delim. If @var{rem} is requested, it contains the | |
9036
58604c45ca74
Cleanup of data types related documentation
Rik <rdrider0-list@yahoo.com>
parents:
8920
diff
changeset
|
24 ## remainder of the string, starting at the first delimiter. Leading |
8442
502e58a0d44f
Fix docstrings, add examples, references and tests to string functions
Thorsten Meyer <thorsten.meyier@gmx.de>
parents:
8202
diff
changeset
|
25 ## delimiters are ignored. If @var{delim} is not specified, space is |
11587
c792872f8942
all script files: untabify and strip trailing whitespace
John W. Eaton <jwe@octave.org>
parents:
11523
diff
changeset
|
26 ## assumed. For example: |
8442
502e58a0d44f
Fix docstrings, add examples, references and tests to string functions
Thorsten Meyer <thorsten.meyier@gmx.de>
parents:
8202
diff
changeset
|
27 ## |
502e58a0d44f
Fix docstrings, add examples, references and tests to string functions
Thorsten Meyer <thorsten.meyier@gmx.de>
parents:
8202
diff
changeset
|
28 ## @example |
502e58a0d44f
Fix docstrings, add examples, references and tests to string functions
Thorsten Meyer <thorsten.meyier@gmx.de>
parents:
8202
diff
changeset
|
29 ## @group |
502e58a0d44f
Fix docstrings, add examples, references and tests to string functions
Thorsten Meyer <thorsten.meyier@gmx.de>
parents:
8202
diff
changeset
|
30 ## strtok ("this is the life") |
502e58a0d44f
Fix docstrings, add examples, references and tests to string functions
Thorsten Meyer <thorsten.meyier@gmx.de>
parents:
8202
diff
changeset
|
31 ## @result{} "this" |
5827 | 32 ## |
8442
502e58a0d44f
Fix docstrings, add examples, references and tests to string functions
Thorsten Meyer <thorsten.meyier@gmx.de>
parents:
8202
diff
changeset
|
33 ## [tok, rem] = strtok ("14*27+31", "+-*/") |
502e58a0d44f
Fix docstrings, add examples, references and tests to string functions
Thorsten Meyer <thorsten.meyier@gmx.de>
parents:
8202
diff
changeset
|
34 ## @result{} |
502e58a0d44f
Fix docstrings, add examples, references and tests to string functions
Thorsten Meyer <thorsten.meyier@gmx.de>
parents:
8202
diff
changeset
|
35 ## tok = 14 |
502e58a0d44f
Fix docstrings, add examples, references and tests to string functions
Thorsten Meyer <thorsten.meyier@gmx.de>
parents:
8202
diff
changeset
|
36 ## rem = *27+31 |
502e58a0d44f
Fix docstrings, add examples, references and tests to string functions
Thorsten Meyer <thorsten.meyier@gmx.de>
parents:
8202
diff
changeset
|
37 ## @end group |
502e58a0d44f
Fix docstrings, add examples, references and tests to string functions
Thorsten Meyer <thorsten.meyier@gmx.de>
parents:
8202
diff
changeset
|
38 ## @end example |
8877
2c8b2399247b
implement strsplit; deprecate split
Jaroslav Hajek <highegg@gmail.com>
parents:
8442
diff
changeset
|
39 ## @seealso{index, strsplit} |
5827 | 40 ## @end deftypefn |
41 | |
8202
cf59d542f33e
replace all TODOs and XXXs with FIXMEs
Jaroslav Hajek <highegg@gmail.com>
parents:
8002
diff
changeset
|
42 ## FIXME: check what to do for a null delimiter |
5827 | 43 |
44 function [tok, rem] = strtok (str, delim) | |
45 | |
46 if (nargin<1 || nargin > 2) | |
47 print_usage (); | |
48 endif | |
49 | |
50 if (nargin < 2 || isempty (delim)) | |
11587
c792872f8942
all script files: untabify and strip trailing whitespace
John W. Eaton <jwe@octave.org>
parents:
11523
diff
changeset
|
51 delim = "\t\n\v\f\r "; |
5827 | 52 endif |
53 | |
54 if (isempty (str)) | |
55 tok = rem = ""; | |
56 elseif (length (delim) > 3) | |
57 start = 1; | |
58 len = length (str); | |
59 while (start <= len) | |
60 if (all (str(start) != delim)) | |
11587
c792872f8942
all script files: untabify and strip trailing whitespace
John W. Eaton <jwe@octave.org>
parents:
11523
diff
changeset
|
61 break; |
5827 | 62 endif |
63 start++; | |
64 endwhile | |
65 stop = start; | |
66 while (stop <= len) | |
67 if (any (str(stop) == delim)) | |
11587
c792872f8942
all script files: untabify and strip trailing whitespace
John W. Eaton <jwe@octave.org>
parents:
11523
diff
changeset
|
68 break; |
5827 | 69 endif |
70 stop++; | |
71 endwhile | |
72 tok = str(start:stop-1); | |
73 rem = str(stop:len); | |
74 else | |
75 if (length (delim) == 1) | |
76 idx = find (str == delim); | |
77 elseif (length (delim) == 2) | |
78 idx = find (str == delim(1) | str == delim(2)); | |
79 else | |
80 idx = find (str == delim(1) | str == delim(2) | str == delim(3)); | |
81 endif | |
82 if (isempty (idx)) | |
83 tok = str; | |
84 rem = ""; | |
85 else | |
86 ## Find first non-leading delimiter. | |
87 skip = find (idx(:)' != 1:length(idx)); | |
88 if (isempty (skip)) | |
10549 | 89 tok = str(idx(length(idx))+1:length(str)); |
90 rem = ""; | |
5827 | 91 else |
10549 | 92 tok = str(skip(1):idx(skip(1))-1); |
93 rem = str(idx(skip(1)):length(str)); | |
5827 | 94 endif |
95 endif | |
96 endif | |
97 | |
98 endfunction | |
99 | |
100 %!demo | |
101 %! strtok("this is the life") | |
102 %! % split at the first space, returning "this" | |
103 | |
104 %!demo | |
105 %! s = "14*27+31" | |
106 %! while 1 | |
107 %! [t,s] = strtok(s, "+-*/"); | |
108 %! printf("<%s>", t); | |
109 %! if isempty(s), break; endif | |
110 %! printf("<%s>", s(1)); | |
111 %! endwhile | |
112 %! printf("\n"); | |
113 %! % ---------------------------------------------------- | |
114 %! % Demonstrates processing of an entire string split on | |
11587
c792872f8942
all script files: untabify and strip trailing whitespace
John W. Eaton <jwe@octave.org>
parents:
11523
diff
changeset
|
115 %! % a variety of delimiters. Tokens and delimiters are |
5827 | 116 %! % printed one after another in angle brackets. The |
117 %! % string is: | |
118 | |
119 %!# test the tokens for all cases | |
120 %!assert(strtok(""), ""); # no string | |
121 %!assert(strtok("this"), "this"); # no delimiter in string | |
122 %!assert(strtok("this "), "this"); # delimiter at end | |
123 %!assert(strtok("this is"), "this"); # delimiter in middle | |
124 %!assert(strtok(" this"), "this"); # delimiter at start | |
125 %!assert(strtok(" this "), "this"); # delimiter at start and end | |
126 %!assert(strtok(" "), ""(1:0)); # delimiter only | |
127 | |
128 %!# test the remainder for all cases | |
129 %!test [t,r] = strtok(""); assert(r, ""); | |
8002
30f560a5fbc3
strtok.m: include TAB, LF, VT, FF, and CR in default set of delim characters
John W. Eaton <jwe@octave.org>
parents:
7017
diff
changeset
|
130 %!test [t,r] = strtok("this"); assert(r, char (zeros (1, 0))); |
5827 | 131 %!test [t,r] = strtok("this "); assert(r, " "); |
132 %!test [t,r] = strtok("this is"); assert(r, " is"); | |
8002
30f560a5fbc3
strtok.m: include TAB, LF, VT, FF, and CR in default set of delim characters
John W. Eaton <jwe@octave.org>
parents:
7017
diff
changeset
|
133 %!test [t,r] = strtok(" this"); assert(r, char (zeros (1, 0))); |
5827 | 134 %!test [t,r] = strtok(" this "); assert(r, " "); |
8002
30f560a5fbc3
strtok.m: include TAB, LF, VT, FF, and CR in default set of delim characters
John W. Eaton <jwe@octave.org>
parents:
7017
diff
changeset
|
135 %!test [t,r] = strtok(" "); assert(r, char (zeros (1, 0))); |
5827 | 136 |
137 %!# simple check with 2 and 3 delimeters | |
138 %!assert(strtok("this is", "i "), "th"); | |
139 %!assert(strtok("this is", "ij "), "th"); | |
140 | |
11587
c792872f8942
all script files: untabify and strip trailing whitespace
John W. Eaton <jwe@octave.org>
parents:
11523
diff
changeset
|
141 %!# test all cases for 4 delimiters since a different |
5827 | 142 %!# algorithm is used when more than 3 delimiters |
143 %!assert(strtok("","jkl "), ""); | |
144 %!assert(strtok("this","jkl "), "this"); | |
145 %!assert(strtok("this ","jkl "), "this"); | |
146 %!assert(strtok("this is","jkl "), "this"); | |
147 %!assert(strtok(" this","jkl "), "this"); | |
148 %!assert(strtok(" this ","jkl "), "this"); | |
149 %!assert(strtok(" ","jkl "), ""(1:0)); | |
150 | |
151 %!# test 'bad' string orientations | |
152 %!assert(strtok(" this "'), "this"'); # delimiter at start and end | |
153 %!assert(strtok(" this "',"jkl "), "this"'); | |
8002
30f560a5fbc3
strtok.m: include TAB, LF, VT, FF, and CR in default set of delim characters
John W. Eaton <jwe@octave.org>
parents:
7017
diff
changeset
|
154 |
30f560a5fbc3
strtok.m: include TAB, LF, VT, FF, and CR in default set of delim characters
John W. Eaton <jwe@octave.org>
parents:
7017
diff
changeset
|
155 %!# test with TAB, LF, VT, FF, and CR |
30f560a5fbc3
strtok.m: include TAB, LF, VT, FF, and CR in default set of delim characters
John W. Eaton <jwe@octave.org>
parents:
7017
diff
changeset
|
156 %!test |
30f560a5fbc3
strtok.m: include TAB, LF, VT, FF, and CR in default set of delim characters
John W. Eaton <jwe@octave.org>
parents:
7017
diff
changeset
|
157 %! for ch = "\t\n\v\f\r" |
30f560a5fbc3
strtok.m: include TAB, LF, VT, FF, and CR in default set of delim characters
John W. Eaton <jwe@octave.org>
parents:
7017
diff
changeset
|
158 %! [t, r] = strtok (cstrcat ("beg", ch, "end")); |
30f560a5fbc3
strtok.m: include TAB, LF, VT, FF, and CR in default set of delim characters
John W. Eaton <jwe@octave.org>
parents:
7017
diff
changeset
|
159 %! assert (t, "beg"); |
30f560a5fbc3
strtok.m: include TAB, LF, VT, FF, and CR in default set of delim characters
John W. Eaton <jwe@octave.org>
parents:
7017
diff
changeset
|
160 %! assert (r, cstrcat (ch, "end")) |
30f560a5fbc3
strtok.m: include TAB, LF, VT, FF, and CR in default set of delim characters
John W. Eaton <jwe@octave.org>
parents:
7017
diff
changeset
|
161 %! endfor |