Mercurial > hg > octave-lyh
annotate scripts/strings/findstr.m @ 10453:7d23c19249d6
NEWS: style fixes
author | John W. Eaton <jwe@octave.org> |
---|---|
date | Wed, 24 Mar 2010 16:26:33 -0400 |
parents | eb63fbe60fab |
children | 95c3e38098bf |
rev | line source |
---|---|
8920 | 1 ## Copyright (C) 1996, 1999, 2000, 2002, 2003, 2005, 2006, 2007, 2008, 2009 |
7017 | 2 ## Kurt Hornik |
2325 | 3 ## |
2313 | 4 ## This file is part of Octave. |
5 ## | |
6 ## Octave is free software; you can redistribute it and/or modify it | |
7 ## under the terms of the GNU General Public License as published by | |
7016 | 8 ## the Free Software Foundation; either version 3 of the License, or (at |
9 ## your option) any later version. | |
2313 | 10 ## |
11 ## Octave is distributed in the hope that it will be useful, but | |
12 ## WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 ## General Public License for more details. | |
15 ## | |
16 ## You should have received a copy of the GNU General Public License | |
7016 | 17 ## along with Octave; see the file COPYING. If not, see |
18 ## <http://www.gnu.org/licenses/>. | |
2272 | 19 |
3361 | 20 ## -*- texinfo -*- |
21 ## @deftypefn {Function File} {} findstr (@var{s}, @var{t}, @var{overlap}) | |
22 ## Return the vector of all positions in the longer of the two strings | |
23 ## @var{s} and @var{t} where an occurrence of the shorter of the two starts. | |
24 ## If the optional argument @var{overlap} is nonzero, the returned vector | |
25 ## can include overlapping positions (this is the default). For example, | |
3426 | 26 ## |
3361 | 27 ## @example |
8442
502e58a0d44f
Fix docstrings, add examples, references and tests to string functions
Thorsten Meyer <thorsten.meyier@gmx.de>
parents:
7411
diff
changeset
|
28 ## @group |
3361 | 29 ## findstr ("ababab", "a") |
8442
502e58a0d44f
Fix docstrings, add examples, references and tests to string functions
Thorsten Meyer <thorsten.meyier@gmx.de>
parents:
7411
diff
changeset
|
30 ## @result{} [1, 3, 5] |
3361 | 31 ## findstr ("abababa", "aba", 0) |
8442
502e58a0d44f
Fix docstrings, add examples, references and tests to string functions
Thorsten Meyer <thorsten.meyier@gmx.de>
parents:
7411
diff
changeset
|
32 ## @result{} [1, 5] |
502e58a0d44f
Fix docstrings, add examples, references and tests to string functions
Thorsten Meyer <thorsten.meyier@gmx.de>
parents:
7411
diff
changeset
|
33 ## @end group |
3361 | 34 ## @end example |
8442
502e58a0d44f
Fix docstrings, add examples, references and tests to string functions
Thorsten Meyer <thorsten.meyier@gmx.de>
parents:
7411
diff
changeset
|
35 ## @seealso{strfind, strmatch, strcmp, strncmp, strcmpi, strncmpi, find} |
3361 | 36 ## @end deftypefn |
2272 | 37 |
3891 | 38 ## Note that this implementation swaps the strings if second one is longer |
39 ## than the first, so try to put the longer one first. | |
40 ## | |
5428 | 41 ## Author: Kurt Hornik <Kurt.Hornik@wu-wien.ac.at> |
2355 | 42 ## Adapted-By: jwe |
2314 | 43 |
2311 | 44 function v = findstr (s, t, overlap) |
2275 | 45 |
46 if (nargin < 2 || nargin > 3) | |
6046 | 47 print_usage (); |
2275 | 48 endif |
49 | |
5348 | 50 if (all (size (s) > 1) || all (size (t) > 1)) |
51 error ("findstr: arguments must have only one non-singleton dimension"); | |
3891 | 52 endif |
53 | |
2275 | 54 if (nargin == 2) |
55 overlap = 1; | |
2272 | 56 endif |
57 | |
3891 | 58 ## Make S be the longer string. |
59 if (length (s) < length (t)) | |
60 tmp = s; | |
61 s = t; | |
62 t = tmp; | |
63 endif | |
64 | |
65 l_s = length (s); | |
66 l_t = length (t); | |
67 | |
68 if (l_t == 0) | |
4321 | 69 ## zero length target: return empty set |
70 v = []; | |
3891 | 71 |
72 elseif (l_t == 1) | |
73 ## length one target: simple find | |
74 v = find (s == t); | |
75 | |
76 elseif (l_t == 2) | |
77 ## length two target: find first at i and second at i+1 | |
78 v = find (s(1:l_s-1) == t(1) & s(2:l_s) == t(2)); | |
79 | |
80 else | |
81 ## length three or more: match the first three by find then go through | |
82 ## the much smaller list to determine which of them are real matches | |
83 limit = l_s - l_t + 1; | |
84 v = find (s(1:limit) == t(1) | |
85 & s(2:limit+1) == t(2) | |
86 & s (3:limit+2) == t(3)); | |
87 endif | |
3759 | 88 |
3891 | 89 ## Need to search the index vector if our find was too short |
90 ## (target length > 3), or if we don't allow overlaps. Note though | |
91 ## that there cannot be any overlaps if the first character in the | |
92 ## target is different from the remaining characters in the target, | |
93 ## so a single character, two different characters, or first character | |
94 ## different from the second two don't need to be searched. | |
95 if (l_t >= 3 || (! overlap && l_t > 1 && any (t(1) == t(2:l_t)))) | |
96 ## force strings to be both row vectors or both column vectors | |
97 if (all (size (s) != size (t))) | |
98 t = t.'; | |
99 endif | |
100 | |
101 ## determine which ones to keep | |
102 keep = zeros (size (v)); | |
103 ind = 0:l_t-1; | |
104 if (overlap) | |
105 for idx = 1:length (v) | |
106 keep(idx) = all (s(v(idx) + ind) == t); | |
107 endfor | |
2272 | 108 else |
8506 | 109 ## First possible position for next non-overlapping match. |
110 next = 1; | |
3891 | 111 for idx = 1:length (v) |
112 if (v(idx) >= next && s(v(idx) + ind) == t) | |
113 keep(idx) = 1; | |
8506 | 114 ## Skip to the next possible match position. |
115 next = v(idx) + l_t; | |
3891 | 116 else |
117 keep(idx) = 0; | |
118 endif | |
119 endfor | |
2272 | 120 endif |
3891 | 121 if (! isempty (v)) |
122 v = v(find (keep)); | |
123 endif | |
124 endif | |
5400 | 125 |
126 if (isempty (v)) | |
127 v = []; | |
128 endif | |
129 | |
8506 | 130 ## Always return a column vector, because that's what the old one did. |
3891 | 131 if (rows (v) > 1) |
132 v = v.'; | |
2272 | 133 endif |
134 | |
135 endfunction | |
7411 | 136 |
8442
502e58a0d44f
Fix docstrings, add examples, references and tests to string functions
Thorsten Meyer <thorsten.meyier@gmx.de>
parents:
7411
diff
changeset
|
137 %!assert ((findstr ("abababa", "a") == [1, 3, 5, 7] |
7411 | 138 %! && findstr ("abababa", "aba") == [1, 3, 5] |
139 %! && findstr ("abababa", "aba", 0) == [1, 5])); | |
140 | |
141 %!error findstr (); | |
142 | |
143 %!error findstr ("foo", "bar", 3, 4); | |
144 |