Mercurial > hg > octave-lyh
changeset 10053:830986c43dee
implement compiled strrep
author | Jaroslav Hajek <highegg@gmail.com> |
---|---|
date | Mon, 04 Jan 2010 12:05:23 +0100 |
parents | 5813ec5077b5 |
children | 8f7f325fa678 |
files | scripts/ChangeLog scripts/strings/module.mk scripts/strings/strrep.m src/ChangeLog src/DLD-FUNCTIONS/strfind.cc |
diffstat | 5 files changed, 137 insertions(+), 123 deletions(-) [+] |
line wrap: on
line diff
--- a/scripts/ChangeLog +++ b/scripts/ChangeLog @@ -1,3 +1,8 @@ +2010-01-04 Jaroslav Hajek <highegg@gmail.com> + + * strings/strrep.m: Remove. + * strings/module.mk: Update. + 2010-01-02 Jaroslav Hajek <highegg@gmail.com> * optimization/fsolve.m: Support old style jacobian passing.
--- a/scripts/strings/module.mk +++ b/scripts/strings/module.mk @@ -26,7 +26,6 @@ strings/strjust.m \ strings/strmatch.m \ strings/strncmpi.m \ - strings/strrep.m \ strings/strtok.m \ strings/strtrim.m \ strings/strtrunc.m \
deleted file mode 100644 --- a/scripts/strings/strrep.m +++ /dev/null @@ -1,107 +0,0 @@ -## Copyright (C) 1995, 1996, 1998, 1999, 2000, 2005, 2006, 2007, 2008, -## 2009 Kurt Hornik -## -## This file is part of Octave. -## -## Octave is free software; you can redistribute it and/or modify it -## under the terms of the GNU General Public License as published by -## the Free Software Foundation; either version 3 of the License, or (at -## your option) any later version. -## -## Octave is distributed in the hope that it will be useful, but -## WITHOUT ANY WARRANTY; without even the implied warranty of -## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -## General Public License for more details. -## -## You should have received a copy of the GNU General Public License -## along with Octave; see the file COPYING. If not, see -## <http://www.gnu.org/licenses/>. - -## -*- texinfo -*- -## @deftypefn {Function File} {} strrep (@var{s}, @var{x}, @var{y}) -## Replace all occurrences of the substring @var{x} of the string @var{s} -## with the string @var{y} and return the result. For example, -## -## @example -## @group -## strrep ("This is a test string", "is", "&%$") -## @result{} "Th&%$ &%$ a test string" -## @end group -## @end example -## @seealso{regexprep, strfind, findstr} -## @end deftypefn - -## Author: Kurt Hornik <Kurt.Hornik@wu-wien.ac.at> -## Created: 11 November 1994 -## Adapted-By: jwe - -function t = strrep (s, x, y) - - if (nargin != 3) - print_usage (); - endif - - if (! (ischar (s) && ischar (x) && ischar (y))) - error ("strrep: all arguments must be strings"); - endif - - if (length (x) > length (s) || isempty (x)) - t = s; - return; - endif - - ind = findstr (s, x, 0); - - if (length(ind) == 0) - t = s; - elseif (length(y) > 0) - ## Replacement. - ## - ## Copy the parts of s that aren't being replaced. This is done - ## with an index vector, with jumps where each search string - ## is found. For a jump of 0 (target length == replacement length) - ## the index is just cumsum (ones (length (s))). For non-zero - ## jumps, add the jump size to the ones vector at each found position. - jump = length(y) - length(x); - if (jump > 0) - ## S expands. - di = ones (size (s)); - di(ind) = 1 + jump * ones (length (ind), 1); - t(cumsum (di)) = s; - elseif (jump < 0) - ## S contracts. - di = ones (jump * length (ind) + length (s), 1); - di (ind + jump * [0:length(ind)-1]) = 1 - jump * ones (length (ind), 1); - t = s (cumsum (di)); - else - ## S stays the same length. - t = s; - endif - ## Now, substitute a copy of the replacement string whereever the - ## search string was found. Note that we must first update the - ## target positions to account for any expansion or contraction - ## of s that may have occurred. - ind = ind + jump * [0:length(ind)-1]; - repeat = [1:length(y)]' * ones (1, length (ind)); - dest = ones (length (y), 1) * ind + repeat - 1; - t(dest) = y(repeat); - else - ## Deletion. - ## - ## Build an index vector of all locations where the target was found - ## in the search string, and zap them. - t = toascii (s); - repeat = [1:length(x)]' * ones (1, length (ind)); - delete = ones (length (x), 1) * ind + repeat - 1; - t(delete) = []; - t = char (t); - endif - -endfunction - -%!assert(strcmp (strrep ("This is a test string", "is", "&%$"), -%! "Th&%$ &%$ a test string")); - -%!error strrep (); - -%!error strrep ("foo", "bar", 3, 4);
--- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,10 @@ +2010-01-04 Jaroslav Hajek <highegg@gmail.com> + + * DLD-FUNCTIONS/strfind.cc (qs_search): Optionally discard overlaps. + Return result as Array<octave_idx_type>. + (Fstrfind): Use octave_value (Array<octave_idx_type>) constructor. + (Fstrrep): New function. + 2010-01-04 John W. Eaton <jwe@octave.org> * lex.ll (can_be_command): New function.
--- a/src/DLD-FUNCTIONS/strfind.cc +++ b/src/DLD-FUNCTIONS/strfind.cc @@ -27,6 +27,7 @@ #include <string> #include <climits> #include <algorithm> +#include <deque> #include "Cell.h" #include "ov.h" @@ -54,34 +55,59 @@ } -static octave_value +static Array<octave_idx_type> qs_search (const Array<char>& needle, const Array<char>& haystack, - const octave_idx_type table[UCHAR_MAX]) + const octave_idx_type table[UCHAR_MAX], + bool overlaps = true) { const char *x = needle.data (); octave_idx_type m = needle.numel (); const char *y = haystack.data (); octave_idx_type n = haystack.numel (); - std::vector<octave_idx_type> accum; + // We'll use deque because it typically has the most favorable properties for + // the operation we need. + std::deque<octave_idx_type> accum; if (n >= m) { octave_idx_type j = 0; - while (j < n - m) { - if (std::equal (x, x + m, y + j)) - accum.push_back (j); - j += table[ORD(y[j + m])]; - } - if (std::equal (x, x + m, y + n - m)) - accum.push_back (n - m); + if (overlaps) + { + while (j < n - m) + { + if (std::equal (x, x + m, y + j)) + accum.push_back (j); + j += table[ORD(y[j + m])]; + } + } + else + { + while (j < n - m) + { + if (std::equal (x, x + m, y + j)) + { + accum.push_back (j); + j += m; + } + else + j += table[ORD(y[j + m])]; + } + } + + if (j == n - m && std::equal (x, x + m, y + j)) + accum.push_back (j); } octave_idx_type nmatch = accum.size (); - NoAlias<Matrix> result (std::min (1, nmatch), nmatch); - for (octave_idx_type i = 0; i < nmatch; i++) - result(i) = accum[i] + 1; + Array<octave_idx_type> result (dim_vector (std::min (1, nmatch), nmatch)); + octave_idx_type k = 0; + for (std::deque<octave_idx_type>::const_iterator iter = accum.begin (); + iter != accum.end (); iter++) + { + result.xelem (k++) = *iter; + } return result; } @@ -132,7 +158,8 @@ qs_preprocess (needle, table); if (argstr.is_string ()) - retval = qs_search (needle, argstr.char_array_value (), table); + retval = octave_value (qs_search (needle, argstr.char_array_value (), table), + true, true); else if (argstr.is_cell ()) { const Cell argsc = argstr.cell_value (); @@ -143,7 +170,8 @@ { octave_value argse = argsc(i); if (argse.is_string ()) - retc(i) = qs_search (needle, argse.char_array_value (), table); + retc(i) = octave_value (qs_search (needle, argse.char_array_value (), table), + true, true); else { error ("strfind: each cell element must be a string"); @@ -177,3 +205,85 @@ %!assert (strfind ("Linux _is_ user-friendly. It just isn't ignorant-friendly or idiot-friendly.", "friendly"), [17, 50, 68]); */ + +DEFUN_DLD (strrep, args, , + "-*- texinfo -*-\n\ +@deftypefn {Loadable Function} {} strrep (@var{s}, @var{x}, @var{y})\n\ +Replace all occurrences of the substring @var{x} of the string @var{s}\n\ +with the string @var{y} and return the result. For example,\n\ +\n\ +@example\n\ +@group\n\ +strrep (\"This is a test string\", \"is\", \"&%$\")\n\ + @result{} \"Th&%$ &%$ a test string\"\n\ +@end group\n\ +@end example\n\ +@seealso{regexprep, strfind, findstr}\n\ +@end deftypefn") +{ + octave_value retval; + int nargin = args.length (); + + if (nargin == 3) + { + octave_value argstr = args(0), argp = args(1), argr = args(2); + if (argp.is_string () && argp.is_string () && argr.is_string ()) + { + const Array<char> str = argstr.char_array_value (); + const Array<char> pat = argp.char_array_value (); + const Array<char> rep = argr.char_array_value (); + Array<char> ret = str; + + octave_idx_type siz = str.numel (), psiz = pat.numel (), rsiz = rep.numel (); + + if (psiz != 0) + { + OCTAVE_LOCAL_BUFFER (octave_idx_type, table, UCHAR_MAX); + qs_preprocess (pat, table); + + // Look up matches, without overlaps. + const Array<octave_idx_type> idx = qs_search (pat, str, table, false); + octave_idx_type nidx = idx.numel (); + + if (nidx) + { + // Compute result size. + octave_idx_type retsiz = siz + nidx * (rsiz - psiz); + ret.clear (dim_vector (1, retsiz)); + const char *src = str.data (), *reps = rep.data (); + char *dest = ret.fortran_vec (); + + octave_idx_type k = 0; + for (octave_idx_type i = 0; i < nidx; i++) + { + octave_idx_type j = idx(i); + dest = std::copy (src + k, src + j, dest); + dest = std::copy (reps, reps + rsiz, dest); + k = j + psiz; + } + + std::copy (src + k, src + siz, dest); + } + } + + retval = ret; + } + else + error ("strrep: all arguments must be strings"); + } + else + print_usage (); + + return retval; +} + +/* + +%!assert(strcmp (strrep ("This is a test string", "is", "&%$"), +%! "Th&%$ &%$ a test string")); + +%!error strrep (); + +%!error strrep ("foo", "bar", 3, 4); + +*/