Mercurial > hg > octave-nkf
view src/DLD-FUNCTIONS/regexp.cc @ 8619:930a8114197b
For zero length matches in regexp, advance index by one and try again
author | David Bateman <dbateman@free.fr> |
---|---|
date | Wed, 28 Jan 2009 23:43:27 +0100 |
parents | 433e788a015b |
children | eb63fbe60fab |
line wrap: on
line source
/* Copyright (C) 2005, 2006, 2007 David Bateman Copyright (C) 2002, 2003, 2004, 2005 Paul Kienzle This file is part of Octave. Octave is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. Octave is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with Octave; see the file COPYING. If not, see <http://www.gnu.org/licenses/>. */ #ifdef HAVE_CONFIG_H #include <config.h> #endif #include <algorithm> #include <sstream> #include "defun-dld.h" #include "error.h" #include "gripes.h" #include "oct-obj.h" #include "utils.h" #include "Cell.h" #include "oct-map.h" #include "str-vec.h" #include "quit.h" #include "parse.h" #include "oct-locbuf.h" #if defined (HAVE_PCRE) #include <pcre.h> #elif defined (HAVE_REGEX) #if defined (__MINGW32__) #define __restrict #endif #if defined (HAVE_SYS_TYPES_H) #include <sys/types.h> #endif #include <regex.h> #endif // Define the maximum number of retries for a pattern that // possibly results in an infinite recursion. #define PCRE_MATCHLIMIT_MAX 10 // The regexp is constructed as a linked list to avoid resizing the // return values in arrays at each new match. // FIXME don't bother collecting and composing return values the user // doesn't want. class regexp_elem { public: regexp_elem (const string_vector& _named_token, const Cell& _t, const std::string& _m, const Matrix& _te, double _s, double _e) : named_token (_named_token), t (_t), m (_m), te (_te), s (_s), e (_e) { } regexp_elem (const regexp_elem &a) : named_token (a.named_token), t (a.t), m (a.m), te (a.te), s (a.s), e (a.e) { } string_vector named_token; Cell t; std::string m; Matrix te; double s; double e; }; typedef std::list<regexp_elem>::const_iterator const_iterator; #define MAXLOOKBEHIND 10 static bool lookbehind_warned = false; static int octregexp_list (const octave_value_list &args, const std::string &nm, bool case_insensitive, std::list<regexp_elem> &lst, string_vector &named, int &nopts, bool &once) { int sz = 0; #if defined (HAVE_REGEX) || defined (HAVE_PCRE) int nargin = args.length(); bool lineanchors = false; bool dotexceptnewline = false; bool freespacing = false; nopts = nargin - 2; once = false; std::string buffer = args(0).string_value (); size_t max_length = (buffer.length () > MAXLOOKBEHIND ? MAXLOOKBEHIND: buffer.length ()); if (error_state) { gripe_wrong_type_arg (nm.c_str(), args(0)); return 0; } std::string pattern = args(1).string_value (); if (error_state) { gripe_wrong_type_arg (nm.c_str(), args(1)); return 0; } for (int i = 2; i < nargin; i++) { std::string str = args(i).string_value(); if (error_state) { error ("%s: optional arguments must be strings", nm.c_str()); break; } std::transform (str.begin (), str.end (), str.begin (), tolower); if (str.find("once", 0) == 0) { once = true; nopts--; } else if (str.find("matchcase", 0) == 0) { case_insensitive = false; nopts--; } else if (str.find("ignorecase", 0) == 0) { case_insensitive = true; nopts--; } else if (str.find("dotall", 0) == 0) { dotexceptnewline = false; nopts--; } else if (str.find("stringanchors", 0) == 0) { lineanchors = false; nopts--; } else if (str.find("literalspacing", 0) == 0) { freespacing = false; nopts--; } #if HAVE_PCRE // Only accept these options with pcre else if (str.find("dotexceptnewline", 0) == 0) { dotexceptnewline = true; nopts--; } else if (str.find("lineanchors", 0) == 0) { lineanchors = true; nopts--; } else if (str.find("freespacing", 0) == 0) { freespacing = true; nopts--; } else if (str.find("start", 0) && str.find("end", 0) && str.find("tokenextents", 0) && str.find("match", 0) && str.find("tokens", 0) && str.find("names", 0)) error ("%s: unrecognized option", nm.c_str()); #else else if (str.find("names", 0) == 0 || str.find("dotexceptnewline", 0) == 0 || str.find("lineanchors", 0) == 0 || str.find("freespacing", 0) == 0) error ("%s: %s not implemented in this version", str.c_str(), nm.c_str()); else if (str.find("start", 0) && str.find("end", 0) && str.find("tokenextents", 0) && str.find("match", 0) && str.find("tokens", 0)) error ("%s: unrecognized option", nm.c_str()); #endif } if (!error_state) { Cell t; std::string m; double s, e; // named tokens "(?<name>...)" are only treated with PCRE not regex. #if HAVE_PCRE size_t pos = 0; size_t new_pos; int nnames = 0; int inames = 0; std::ostringstream buf; Array<int> named_idx; while ((new_pos = pattern.find ("(?",pos)) != std::string::npos) { if (pattern.at (new_pos + 2) == '<' && !(pattern.at (new_pos + 3) == '=' || pattern.at (new_pos + 3) == '!')) { // The syntax of named tokens in pcre is "(?P<name>...)" while // we need a syntax "(?<name>...)", so fix that here. Also an // expression like // "(?<first>\w+)\s+(?<last>\w+)|(?<last>\w+),\s+(?<first>\w+)" // should be perfectly legal, while pcre does not allow the same // named token name on both sides of the alternative. Also fix // that here by replacing name tokens by dummy names, and dealing // with the dummy names later. size_t tmp_pos = pattern.find_first_of ('>',new_pos); if (tmp_pos == std::string::npos) { error ("syntax error in pattern"); break; } std::string tmp_name = pattern.substr(new_pos+3,tmp_pos-new_pos-3); bool found = false; for (int i = 0; i < nnames; i++) if (named(i) == tmp_name) { named_idx.resize(inames+1); named_idx(inames) = i; found = true; break; } if (! found) { named_idx.resize(inames+1); named_idx(inames) = nnames; named.append(tmp_name); nnames++; } if (new_pos - pos > 0) buf << pattern.substr(pos,new_pos-pos); if (inames < 10) buf << "(?P<n00" << inames++; else if (inames < 100) buf << "(?P<n0" << inames++; else buf << "(?P<n" << inames++; pos = tmp_pos; } else if (pattern.at (new_pos + 2) == '<') { // Find lookbehind operators of arbitrary length (ie like // "(?<=[a-z]*)") and replace with a maximum length operator // as PCRE can not yet handle arbitrary length lookahead // operators. Use the string length as the maximum length to // avoid issues. int brackets = 1; size_t tmp_pos1 = new_pos + 2; size_t tmp_pos2 = tmp_pos1; while (tmp_pos1 <= pattern.length () && brackets > 0) { char ch = pattern.at (tmp_pos1); if (ch == '(') brackets++; else if (ch == ')') { if (brackets > 1) tmp_pos2 = tmp_pos1; brackets--; } tmp_pos1++; } if (brackets != 0) { buf << pattern.substr (pos, new_pos - pos) << "(?"; pos = new_pos + 2; } else { size_t tmp_pos3 = pattern.find_first_of ("*+", tmp_pos2); if (tmp_pos3 != std::string::npos && tmp_pos3 < tmp_pos1) { if (!lookbehind_warned) { lookbehind_warned = true; warning ("%s: arbitrary length lookbehind patterns are only support up to length %d", nm.c_str(), MAXLOOKBEHIND); } buf << pattern.substr (pos, new_pos - pos) << "("; size_t i; if (pattern.at (tmp_pos3) == '*') i = 0; else i = 1; for (; i < max_length + 1; i++) { buf << pattern.substr(new_pos, tmp_pos3 - new_pos) << "{" << i << "}"; buf << pattern.substr(tmp_pos3 + 1, tmp_pos1 - tmp_pos3 - 1); if (i != max_length) buf << "|"; } buf << ")"; } else buf << pattern.substr (pos, tmp_pos1 - pos); pos = tmp_pos1; } } else { buf << pattern.substr (pos, new_pos - pos) << "(?"; pos = new_pos + 2; } } buf << pattern.substr(pos); if (error_state) return 0; // Compile expression pcre *re; const char *err; int erroffset; std::string buf_str = buf.str (); re = pcre_compile (buf_str.c_str (), (case_insensitive ? PCRE_CASELESS : 0) | (dotexceptnewline ? 0 : PCRE_DOTALL) | (lineanchors ? PCRE_MULTILINE : 0) | (freespacing ? PCRE_EXTENDED : 0), &err, &erroffset, 0); if (re == 0) { error("%s: %s at position %d of expression", nm.c_str(), err, erroffset); return 0; } int subpatterns; int namecount; int nameentrysize; char *nametable; int idx = 0; pcre_fullinfo(re, 0, PCRE_INFO_CAPTURECOUNT, &subpatterns); pcre_fullinfo(re, 0, PCRE_INFO_NAMECOUNT, &namecount); pcre_fullinfo(re, 0, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize); pcre_fullinfo(re, 0, PCRE_INFO_NAMETABLE, &nametable); OCTAVE_LOCAL_BUFFER(int, ovector, (subpatterns+1)*3); OCTAVE_LOCAL_BUFFER(int, nidx, namecount); for (int i = 0; i < namecount; i++) { // Index of subpattern in first two bytes MSB first of name. // Extract index. nidx[i] = (static_cast<int>(nametable[i*nameentrysize])) << 8 | static_cast<int>(nametable[i*nameentrysize+1]); } while(true) { OCTAVE_QUIT; int matches = pcre_exec(re, 0, buffer.c_str(), buffer.length(), idx, (idx ? PCRE_NOTBOL : 0), ovector, (subpatterns+1)*3); if (matches == PCRE_ERROR_MATCHLIMIT) { // try harder; start with default value for MATCH_LIMIT and increase it warning("Your pattern caused PCRE to hit its MATCH_LIMIT.\nTrying harder now, but this will be slow."); pcre_extra pe; pcre_config(PCRE_CONFIG_MATCH_LIMIT, static_cast <void *> (&pe.match_limit)); pe.flags = PCRE_EXTRA_MATCH_LIMIT; int i = 0; while (matches == PCRE_ERROR_MATCHLIMIT && i++ < PCRE_MATCHLIMIT_MAX) { OCTAVE_QUIT; pe.match_limit *= 10; matches = pcre_exec(re, &pe, buffer.c_str(), buffer.length(), idx, (idx ? PCRE_NOTBOL : 0), ovector, (subpatterns+1)*3); } } if (matches < 0 && matches != PCRE_ERROR_NOMATCH) { error ("%s: internal error calling pcre_exec\nError code from pcre_exec is %i", nm.c_str(), matches); pcre_free(re); return 0; } else if (matches == PCRE_ERROR_NOMATCH) break; else if (ovector[1] <= ovector[0]) { // FIXME: Zero sized match!! Is this the right thing to do? idx = ovector[0] + 1; continue; } else { int pos_match = 0; Matrix te(matches-1,2); for (int i = 1; i < matches; i++) { if (ovector[2*i] >= 0 && ovector[2*i+1] > 0) { te(pos_match,0) = double (ovector[2*i]+1); te(pos_match++,1) = double (ovector[2*i+1]); } } te.resize(pos_match,2); s = double (ovector[0]+1); e = double (ovector[1]); const char **listptr; int status = pcre_get_substring_list(buffer.c_str(), ovector, matches, &listptr); if (status == PCRE_ERROR_NOMEMORY) { error("%s: cannot allocate memory in pcre_get_substring_list", nm.c_str()); pcre_free(re); return 0; } Cell cell_t (dim_vector(1,pos_match)); pos_match = 0; for (int i = 1; i < matches; i++) if (ovector[2*i] >= 0 && ovector[2*i+1] > 0) cell_t(pos_match++) = std::string(*(listptr+i)); m = std::string(*listptr); t = cell_t; string_vector named_tokens(nnames); if (namecount > 0) for (int i = 1; i < matches; i++) { if (ovector[2*i] >= 0 && ovector[2*i+1] > 0) { named_tokens(named_idx(i-1)) = std::string(*(listptr+nidx[i-1])); } } pcre_free_substring_list(listptr); regexp_elem new_elem (named_tokens, t, m, te, s, e); lst.push_back (new_elem); idx = ovector[1]; sz++; if (once) break; } } pcre_free(re); #else regex_t compiled; int err=regcomp(&compiled, pattern.c_str(), REG_EXTENDED | (case_insensitive ? REG_ICASE : 0)); if (err) { int len = regerror(err, &compiled, 0, 0); OCTAVE_LOCAL_BUFFER (char, errmsg, len); regerror(err, &compiled, errmsg, len); error("%s: %s in pattern (%s)", nm.c_str(), errmsg, pattern.c_str()); regfree(&compiled); return 0; } int subexpr = 1; int idx = 0; for (unsigned int i=0; i < pattern.length(); i++) subexpr += ( pattern[i] == '(' ? 1 : 0 ); OCTAVE_LOCAL_BUFFER (regmatch_t, match, subexpr ); while(true) { OCTAVE_QUIT; if (regexec(&compiled, buffer.c_str() + idx, subexpr, match, (idx ? REG_NOTBOL : 0)) == 0) { // Count actual matches int matches = 0; while (matches < subexpr && match[matches].rm_so >= 0) matches++; if (matches == 0 || match[0].rm_eo == 0) break; s = double (match[0].rm_so+1+idx); e = double (match[0].rm_eo+idx); Matrix te(matches-1,2); for (int i = 1; i < matches; i++) { te(i-1,0) = double (match[i].rm_so+1+idx); te(i-1,1) = double (match[i].rm_eo+idx); } m = buffer.substr (match[0].rm_so+idx, match[0].rm_eo-match[0].rm_so); Cell cell_t (dim_vector(1,matches-1)); for (int i = 1; i < matches; i++) cell_t(i-1) = buffer.substr (match[i].rm_so+idx, match[i].rm_eo-match[i].rm_so); t = cell_t; idx += match[0].rm_eo; string_vector sv; regexp_elem new_elem (sv, t, m, te, s, e); lst.push_back (new_elem); sz++; if (once) break; } else break; } regfree(&compiled); #endif } #else error ("%s: not available in this version of Octave", nm.c_str()); #endif return sz; } static octave_value_list octregexp (const octave_value_list &args, int nargout, const std::string &nm, bool case_insensitive) { octave_value_list retval; int nargin = args.length(); std::list<regexp_elem> lst; string_vector named; int nopts; bool once; int sz = octregexp_list (args, nm, case_insensitive, lst, named, nopts, once); if (! error_state) { // Converted the linked list in the correct form for the return values octave_idx_type i = 0; #ifdef HAVE_PCRE Octave_map nmap; if (sz == 1) { for (int j = 0; j < named.length(); j++) nmap.assign (named(j), lst.begin()->named_token(j)); retval(5) = nmap; } else { for (int j = 0; j < named.length (); j++) { i = 0; Cell tmp(dim_vector (1, sz)); for (const_iterator p = lst.begin(); p != lst.end(); p++) tmp(i++) = p->named_token(j); nmap.assign (named(j), octave_value (tmp)); } retval(5) = nmap; } #else retval(5) = Octave_map(); #endif if (once) retval(4) = sz ? lst.front ().t : Cell(); else { Cell t (dim_vector(1, sz)); i = 0; for (const_iterator p = lst.begin(); p != lst.end(); p++) t(i++) = p->t; retval(4) = t; } if (once) retval(3) = sz ? lst.front ().m : std::string(); else { Cell m (dim_vector(1, sz)); i = 0; for (const_iterator p = lst.begin(); p != lst.end(); p++) m(i++) = p->m; retval(3) = m; } if (once) retval(2) = sz ? lst.front ().te : Matrix(); else { Cell te (dim_vector(1, sz)); i = 0; for (const_iterator p = lst.begin(); p != lst.end(); p++) te(i++) = p->te; retval(2) = te; } if (once) { if (sz) retval(1) = lst.front ().e; else retval(1) = Matrix(); } else { NDArray e (dim_vector(1, sz)); i = 0; for (const_iterator p = lst.begin(); p != lst.end(); p++) e(i++) = p->e; retval(1) = e; } if (once) { if (sz) retval(0) = lst.front ().s; else retval(0) = Matrix(); } else { NDArray s (dim_vector(1, sz)); i = 0; for (const_iterator p = lst.begin(); p != lst.end(); p++) s(i++) = p->s; retval(0) = s; } // Alter the order of the output arguments if (nopts > 0) { int n = 0; octave_value_list new_retval; new_retval.resize(nargout); OCTAVE_LOCAL_BUFFER (int, arg_used, 6); for (int j = 0; j < 6; j++) arg_used[j] = false; for (int j = 2; j < nargin; j++) { int k = 0; std::string str = args(j).string_value(); std::transform (str.begin (), str.end (), str.begin (), tolower); if (str.find("once", 0) == 0 || str.find("stringanchors", 0) == 0 || str.find("lineanchors", 0) == 0 || str.find("matchcase", 0) == 0 || str.find("ignorecase", 0) == 0 || str.find("dotall", 0) == 0 || str.find("dotexceptnewline", 0) == 0 || str.find("literalspacing", 0) == 0 || str.find("freespacing", 0) == 0 ) continue; else if (str.find("start", 0) == 0) k = 0; else if (str.find("end", 0) == 0) k = 1; else if (str.find("tokenextents", 0) == 0) k = 2; else if (str.find("match", 0) == 0) k = 3; else if (str.find("tokens", 0) == 0) k = 4; else if (str.find("names", 0) == 0) k = 5; new_retval(n++) = retval(k); arg_used[k] = true; if (n == nargout) break; } // Fill in the rest of the arguments if (n < nargout) { for (int j = 0; j < 6; j++) { if (! arg_used[j]) new_retval(n++) = retval(j); } } retval = new_retval; } } return retval; } static octave_value_list octcellregexp (const octave_value_list &args, int nargout, const std::string &nm, bool case_insensitive) { octave_value_list retval; if (args(0).is_cell()) { OCTAVE_LOCAL_BUFFER (Cell, newretval, nargout); octave_value_list new_args = args; Cell cellstr = args(0).cell_value(); if (args(1).is_cell()) { Cell cellpat = args(1).cell_value(); if (cellpat.numel() == 1) { for (int j = 0; j < nargout; j++) newretval[j].resize(cellstr.dims()); new_args(1) = cellpat(0); for (octave_idx_type i = 0; i < cellstr.numel (); i++) { new_args(0) = cellstr(i); octave_value_list tmp = octregexp (new_args, nargout, nm, case_insensitive); if (error_state) break; for (int j = 0; j < nargout; j++) newretval[j](i) = tmp(j); } } else if (cellstr.numel() == 1) { for (int j = 0; j < nargout; j++) newretval[j].resize(cellpat.dims()); new_args(0) = cellstr(0); for (octave_idx_type i = 0; i < cellpat.numel (); i++) { new_args(1) = cellpat(i); octave_value_list tmp = octregexp (new_args, nargout, nm, case_insensitive); if (error_state) break; for (int j = 0; j < nargout; j++) newretval[j](i) = tmp(j); } } else if (cellstr.numel() == cellpat.numel()) { if (cellstr.dims() != cellpat.dims()) error ("%s: Inconsistent cell array dimensions", nm.c_str()); else { for (int j = 0; j < nargout; j++) newretval[j].resize(cellstr.dims()); for (octave_idx_type i = 0; i < cellstr.numel (); i++) { new_args(0) = cellstr(i); new_args(1) = cellpat(i); octave_value_list tmp = octregexp (new_args, nargout, nm, case_insensitive); if (error_state) break; for (int j = 0; j < nargout; j++) newretval[j](i) = tmp(j); } } } else error ("regexp: cell array arguments must be scalar or equal size"); } else { for (int j = 0; j < nargout; j++) newretval[j].resize(cellstr.dims()); for (octave_idx_type i = 0; i < cellstr.numel (); i++) { new_args(0) = cellstr(i); octave_value_list tmp = octregexp (new_args, nargout, nm, case_insensitive); if (error_state) break; for (int j = 0; j < nargout; j++) newretval[j](i) = tmp(j); } } if (!error_state) for (int j = 0; j < nargout; j++) retval(j) = octave_value (newretval[j]); } else if (args(1).is_cell()) { OCTAVE_LOCAL_BUFFER (Cell, newretval, nargout); octave_value_list new_args = args; Cell cellpat = args(1).cell_value(); for (int j = 0; j < nargout; j++) newretval[j].resize(cellpat.dims()); for (octave_idx_type i = 0; i < cellpat.numel (); i++) { new_args(1) = cellpat(i); octave_value_list tmp = octregexp (new_args, nargout, nm, case_insensitive); if (error_state) break; for (int j = 0; j < nargout; j++) newretval[j](i) = tmp(j); } if (!error_state) for (int j = 0; j < nargout; j++) retval(j) = octave_value (newretval[j]); } else retval = octregexp (args, nargout, nm, case_insensitive); return retval; } DEFUN_DLD (regexp, args, nargout, "-*- texinfo -*-\n\ @deftypefn {Loadable Function} {[@var{s}, @var{e}, @var{te}, @var{m}, @var{t}, @var{nm}] =} regexp (@var{str}, @var{pat})\n\ @deftypefnx {Loadable Function} {[@dots{}] =} regexp (@var{str}, @var{pat}, @var{opts}, @dots{})\n\ \n\ Regular expression string matching. Matches @var{pat} in @var{str} and\n\ returns the position and matching substrings or empty values if there are\n\ none.\n\ \n\ The matched pattern @var{pat} can include any of the standard regex\n\ operators, including:\n\ \n\ @table @code\n\ @item .\n\ Match any character\n\ @item * + ? @{@}\n\ Repetition operators, representing\n\ @table @code\n\ @item *\n\ Match zero or more times\n\ @item +\n\ Match one or more times\n\ @item ?\n\ Match zero or one times\n\ @item @{@}\n\ Match range operator, which is of the form @code{@{@var{n}@}} to match exactly\n\ @var{n} times, @code{@{@var{m},@}} to match @var{m} or more times,\n\ @code{@{@var{m},@var{n}@}} to match between @var{m} and @var{n} times.\n\ @end table\n\ @item [@dots{}] [^@dots{}]\n\ List operators, where for example @code{[ab]c} matches @code{ac} and @code{bc}\n\ @item ()\n\ Grouping operator\n\ @item |\n\ Alternation operator. Match one of a choice of regular expressions. The\n\ alternatives must be delimited by the grouping operator @code{()} above\n\ @item ^ $\n\ Anchoring operator. @code{^} matches the start of the string @var{str} and\n\ @code{$} the end\n\ @end table\n\ \n\ In addition the following escaped characters have special meaning. It should\n\ be noted that it is recommended to quote @var{pat} in single quotes rather\n\ than double quotes, to avoid the escape sequences being interpreted by Octave\n\ before being passed to @code{regexp}.\n\ \n\ @table @code\n\ @item \\b\n\ Match a word boundary\n\ @item \\B\n\ Match within a word\n\ @item \\w\n\ Matches any word character\n\ @item \\W\n\ Matches any non word character\n\ @item \\<\n\ Matches the beginning of a word\n\ @item \\>\n\ Matches the end of a word\n\ @item \\s\n\ Matches any whitespace character\n\ @item \\S\n\ Matches any non whitespace character\n\ @item \\d\n\ Matches any digit\n\ @item \\D\n\ Matches any non-digit\n\ @end table\n\ \n\ The outputs of @code{regexp} by default are in the order as given below\n\ \n\ @table @asis\n\ @item @var{s}\n\ The start indices of each of the matching substrings\n\ \n\ @item @var{e}\n\ The end indices of each matching substring\n\ \n\ @item @var{te}\n\ The extents of each of the matched token surrounded by @code{(@dots{})} in\n\ @var{pat}.\n\ \n\ @item @var{m}\n\ A cell array of the text of each match.\n\ \n\ @item @var{t}\n\ A cell array of the text of each token matched.\n\ \n\ @item @var{nm}\n\ A structure containing the text of each matched named token, with the name\n\ being used as the fieldname. A named token is denoted as\n\ @code{(?<name>@dots{})}\n\ @end table\n\ \n\ Particular output arguments or the order of the output arguments can be\n\ selected by additional @var{opts} arguments. These are strings and the\n\ correspondence between the output arguments and the optional argument\n\ are\n\ \n\ @multitable @columnfractions 0.2 0.3 0.3 0.2\n\ @item @tab 'start' @tab @var{s} @tab\n\ @item @tab 'end' @tab @var{e} @tab\n\ @item @tab 'tokenExtents' @tab @var{te} @tab\n\ @item @tab 'match' @tab @var{m} @tab\n\ @item @tab 'tokens' @tab @var{t} @tab\n\ @item @tab 'names' @tab @var{nm} @tab\n\ @end multitable\n\ \n\ A further optional argument is 'once', that limits the number of returned\n\ matches to the first match. Additional arguments are\n\ \n\ @table @asis\n\ @item matchcase\n\ Make the matching case sensitive.\n\ @item ignorecase\n\ Make the matching case insensitive.\n\ @item stringanchors\n\ Match the anchor characters at the beginning and end of the string.\n\ @item lineanchors\n\ Match the anchor characters at the beginning and end of the line.\n\ @item dotall\n\ The character @code{.} matches the newline character.\n\ @item dotexceptnewline\n\ The character @code{.} matches all but the newline character.\n\ @item freespacing\n\ The pattern can include arbitrary whitespace and comments starting with\n\ @code{#}.\n\ @item literalspacing\n\ The pattern is taken literally.\n\ @end table\n\ @seealso{regexpi, regexprep}\n\ @end deftypefn") { octave_value_list retval; int nargin = args.length(); if (nargin < 2) print_usage (); else if (args(0).is_cell() || args(1).is_cell()) retval = octcellregexp (args, nargout, "regexp", false); else retval = octregexp (args, nargout, "regexp", false); return retval; } /* ## PCRE_ERROR_MATCHLIMIT test %!test %! s=sprintf('\t4\n0000\t-0.00\t-0.0000\t4\t-0.00\t-0.0000\t4\n0000\t-0.00\t-0.0000\t0\t-0.00\t-'); %! ws = warning("query"); %! unwind_protect %! warning("off"); %! regexp(s, '(\s*-*\d+[.]*\d*\s*)+\n'); %! unwind_protect_cleanup %! warning(ws); %! end_unwind_protect ## seg-fault test %!assert(regexp("abcde","."),[1,2,3,4,5]) ## Check that anchoring of pattern works correctly %!assert(regexp('abcabc','^abc'),1); %!assert(regexp('abcabc','abc$'),4); %!assert(regexp('abcabc','^abc$'),zeros(1,0)); %!test %! [s, e, te, m, t] = regexp(' No Match ', 'f(.*)uck'); %! assert (s,zeros(1,0)) %! assert (e,zeros(1,0)) %! assert (te,cell(1,0)) %! assert (m, cell(1,0)) %! assert (t, cell(1,0)) %!test %! [s, e, te, m, t] = regexp(' FiRetrUck ', 'f(.*)uck'); %! assert (s,zeros(1,0)) %! assert (e,zeros(1,0)) %! assert (te,cell(1,0)) %! assert (m, cell(1,0)) %! assert (t, cell(1,0)) %!test %! [s, e, te, m, t] = regexp(' firetruck ', 'f(.*)uck'); %! assert (s,2) %! assert (e,10) %! assert (te{1},[3,7]) %! assert (m{1}, 'firetruck') %! assert (t{1}{1}, 'iretr') %!test %! [s, e, te, m, t] = regexp('short test string','\w*r\w*'); %! assert (s,[1,12]) %! assert (e,[5,17]) %! assert (size(te), [1,2]) %! assert (isempty(te{1})) %! assert (isempty(te{2})) %! assert (m{1},'short') %! assert (m{2},'string') %! assert (size(t), [1,2]) %! assert (isempty(t{1})) %! assert (isempty(t{2})) %!test %! [s, e, te, m, t] = regexp('short test string','\w*r\w*','once'); %! assert (s,1) %! assert (e,5) %! assert (isempty(te)) %! assert (m,'short') %! assert (isempty(t)) %!test %! [m, te, e, s, t] = regexp('short test string','\w*r\w*','once', 'match', 'tokenExtents', 'end', 'start', 'tokens'); %! assert (s,1) %! assert (e,5) %! assert (isempty(te)) %! assert (m,'short') %! assert (isempty(t)) %!testif HAVE_PCRE %! ## This test is expected to fail if PCRE is not installed %! [s, e, te, m, t, nm] = regexp('short test string','(?<word1>\w*t)\s*(?<word2>\w*t)'); %! assert (s,1) %! assert (e,10) %! assert (size(te), [1,1]) %! assert (te{1}, [1 5; 7, 10]) %! assert (m{1},'short test') %! assert (size(t),[1,1]) %! assert (t{1}{1},'short') %! assert (t{1}{2},'test') %! assert (size(nm), [1,1]) %! assert (!isempty(fieldnames(nm))) %! assert (sort(fieldnames(nm)),{'word1';'word2'}) %! assert (nm.word1,'short') %! assert (nm.word2,'test') %!testif HAVE_PCRE %! ## This test is expected to fail if PCRE is not installed %! [nm, m, te, e, s, t] = regexp('short test string','(?<word1>\w*t)\s*(?<word2>\w*t)', 'names', 'match', 'tokenExtents', 'end', 'start', 'tokens'); %! assert (s,1) %! assert (e,10) %! assert (size(te), [1,1]) %! assert (te{1}, [1 5; 7, 10]) %! assert (m{1},'short test') %! assert (size(t),[1,1]) %! assert (t{1}{1},'short') %! assert (t{1}{2},'test') %! assert (size(nm), [1,1]) %! assert (!isempty(fieldnames(nm))) %! assert (sort(fieldnames(nm)),{'word1';'word2'}) %! assert (nm.word1,'short') %! assert (nm.word2,'test') %!testif HAVE_PCRE %! ## This test is expected to fail if PCRE is not installed %! [t, nm] = regexp("John Davis\nRogers, James",'(?<first>\w+)\s+(?<last>\w+)|(?<last>\w+),\s+(?<first>\w+)','tokens','names'); %! assert (size(t), [1,2]); %! assert (t{1}{1},'John'); %! assert (t{1}{2},'Davis'); %! assert (t{2}{1},'Rogers'); %! assert (t{2}{2},'James'); %! assert (size(nm), [1,1]); %! assert (nm.first{1},'John'); %! assert (nm.first{2},'James'); %! assert (nm.last{1},'Davis'); %! assert (nm.last{2},'Rogers'); %!assert(regexp("abc\nabc",'.'),[1:7]) %!assert(regexp("abc\nabc",'.','dotall'),[1:7]) %!testif HAVE_PCRE %! assert(regexp("abc\nabc",'(?s).'),[1:7]) %! assert(regexp("abc\nabc",'.','dotexceptnewline'),[1,2,3,5,6,7]) %! assert(regexp("abc\nabc",'(?-s).'),[1,2,3,5,6,7]) %!assert(regexp("caseCaSe",'case'),1) %!assert(regexp("caseCaSe",'case',"matchcase"),1) %!assert(regexp("caseCaSe",'case',"ignorecase"),[1,5]) %!testif HAVE_PCRE %! assert(regexp("caseCaSe",'(?-i)case'),1) %! assert(regexp("caseCaSe",'(?i)case'),[1,5]) %!assert (regexp("abc\nabc",'c$'),7) %!assert (regexp("abc\nabc",'c$',"stringanchors"),7) %!testif HAVE_PCRE %! assert (regexp("abc\nabc",'(?-m)c$'),7) %! assert (regexp("abc\nabc",'c$',"lineanchors"),[3,7]) %! assert (regexp("abc\nabc",'(?m)c$'),[3,7]) %!assert (regexp("this word",'s w'),4) %!assert (regexp("this word",'s w','literalspacing'),4) %!testif HAVE_PCRE %! assert (regexp("this word",'(?-x)s w','literalspacing'),4) %! assert (regexp("this word",'s w','freespacing'),zeros(1,0)) %! assert (regexp("this word",'(?x)s w'),zeros(1,0)) %!error regexp('string', 'tri', 'BadArg'); %!error regexp('string'); %!assert(regexp({'asdfg-dfd';'-dfd-dfd-';'qasfdfdaq'},'-'),{6;[1,5,9];zeros(1,0)}) %!assert(regexp({'asdfg-dfd','-dfd-dfd-','qasfdfdaq'},'-'),{6,[1,5,9],zeros(1,0)}) %!assert(regexp({'asdfg-dfd';'-dfd-dfd-';'qasfdfdaq'},{'-';'f';'q'}),{6;[3,7];[1,9]}) %!assert(regexp('Strings',{'t','s'}),{2,7}) ## Test case for lookaround operators %!assert(regexp('Iraq','q(?!u)'),4) %!assert(regexp('quit','q(?!u)'), zeros(1,0)) %!assert(regexp('quit','q(?=u)','match'), {'q'}) %!assert(regexp("quit",'q(?=u+)','match'), {'q'}) %!assert(regexp("qit",'q(?=u+)','match'), cell(1,0)) %!assert(regexp("qit",'q(?=u*)','match'), {'q'}) %!assert(regexp('thingamabob','(?<=a)b'), 9) */ DEFUN_DLD (regexpi, args, nargout, "-*- texinfo -*-\n\ @deftypefn {Loadable Function} {[@var{s}, @var{e}, @var{te}, @var{m}, @var{t}, @var{nm}] =} regexpi (@var{str}, @var{pat})\n\ @deftypefnx {Loadable Function} {[@dots{}] =} regexpi (@var{str}, @var{pat}, @var{opts}, @dots{})\n\ \n\ Case insensitive regular expression string matching. Matches @var{pat} in\n\ @var{str} and returns the position and matching substrings or empty values\n\ if there are none. @xref{doc-regexp,,regexp}, for more details\n\ @end deftypefn") { octave_value_list retval; int nargin = args.length(); if (nargin < 2) print_usage (); else if (args(0).is_cell() || args(1).is_cell()) retval = octcellregexp (args, nargout, "regexpi", true); else retval = octregexp (args, nargout, "regexpi", true); return retval; } /* ## seg-fault test %!assert(regexpi("abcde","."),[1,2,3,4,5]) ## Check that anchoring of pattern works correctly %!assert(regexpi('abcabc','^abc'),1); %!assert(regexpi('abcabc','abc$'),4); %!assert(regexpi('abcabc','^abc$'),zeros(1,0)); %!test %! [s, e, te, m, t] = regexpi(' No Match ', 'f(.*)uck'); %! assert (s,zeros(1,0)) %! assert (e,zeros(1,0)) %! assert (te,cell(1,0)) %! assert (m, cell(1,0)) %! assert (t, cell(1,0)) %!test %! [s, e, te, m, t] = regexpi(' FiRetrUck ', 'f(.*)uck'); %! assert (s,2) %! assert (e,10) %! assert (te{1},[3,7]) %! assert (m{1}, 'FiRetrUck') %! assert (t{1}{1}, 'iRetr') %!test %! [s, e, te, m, t] = regexpi(' firetruck ', 'f(.*)uck'); %! assert (s,2) %! assert (e,10) %! assert (te{1},[3,7]) %! assert (m{1}, 'firetruck') %! assert (t{1}{1}, 'iretr') %!test %! [s, e, te, m, t] = regexpi('ShoRt Test String','\w*r\w*'); %! assert (s,[1,12]) %! assert (e,[5,17]) %! assert (size(te), [1,2]) %! assert (isempty(te{1})) %! assert (isempty(te{2})) %! assert (m{1},'ShoRt') %! assert (m{2},'String') %! assert (size(t), [1,2]) %! assert (isempty(t{1})) %! assert (isempty(t{2})) %!test %! [s, e, te, m, t] = regexpi('ShoRt Test String','\w*r\w*','once'); %! assert (s,1) %! assert (e,5) %! assert (isempty(te)) %! assert (m,'ShoRt') %! assert (isempty(t)) %!test %! [m, te, e, s, t] = regexpi('ShoRt Test String','\w*r\w*','once', 'match', 'tokenExtents', 'end', 'start', 'tokens'); %! assert (s,1) %! assert (e,5) %! assert (isempty(te)) %! assert (m,'ShoRt') %! assert (isempty(t)) %!testif HAVE_PCRE %! ## This test is expected to fail if PCRE is not installed %! [s, e, te, m, t, nm] = regexpi('ShoRt Test String','(?<word1>\w*t)\s*(?<word2>\w*t)'); %! assert (s,1) %! assert (e,10) %! assert (size(te), [1,1]) %! assert (te{1}, [1 5; 7, 10]) %! assert (m{1},'ShoRt Test') %! assert (size(t),[1,1]) %! assert (t{1}{1},'ShoRt') %! assert (t{1}{2},'Test') %! assert (size(nm), [1,1]) %! assert (!isempty(fieldnames(nm))) %! assert (sort(fieldnames(nm)),{'word1';'word2'}) %! assert (nm.word1,'ShoRt') %! assert (nm.word2,'Test') %!testif HAVE_PCRE %! ## This test is expected to fail if PCRE is not installed %! [nm, m, te, e, s, t] = regexpi('ShoRt Test String','(?<word1>\w*t)\s*(?<word2>\w*t)', 'names', 'match', 'tokenExtents', 'end', 'start', 'tokens'); %! assert (s,1) %! assert (e,10) %! assert (size(te), [1,1]) %! assert (te{1}, [1 5; 7, 10]) %! assert (m{1},'ShoRt Test') %! assert (size(t),[1,1]) %! assert (t{1}{1},'ShoRt') %! assert (t{1}{2},'Test') %! assert (size(nm), [1,1]) %! assert (!isempty(fieldnames(nm))) %! assert (sort(fieldnames(nm)),{'word1';'word2'}) %! assert (nm.word1,'ShoRt') %! assert (nm.word2,'Test') %!assert(regexpi("abc\nabc",'.'),[1:7]) %!assert(regexpi("abc\nabc",'.','dotall'),[1:7]) %!testif HAVE_PCRE %! assert(regexpi("abc\nabc",'(?s).'),[1:7]) %! assert(regexpi("abc\nabc",'.','dotexceptnewline'),[1,2,3,5,6,7]) %! assert(regexpi("abc\nabc",'(?-s).'),[1,2,3,5,6,7]) %!assert(regexpi("caseCaSe",'case'),[1,5]) %!assert(regexpi("caseCaSe",'case',"matchcase"),1) %!assert(regexpi("caseCaSe",'case',"ignorecase"),[1,5]) %!testif HAVE_PCRE %! assert(regexpi("caseCaSe",'(?-i)case'),1) %! assert(regexpi("caseCaSe",'(?i)case'),[1,5]) %!assert (regexpi("abc\nabc",'c$'),7) %!assert (regexpi("abc\nabc",'c$',"stringanchors"),7) %!testif HAVE_PCRE %! assert (regexpi("abc\nabc",'(?-m)c$'),7) %! assert (regexpi("abc\nabc",'c$',"lineanchors"),[3,7]) %! assert (regexpi("abc\nabc",'(?m)c$'),[3,7]) %!assert (regexpi("this word",'s w'),4) %!assert (regexpi("this word",'s w','literalspacing'),4) %!testif HAVE_PCRE %! assert (regexpi("this word",'(?-x)s w','literalspacing'),4) %! assert (regexpi("this word",'s w','freespacing'),zeros(1,0)) %! assert (regexpi("this word",'(?x)s w'),zeros(1,0)) %!error regexpi('string', 'tri', 'BadArg'); %!error regexpi('string'); %!assert(regexpi({'asdfg-dfd';'-dfd-dfd-';'qasfdfdaq'},'-'),{6;[1,5,9];zeros(1,0)}) %!assert(regexpi({'asdfg-dfd','-dfd-dfd-','qasfdfdaq'},'-'),{6,[1,5,9],zeros(1,0)}) %!assert(regexpi({'asdfg-dfd';'-dfd-dfd-';'qasfdfdaq'},{'-';'f';'q'}),{6;[3,7];[1,9]}) %!assert(regexpi('Strings',{'t','s'}),{2,[1,7]}) */ static octave_value octregexprep (const octave_value_list &args, const std::string &nm) { octave_value retval; int nargin = args.length(); // Make sure we have string,pattern,replacement const std::string buffer = args(0).string_value (); if (error_state) return retval; const std::string pattern = args(1).string_value (); if (error_state) return retval; const std::string replacement = args(2).string_value (); if (error_state) return retval; // Pack options excluding 'tokenize' and various output // reordering strings into regexp arg list octave_value_list regexpargs(nargin-1,octave_value()); regexpargs(0) = args(0); regexpargs(1) = args(1); int len=2; for (int i = 3; i < nargin; i++) { const std::string opt = args(i).string_value(); if (opt != "tokenize" && opt != "start" && opt != "end" && opt != "tokenextents" && opt != "match" && opt != "tokens" && opt != "names" && opt != "warnings") { regexpargs(len++) = args(i); } } regexpargs.resize(len); // Identify replacement tokens; build a vector of group numbers in // the replacement string so that we can quickly calculate the size // of the replacement. int tokens = 0; for (size_t i=1; i < replacement.size(); i++) { if (replacement[i-1]=='$' && isdigit(replacement[i])) { tokens++, i++; } } std::vector<int> token(tokens); int kk = 0; for (size_t i = 1; i < replacement.size(); i++) { if (replacement[i-1]=='$' && isdigit(replacement[i])) { token[kk++] = replacement[i]-'0'; i++; } } // Perform replacement std::string rep; if (tokens > 0) { std::list<regexp_elem> lst; string_vector named; int nopts; bool once; int sz = octregexp_list (regexpargs, nm , false, lst, named, nopts, once); if (error_state) return retval; if (sz == 0) { retval = args(0); return retval; } // Determine replacement length const size_t replen = replacement.size() - 2*tokens; int delta = 0; const_iterator p = lst.begin(); for (int i = 0; i < sz; i++) { OCTAVE_QUIT; const Matrix pairs(p->te); size_t pairlen = 0; for (int j = 0; j < tokens; j++) { if (token[j] == 0) pairlen += static_cast<size_t>(p->e - p->s) + 1; else if (token[j] <= pairs.rows()) pairlen += static_cast<size_t>(pairs(token[j]-1,1) - pairs(token[j]-1,0)) + 1; } delta += static_cast<int>(replen + pairlen) - static_cast<int>(p->e - p->s + 1); p++; } // Build replacement string rep.reserve(buffer.size()+delta); size_t from = 0; p = lst.begin(); for (int i=0; i < sz; i++) { OCTAVE_QUIT; const Matrix pairs(p->te); rep.append(&buffer[from], static_cast<size_t>(p->s - 1) - from); from = static_cast<size_t>(p->e - 1) + 1; for (size_t j = 1; j < replacement.size(); j++) { if (replacement[j-1]=='$' && isdigit(replacement[j])) { int k = replacement[j]-'0'; if (k == 0) { // replace with entire match rep.append(&buffer[static_cast<size_t>(p->e - 1)], static_cast<size_t>(p->e - p->s) + 1); } else if (k <= pairs.rows()) { // replace with group capture rep.append(&buffer[static_cast<size_t>(pairs(k-1,0)-1)], static_cast<size_t>(pairs(k-1,1) - pairs(k-1,0))+1); } else { // replace with nothing } j++; } else { rep.append(1,replacement[j-1]); } if (j+1 == replacement.size()) { rep.append(1,replacement[j]); } } p++; } rep.append(&buffer[from],buffer.size()-from); } else { std::list<regexp_elem> lst; string_vector named; int nopts; bool once; int sz = octregexp_list (regexpargs, nm, false, lst, named, nopts, once); if (error_state) return retval; if (sz == 0) { retval = args(0); return retval; } // Determine replacement length const size_t replen = replacement.size(); int delta = 0; const_iterator p = lst.begin(); for (int i = 0; i < sz; i++) { OCTAVE_QUIT; delta += static_cast<int>(replen) - static_cast<int>(p->e - p->s + 1); p++; } // Build replacement string rep.reserve(buffer.size()+delta); size_t from = 0; p = lst.begin(); for (int i=0; i < sz; i++) { OCTAVE_QUIT; rep.append(&buffer[from], static_cast<size_t>(p->s - 1) - from); from = static_cast<size_t>(p->e - 1) + 1; rep.append(replacement); p++; } rep.append(&buffer[from],buffer.size()-from); } retval = rep; return retval; } DEFUN_DLD (regexprep, args, , "-*- texinfo -*-\n\ @deftypefn {Loadable Function} {@var{string} =} regexprep (@var{string}, @var{pat}, @var{repstr}, @var{options})\n\ Replace matches of @var{pat} in @var{string} with @var{repstr}.\n\ \n\ \n\ The replacement can contain @code{$i}, which substitutes\n\ for the ith set of parentheses in the match string. E.g.,\n\ @example\n\ \n\ regexprep(\"Bill Dunn\",'(\\w+) (\\w+)','$2, $1')\n\ \n\ @end example\n\ returns \"Dunn, Bill\"\n\ \n\ @var{options} may be zero or more of\n\ @table @samp\n\ \n\ @item once\n\ Replace only the first occurrence of @var{pat} in the result.\n\ \n\ @item warnings\n\ This option is present for compatibility but is ignored.\n\ \n\ @item ignorecase or matchcase\n\ Ignore case for the pattern matching (see @code{regexpi}).\n\ Alternatively, use (?i) or (?-i) in the pattern.\n\ \n\ @item lineanchors and stringanchors\n\ Whether characters ^ and $ match the beginning and ending of lines.\n\ Alternatively, use (?m) or (?-m) in the pattern.\n\ \n\ @item dotexceptnewline and dotall\n\ Whether . matches newlines in the string.\n\ Alternatively, use (?s) or (?-s) in the pattern.\n\ \n\ @item freespacing or literalspacing\n\ Whether whitespace and # comments can be used to make the regular expression more readable.\n\ Alternatively, use (?x) or (?-x) in the pattern.\n\ \n\ @end table\n\ @seealso{regexp,regexpi,strrep}\n\ @end deftypefn") { octave_value_list retval; int nargin = args.length(); if (nargin < 3) { print_usage (); return retval; } if (args(0).is_cell() || args(1).is_cell() || args(2).is_cell()) { Cell str; Cell pat; Cell rep; dim_vector dv0; dim_vector dv1(1,1); if (args(0).is_cell()) str = args(0).cell_value(); else str = Cell (args(0)); if (args(1).is_cell()) pat = args(1).cell_value(); else pat = Cell (args(1)); if (args(2).is_cell()) rep = args(2).cell_value(); else rep = Cell (args(2)); dv0 = str.dims(); if (pat.numel() != 1) { dv1 = pat.dims(); if (rep.numel() != 1 && dv1 != rep.dims()) error ("regexprep: Inconsistent cell array dimensions"); } else if (rep.numel() != 1) dv1 = rep.dims(); if (!error_state) { Cell ret (dv0); octave_value_list new_args = args; for (octave_idx_type i = 0; i < dv0.numel(); i++) { new_args(0) = str(i); if (pat.numel() == 1) new_args(1) = pat(0); if (rep.numel() == 1) new_args(2) = rep(0); for (octave_idx_type j = 0; j < dv1.numel(); j++) { if (pat.numel() != 1) new_args(1) = pat(j); if (rep.numel() != 1) new_args(2) = rep(j); new_args(0) = octregexprep (new_args, "regexprep"); if (error_state) break; } if (error_state) break; ret(i) = new_args(0); } if (!error_state) retval = octave_value (ret); } } else retval = octregexprep (args, "regexprep"); return retval; } /* %!test # Replace with empty %! xml = '<!-- This is some XML --> <tag v="hello">some stuff<!-- sample tag--></tag>'; %! t = regexprep(xml,'<[!?][^>]*>',''); %! assert(t,' <tag v="hello">some stuff</tag>') %!test # Replace with non-empty %! xml = '<!-- This is some XML --> <tag v="hello">some stuff<!-- sample tag--></tag>'; %! t = regexprep(xml,'<[!?][^>]*>','?'); %! assert(t,'? <tag v="hello">some stuff?</tag>') %!test # Check that 'tokenize' is ignored %! xml = '<!-- This is some XML --> <tag v="hello">some stuff<!-- sample tag--></tag>'; %! t = regexprep(xml,'<[!?][^>]*>','','tokenize'); %! assert(t,' <tag v="hello">some stuff</tag>') %!testif HAVE_PCRE # Capture replacement %! data = "Bob Smith\nDavid Hollerith\nSam Jenkins"; %! result = "Smith, Bob\nHollerith, David\nJenkins, Sam"; %! t = regexprep(data,'(?m)^(\w+)\s+(\w+)$','$2, $1'); %! assert(t,result) # Return the original if no match %!assert(regexprep('hello','world','earth'),'hello') ## Test a general replacement %!assert(regexprep("a[b]c{d}e-f=g", "[^A-Za-z0-9_]", "_"), "a_b_c_d_e_f_g"); ## Make sure it works at the beginning and end %!assert(regexprep("a[b]c{d}e-f=g", "a", "_"), "_[b]c{d}e-f=g"); %!assert(regexprep("a[b]c{d}e-f=g", "g", "_"), "a[b]c{d}e-f=_"); ## Options %!assert(regexprep("a[b]c{d}e-f=g", "[^A-Za-z0-9_]", "_", "once"), "a_b]c{d}e-f=g"); %!assert(regexprep("a[b]c{d}e-f=g", "[^A-Z0-9_]", "_", "ignorecase"), "a_b_c_d_e_f_g"); ## Option combinations %!assert(regexprep("a[b]c{d}e-f=g", "[^A-Z0-9_]", "_", "once", "ignorecase"), "a_b]c{d}e-f=g"); ## End conditions on replacement %!assert(regexprep("abc","(b)",".$1"),"a.bc"); %!assert(regexprep("abc","(b)","$1"),"abc"); %!assert(regexprep("abc","(b)","$1."),"ab.c"); %!assert(regexprep("abc","(b)","$1.."),"ab..c"); ## Test cell array arguments %!assert(regexprep("abc",{"b","a"},"?"),{"??c"}) %!assert(regexprep({"abc","cba"},"b","?"),{"a?c","c?a"}) %!assert(regexprep({"abc","cba"},{"b","a"},{"?","!"}),{"!?c","c?!"}) # Nasty lookbehind expression %!assert(regexprep('x^(-1)+y(-1)+z(-1)=0','(?<=[a-z]+)\(\-[1-9]*\)','_minus1'),'x^(-1)+y_minus1+z_minus1=0') */ /* ;;; Local Variables: *** ;;; mode: C++ *** ;;; End: *** */