changeset 12462:e4dbfe3019b1

Use PCRE regular expressions throughout Octave.
author Rik <octave@nomad.inbox5.com>
date Sat, 19 Feb 2011 18:21:58 -0800
parents cc14943e6254
children 189baf055143
files doc/ChangeLog doc/interpreter/install.txi scripts/ChangeLog scripts/io/dlmwrite.m scripts/pkg/get_forge_pkg.m scripts/plot/__gnuplot_ginput__.m scripts/plot/__go_draw_axes__.m scripts/testfun/runtests.m src/ChangeLog src/DLD-FUNCTIONS/regexp.cc test/ChangeLog test/fntests.m
diffstat 12 files changed, 57 insertions(+), 54 deletions(-) [+]
line wrap: on
line diff
--- a/doc/ChangeLog
+++ b/doc/ChangeLog
@@ -1,3 +1,7 @@
+2010-02-19  Rik  <octave@nomad.inbox5.com>
+
+	* interpreter/install.txi: Remove reference to POSIX regex library.
+
 2010-02-14  Rik  <octave@nomad.inbox5.com>
 
 	* interpreter/arith.txi, interpreter/io.txi, interpreter/oop.txi:
--- a/doc/interpreter/install.txi
+++ b/doc/interpreter/install.txi
@@ -561,7 +561,7 @@
 please submit a bug report.
 
 @itemize @minus
-@item pcre or regex
+@item pcre
 
 @item zlib
 
--- a/scripts/ChangeLog
+++ b/scripts/ChangeLog
@@ -1,3 +1,9 @@
+2010-02-19  Rik  <octave@nomad.inbox5.com>
+
+	* io/dlmwrite.m, pkg/get_forge_pkg.m, plot/__gnuplot_ginput__.m,
+	plot/__go_draw_axes__.m, testfun/runtests.m: Use PCRE regular
+	expressions to simplify scripts.
+
 2011-02-19  Ben Abbott  <bpabbott@mac.com>
 
 	* plot/__print_parse_opts__.m, plot/private/__ghostscript__.m: Improve
--- a/scripts/io/dlmwrite.m
+++ b/scripts/io/dlmwrite.m
@@ -172,7 +172,7 @@
                repmat ([repmat(delim, 1, c + columns(M)-1), newline], 1, r));
     endif
     if (iscomplex (M))
-      cprecision = regexprep (precision, '^%([-.0-9])','%+$1');
+      cprecision = regexprep (precision, '^%([-\d.])','%+$1');
       template = [precision, cprecision, "i", ...
                   repmat([delim, precision, cprecision, "i"], 1, ...
                   columns(M) - 1), newline ];
--- a/scripts/pkg/get_forge_pkg.m
+++ b/scripts/pkg/get_forge_pkg.m
@@ -43,7 +43,7 @@
     ## Remove blanks for simpler matching.
     html(isspace(html)) = [];
     ## Good. Let's grep for the version.
-    pat = "<tdclass=""package_table"">PackageVersion:</td><td>([0-9.]*)</td>";
+    pat = "<tdclass=""package_table"">PackageVersion:</td><td>([\d.]*)</td>";
     t = regexp (html, pat, "tokens");
     if (isempty (t) || isempty(t{1}))
       error ("get_forge_pkg: could not read version number from package's page");
--- a/scripts/plot/__gnuplot_ginput__.m
+++ b/scripts/plot/__gnuplot_ginput__.m
@@ -113,7 +113,7 @@
           if (isempty (str))
             sleep (0.05);
           else
-            str = regexp (str, 'OCTAVE:\s+[-+.0-9]+\s+[-+.0-9]+\s+[0-9]*', 'match');
+            str = regexp (str, 'OCTAVE:\s+[-+.\d]+\s+[-+.\d]+\s+\d*', 'match');
           endif
           fclear (istream);
         endwhile
--- a/scripts/plot/__go_draw_axes__.m
+++ b/scripts/plot/__go_draw_axes__.m
@@ -2326,7 +2326,7 @@
 
 function l = length_string (s)
   l = length (s) - length (strfind(s,'{')) - length (strfind(s,'}'));
-  m = regexp (s, '/([a-zA-Z0-9_-]+|[a-zA-Z0-9_-]+=[0-9]+)', 'matches');
+  m = regexp (s, '/([\w-]+|[\w-]+=\d+)', 'matches');
   if (!isempty (m))
     l = l - sum (cellfun (@length, m));
   endif
--- a/scripts/testfun/runtests.m
+++ b/scripts/testfun/runtests.m
@@ -80,9 +80,7 @@
   if (fid >= 0)
     str = fread (fid, "*char")';
     fclose (fid);
-    ## Avoid PCRE 'lineanchors' by searching for newline followed by PTN.
-    ## Equivalent to regexp ('^PTN','lineanchors')
-    retval = ! isempty (regexp (str, "[\r\n]\\s*%!(test|assert|error|warning)", "once"));
+    retval = ! isempty (regexp (str, "^%!(test|assert|error|warning)", "lineanchors"));
   else
     error ("runtests: fopen failed: %s", f);
   endif
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,9 @@
+2010-02-19  Rik  <octave@nomad.inbox5.com>
+
+	* src/DLD-FUNCTIONS/regexp.cc: Use PCRE regular expressions everywhere
+	now that Octave requires library for building.  Remove testif PCRE
+	on testing blocks and adjust all documentation.
+	
 2011-02-18  Konstantinos Poulios  <logari81@googlemail.com>
 
 	* graphics.cc (axes::properties::sync_positions): Fix a small
--- a/src/DLD-FUNCTIONS/regexp.cc
+++ b/src/DLD-FUNCTIONS/regexp.cc
@@ -885,9 +885,7 @@
 @deftypefnx {Loadable Function} {[@dots{}] =} regexp (@var{str}, @var{pat}, \"@var{opt1}\", @dots{})\n\
 Regular expression string matching.  Search for @var{pat} in @var{str} and\n\
 return the positions and substrings of any matches, or empty values if there\n\
-are none.  Note, some features and extended options are only available when\n\
-Octave is compiled with support for Perl Compatible Regular Expressions\n\
-(PCRE).\n\
+are none.\n\
 \n\
 The matched pattern @var{pat} can include any of the standard regex\n\
 operators, including:\n\
@@ -924,11 +922,9 @@
 and \"]\".  If the first character is \"^\" then the pattern is inverted and\n\
 any character except those listed between brackets will match.\n\
 \n\
-With PCRE support, escape sequences defined below can be used inside list\n\
+Escape sequences defined below can also be used inside list\n\
 operators.  For example, a template for a floating point number might be\n\
-@code{[-+.\\d]+}.  POSIX regular expressions do not use escape sequences\n\
-and any backslash @samp{\\} will be interpreted literally as one\n\
-of the list of characters to match.\n\
+@code{[-+.\\d]+}.\n\
 \n\
 @item ()\n\
 Grouping operator\n\
@@ -975,14 +971,8 @@
 @item \\d\n\
 Match any digit\n\
 \n\
-This sequence is only available with PCRE support.  For POSIX regular\n\
-expressions use the following list operator @code{[0-9]}.\n\
-\n\
 @item \\D\n\
 Match any non-digit\n\
-\n\
-This sequence is only available with PCRE support.  For POSIX regular\n\
-expressions use the following list operator @code{[^0-9]}.\n\
 @end table\n\
 \n\
 The outputs of @code{regexp} default to the order given below\n\
@@ -1007,7 +997,7 @@
 @item nm\n\
 A structure containing the text of each matched named token, with the name\n\
 being used as the fieldname.  A named token is denoted by\n\
-@code{(?<name>@dots{})} and is only available with PCRE support.\n\
+@code{(?<name>@dots{})}.\n\
 @end table\n\
 \n\
 Particular output arguments, or the order of the output arguments, can be\n\
@@ -1033,49 +1023,46 @@
 @item matchcase\n\
 Make the matching case sensitive.  (default)\n\
 \n\
-Alternatively, use (?-i) in the pattern when PCRE is available.\n\
+Alternatively, use (?-i) in the pattern.\n\
 \n\
 @item ignorecase\n\
 Ignore case when matching the pattern to the string.\n\
 \n\
-Alternatively, use (?i) in the pattern when PCRE is available.\n\
+Alternatively, use (?i) in the pattern.\n\
 \n\
 @item stringanchors\n\
 Match the anchor characters at the beginning and end of the string.  \n\
 (default)\n\
 \n\
-Alternatively, use (?-m) in the pattern when PCRE is available.\n\
+Alternatively, use (?-m) in the pattern.\n\
 \n\
 @item lineanchors\n\
 Match the anchor characters at the beginning and end of the line.\n\
-Only available when Octave is compiled with PCRE.\n\
 \n\
-Alternatively, use (?m) in the pattern when PCRE is available.\n\
+Alternatively, use (?m) in the pattern.\n\
 \n\
 @item dotall\n\
 The pattern @code{.} matches all characters including the newline character.\n\
  (default)\n\
 \n\
-Alternatively, use (?s) in the pattern when PCRE is available.\n\
+Alternatively, use (?s) in the pattern.\n\
 \n\
 @item dotexceptnewline\n\
 The pattern @code{.} matches all characters except the newline character.\n\
-Only available when Octave is compiled with PCRE.\n\
 \n\
-Alternatively, use (?-s) in the pattern when PCRE is available.\n\
+Alternatively, use (?-s) in the pattern.\n\
 \n\
 @item literalspacing\n\
 All characters in the pattern, including whitespace, are significant and are\n\
 used in pattern matching.  (default)\n\
 \n\
-Alternatively, use (?-x) in the pattern when PCRE is available.\n\
+Alternatively, use (?-x) in the pattern.\n\
 \n\
 @item freespacing\n\
 The pattern may include arbitrary whitespace and also comments beginning with\n\
 the character @samp{#}.\n\
-Only available when Octave is compiled with PCRE.\n\
 \n\
-Alternatively, use (?x) in the pattern when PCRE is available.\n\
+Alternatively, use (?x) in the pattern.\n\
 \n\
 @end table\n\
 @seealso{regexpi, strfind, regexprep}\n\
@@ -1168,7 +1155,7 @@
 %! assert (m,'short')
 %! assert (isempty(t))
 
-%!testif HAVE_PCRE
+%!test
 %! [s, e, te, m, t, nm] = regexp('short test string','(?<word1>\w*t)\s*(?<word2>\w*t)');
 %! assert (s,1)
 %! assert (e,10)
@@ -1184,7 +1171,7 @@
 %! assert (nm.word1,'short')
 %! assert (nm.word2,'test')
 
-%!testif HAVE_PCRE
+%!test
 %! [nm, m, te, e, s, t] = regexp('short test string','(?<word1>\w*t)\s*(?<word2>\w*t)', 'names', 'match', 'tokenExtents', 'end', 'start', 'tokens');
 %! assert (s,1)
 %! assert (e,10)
@@ -1200,7 +1187,7 @@
 %! assert (nm.word1,'short')
 %! assert (nm.word2,'test')
 
-%!testif HAVE_PCRE
+%!test
 %! [t, nm] = regexp("John Davis\nRogers, James",'(?<first>\w+)\s+(?<last>\w+)|(?<last>\w+),\s+(?<first>\w+)','tokens','names');
 %! assert (size(t), [1,2]);
 %! assert (t{1}{1},'John');
@@ -1213,13 +1200,13 @@
 %! assert (nm.last{1},'Davis');
 %! assert (nm.last{2},'Rogers');
 
-%!testif HAVE_PCRE
+%!test
 %! # Parenthesis in named token (ie (int)) causes a problem
 %! assert (regexp('qwe int asd', ['(?<typestr>(int))'], 'names'), struct ('typestr', 'int'));
 
 %!assert(regexp("abc\nabc",'.'),[1:7])
 %!assert(regexp("abc\nabc",'.','dotall'),[1:7])
-%!testif HAVE_PCRE
+%!test
 %! assert(regexp("abc\nabc",'(?s).'),[1:7])
 %! assert(regexp("abc\nabc",'.','dotexceptnewline'),[1,2,3,5,6,7])
 %! assert(regexp("abc\nabc",'(?-s).'),[1,2,3,5,6,7])
@@ -1227,20 +1214,20 @@
 %!assert(regexp("caseCaSe",'case'),1)
 %!assert(regexp("caseCaSe",'case',"matchcase"),1)
 %!assert(regexp("caseCaSe",'case',"ignorecase"),[1,5])
-%!testif HAVE_PCRE
+%!test
 %! assert(regexp("caseCaSe",'(?-i)case'),1)
 %! assert(regexp("caseCaSe",'(?i)case'),[1,5])
 
 %!assert (regexp("abc\nabc",'c$'),7)
 %!assert (regexp("abc\nabc",'c$',"stringanchors"),7)
-%!testif HAVE_PCRE
+%!test
 %! assert (regexp("abc\nabc",'(?-m)c$'),7)
 %! assert (regexp("abc\nabc",'c$',"lineanchors"),[3,7])
 %! assert (regexp("abc\nabc",'(?m)c$'),[3,7])
 
 %!assert (regexp("this word",'s w'),4)
 %!assert (regexp("this word",'s w','literalspacing'),4)
-%!testif HAVE_PCRE
+%!test
 %! assert (regexp("this word",'(?-x)s w','literalspacing'),4)
 %! assert (regexp("this word",'s w','freespacing'),zeros(1,0))
 %! assert (regexp("this word",'(?x)s w'),zeros(1,0))
@@ -1254,7 +1241,7 @@
 %!assert(regexp('Strings',{'t','s'}),{2,7})
 
 ## Test case for lookaround operators
-%!testif HAVE_PCRE
+%!test
 %! assert(regexp('Iraq','q(?!u)'),4)
 %! assert(regexp('quit','q(?!u)'), zeros(1,0))
 %! assert(regexp('quit','q(?=u)','match'), {'q'})
@@ -1353,7 +1340,7 @@
 %! assert (m,'ShoRt')
 %! assert (isempty(t))
 
-%!testif HAVE_PCRE
+%!test
 %! [s, e, te, m, t, nm] = regexpi('ShoRt Test String','(?<word1>\w*t)\s*(?<word2>\w*t)');
 %! assert (s,1)
 %! assert (e,10)
@@ -1369,7 +1356,7 @@
 %! assert (nm.word1,'ShoRt')
 %! assert (nm.word2,'Test')
 
-%!testif HAVE_PCRE
+%!test
 %! [nm, m, te, e, s, t] = regexpi('ShoRt Test String','(?<word1>\w*t)\s*(?<word2>\w*t)', 'names', 'match', 'tokenExtents', 'end', 'start', 'tokens');
 %! assert (s,1)
 %! assert (e,10)
@@ -1387,7 +1374,7 @@
 
 %!assert(regexpi("abc\nabc",'.'),[1:7])
 %!assert(regexpi("abc\nabc",'.','dotall'),[1:7])
-%!testif HAVE_PCRE
+%!test
 %! assert(regexpi("abc\nabc",'(?s).'),[1:7])
 %! assert(regexpi("abc\nabc",'.','dotexceptnewline'),[1,2,3,5,6,7])
 %! assert(regexpi("abc\nabc",'(?-s).'),[1,2,3,5,6,7])
@@ -1395,20 +1382,20 @@
 %!assert(regexpi("caseCaSe",'case'),[1,5])
 %!assert(regexpi("caseCaSe",'case',"matchcase"),1)
 %!assert(regexpi("caseCaSe",'case',"ignorecase"),[1,5])
-%!testif HAVE_PCRE
+%!test
 %! assert(regexpi("caseCaSe",'(?-i)case'),1)
 %! assert(regexpi("caseCaSe",'(?i)case'),[1,5])
 
 %!assert (regexpi("abc\nabc",'C$'),7)
 %!assert (regexpi("abc\nabc",'C$',"stringanchors"),7)
-%!testif HAVE_PCRE
+%!test
 %! assert (regexpi("abc\nabc",'(?-m)C$'),7)
 %! assert (regexpi("abc\nabc",'C$',"lineanchors"),[3,7])
 %! assert (regexpi("abc\nabc",'(?m)C$'),[3,7])
 
 %!assert (regexpi("this word",'S w'),4)
 %!assert (regexpi("this word",'S w','literalspacing'),4)
-%!testif HAVE_PCRE
+%!test
 %! assert (regexpi("this word",'(?-x)S w','literalspacing'),4)
 %! assert (regexpi("this word",'S w','freespacing'),zeros(1,0))
 %! assert (regexpi("this word",'(?x)S w'),zeros(1,0))
@@ -1746,7 +1733,7 @@
 %! assert(t,' <tag v="hello">some stuff</tag>')
 
 ## Test capture replacement
-%!testif HAVE_PCRE
+%!test
 %! data = "Bob Smith\nDavid Hollerith\nSam Jenkins";
 %! result = "Smith, Bob\nHollerith, David\nJenkins, Sam";
 %! t = regexprep(data,'(?m)^(\w+)\s+(\w+)$','$2, $1');
@@ -1781,7 +1768,7 @@
 %!assert(regexprep({"abc","cba"},{"b","a"},{"?","!"}),{"!?c","c?!"})
 
 # Nasty lookbehind expression
-%!testif HAVE_PCRE
+%!test
 %! assert(regexprep('x^(-1)+y(-1)+z(-1)=0','(?<=[a-z]+)\(\-[1-9]*\)','_minus1'),'x^(-1)+y_minus1+z_minus1=0')
 
 */
--- a/test/ChangeLog
+++ b/test/ChangeLog
@@ -1,3 +1,7 @@
+2010-02-19  Rik  <octave@nomad.inbox5.com>
+
+	* fntests.m: Use PCRE regular expressions to simplify script.
+
 2010-02-02  Rik  <octave@nomad.inbox5.com>
 
 	* build_sparse_tests.sh: Use testif to only run some sparse tests when
--- a/test/fntests.m
+++ b/test/fntests.m
@@ -75,7 +75,7 @@
     if (fid >= 0)
       str = fread (fid, "*char")';
       fclose (fid);
-      retval = ! isempty (regexp (str,"[\r\n](DEFUN|DEFUN_DLD)\\b", "once"));
+      retval = ! isempty (regexp (str,'^(DEFUN|DEFUN_DLD)\b', 'lineanchors'));
     else
       error ("fopen failed: %s", f);
     endif
@@ -91,9 +91,7 @@
   if (fid >= 0)
     str = fread (fid, "*char")';
     fclose (fid);
-    ## Avoid PCRE 'lineanchors' by searching for newline followed by PTN.
-    ## Equivalent to regexp ('^PTN','lineanchors')
-    retval = ! isempty (regexp (str, "[\r\n]\\s*%!(test|assert|error|warning)", "once"));
+    retval = ! isempty (regexp (str, "^%!(test|assert|error|warning)", "lineanchors"));
   else
     error ("fopen failed: %s", f);
   endif