Mercurial > hg > octave-nkf
comparison src/DLD-FUNCTIONS/regexp.cc @ 8140:cdd05e46f6c9
Increase pcre's match_limit for difficult regexps
author | Thomas Weber <thomas.weber.mail@gmail.com> |
---|---|
date | Wed, 24 Sep 2008 11:40:33 -0400 |
parents | dcc31f473596 |
children | 25bc2d31e1bf |
comparison
equal
deleted
inserted
replaced
8139:6b3a965b6c7d | 8140:cdd05e46f6c9 |
---|---|
49 #if defined (HAVE_SYS_TYPES_H) | 49 #if defined (HAVE_SYS_TYPES_H) |
50 #include <sys/types.h> | 50 #include <sys/types.h> |
51 #endif | 51 #endif |
52 #include <regex.h> | 52 #include <regex.h> |
53 #endif | 53 #endif |
54 | |
55 // Define the maximum number of retries for a pattern that | |
56 // possibly results in an infinite recursion. | |
57 #define PCRE_MATCHLIMIT_MAX 10 | |
54 | 58 |
55 // The regexp is constructed as a linked list to avoid resizing the | 59 // The regexp is constructed as a linked list to avoid resizing the |
56 // return values in arrays at each new match. | 60 // return values in arrays at each new match. |
57 | 61 |
58 // FIXME don't bother collecting and composing return values the user | 62 // FIXME don't bother collecting and composing return values the user |
383 int matches = pcre_exec(re, 0, buffer.c_str(), | 387 int matches = pcre_exec(re, 0, buffer.c_str(), |
384 buffer.length(), idx, | 388 buffer.length(), idx, |
385 (idx ? PCRE_NOTBOL : 0), | 389 (idx ? PCRE_NOTBOL : 0), |
386 ovector, (subpatterns+1)*3); | 390 ovector, (subpatterns+1)*3); |
387 | 391 |
392 if (matches == PCRE_ERROR_MATCHLIMIT) | |
393 { | |
394 // try harder; start with default value for MATCH_LIMIT and increase it | |
395 warning("Your pattern caused PCRE to hit its MATCH_LIMIT.\nTrying harder now, but this will be slow."); | |
396 pcre_extra pe; | |
397 pcre_config(PCRE_CONFIG_MATCH_LIMIT, static_cast <void *> (&pe.match_limit)); | |
398 pe.flags = PCRE_EXTRA_MATCH_LIMIT; | |
399 | |
400 int i = 0; | |
401 while (matches == PCRE_ERROR_MATCHLIMIT && | |
402 i++ < PCRE_MATCHLIMIT_MAX) | |
403 { | |
404 OCTAVE_QUIT; | |
405 | |
406 pe.match_limit *= 10; | |
407 matches = pcre_exec(re, &pe, buffer.c_str(), | |
408 buffer.length(), idx, | |
409 (idx ? PCRE_NOTBOL : 0), | |
410 ovector, (subpatterns+1)*3); | |
411 } | |
412 } | |
413 | |
388 if (matches < 0 && matches != PCRE_ERROR_NOMATCH) | 414 if (matches < 0 && matches != PCRE_ERROR_NOMATCH) |
389 { | 415 { |
390 error ("%s: internal error calling pcre_exec", nm.c_str()); | 416 error ("%s: internal error calling pcre_exec\nError code from pcre_exec is %i", nm.c_str(), matches); |
391 pcre_free(re); | 417 pcre_free(re); |
392 return 0; | 418 return 0; |
393 } | 419 } |
394 else if (matches == PCRE_ERROR_NOMATCH) | 420 else if (matches == PCRE_ERROR_NOMATCH) |
395 break; | 421 break; |
981 | 1007 |
982 return retval; | 1008 return retval; |
983 } | 1009 } |
984 | 1010 |
985 /* | 1011 /* |
1012 | |
1013 ## PCRE_ERROR_MATCHLIMIT test | |
1014 %!test | |
1015 %! s=sprintf('\t4\n0000\t-0.00\t-0.0000\t4\t-0.00\t-0.0000\t4\n0000\t-0.00\t-0.0000\t0\t-0.00\t-'); | |
1016 %! ws = warning("query"); | |
1017 %! unwind_protect | |
1018 %! warning("off"); | |
1019 %! regexp(s, '(\s*-*\d+[.]*\d*\s*)+\n'); | |
1020 %! unwind_protect_cleanup | |
1021 %! warning(ws); | |
1022 %! end_unwind_protect | |
986 | 1023 |
987 ## seg-fault test | 1024 ## seg-fault test |
988 %!assert(regexp("abcde","."),[1,2,3,4,5]) | 1025 %!assert(regexp("abcde","."),[1,2,3,4,5]) |
989 | 1026 |
990 ## Check that anchoring of pattern works correctly | 1027 ## Check that anchoring of pattern works correctly |