comparison src/DLD-FUNCTIONS/regexp.cc @ 8140:cdd05e46f6c9

Increase pcre's match_limit for difficult regexps
author Thomas Weber <thomas.weber.mail@gmail.com>
date Wed, 24 Sep 2008 11:40:33 -0400
parents dcc31f473596
children 25bc2d31e1bf
comparison
equal deleted inserted replaced
8139:6b3a965b6c7d 8140:cdd05e46f6c9
49 #if defined (HAVE_SYS_TYPES_H) 49 #if defined (HAVE_SYS_TYPES_H)
50 #include <sys/types.h> 50 #include <sys/types.h>
51 #endif 51 #endif
52 #include <regex.h> 52 #include <regex.h>
53 #endif 53 #endif
54
55 // Define the maximum number of retries for a pattern that
56 // possibly results in an infinite recursion.
57 #define PCRE_MATCHLIMIT_MAX 10
54 58
55 // The regexp is constructed as a linked list to avoid resizing the 59 // The regexp is constructed as a linked list to avoid resizing the
56 // return values in arrays at each new match. 60 // return values in arrays at each new match.
57 61
58 // FIXME don't bother collecting and composing return values the user 62 // FIXME don't bother collecting and composing return values the user
383 int matches = pcre_exec(re, 0, buffer.c_str(), 387 int matches = pcre_exec(re, 0, buffer.c_str(),
384 buffer.length(), idx, 388 buffer.length(), idx,
385 (idx ? PCRE_NOTBOL : 0), 389 (idx ? PCRE_NOTBOL : 0),
386 ovector, (subpatterns+1)*3); 390 ovector, (subpatterns+1)*3);
387 391
392 if (matches == PCRE_ERROR_MATCHLIMIT)
393 {
394 // try harder; start with default value for MATCH_LIMIT and increase it
395 warning("Your pattern caused PCRE to hit its MATCH_LIMIT.\nTrying harder now, but this will be slow.");
396 pcre_extra pe;
397 pcre_config(PCRE_CONFIG_MATCH_LIMIT, static_cast <void *> (&pe.match_limit));
398 pe.flags = PCRE_EXTRA_MATCH_LIMIT;
399
400 int i = 0;
401 while (matches == PCRE_ERROR_MATCHLIMIT &&
402 i++ < PCRE_MATCHLIMIT_MAX)
403 {
404 OCTAVE_QUIT;
405
406 pe.match_limit *= 10;
407 matches = pcre_exec(re, &pe, buffer.c_str(),
408 buffer.length(), idx,
409 (idx ? PCRE_NOTBOL : 0),
410 ovector, (subpatterns+1)*3);
411 }
412 }
413
388 if (matches < 0 && matches != PCRE_ERROR_NOMATCH) 414 if (matches < 0 && matches != PCRE_ERROR_NOMATCH)
389 { 415 {
390 error ("%s: internal error calling pcre_exec", nm.c_str()); 416 error ("%s: internal error calling pcre_exec\nError code from pcre_exec is %i", nm.c_str(), matches);
391 pcre_free(re); 417 pcre_free(re);
392 return 0; 418 return 0;
393 } 419 }
394 else if (matches == PCRE_ERROR_NOMATCH) 420 else if (matches == PCRE_ERROR_NOMATCH)
395 break; 421 break;
981 1007
982 return retval; 1008 return retval;
983 } 1009 }
984 1010
985 /* 1011 /*
1012
1013 ## PCRE_ERROR_MATCHLIMIT test
1014 %!test
1015 %! s=sprintf('\t4\n0000\t-0.00\t-0.0000\t4\t-0.00\t-0.0000\t4\n0000\t-0.00\t-0.0000\t0\t-0.00\t-');
1016 %! ws = warning("query");
1017 %! unwind_protect
1018 %! warning("off");
1019 %! regexp(s, '(\s*-*\d+[.]*\d*\s*)+\n');
1020 %! unwind_protect_cleanup
1021 %! warning(ws);
1022 %! end_unwind_protect
986 1023
987 ## seg-fault test 1024 ## seg-fault test
988 %!assert(regexp("abcde","."),[1,2,3,4,5]) 1025 %!assert(regexp("abcde","."),[1,2,3,4,5])
989 1026
990 ## Check that anchoring of pattern works correctly 1027 ## Check that anchoring of pattern works correctly