changeset 14452:721be41ea988 stable

Stop segfault when using regular expressions with named capture buffers (bug #35683). liboctave/regexp.cc (match): Check current match has a named buffer before assigning it. DLD-FUNCTIONS/regexp.cc: Add %!test for mix of named and unnamed buffers.
author Rik <octave@nomad.inbox5.com>
date Sun, 11 Mar 2012 13:43:00 -0700
parents 245963d3d628
children 8bfa6e2bb4ed af552038cc52
files liboctave/regexp.cc src/DLD-FUNCTIONS/regexp.cc
diffstat 2 files changed, 25 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/liboctave/regexp.cc
+++ b/liboctave/regexp.cc
@@ -371,8 +371,21 @@
                       || ovector[2*i-1] != ovector[2*i+1])
                     {
                       if (namecount > 0)
-                        named_tokens(named_idx(i-pos_offset-1)) =
-                          std::string (*(listptr+nidx[i-pos_offset-1]));
+                        {
+                          // FIXME: Should probably do this with a map()
+                          // rather than a linear search.  However,
+                          // the number of captured, named expressions
+                          // is usually pretty small (< 4)
+                          for (int j = 0; j < namecount; j++)
+                            {
+                              if (nidx[j] == i)
+                                { 
+                                  named_tokens(named_idx(j)) =
+                                    std::string (*(listptr+i-pos_offset));
+                                  break;
+                                }
+                            }
+                        }
 
                       tokens(pos_match++) = std::string (*(listptr+i));
                     }
--- a/src/DLD-FUNCTIONS/regexp.cc
+++ b/src/DLD-FUNCTIONS/regexp.cc
@@ -768,10 +768,20 @@
 %! assert (nm.last{1},'Davis');
 %! assert (nm.last{2},'Rogers');
 
+## Tests for named tokens
 %!test
 %! # Parenthesis in named token (ie (int)) causes a problem
 %! assert (regexp('qwe int asd', ['(?<typestr>(int))'], 'names'), struct ('typestr', 'int'));
 
+%!test
+%! ## Mix of named and unnamed tokens can cause segfault (bug #35683)
+%! str = "abcde";
+%! ptn = '(?<T1>a)(\w+)(?<T2>d\w+)';
+%! tokens = regexp (str, ptn, "names");
+%! assert (isstruct (tokens) && numel (tokens) == 1);
+%! assert (tokens.T1, "a");
+%! assert (tokens.T2, "de");
+
 %!assert(regexp("abc\nabc",'.'),[1:7])
 %!assert(regexp("abc\nabc",'.','dotall'),[1:7])
 %!test