changeset 74:4da3615c731e

GNU text utilities
author Jim Meyering <jim@meyering.net>
date Tue, 05 Oct 1993 18:21:52 +0000
parents d24aeffde572
children b3ff3ce48150
files lib/memchr.c lib/regex.c lib/regex.h
diffstat 3 files changed, 107 insertions(+), 37 deletions(-) [+]
line wrap: on
line diff
--- a/lib/memchr.c
+++ b/lib/memchr.c
@@ -57,15 +57,17 @@
      The 1-bits make sure that carries propagate to the next 0-bit.
      The 0-bits provide holes for carries to fall into.  */
   magic_bits = 0x7efefeff;
-  if (sizeof (longword) > 4)
-    /* 64-bit version of the magic.  */
-    magic_bits = (0x7efefefe << 32) | 0xfefefeff;
+#ifdef LONG_64_BITS
+  /* 64-bit version of the magic.  */
+  magic_bits = (0x7efefefe << 32) | 0xfefefeff;
+#endif /* LONG_64_BITS */
 
   /* Set up a longword, each of whose bytes is C.  */
   charmask = c | (c << 8);
   charmask |= charmask << 16;
-  if (sizeof (longword) > 4)
-    charmask |= charmask << 32;
+#ifdef LONG_64_BITS
+  charmask |= charmask << 32;
+#endif /* LONG_64_BITS */
 
   /* Instead of the traditional loop which tests each character,
      we will test a longword at a time.  The tricky part is testing
@@ -132,17 +134,16 @@
 	    return (char *) &cp[2];
 	  if (cp[3] == c)
 	    return (char *) &cp[3];
-	  if (sizeof (longword) > 4)
-	    {
-	      if (cp[4] == c)
-		return (char *) &cp[4];
-	      if (cp[5] == c)
-		return (char *) &cp[5];
-	      if (cp[6] == c)
-		return (char *) &cp[6];
-	      if (cp[7] == c)
-		return (char *) &cp[7];
-	    }
+#ifdef LONG_64_BITS
+	  if (cp[4] == c)
+	    return (char *) &cp[4];
+	  if (cp[5] == c)
+	    return (char *) &cp[5];
+	  if (cp[6] == c)
+	    return (char *) &cp[6];
+	  if (cp[7] == c)
+	    return (char *) &cp[7];
+#endif /* LONG_64_BITS */
 	}
 
       n -= sizeof (longword);
--- a/lib/regex.c
+++ b/lib/regex.c
@@ -46,6 +46,14 @@
 
 #else  /* not emacs */
 
+#ifdef STDC_HEADERS
+#include <stdlib.h>
+#else
+char *malloc ();
+char *realloc ();
+#endif
+
+
 /* We used to test for `BSTRING' here, but only GCC and Emacs define
    `BSTRING', as far as I know, and neither of them use this code.  */
 #if HAVE_STRING_H || STDC_HEADERS
@@ -63,14 +71,6 @@
 #include <strings.h>
 #endif
 
-#ifdef STDC_HEADERS
-#include <stdlib.h>
-#else
-char *malloc ();
-char *realloc ();
-#endif
-
-
 /* Define the syntax stuff for \<, \>, etc.  */
 
 /* This must be nonzero for the wordchar and notwordchar pattern
@@ -881,13 +881,22 @@
    using the relocating allocator routines, then malloc could cause a
    relocation, which might (if the strings being searched are in the
    ralloc heap) shift the data out from underneath the regexp
-   routines.  */
+   routines.
+
+   Here's another reason to avoid allocation: Emacs insists on
+   processing input from X in a signal handler; processing X input may
+   call malloc; if input arrives while a matching routine is calling
+   malloc, then we're scrod.  But Emacs can't just block input while
+   calling matching routines; then we don't notice interrupts when
+   they come in.  So, Emacs blocks input around all regexp calls
+   except the matching calls, which it leaves unprotected, in the
+   faith that they will not malloc.  */
 
 /* Normally, this is fine.  */
 #define MATCH_MAY_ALLOCATE
 
 /* But under some circumstances, it's not.  */
-#if defined (REL_ALLOC) && defined (C_ALLOCA)
+#if defined (emacs) || (defined (REL_ALLOC) && defined (C_ALLOCA))
 #undef MATCH_MAY_ALLOCATE
 #endif
 
@@ -4130,11 +4139,27 @@
                detect that here, the alternative has put on a dummy
                failure point which is what we will end up popping.  */
 
-	    /* Skip over open/close-group commands.  */
-	    while (p2 + 2 < pend
-		   && ((re_opcode_t) *p2 == stop_memory
-		       || (re_opcode_t) *p2 == start_memory))
-	      p2 += 3;			/* Skip over args, too.  */
+	    /* Skip over open/close-group commands.
+	       If what follows this loop is a ...+ construct,
+	       look at what begins its body, since we will have to
+	       match at least one of that.  */
+	    while (1)
+	      {
+		if (p2 + 2 < pend
+		    && ((re_opcode_t) *p2 == stop_memory
+			|| (re_opcode_t) *p2 == start_memory))
+		  p2 += 3;
+		else if (p2 + 6 < pend
+			 && (re_opcode_t) *p2 == dummy_failure_jump)
+		  p2 += 6;
+		else
+		  break;
+	      }
+
+	    p1 = p + mcnt;
+	    /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
+	       to the `maybe_finalize_jump' of this case.  Examine what 
+	       follows.  */
 
             /* If we're at the end of the pattern, we can change.  */
             if (p2 == pend)
@@ -4152,11 +4177,7 @@
 	      {
 		register unsigned char c
                   = *p2 == (unsigned char) endline ? '\n' : p2[2];
-		p1 = p + mcnt;
-
-                /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
-                   to the `maybe_finalize_jump' of this case.  Examine what 
-                   follows.  */
+
                 if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
                   {
   		    p[-3] = (unsigned char) pop_failure_jump;
@@ -4182,6 +4203,54 @@
                       }
 		  }
 	      }
+            else if ((re_opcode_t) *p2 == charset)
+	      {
+		register unsigned char c
+                  = *p2 == (unsigned char) endline ? '\n' : p2[2];
+
+                if ((re_opcode_t) p1[3] == exactn
+		    && ! (p2[1] * BYTEWIDTH > p1[4]
+			  && (p2[1 + p1[4] / BYTEWIDTH]
+			      & (1 << (p1[4] % BYTEWIDTH)))))
+                  {
+  		    p[-3] = (unsigned char) pop_failure_jump;
+                    DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
+                                  c, p1[5]);
+                  }
+                  
+		else if ((re_opcode_t) p1[3] == charset_not)
+		  {
+		    int idx;
+		    /* We win if the charset_not inside the loop
+		       lists every character listed in the charset after.  */
+		    for (idx = 0; idx < p2[1]; idx++)
+		      if (! (p2[2 + idx] == 0
+			     || (idx < p1[4]
+				 && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
+			break;
+
+		    if (idx == p2[1])
+                      {
+  		        p[-3] = (unsigned char) pop_failure_jump;
+                        DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
+                      }
+		  }
+		else if ((re_opcode_t) p1[3] == charset)
+		  {
+		    int idx;
+		    /* We win if the charset inside the loop
+		       has no overlap with the one after the loop.  */
+		    for (idx = 0; idx < p2[1] && idx < p1[4]; idx++)
+		      if ((p2[2 + idx] & p1[5 + idx]) != 0)
+			break;
+
+		    if (idx == p2[1] || idx == p1[4])
+                      {
+  		        p[-3] = (unsigned char) pop_failure_jump;
+                        DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
+                      }
+		  }
+	      }
 	  }
 	  p -= 2;		/* Point at relative address again.  */
 	  if ((re_opcode_t) p[-1] != pop_failure_jump)
--- a/lib/regex.h
+++ b/lib/regex.h
@@ -1,7 +1,7 @@
 /* Definitions for data structures and routines for the regular
    expression library, version 0.12.
 
-   Copyright (C) 1985, 1989, 1990, 1991, 1992, 1993 Free Software Foundation, Inc.
+   Copyright (C) 1985, 89, 90, 91, 92, 1993 Free Software Foundation, Inc.
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by