changeset 256:8cf333566445

GNU text utilities
author Jim Meyering <jim@meyering.net>
date Sun, 02 Oct 1994 22:50:11 +0000
parents 74478d22a1fe
children a70a20230879
files lib/linebuffer.c lib/long-options.c lib/long-options.h lib/memchr.c lib/regex.c lib/strtol.c lib/strtoul.c
diffstat 7 files changed, 200 insertions(+), 114 deletions(-) [+]
line wrap: on
line diff
--- a/lib/linebuffer.c
+++ b/lib/linebuffer.c
@@ -18,14 +18,7 @@
 /* Written by Richard Stallman. */
 
 #ifdef HAVE_CONFIG_H
-#if defined (CONFIG_BROKETS)
-/* We use <config.h> instead of "config.h" so that a compilation
-   using -I. -I$srcdir will use ./config.h rather than $srcdir/config.h
-   (which it would do because it found this file in $srcdir).  */
 #include <config.h>
-#else
-#include "config.h"
-#endif
 #endif
 
 #include <stdio.h>
new file mode 100644
--- /dev/null
+++ b/lib/long-options.c
@@ -0,0 +1,79 @@
+/* Utility to accept --help and --version options as unobtrusively as possible.
+   Copyright (C) 1993, 1994 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
+
+/* Jim Meyering (meyering@comco.com) */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <stdio.h>
+#include <getopt.h>
+#include "long-options.h"
+
+static struct option const long_options[] =
+{
+  {"help", no_argument, 0, 'h'},
+  {"version", no_argument, 0, 'v'},
+  {0, 0, 0, 0}
+};
+
+/* Process long options --help and --version, but only if argc == 2.
+   Be careful not to gobble up `--'.  */
+
+void
+parse_long_options (argc, argv, command_name, version_string, usage)
+     int argc;
+     char **argv;
+     const char *command_name;
+     const char *version_string;
+     void (*usage)();
+{
+  int c;
+  int saved_opterr;
+  int saved_optind;
+
+  saved_opterr = opterr;
+  saved_optind = optind;
+
+  /* Don't print an error message for unrecognized options.  */
+  opterr = 0;
+
+  if (argc == 2
+      && (c = getopt_long (argc, argv, "+", long_options, (int *) 0)) != EOF)
+    {
+      switch (c)
+	{
+	case 'h':
+	  (*usage) (0);
+
+	case 'v':
+	  printf ("%s - %s\n", command_name, version_string);
+	  exit (0);
+	
+	default:
+	  /* Don't process any other long-named options.  */
+	  break;
+	}
+    }
+
+  /* Restore previous value.  */
+  opterr = saved_opterr;
+
+  /* Restore optind in case it has advanced past a leading `--'.  */
+  optind = saved_optind;
+}
new file mode 100644
--- /dev/null
+++ b/lib/long-options.h
@@ -0,0 +1,11 @@
+#ifndef __P
+#if defined (__GNUC__) || (defined (__STDC__) && __STDC__)
+#define __P(args) args
+#else
+#define __P(args) ()
+#endif  /* GCC.  */
+#endif  /* Not __P.  */
+
+void
+  parse_long_options __P ((int _argc, char **_argv, const char *_command_name,
+			   const char *_version_string, void (*_usage) (int)));
--- a/lib/memchr.c
+++ b/lib/memchr.c
@@ -21,14 +21,7 @@
 Cambridge, MA 02139, USA.  */
 
 #ifdef HAVE_CONFIG_H
-#if defined (CONFIG_BROKETS)
-/* We use <config.h> instead of "config.h" so that a compilation
-   using -I. -I$srcdir will use ./config.h rather than $srcdir/config.h
-   (which it would do because it found this file in $srcdir).  */
 #include <config.h>
-#else
-#include "config.h"
-#endif
 #endif
 
 #if (SIZEOF_LONG != 4 && SIZEOF_LONG != 8)
--- a/lib/regex.c
+++ b/lib/regex.c
@@ -27,14 +27,7 @@
 #define _GNU_SOURCE
 
 #ifdef HAVE_CONFIG_H
-#if defined (emacs) || defined (CONFIG_BROKETS)
-/* We use <config.h> instead of "config.h" so that a compilation
-   using -I. -I$srcdir will use ./config.h rather than $srcdir/config.h
-   (which it would do because it found this file in $srcdir).  */
 #include <config.h>
-#else
-#include "config.h"
-#endif
 #endif
 
 /* We need this for `regex.h', and perhaps for the Emacs include files.  */
@@ -898,8 +891,8 @@
    ralloc heap) shift the data out from underneath the regexp
    routines.
 
-   Here's another reason to avoid allocation: Emacs insists on
-   processing input from X in a signal handler; processing X input may
+   Here's another reason to avoid allocation: Emacs 
+   processes input from X in a signal handler; processing X input may
    call malloc; if input arrives while a matching routine is calling
    malloc, then we're scrod.  But Emacs can't just block input while
    calling matching routines; then we don't notice interrupts when
@@ -910,8 +903,9 @@
 /* Normally, this is fine.  */
 #define MATCH_MAY_ALLOCATE
 
-/* But under some circumstances, it's not.  */
-#if defined (emacs) || (defined (REL_ALLOC) && defined (C_ALLOCA))
+/* The match routines may not allocate if (1) they would do it with malloc
+   and (2) it's not safe for htem to use malloc.  */
+#if (defined (C_ALLOCA) || defined (REGEX_MALLOC)) && (defined (emacs) || defined (REL_ALLOC))
 #undef MATCH_MAY_ALLOCATE
 #endif
 
@@ -1490,6 +1484,10 @@
    The `fastmap' and `newline_anchor' fields are neither
    examined nor set.  */
 
+/* Return, freeing storage we allocated.  */
+#define FREE_STACK_RETURN(value)		\
+  return (free (compile_stack.stack), value)
+
 static reg_errcode_t
 regex_compile (pattern, size, syntax, bufp)
      const char *pattern;
@@ -1596,7 +1594,7 @@
         { /* Caller did not allocate a buffer.  Do it for them.  */
           bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
         }
-      if (!bufp->buffer) return REG_ESPACE;
+      if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);
 
       bufp->allocated = INIT_BUF_SIZE;
     }
@@ -1651,7 +1649,7 @@
           if (!laststart)
             {
               if (syntax & RE_CONTEXT_INVALID_OPS)
-                return REG_BADRPT;
+                FREE_STACK_RETURN (REG_BADRPT);
               else if (!(syntax & RE_CONTEXT_INDEP_OPS))
                 goto normal_char;
             }
@@ -1684,7 +1682,7 @@
 
                 else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
                   {
-                    if (p == pend) return REG_EESCAPE;
+                    if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
 
                     PATFETCH (c1);
                     if (!(c1 == '+' || c1 == '?'))
@@ -1783,7 +1781,7 @@
           {
             boolean had_char_class = false;
 
-            if (p == pend) return REG_EBRACK;
+            if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
 
             /* Ensure that we have enough space to push a charset: the
                opcode, the length count, and the bitset; 34 bytes in all.  */
@@ -1814,14 +1812,14 @@
             /* Read in characters and ranges, setting map bits.  */
             for (;;)
               {
-                if (p == pend) return REG_EBRACK;
+                if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
 
                 PATFETCH (c);
 
                 /* \ might escape characters inside [...] and [^...].  */
                 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
                   {
-                    if (p == pend) return REG_EESCAPE;
+                    if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
 
                     PATFETCH (c1);
                     SET_LIST_BIT (c1);
@@ -1837,7 +1835,7 @@
                 /* Look ahead to see if it's a range when the last thing
                    was a character class.  */
                 if (had_char_class && c == '-' && *p != ']')
-                  return REG_ERANGE;
+                  FREE_STACK_RETURN (REG_ERANGE);
 
                 /* Look ahead to see if it's a range when the last thing
                    was a character: if this is a hyphen not at the
@@ -1850,7 +1848,7 @@
                   {
                     reg_errcode_t ret
                       = compile_range (&p, pend, translate, syntax, b);
-                    if (ret != REG_NOERROR) return ret;
+                    if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
                   }
 
                 else if (p[0] == '-' && p[1] != ']')
@@ -1861,7 +1859,7 @@
                     PATFETCH (c1);
                     
                     ret = compile_range (&p, pend, translate, syntax, b);
-                    if (ret != REG_NOERROR) return ret;
+                    if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
                   }
 
                 /* See if we're at the beginning of a possible character
@@ -1875,7 +1873,7 @@
                     c1 = 0;
 
                     /* If pattern is `[[:'.  */
-                    if (p == pend) return REG_EBRACK;
+                    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
 
                     for (;;)
                       {
@@ -1906,29 +1904,34 @@
                         boolean is_upper = STREQ (str, "upper");
                         boolean is_xdigit = STREQ (str, "xdigit");
                         
-                        if (!IS_CHAR_CLASS (str)) return REG_ECTYPE;
+                        if (!IS_CHAR_CLASS (str))
+			  FREE_STACK_RETURN (REG_ECTYPE);
 
                         /* Throw away the ] at the end of the character
                            class.  */
                         PATFETCH (c);					
 
-                        if (p == pend) return REG_EBRACK;
+                        if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
 
                         for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
                           {
+			    /* This was split into 3 if's to
+			       avoid an arbitrary limit in some compiler.  */
                             if (   (is_alnum  && ISALNUM (ch))
                                 || (is_alpha  && ISALPHA (ch))
                                 || (is_blank  && ISBLANK (ch))
-                                || (is_cntrl  && ISCNTRL (ch))
-                                || (is_digit  && ISDIGIT (ch))
+                                || (is_cntrl  && ISCNTRL (ch)))
+			      SET_LIST_BIT (ch);
+			    if (   (is_digit  && ISDIGIT (ch))
                                 || (is_graph  && ISGRAPH (ch))
                                 || (is_lower  && ISLOWER (ch))
-                                || (is_print  && ISPRINT (ch))
-                                || (is_punct  && ISPUNCT (ch))
+                                || (is_print  && ISPRINT (ch)))
+			      SET_LIST_BIT (ch);
+			    if (   (is_punct  && ISPUNCT (ch))
                                 || (is_space  && ISSPACE (ch))
                                 || (is_upper  && ISUPPER (ch))
                                 || (is_xdigit && ISXDIGIT (ch)))
-                            SET_LIST_BIT (ch);
+			      SET_LIST_BIT (ch);
                           }
                         had_char_class = true;
                       }
@@ -1994,7 +1997,7 @@
 
 
         case '\\':
-          if (p == pend) return REG_EESCAPE;
+          if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
 
           /* Do not translate the character after the \, so that we can
              distinguish, e.g., \B from \b, even if we normally would
@@ -2059,7 +2062,7 @@
                 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
                   goto normal_backslash;
                 else
-                  return REG_ERPAREN;
+                  FREE_STACK_RETURN (REG_ERPAREN);
 
             handle_close:
               if (fixup_alt_jump)
@@ -2079,7 +2082,7 @@
                 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
                   goto normal_char;
                 else
-                  return REG_ERPAREN;
+                  FREE_STACK_RETURN (REG_ERPAREN);
 
               /* Since we just checked for an empty stack above, this
                  ``can't happen''.  */
@@ -2186,7 +2189,7 @@
                     if (syntax & RE_NO_BK_BRACES)
                       goto unfetch_interval;
                     else
-                      return REG_EBRACE;
+                      FREE_STACK_RETURN (REG_EBRACE);
                   }
 
                 GET_UNSIGNED_NUMBER (lower_bound);
@@ -2206,12 +2209,12 @@
                     if (syntax & RE_NO_BK_BRACES)
                       goto unfetch_interval;
                     else 
-                      return REG_BADBR;
+                      FREE_STACK_RETURN (REG_BADBR);
                   }
 
                 if (!(syntax & RE_NO_BK_BRACES)) 
                   {
-                    if (c != '\\') return REG_EBRACE;
+                    if (c != '\\') FREE_STACK_RETURN (REG_EBRACE);
 
                     PATFETCH (c);
                   }
@@ -2221,7 +2224,7 @@
                     if (syntax & RE_NO_BK_BRACES)
                       goto unfetch_interval;
                     else 
-                      return REG_BADBR;
+                      FREE_STACK_RETURN (REG_BADBR);
                   }
 
                 /* We just parsed a valid interval.  */
@@ -2230,7 +2233,7 @@
                 if (!laststart)
                   {
                     if (syntax & RE_CONTEXT_INVALID_OPS)
-                      return REG_BADRPT;
+                      FREE_STACK_RETURN (REG_BADRPT);
                     else if (syntax & RE_CONTEXT_INDEP_OPS)
                       laststart = b;
                     else
@@ -2397,7 +2400,7 @@
               c1 = c - '0';
 
               if (c1 > regnum)
-                return REG_ESUBREG;
+                FREE_STACK_RETURN (REG_ESUBREG);
 
               /* Can't back reference to a subexpression if inside of it.  */
               if (group_in_compile_stack (compile_stack, c1))
@@ -2469,7 +2472,7 @@
     STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
 
   if (!COMPILE_STACK_EMPTY) 
-    return REG_EPAREN;
+    FREE_STACK_RETURN (REG_EPAREN);
 
   free (compile_stack.stack);
 
@@ -2700,8 +2703,9 @@
 
      We also want to fetch the endpoints without translating them; the 
      appropriate translation is done in the bit-setting loop below.  */
-  range_start = ((unsigned char *) p)[-2];
-  range_end   = ((unsigned char *) p)[0];
+  /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *.  */
+  range_start = ((const unsigned char *) p)[-2];
+  range_end   = ((const unsigned char *) p)[0];
 
   /* Have to increment the pointer into the pattern string, so the
      caller isn't still at the ending character.  */
@@ -2845,22 +2849,25 @@
 
 
         case anychar:
-          /* `.' matches anything ...  */
-	  for (j = 0; j < (1 << BYTEWIDTH); j++)
-            fastmap[j] = 1;
-
-          /* ... except perhaps newline.  */
-          if (!(bufp->syntax & RE_DOT_NEWLINE))
-            fastmap['\n'] = 0;
-
-          /* Return if we have already set `can_be_null'; if we have,
-             then the fastmap is irrelevant.  Something's wrong here.  */
-	  else if (bufp->can_be_null)
-	    return 0;
-
-          /* Otherwise, have to check alternative paths.  */
-	  break;
-
+	  {
+	    int fastmap_newline = fastmap['\n'];
+
+	    /* `.' matches anything ...  */
+	    for (j = 0; j < (1 << BYTEWIDTH); j++)
+	      fastmap[j] = 1;
+
+	    /* ... except perhaps newline.  */
+	    if (!(bufp->syntax & RE_DOT_NEWLINE))
+	      fastmap['\n'] = fastmap_newline;
+
+	    /* Return if we have already set `can_be_null'; if we have,
+	       then the fastmap is irrelevant.  Something's wrong here.  */
+	    else if (bufp->can_be_null)
+	      return 0;
+
+	    /* Otherwise, have to check alternative paths.  */
+	    break;
+	  }
 
 #ifdef emacs
         case syntaxspec:
@@ -3588,17 +3595,27 @@
              longest match, try backtracking.  */
           if (d != end_match_2)
 	    {
+	      /* 1 if this match ends in the same string (string1 or string2)
+		 as the best previous match.  */
+	      boolean same_str_p = (FIRST_STRING_P (match_end) 
+				    == MATCHING_IN_FIRST_STRING);
+	      /* 1 if this match is the best seen so far.  */
+	      boolean best_match_p;
+
+	      /* AIX compiler got confused when this was combined
+		 with the previous declaration.  */
+	      if (same_str_p)
+		best_match_p = d > match_end;
+	      else
+		best_match_p = !MATCHING_IN_FIRST_STRING;
+
               DEBUG_PRINT1 ("backtracking.\n");
               
               if (!FAIL_STACK_EMPTY ())
                 { /* More failure points to try.  */
-                  boolean same_str_p = (FIRST_STRING_P (match_end) 
-	        	                == MATCHING_IN_FIRST_STRING);
 
                   /* If exceeds best match so far, save it.  */
-                  if (!best_regs_set
-                      || (same_str_p && d > match_end)
-                      || (!same_str_p && !MATCHING_IN_FIRST_STRING))
+                  if (!best_regs_set || best_match_p)
                     {
                       best_regs_set = true;
                       match_end = d;
@@ -3614,8 +3631,10 @@
                   goto fail;	       
                 }
 
-              /* If no failure points, don't restore garbage.  */
-              else if (best_regs_set)   
+              /* If no failure points, don't restore garbage.  And if
+                 last match is real best match, don't restore second
+                 best one. */
+              else if (best_regs_set && !best_match_p)
                 {
   	        restore_best_regs:
                   /* Restore best match.  It may happen that `dend ==
@@ -4278,7 +4297,7 @@
 #endif
 
                 if ((re_opcode_t) p1[3] == exactn
-		    && ! (p2[1] * BYTEWIDTH > p1[4]
+		    && ! ((int) p2[1] * BYTEWIDTH > (int) p1[4]
 			  && (p2[1 + p1[4] / BYTEWIDTH]
 			      & (1 << (p1[4] % BYTEWIDTH)))))
                   {
@@ -4292,9 +4311,9 @@
 		    int idx;
 		    /* We win if the charset_not inside the loop
 		       lists every character listed in the charset after.  */
-		    for (idx = 0; idx < p2[1]; idx++)
+		    for (idx = 0; idx < (int) p2[1]; idx++)
 		      if (! (p2[2 + idx] == 0
-			     || (idx < p1[4]
+			     || (idx < (int) p1[4]
 				 && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
 			break;
 
@@ -4309,7 +4328,9 @@
 		    int idx;
 		    /* We win if the charset inside the loop
 		       has no overlap with the one after the loop.  */
-		    for (idx = 0; idx < p2[1] && idx < p1[4]; idx++)
+		    for (idx = 0;
+			 idx < (int) p2[1] && idx < (int) p1[4];
+			 idx++)
 		      if ((p2[2 + idx] & p1[5 + idx]) != 0)
 			break;
 
--- a/lib/strtol.c
+++ b/lib/strtol.c
@@ -1,30 +1,21 @@
 /* Copyright (C) 1991, 1992 Free Software Foundation, Inc.
-This file is part of the GNU C Library.
 
-The GNU C Library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Library General Public License as
-published by the Free Software Foundation; either version 2 of the
-License, or (at your option) any later version.
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
 
-The GNU C Library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-Library General Public License for more details.
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
 
-You should have received a copy of the GNU Library General Public
-License along with the GNU C Library; see the file COPYING.LIB.  If
-not, write to the Free Software Foundation, Inc., 675 Mass Ave,
-Cambridge, MA 02139, USA.  */
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
 
 #ifdef HAVE_CONFIG_H
-#if defined (CONFIG_BROKETS)
-/* We use <config.h> instead of "config.h" so that a compilation
-   using -I. -I$srcdir will use ./config.h rather than $srcdir/config.h
-   (which it would do because it found this file in $srcdir).  */
 #include <config.h>
-#else
-#include "config.h"
-#endif
 #endif
 
 #include <ctype.h>
--- a/lib/strtoul.c
+++ b/lib/strtoul.c
@@ -1,20 +1,18 @@
 /* Copyright (C) 1991 Free Software Foundation, Inc.
-This file is part of the GNU C Library.
 
-The GNU C Library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Library General Public License as
-published by the Free Software Foundation; either version 2 of the
-License, or (at your option) any later version.
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
 
-The GNU C Library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-Library General Public License for more details.
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
 
-You should have received a copy of the GNU Library General Public
-License along with the GNU C Library; see the file COPYING.LIB.  If
-not, write to the Free Software Foundation, Inc., 675 Mass Ave,
-Cambridge, MA 02139, USA.  */
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
 
 #define	UNSIGNED	1