# HG changeset patch # User Jim Meyering # Date 795749778 0 # Node ID e512303d74b85b2dc169e8d9a4ebac85a9f6c461 # Parent 9d9e8df9d7c7b8b20555c783aa754c1a2702a270 GNU text utilities diff --git a/lib/error.c b/lib/error.c --- a/lib/error.c +++ b/lib/error.c @@ -53,7 +53,9 @@ extern char *program_name; #if HAVE_STRERROR +# ifndef strerror /* On some systems, sterror is a macro */ char *strerror (); +# endif #else static char * private_strerror (errnum) diff --git a/lib/getopt.c b/lib/getopt.c --- a/lib/getopt.c +++ b/lib/getopt.c @@ -59,6 +59,14 @@ #include #endif /* GNU C library. */ +/* This is for other GNU distributions with internationalized messages. + The GNU C Library itself does not yet support such messages. */ +#if HAVE_LIBINTL_H +# include +#else +# define gettext(msgid) (msgid) +#endif + /* This version of `getopt' appears to the caller like standard Unix `getopt' but it behaves differently for the user, since it allows the user to intersperse the options with the other arguments. @@ -505,7 +513,7 @@ if (ambig && !exact) { if (opterr) - fprintf (stderr, "%s: option `%s' is ambiguous\n", + fprintf (stderr, gettext ("%s: option `%s' is ambiguous\n"), argv[0], argv[optind]); nextchar += strlen (nextchar); optind++; @@ -525,18 +533,17 @@ else { if (opterr) - { - if (argv[optind - 1][1] == '-') - /* --option */ - fprintf (stderr, - "%s: option `--%s' doesn't allow an argument\n", - argv[0], pfound->name); - else - /* +option or -option */ - fprintf (stderr, - "%s: option `%c%s' doesn't allow an argument\n", - argv[0], argv[optind - 1][0], pfound->name); - } + if (argv[optind - 1][1] == '-') + /* --option */ + fprintf (stderr, + gettext ("%s: option `--%s' doesn't allow an argument\n"), + argv[0], pfound->name); + else + /* +option or -option */ + fprintf (stderr, + gettext ("%s: option `%c%s' doesn't allow an argument\n"), + argv[0], argv[optind - 1][0], pfound->name); + nextchar += strlen (nextchar); return '?'; } @@ -548,8 +555,9 @@ else { if (opterr) - fprintf (stderr, "%s: option `%s' requires an argument\n", - argv[0], argv[optind - 1]); + fprintf (stderr, + gettext ("%s: option `%s' requires an argument\n"), + argv[0], argv[optind - 1]); nextchar += strlen (nextchar); return optstring[0] == ':' ? ':' : '?'; } @@ -576,11 +584,11 @@ { if (argv[optind][1] == '-') /* --option */ - fprintf (stderr, "%s: unrecognized option `--%s'\n", + fprintf (stderr, gettext ("%s: unrecognized option `--%s'\n"), argv[0], nextchar); else /* +option or -option */ - fprintf (stderr, "%s: unrecognized option `%c%s'\n", + fprintf (stderr, gettext ("%s: unrecognized option `%c%s'\n"), argv[0], argv[optind][0], nextchar); } nextchar = (char *) ""; @@ -605,9 +613,11 @@ { if (posixly_correct) /* 1003.2 specifies the format of this message. */ - fprintf (stderr, "%s: illegal option -- %c\n", argv[0], c); + fprintf (stderr, gettext ("%s: illegal option -- %c\n"), + argv[0], c); else - fprintf (stderr, "%s: invalid option -- %c\n", argv[0], c); + fprintf (stderr, gettext ("%s: invalid option -- %c\n"), + argv[0], c); } optopt = c; return '?'; @@ -641,8 +651,9 @@ if (opterr) { /* 1003.2 specifies the format of this message. */ - fprintf (stderr, "%s: option requires an argument -- %c\n", - argv[0], c); + fprintf (stderr, + gettext ("%s: option requires an argument -- %c\n"), + argv[0], c); } optopt = c; if (optstring[0] == ':') diff --git a/lib/linebuffer.h b/lib/linebuffer.h --- a/lib/linebuffer.h +++ b/lib/linebuffer.h @@ -24,6 +24,7 @@ char *buffer; }; +#undef __P #if defined (__STDC__) && __STDC__ #define __P(x) x #else diff --git a/lib/long-options.h b/lib/long-options.h --- a/lib/long-options.h +++ b/lib/long-options.h @@ -1,10 +1,9 @@ -#ifndef __P -#if defined (__GNUC__) || (defined (__STDC__) && __STDC__) +#undef __P +#if defined (__STDC__) && __STDC__ #define __P(args) args #else #define __P(args) () -#endif /* GCC. */ -#endif /* Not __P. */ +#endif void parse_long_options __P ((int _argc, char **_argv, const char *_command_name, diff --git a/lib/memchr.c b/lib/memchr.c --- a/lib/memchr.c +++ b/lib/memchr.c @@ -5,38 +5,58 @@ adaptation to memchr suggested by Dick Karpinski (dick@cca.ucsf.edu), and implemented by Roland McGrath (roland@ai.mit.edu). -The GNU C Library is free software; you can redistribute it and/or -modify it under the terms of the GNU Library General Public License as -published by the Free Software Foundation; either version 2 of the -License, or (at your option) any later version. +NOTE: The canonical source of this file is maintained with the GNU C Library. +Bugs can be reported to bug-glibc@prep.ai.mit.edu. -The GNU C Library is distributed in the hope that it will be useful, +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -Library General Public License for more details. +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. -You should have received a copy of the GNU Library General Public -License along with the GNU C Library; see the file COPYING.LIB. If -not, write to the Free Software Foundation, Inc., 675 Mass Ave, -Cambridge, MA 02139, USA. */ +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifdef HAVE_CONFIG_H #include #endif -#if (SIZEOF_LONG != 4 && SIZEOF_LONG != 8) - error This function works only on systems for which sizeof(long) is 4 or 8. -/* The previous line would begin with `#error,' but some compilers can't - handle that even when the condition is false. */ +#undef __ptr_t +#if defined (__cplusplus) || (defined (__STDC__) && __STDC__) +# define __ptr_t void * +#else /* Not C++ or ANSI C. */ +# define __ptr_t char * +#endif /* C++ or ANSI C. */ + +#if defined (HAVE_STRING_H) || defined (_LIBC) +# include #endif +#if defined (HAVE_LIMIT_H) || defined (_LIBC) +# include +#endif + +#define LONG_MAX_32_BITS 2147483647 + +#ifndef LONG_MAX +#define LONG_MAX LONG_MAX_32_BITS +#endif + +#include + + /* Search no more than N bytes of S for C. */ -char * +__ptr_t memchr (s, c, n) - unsigned char *s; + const __ptr_t s; int c; - unsigned n; + size_t n; { const unsigned char *char_ptr; const unsigned long int *longword_ptr; @@ -50,7 +70,7 @@ & (sizeof (longword) - 1)) != 0; --n, ++char_ptr) if (*char_ptr == c) - return (char *) char_ptr; + return (__ptr_t) char_ptr; /* All these elucidatory comments refer to 4-byte longwords, but the theory applies equally well to 8-byte longwords. */ @@ -60,27 +80,28 @@ /* Bits 31, 24, 16, and 8 of this number are zero. Call these bits the "holes." Note that there is a hole just to the left of each byte, with an extra at the end: - + bits: 01111110 11111110 11111110 11111111 - bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD + bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD The 1-bits make sure that carries propagate to the next 0-bit. The 0-bits provide holes for carries to fall into. */ -#if (SIZEOF_LONG == 8) - magic_bits = ((unsigned long int) 0x7efefefe << 32) | 0xfefefeff; + + if (sizeof (longword) != 4 && sizeof (longword) != 8) + abort (); + +#if LONG_MAX <= LONG_MAX_32_BITS + magic_bits = 0x7efefeff; #else - magic_bits = 0x7efefeff; -#endif /* SIZEOF_LONG == 8 */ + magic_bits = ((unsigned long int) 0x7efefefe << 32) | 0xfefefeff; +#endif /* Set up a longword, each of whose bytes is C. */ charmask = c | (c << 8); charmask |= charmask << 16; -#if (SIZEOF_LONG == 8) +#if LONG_MAX > LONG_MAX_32_BITS charmask |= charmask << 32; -#endif /* SIZEOF_LONG == 8 */ - if (sizeof (longword) > 8) - abort (); - +#endif /* Instead of the traditional loop which tests each character, we will test a longword at a time. The tricky part is testing @@ -125,11 +146,11 @@ /* Add MAGIC_BITS to LONGWORD. */ if ((((longword + magic_bits) - - /* Set those bits that were unchanged by the addition. */ + + /* Set those bits that were unchanged by the addition. */ ^ ~longword) - - /* Look at only the hole bits. If any of the hole bits + + /* Look at only the hole bits. If any of the hole bits are unchanged, most likely one of the bytes was a zero. */ & ~magic_bits) != 0) @@ -140,23 +161,23 @@ const unsigned char *cp = (const unsigned char *) (longword_ptr - 1); if (cp[0] == c) - return (char *) cp; + return (__ptr_t) cp; if (cp[1] == c) - return (char *) &cp[1]; + return (__ptr_t) &cp[1]; if (cp[2] == c) - return (char *) &cp[2]; + return (__ptr_t) &cp[2]; if (cp[3] == c) - return (char *) &cp[3]; -#if (SIZEOF_LONG == 8) + return (__ptr_t) &cp[3]; +#if LONG_MAX > 2147483647 if (cp[4] == c) - return (char *) &cp[4]; + return (__ptr_t) &cp[4]; if (cp[5] == c) - return (char *) &cp[5]; + return (__ptr_t) &cp[5]; if (cp[6] == c) - return (char *) &cp[6]; + return (__ptr_t) &cp[6]; if (cp[7] == c) - return (char *) &cp[7]; -#endif /* SIZEOF_LONG == 8 */ + return (__ptr_t) &cp[7]; +#endif } n -= sizeof (longword); @@ -167,7 +188,7 @@ while (n-- > 0) { if (*char_ptr == c) - return (char *) char_ptr; + return (__ptr_t) char_ptr; else ++char_ptr; } diff --git a/lib/regex.c b/lib/regex.c --- a/lib/regex.c +++ b/lib/regex.c @@ -33,6 +33,14 @@ /* We need this for `regex.h', and perhaps for the Emacs include files. */ #include +/* This is for other GNU distributions with internationalized messages. + The GNU C Library itself does not yet support such messages. */ +#if HAVE_LIBINTL_H +# include +#else +# define gettext(msgid) (msgid) +#endif + /* The `emacs' switch turns on certain matching commands that make sense only in Emacs. */ #ifdef emacs @@ -41,9 +49,6 @@ #include "buffer.h" #include "syntax.h" -/* Emacs uses `NULL' as a predicate. */ -#undef NULL - #else /* not emacs */ #ifdef STDC_HEADERS @@ -81,6 +86,12 @@ #define Sword 1 #endif +#ifdef SWITCH_ENUM_BUG +#define SWITCH_ENUM_CAST(x) ((int)(x)) +#else +#define SWITCH_ENUM_CAST(x) (x) +#endif + #ifdef SYNTAX_TABLE extern char *re_syntax_table; @@ -267,6 +278,9 @@ { no_op = 0, + /* Succeed right away--no more backtracking. */ + succeed, + /* Followed by one byte giving n, then by n literal bytes. */ exactn, @@ -495,8 +509,6 @@ if (debug) print_double_string (w, s1, sz1, s2, sz2) -extern void printchar (); - /* Print the fastmap in human-readable form. */ void @@ -511,7 +523,7 @@ if (fastmap[i++]) { was_a_range = 0; - printchar (i - 1); + putchar (i - 1); while (i < (1 << BYTEWIDTH) && fastmap[i]) { was_a_range = 1; @@ -520,7 +532,7 @@ if (was_a_range) { printf ("-"); - printchar (i - 1); + putchar (i - 1); } } } @@ -563,7 +575,7 @@ do { putchar ('/'); - printchar (*p++); + putchar (*p++); } while (--mcnt); break; @@ -610,18 +622,18 @@ /* Have we broken a range? */ else if (last + 1 != c && in_range) { - printchar (last); + putchar (last); in_range = 0; } if (! in_range) - printchar (c); + putchar (c); last = c; } if (in_range) - printchar (last); + putchar (last); putchar (']'); @@ -806,13 +818,13 @@ if (FIRST_STRING_P (where)) { for (this_char = where - string1; this_char < size1; this_char++) - printchar (string1[this_char]); + putchar (string1[this_char]); where = string2; } for (this_char = where - string2; this_char < size2; this_char++) - printchar (string2[this_char]); + putchar (string2[this_char]); } } @@ -834,7 +846,9 @@ /* Set by `re_set_syntax' to the current regexp syntax to recognize. Can also be assigned to arbitrarily: each pattern buffer stores its own syntax, so it can be changed between regex compilations. */ -reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS; +/* This has no initializer because initialized variables in Emacs + become read-only after dumping. */ +reg_syntax_t re_syntax_options; /* Specify the precise syntax of regexps for compilation. This provides @@ -855,10 +869,12 @@ } /* This table gives an error message for each of the error codes listed - in regex.h. Obviously the order here has to be same as there. */ - -static const char *re_error_msg[] = - { NULL, /* REG_NOERROR */ + in regex.h. Obviously the order here has to be same as there. + POSIX doesn't require that we do anything for REG_NOERROR, + but why not be nice? */ + +static const char *re_error_msgid[] = + { "Success", /* REG_NOERROR */ "No match", /* REG_NOMATCH */ "Invalid regular expression", /* REG_BADPAT */ "Invalid collation character", /* REG_ECOLLATE */ @@ -1184,6 +1200,7 @@ DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \ } \ \ + set_regs_matched_done = 0; \ DEBUG_STATEMENT (nfailure_points_popped++); \ } /* POP_FAILURE_POINT */ @@ -1227,19 +1244,23 @@ #define SET_REGS_MATCHED() \ do \ { \ - unsigned r; \ - for (r = lowest_active_reg; r <= highest_active_reg; r++) \ - { \ - MATCHED_SOMETHING (reg_info[r]) \ - = EVER_MATCHED_SOMETHING (reg_info[r]) \ - = 1; \ - } \ + if (!set_regs_matched_done) \ + { \ + unsigned r; \ + set_regs_matched_done = 1; \ + for (r = lowest_active_reg; r <= highest_active_reg; r++) \ + { \ + MATCHED_SOMETHING (reg_info[r]) \ + = EVER_MATCHED_SOMETHING (reg_info[r]) \ + = 1; \ + } \ + } \ } \ while (0) - /* Registers are set to a sentinel when they haven't yet matched. */ -#define REG_UNSET_VALUE ((char *) -1) +static char reg_unset_dummy; +#define REG_UNSET_VALUE (®_unset_dummy) #define REG_UNSET(e) ((e) == REG_UNSET_VALUE) @@ -1547,7 +1568,7 @@ unsigned debug_count; for (debug_count = 0; debug_count < size; debug_count++) - printchar (pattern[debug_count]); + putchar (pattern[debug_count]); putchar ('\n'); } #endif /* DEBUG */ @@ -2470,6 +2491,11 @@ if (!COMPILE_STACK_EMPTY) FREE_STACK_RETURN (REG_EPAREN); + /* If we don't want backtracking, force success + the first time we reach the end of the compiled pattern. */ + if (syntax & RE_NO_POSIX_BACKTRACKING) + BUF_PUSH (succeed); + free (compile_stack.stack); /* We have succeeded; set the length of the buffer. */ @@ -2772,26 +2798,30 @@ bufp->fastmap_accurate = 1; /* It will be when we're done. */ bufp->can_be_null = 0; - while (p != pend || !FAIL_STACK_EMPTY ()) + while (1) { - if (p == pend) - { - bufp->can_be_null |= path_can_be_null; - - /* Reset for next path. */ - path_can_be_null = true; - - p = fail_stack.stack[--fail_stack.avail]; + if (p == pend || *p == succeed) + { + /* We have reached the (effective) end of pattern. */ + if (!FAIL_STACK_EMPTY ()) + { + bufp->can_be_null |= path_can_be_null; + + /* Reset for next path. */ + path_can_be_null = true; + + p = fail_stack.stack[--fail_stack.avail]; + + continue; + } + else + break; } /* We should never be about to go beyond the end of the pattern. */ assert (p < pend); -#ifdef SWITCH_ENUM_BUG - switch ((int) ((re_opcode_t) *p++)) -#else - switch ((re_opcode_t) *p++) -#endif + switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++)) { /* I guess the idea here is to simply not bother with a fastmap @@ -3456,6 +3486,9 @@ and need to test it, it's not garbage. */ const char *match_end = NULL; + /* This helps SET_REGS_MATCHED avoid doing redundant work. */ + int set_regs_matched_done = 0; + /* Used when we pop values we don't care about. */ #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ const char **reg_dummy; @@ -3652,6 +3685,7 @@ } } /* d != end_match_2 */ + succeed: DEBUG_PRINT1 ("Accepting match.\n"); /* If caller wants register contents data back, do it. */ @@ -3740,11 +3774,7 @@ } /* Otherwise match next pattern command. */ -#ifdef SWITCH_ENUM_BUG - switch ((int) ((re_opcode_t) *p++)) -#else - switch ((re_opcode_t) *p++) -#endif + switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++)) { /* Ignore these. Used to ignore the n of succeed_n's which currently have n == 0. */ @@ -3752,6 +3782,9 @@ DEBUG_PRINT1 ("EXECUTING no_op.\n"); break; + case succeed: + DEBUG_PRINT1 ("EXECUTING succeed.\n"); + goto succeed; /* Match the next n pattern characters exactly. The following byte in the pattern defines n, and the n bytes after that @@ -3859,6 +3892,9 @@ IS_ACTIVE (reg_info[*p]) = 1; MATCHED_SOMETHING (reg_info[*p]) = 0; + + /* Clear this whenever we change the register activity status. */ + set_regs_matched_done = 0; /* This is the new highest active register. */ highest_active_reg = *p; @@ -3871,6 +3907,7 @@ /* Move past the register number and inner group count. */ p += 2; just_past_start_mem = p; + break; @@ -3896,7 +3933,10 @@ /* This register isn't active anymore. */ IS_ACTIVE (reg_info[*p]) = 0; - + + /* Clear this whenever we change the register activity status. */ + set_regs_matched_done = 0; + /* If this was the only register active, nothing is active anymore. */ if (lowest_active_reg == highest_active_reg) @@ -4064,6 +4104,9 @@ : bcmp (d, d2, mcnt)) goto fail; d += mcnt, d2 += mcnt; + + /* Do this because we've match some characters. */ + SET_REGS_MATCHED (); } } break; @@ -4930,7 +4973,9 @@ ret = regex_compile (pattern, length, re_syntax_options, bufp); - return re_error_msg[(int) ret]; + if (!ret) + return NULL; + return gettext (re_error_msgid[(int) ret]); } /* Entry points compatible with 4.2 BSD regex library. We don't define @@ -4950,7 +4995,7 @@ if (!s) { if (!re_comp_buf.buffer) - return "No previous regular expression"; + return gettext ("No previous regular expression"); return 0; } @@ -4958,12 +5003,12 @@ { re_comp_buf.buffer = (unsigned char *) malloc (200); if (re_comp_buf.buffer == NULL) - return "Memory exhausted"; + return gettext (re_error_msgid[(int) REG_ESPACE]); re_comp_buf.allocated = 200; re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH); if (re_comp_buf.fastmap == NULL) - return "Memory exhausted"; + return gettext (re_error_msgid[(int) REG_ESPACE]); } /* Since `re_exec' always passes NULL for the `regs' argument, we @@ -4974,8 +5019,11 @@ ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf); - /* Yes, we're discarding `const' here. */ - return (char *) re_error_msg[(int) ret]; + if (!ret) + return NULL; + + /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */ + return (char *) gettext (re_error_msgid[(int) ret]); } @@ -5179,19 +5227,14 @@ size_t msg_size; if (errcode < 0 - || errcode >= (sizeof (re_error_msg) / sizeof (re_error_msg[0]))) + || errcode >= (sizeof (re_error_msgid) / sizeof (re_error_msgid[0]))) /* Only error codes returned by the rest of the code should be passed to this routine. If we are given anything else, or if other regex code generates an invalid error code, then the program has a bug. Dump core so we can fix it. */ abort (); - msg = re_error_msg[errcode]; - - /* POSIX doesn't require that we do anything in this case, but why - not be nice. */ - if (! msg) - msg = "Success"; + msg = gettext (re_error_msgid[errcode]); msg_size = strlen (msg) + 1; /* Includes the null. */ diff --git a/lib/regex.h b/lib/regex.h --- a/lib/regex.h +++ b/lib/regex.h @@ -130,6 +130,10 @@ If not set, then an unmatched ) is invalid. */ #define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1) +/* If this bit is set, succeed as soon as we match the whole pattern, + without further backtracking. */ +#define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1) + /* This global variable defines the particular regexp syntax to use (for some interfaces). When a regexp is compiled, the syntax used is stored in the pattern buffer, so changing this does not affect diff --git a/lib/strtol.c b/lib/strtol.c --- a/lib/strtol.c +++ b/lib/strtol.c @@ -20,6 +20,9 @@ #include #include +#ifndef errno +extern int errno; +#endif #if HAVE_LIMITS_H #include @@ -42,7 +45,6 @@ #include #else #define NULL 0 -extern int errno; #endif #ifndef UNSIGNED diff --git a/lib/xmalloc.c b/lib/xmalloc.c --- a/lib/xmalloc.c +++ b/lib/xmalloc.c @@ -35,6 +35,14 @@ void free (); #endif +/* This is for other GNU distributions with internationalized messages. + The GNU C Library itself does not yet support such messages. */ +#if HAVE_LIBINTL_H +# include +#else +# define gettext(msgid) (msgid) +#endif + #ifndef EXIT_FAILURE #define EXIT_FAILURE 1 #endif @@ -59,7 +67,7 @@ if (n == 0) p = malloc ((size_t) 1); if (p == 0) - error (xmalloc_exit_failure, 0, "memory exhausted"); + error (xmalloc_exit_failure, 0, gettext ("Memory exhausted")); return p; }