changeset 16596:e2c60dd50c61

readtokens: avoid core dumps with unusual calling patterns Reported by Xu Zhongxing in <http://debbugs.gnu.org/10953>. * lib/readtokens.c: Include limits.h. (word, bits_per_word, get_nth_bit, set_nth_bit): New. (readtoken): Don't cache the delimiters; the cache code was buggy if !delim && saved_delim, or if the new n_delim differs from the old. Also, it wasn't thread-safe.
author Paul Eggert <eggert@cs.ucla.edu>
date Tue, 06 Mar 2012 15:19:24 -0800
parents 6d5adeda9a06
children 3d8b0225b6ec
files ChangeLog lib/readtokens.c
diffstat 2 files changed, 34 insertions(+), 32 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+2012-03-06  Paul Eggert  <eggert@cs.ucla.edu>
+
+	readtokens: avoid core dumps with unusual calling patterns
+	Reported by Xu Zhongxing in <http://debbugs.gnu.org/10953>.
+	* lib/readtokens.c: Include limits.h.
+	(word, bits_per_word, get_nth_bit, set_nth_bit): New.
+	(readtoken): Don't cache the delimiters; the cache code was buggy
+	if !delim && saved_delim, or if the new n_delim differs from the old.
+	Also, it wasn't thread-safe.
+
 2012-03-07  Bruno Haible  <bruno@clisp.org>
 
 	quote: Adhere to common module description layout.
--- a/lib/readtokens.c
+++ b/lib/readtokens.c
@@ -26,6 +26,7 @@
 
 #include "readtokens.h"
 
+#include <limits.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -46,6 +47,22 @@
   tokenbuffer->buffer = NULL;
 }
 
+typedef size_t word;
+enum { bits_per_word = sizeof (word) * CHAR_BIT };
+
+static bool
+get_nth_bit (size_t n, word const *bitset)
+{
+  return bitset[n / bits_per_word] >> n % bits_per_word & 1;
+}
+
+static void
+set_nth_bit (size_t n, word *bitset)
+{
+  size_t one = 1;
+  bitset[n / bits_per_word] |= one << n % bits_per_word;
+}
+
 /* Read a token from STREAM into TOKENBUFFER.
    A token is delimited by any of the N_DELIM bytes in DELIM.
    Upon return, the token is in tokenbuffer->buffer and
@@ -68,42 +85,17 @@
   char *p;
   int c;
   size_t i, n;
-  static const char *saved_delim = NULL;
-  static char isdelim[256];
-  bool same_delimiters;
+  word isdelim[(UCHAR_MAX + bits_per_word) / bits_per_word];
 
-  if (delim == NULL && saved_delim == NULL)
-    abort ();
-
-  same_delimiters = false;
-  if (delim != saved_delim && saved_delim != NULL)
+  memset (isdelim, 0, sizeof isdelim);
+  for (i = 0; i < n_delim; i++)
     {
-      same_delimiters = true;
-      for (i = 0; i < n_delim; i++)
-        {
-          if (delim[i] != saved_delim[i])
-            {
-              same_delimiters = false;
-              break;
-            }
-        }
+      unsigned char ch = delim[i];
+      set_nth_bit (ch, isdelim);
     }
 
-  if (!same_delimiters)
-    {
-      size_t j;
-      saved_delim = delim;
-      memset (isdelim, 0, sizeof isdelim);
-      for (j = 0; j < n_delim; j++)
-        {
-          unsigned char ch = delim[j];
-          isdelim[ch] = 1;
-        }
-    }
-
-  /* FIXME: don't fool with this caching.  Use strchr instead.  */
   /* skip over any leading delimiters */
-  for (c = getc (stream); c >= 0 && isdelim[c]; c = getc (stream))
+  for (c = getc (stream); c >= 0 && get_nth_bit (c, isdelim); c = getc (stream))
     {
       /* empty */
     }
@@ -124,7 +116,7 @@
           p[i] = 0;
           break;
         }
-      if (isdelim[c])
+      if (get_nth_bit (c, isdelim))
         {
           p[i] = 0;
           break;