changeset 9566:dda8ef1c4bfd

Treat untyped memory as an 'unsigned char' array.
author Bruno Haible <bruno@clisp.org>
date Fri, 04 Jan 2008 00:24:10 +0100
parents 4422a6d32432
children 17eb10d3ae34
files ChangeLog lib/memmem.c
diffstat 2 files changed, 27 insertions(+), 19 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2008-01-02  Bruno Haible  <bruno@clisp.org>
+
+	* lib/memmem.c (knuth_morris_pratt, memmem): Change all 'char *'
+	variables to 'unsigned char *' type.
+	Reported by Paul Eggert.
+
 2008-01-02  Jim Meyering  <jim@meyering.net>
 
 	* lib/version-etc.c (COPYRIGHT_YEAR): Increase for new year.
--- a/lib/memmem.c
+++ b/lib/memmem.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1991,92,93,94,96,97,98,2000,2004,2007 Free Software Foundation, Inc.
+/* Copyright (C) 1991,92,93,94,96,97,98,2000,2004,2007,2008 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    This program is free software; you can redistribute it and/or modify
@@ -34,9 +34,10 @@
    Return a boolean indicating success.  */
 
 static bool
-knuth_morris_pratt (const char *haystack, const char *last_haystack,
-                    const char *needle, size_t m,
-                    const char **resultp)
+knuth_morris_pratt (const unsigned char *haystack,
+                    const unsigned char *last_haystack,
+                    const unsigned char *needle, size_t m,
+                    const unsigned char **resultp)
 {
   /* Allocate the table.  */
   size_t *table = (size_t *) nmalloca (m, sizeof (size_t));
@@ -70,14 +71,14 @@
 	   The inequality needle[x..i-1] != needle[0..i-1-x] is known to hold
 	   for x < table[i-1], by induction.
 	   Furthermore, if j>0: needle[i-1-j..i-2] = needle[0..j-1].  */
-	unsigned char b = (unsigned char) needle[i - 1];
+	unsigned char b = needle[i - 1];
 
 	for (;;)
 	  {
 	    /* Invariants: The inequality needle[x..i-1] != needle[0..i-1-x]
 	       is known to hold for x < i-1-j.
 	       Furthermore, if j>0: needle[i-1-j..i-2] = needle[0..j-1].  */
-	    if (b == (unsigned char) needle[j])
+	    if (b == needle[j])
 	      {
 		/* Set table[i] := i-1-j.  */
 		table[i] = i - ++j;
@@ -112,8 +113,8 @@
   /* Search, using the table to accelerate the processing.  */
   {
     size_t j;
-    const char *rhaystack;
-    const char *phaystack;
+    const unsigned char *rhaystack;
+    const unsigned char *phaystack;
 
     *resultp = NULL;
     j = 0;
@@ -121,7 +122,7 @@
     phaystack = haystack;
     /* Invariant: phaystack = rhaystack + j.  */
     while (phaystack != last_haystack)
-      if ((unsigned char) needle[j] == (unsigned char) *phaystack)
+      if (needle[j] == *phaystack)
 	{
 	  j++;
 	  phaystack++;
@@ -157,11 +158,12 @@
 memmem (const void *haystack_start, size_t haystack_len,
 	const void *needle_start, size_t needle_len)
 {
-  /* Operating with void * is awkward.  */
-  const char *haystack = (const char *) haystack_start;
-  const char *needle = (const char *) needle_start;
-  const char *last_haystack = haystack + haystack_len;
-  const char *last_needle = needle + needle_len;
+  /* Abstract memory is considered to be an array of 'unsigned char' values,
+     not an array of 'char' values.  See ISO C 99 section 6.2.6.1.  */
+  const unsigned char *haystack = (const unsigned char *) haystack_start;
+  const unsigned char *needle = (const unsigned char *) needle_start;
+  const unsigned char *last_haystack = haystack + haystack_len;
+  const unsigned char *last_needle = needle + needle_len;
 
   if (needle_len == 0)
     /* The first occurrence of the empty string is deemed to occur at
@@ -175,7 +177,7 @@
 
   /* Use optimizations in memchr when possible.  */
   if (__builtin_expect (needle_len == 1, 0))
-    return memchr (haystack, (unsigned char) *needle, haystack_len);
+    return memchr (haystack, *needle, haystack_len);
 
   /* Minimizing the worst-case complexity:
      Let n = haystack_len, m = needle_len.
@@ -198,7 +200,7 @@
 
     /* Speed up the following searches of needle by caching its first
        byte.  */
-    char b = *needle++;
+    unsigned char b = *needle++;
 
     for (;; haystack++)
       {
@@ -217,7 +219,7 @@
 	    if (comparison_count >= needle_len)
 	      {
 		/* Try the Knuth-Morris-Pratt algorithm.  */
-		const char *result;
+		const unsigned char *result;
 		if (knuth_morris_pratt (haystack, last_haystack,
 					needle - 1, needle_len, &result))
 		  return (void *) result;
@@ -230,8 +232,8 @@
 	if (*haystack == b)
 	  /* The first byte matches.  */
 	  {
-	    const char *rhaystack = haystack + 1;
-	    const char *rneedle = needle;
+	    const unsigned char *rhaystack = haystack + 1;
+	    const unsigned char *rneedle = needle;
 
 	    for (;; rhaystack++, rneedle++)
 	      {