changeset 9999:5f559abfabef

Add rawmemchr module, matching glibc. * modules/string (Makefile.am): New indicator. * m4/string_h.m4 (gl_HEADER_STRING_H_DEFAULTS): Set it. * lib/string.in.h (rawmemchr): Declare when appropriate. * modules/rawmemchr: New file. * m4/rawmemchr.m4: Likewise. * lib/rawmemchr.c: Likewise. * modules/rawmemchr-tests: Likewise. * tests/test-rawmemchr.c: Likewise. * doc/glibc-functions/rawmemchr.texi (rawmemchr): Document module. * modules/strchrnul (Depends-on): Add rawmemchr. * lib/strchrnul.c (strchrnul): Optimize a corner case. Signed-off-by: Eric Blake <ebb9@byu.net>
author Eric Blake <ebb9@byu.net>
date Mon, 28 Apr 2008 17:08:43 -0600
parents 3362dfd48604
children 8095c13f76df
files ChangeLog doc/glibc-functions/rawmemchr.texi lib/rawmemchr.c lib/strchrnul.c lib/string.in.h m4/rawmemchr.m4 m4/string_h.m4 modules/rawmemchr modules/rawmemchr-tests modules/strchrnul modules/string tests/test-rawmemchr.c
diffstat 12 files changed, 318 insertions(+), 5 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,19 @@
 2008-04-28  Eric Blake  <ebb9@byu.net>
 
+	Add rawmemchr module, matching glibc.
+	* modules/string (Makefile.am): New indicator.
+	* m4/string_h.m4 (gl_HEADER_STRING_H_DEFAULTS): Set it.
+	* lib/string.in.h (rawmemchr): Declare when appropriate.
+	* modules/rawmemchr: New file.
+	* m4/rawmemchr.m4: Likewise.
+	* lib/rawmemchr.c: Likewise.
+	* modules/rawmemchr-tests: Likewise.
+	* tests/test-rawmemchr.c: Likewise.
+	* doc/glibc-functions/rawmemchr.texi (rawmemchr): Document
+	module.
+	* modules/strchrnul (Depends-on): Add rawmemchr.
+	* lib/strchrnul.c (strchrnul): Optimize a corner case.
+
 	Whitespace cleanup.
 	* tests/test-strchrnul.c: Reindent.
 	* lib/strchrnul.c: Likewise.
--- a/doc/glibc-functions/rawmemchr.texi
+++ b/doc/glibc-functions/rawmemchr.texi
@@ -2,15 +2,15 @@
 @subsection @code{rawmemchr}
 @findex rawmemchr
 
-Gnulib module: ---
+Gnulib module: rawmemchr
 
 Portability problems fixed by Gnulib:
 @itemize
-@end itemize
-
-Portability problems not fixed by Gnulib:
-@itemize
 @item
 This function is missing on all non-glibc platforms:
 MacOS X 10.3, FreeBSD 6.0, NetBSD 3.0, OpenBSD 3.8, AIX 5.1, HP-UX 11, IRIX 6.5, OSF/1 5.1, Solaris 10, Cygwin, mingw, Interix 3.5, BeOS.
 @end itemize
+
+Portability problems not fixed by Gnulib:
+@itemize
+@end itemize
new file mode 100644
--- /dev/null
+++ b/lib/rawmemchr.c
@@ -0,0 +1,136 @@
+/* Searching in a string.
+   Copyright (C) 2008 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+
+/* Specification.  */
+#include <string.h>
+
+/* Find the first occurrence of C in S.  */
+void *
+rawmemchr (const void *s, int c_in)
+{
+  /* On 32-bit hardware, choosing longword to be a 32-bit unsigned
+     long instead of a 64-bit uintmax_t tends to give better
+     performance.  On 64-bit hardware, unsigned long is generally 64
+     bits already.  Change this typedef to experiment with
+     performance.  */
+  typedef unsigned long int longword;
+
+  const unsigned char *char_ptr;
+  const longword *longword_ptr;
+  longword repeated_one;
+  longword repeated_c;
+  unsigned char c;
+
+  c = (unsigned char) c_in;
+
+  /* Handle the first few bytes by reading one byte at a time.
+     Do this until CHAR_PTR is aligned on a longword boundary.  */
+  for (char_ptr = (const unsigned char *) s;
+       (size_t) char_ptr % sizeof (longword) != 0;
+       ++char_ptr)
+    if (*char_ptr == c)
+      return (void *) char_ptr;
+
+  longword_ptr = (const longword *) char_ptr;
+
+  /* All these elucidatory comments refer to 4-byte longwords,
+     but the theory applies equally well to any size longwords.  */
+
+  /* Compute auxiliary longword values:
+     repeated_one is a value which has a 1 in every byte.
+     repeated_c has c in every byte.  */
+  repeated_one = 0x01010101;
+  repeated_c = c | (c << 8);
+  repeated_c |= repeated_c << 16;
+  if (0xffffffffU < (longword) -1)
+    {
+      repeated_one |= repeated_one << 31 << 1;
+      repeated_c |= repeated_c << 31 << 1;
+      if (8 < sizeof (longword))
+        {
+          size_t i;
+
+          for (i = 64; i < sizeof (longword) * 8; i *= 2)
+            {
+              repeated_one |= repeated_one << i;
+              repeated_c |= repeated_c << i;
+            }
+        }
+    }
+
+  /* Instead of the traditional loop which tests each byte, we will
+     test a longword at a time.  The tricky part is testing if *any of
+     the four* bytes in the longword in question are equal to NUL or
+     c.  We first use an xor with repeated_c.  This reduces the task
+     to testing whether *any of the four* bytes in longword1 is zero.
+
+     We compute tmp =
+       ((longword1 - repeated_one) & ~longword1) & (repeated_one << 7).
+     That is, we perform the following operations:
+       1. Subtract repeated_one.
+       2. & ~longword1.
+       3. & a mask consisting of 0x80 in every byte.
+     Consider what happens in each byte:
+       - If a byte of longword1 is zero, step 1 and 2 transform it into 0xff,
+         and step 3 transforms it into 0x80.  A carry can also be propagated
+         to more significant bytes.
+       - If a byte of longword1 is nonzero, let its lowest 1 bit be at
+         position k (0 <= k <= 7); so the lowest k bits are 0.  After step 1,
+         the byte ends in a single bit of value 0 and k bits of value 1.
+         After step 2, the result is just k bits of value 1: 2^k - 1.  After
+         step 3, the result is 0.  And no carry is produced.
+     So, if longword1 has only non-zero bytes, tmp is zero.
+     Whereas if longword1 has a zero byte, call j the position of the least
+     significant zero byte.  Then the result has a zero at positions 0, ...,
+     j-1 and a 0x80 at position j.  We cannot predict the result at the more
+     significant bytes (positions j+1..3), but it does not matter since we
+     already have a non-zero bit at position 8*j+7.
+
+     The test whether any byte in longword1 is zero is equivalent
+     to testing whether tmp is nonzero.
+
+     This test can read beyond the end of a string, depending on where
+     C_IN is encountered.  However, this is considered safe since the
+     initialization phase ensured that the read will be aligned,
+     therefore, the read will not cross page boundaries and will not
+     cause a fault.  */
+
+  while (1)
+    {
+      longword longword1 = *longword_ptr ^ repeated_c;
+
+      if ((((longword1 - repeated_one) & ~longword1)
+           & (repeated_one << 7)) != 0)
+        break;
+      longword_ptr++;
+    }
+
+  char_ptr = (const unsigned char *) longword_ptr;
+
+  /* At this point, we know that one of the sizeof (longword) bytes
+     starting at char_ptr is == c.  On little-endian machines, we
+     could determine the first such byte without any further memory
+     accesses, just by looking at the tmp result from the last loop
+     iteration.  But this does not work on big-endian machines.
+     Choose code that works in both cases.  */
+
+  char_ptr = (unsigned char *) longword_ptr;
+  while (*char_ptr != c)
+    char_ptr++;
+  return (void *) char_ptr;
+}
--- a/lib/strchrnul.c
+++ b/lib/strchrnul.c
@@ -37,6 +37,8 @@
   unsigned char c;
 
   c = (unsigned char) c_in;
+  if (!c)
+    return rawmemchr (s, 0);
 
   /* Handle the first few bytes by reading one byte at a time.
      Do this until CHAR_PTR is aligned on a longword boundary.  */
--- a/lib/string.in.h
+++ b/lib/string.in.h
@@ -93,6 +93,22 @@
      memrchr (a, b, c))
 #endif
 
+/* Find the first occurrence of C in S.  More efficient than
+   memchr(S,C,N), at the expense of undefined behavior if C does not
+   occur within N bytes.  */
+#if @GNULIB_RAWMEMCHR@
+# if ! @HAVE_RAWMEMCHR@
+extern void *rawmemchr (void const *__s, int __c_in)
+  __attribute__ ((__pure__));
+# endif
+#elif defined GNULIB_POSIXCHECK
+# undef rawmemchr
+# define rawmemchr(a,b) \
+    (GL_LINK_WARNING ("rawmemchr is unportable - " \
+                      "use gnulib module rawmemchr for portability"), \
+     rawmemchr (a, b))
+#endif
+
 /* Copy SRC to DST, returning the address of the terminating '\0' in DST.  */
 #if @GNULIB_STPCPY@
 # if ! @HAVE_STPCPY@
new file mode 100644
--- /dev/null
+++ b/m4/rawmemchr.m4
@@ -0,0 +1,21 @@
+# rawmemchr.m4 serial 1
+dnl Copyright (C) 2003, 2007, 2008 Free Software Foundation, Inc.
+dnl This file is free software; the Free Software Foundation
+dnl gives unlimited permission to copy and/or distribute it,
+dnl with or without modifications, as long as this notice is preserved.
+
+AC_DEFUN([gl_FUNC_RAWMEMCHR],
+[
+  dnl Persuade glibc <string.h> to declare rawmemchr().
+  AC_REQUIRE([AC_USE_SYSTEM_EXTENSIONS])
+
+  AC_REQUIRE([gl_HEADER_STRING_H_DEFAULTS])
+  AC_REPLACE_FUNCS([rawmemchr])
+  if test $ac_cv_func_rawmemchr = no; then
+    HAVE_RAWMEMCHR=0
+    gl_PREREQ_RAWMEMCHR
+  fi
+])
+
+# Prerequisites of lib/strchrnul.c.
+AC_DEFUN([gl_PREREQ_RAWMEMCHR], [:])
--- a/m4/string_h.m4
+++ b/m4/string_h.m4
@@ -35,6 +35,7 @@
   GNULIB_MEMMEM=0;      AC_SUBST([GNULIB_MEMMEM])
   GNULIB_MEMPCPY=0;     AC_SUBST([GNULIB_MEMPCPY])
   GNULIB_MEMRCHR=0;     AC_SUBST([GNULIB_MEMRCHR])
+  GNULIB_RAWMEMCHR=0;   AC_SUBST([GNULIB_RAWMEMCHR])
   GNULIB_STPCPY=0;      AC_SUBST([GNULIB_STPCPY])
   GNULIB_STPNCPY=0;     AC_SUBST([GNULIB_STPNCPY])
   GNULIB_STRCHRNUL=0;   AC_SUBST([GNULIB_STRCHRNUL])
@@ -66,6 +67,7 @@
   HAVE_DECL_MEMMEM=1;		AC_SUBST([HAVE_DECL_MEMMEM])
   HAVE_MEMPCPY=1;		AC_SUBST([HAVE_MEMPCPY])
   HAVE_DECL_MEMRCHR=1;		AC_SUBST([HAVE_DECL_MEMRCHR])
+  HAVE_RAWMEMCHR=1;		AC_SUBST([HAVE_RAWMEMCHR])
   HAVE_STPCPY=1;		AC_SUBST([HAVE_STPCPY])
   HAVE_STPNCPY=1;		AC_SUBST([HAVE_STPNCPY])
   HAVE_STRCHRNUL=1;		AC_SUBST([HAVE_STRCHRNUL])
new file mode 100644
--- /dev/null
+++ b/modules/rawmemchr
@@ -0,0 +1,25 @@
+Description:
+rawmemchr() function: Find the first occurrence of C in S.
+
+Files:
+lib/rawmemchr.c
+m4/rawmemchr.m4
+
+Depends-on:
+extensions
+string
+
+configure.ac:
+gl_FUNC_RAWMEMCHR
+gl_STRING_MODULE_INDICATOR([rawmemchr])
+
+Makefile.am:
+
+Include:
+<string.h>
+
+License:
+LGPL
+
+Maintainer:
+FSF
new file mode 100644
--- /dev/null
+++ b/modules/rawmemchr-tests
@@ -0,0 +1,10 @@
+Files:
+tests/test-rawmemchr.c
+
+Depends-on:
+
+configure.ac:
+
+Makefile.am:
+TESTS += test-rawmemchr
+check_PROGRAMS += test-rawmemchr
--- a/modules/strchrnul
+++ b/modules/strchrnul
@@ -9,6 +9,7 @@
 Depends-on:
 extensions
 string
+rawmemchr
 
 configure.ac:
 gl_FUNC_STRCHRNUL
--- a/modules/string
+++ b/modules/string
@@ -40,6 +40,7 @@
 	      -e 's|@''GNULIB_MEMMEM''@|$(GNULIB_MEMMEM)|g' \
 	      -e 's|@''GNULIB_MEMPCPY''@|$(GNULIB_MEMPCPY)|g' \
 	      -e 's|@''GNULIB_MEMRCHR''@|$(GNULIB_MEMRCHR)|g' \
+	      -e 's|@''GNULIB_RAWMEMCHR''@|$(GNULIB_RAWMEMCHR)|g' \
 	      -e 's|@''GNULIB_STPCPY''@|$(GNULIB_STPCPY)|g' \
 	      -e 's|@''GNULIB_STPNCPY''@|$(GNULIB_STPNCPY)|g' \
 	      -e 's|@''GNULIB_STRCHRNUL''@|$(GNULIB_STRCHRNUL)|g' \
@@ -56,6 +57,7 @@
 	      -e 's|@''HAVE_DECL_MEMMEM''@|$(HAVE_DECL_MEMMEM)|g' \
 	      -e 's|@''HAVE_MEMPCPY''@|$(HAVE_MEMPCPY)|g' \
 	      -e 's|@''HAVE_DECL_MEMRCHR''@|$(HAVE_DECL_MEMRCHR)|g' \
+	      -e 's|@''HAVE_RAWMEMCHR''@|$(HAVE_RAWMEMCHR)|g' \
 	      -e 's|@''HAVE_STPCPY''@|$(HAVE_STPCPY)|g' \
 	      -e 's|@''HAVE_STPNCPY''@|$(HAVE_STPNCPY)|g' \
 	      -e 's|@''HAVE_STRCHRNUL''@|$(HAVE_STRCHRNUL)|g' \
new file mode 100644
--- /dev/null
+++ b/tests/test-rawmemchr.c
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2008 Free Software Foundation
+ * Written by Eric Blake and Bruno Haible
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+
+#include <string.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#define ASSERT(expr) \
+  do									     \
+    {									     \
+      if (!(expr))							     \
+	{								     \
+	  fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \
+	  fflush (stderr);						     \
+	  abort ();							     \
+	}								     \
+    }									     \
+  while (0)
+
+/* Calculating void * + int is not portable, so this wrapper converts
+   to char * to make the tests easier to write.  */
+#define RAWMEMCHR (char *) rawmemchr
+
+int
+main ()
+{
+  size_t n = 0x100000;
+  char *input = malloc (n + 1);
+  ASSERT (input);
+
+  input[0] = 'a';
+  input[1] = 'b';
+  memset (input + 2, 'c', 1024);
+  memset (input + 1026, 'd', n - 1028);
+  input[n - 2] = 'e';
+  input[n - 1] = 'a';
+  input[n] = '\0';
+
+  /* Basic behavior tests.  */
+  ASSERT (RAWMEMCHR (input, 'a') == input);
+  ASSERT (RAWMEMCHR (input, 'b') == input + 1);
+  ASSERT (RAWMEMCHR (input, 'c') == input + 2);
+  ASSERT (RAWMEMCHR (input, 'd') == input + 1026);
+
+  ASSERT (RAWMEMCHR (input + 1, 'a') == input + n - 1);
+  ASSERT (RAWMEMCHR (input + 1, 'e') == input + n - 2);
+
+  ASSERT (RAWMEMCHR (input, '\0') == input + n);
+
+  /* Alignment tests.  */
+  {
+    int i, j;
+    for (i = 0; i < 32; i++)
+      {
+        for (j = 0; j < 256; j++)
+          input[i + j] = j;
+        for (j = 0; j < 256; j++)
+          {
+            ASSERT (RAWMEMCHR (input + i, j) == input + i + j);
+          }
+      }
+  }
+
+  free (input);
+
+  return 0;
+}