changeset 9997:8e859c0b80c6

Optimize and test strchrnul. * lib/strchrnul.c (strchrnul): Rewrite to do parallel search. * modules/strchrnul-tests: New file. * tests/test-strchrnul.c: Likewise. Signed-off-by: Eric Blake <ebb9@byu.net>
author Eric Blake <ebb9@byu.net>
date Mon, 28 Apr 2008 15:43:04 -0600
parents dd3a38e7cf66
children 3362dfd48604
files ChangeLog lib/strchrnul.c modules/strchrnul-tests tests/test-strchrnul.c
diffstat 4 files changed, 223 insertions(+), 5 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,10 @@
 2008-04-28  Eric Blake  <ebb9@byu.net>
 
+	Optimize and test strchrnul.
+	* lib/strchrnul.c (strchrnul): Rewrite to do parallel search.
+	* modules/strchrnul-tests: New file.
+	* tests/test-strchrnul.c: Likewise.
+
 	Remove intprops dependency.
 	* modules/memchr (Depends-on): Remove intprops.
 	* modules/memrchr (Depends-on): Likewise.
--- a/lib/strchrnul.c
+++ b/lib/strchrnul.c
@@ -1,5 +1,5 @@
 /* Searching in a string.
-   Copyright (C) 2003, 2007 Free Software Foundation, Inc.
+   Copyright (C) 2003, 2007, 2008 Free Software Foundation, Inc.
 
    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -23,9 +23,118 @@
 char *
 strchrnul (const char *s, int c_in)
 {
-  char c = c_in;
-  while (*s && (*s != c))
-    s++;
+  /* On 32-bit hardware, choosing longword to be a 32-bit unsigned
+     long instead of a 64-bit uintmax_t tends to give better
+     performance.  On 64-bit hardware, unsigned long is generally 64
+     bits already.  Change this typedef to experiment with
+     performance.  */
+  typedef unsigned long int longword;
+
+  const unsigned char *char_ptr;
+  const longword *longword_ptr;
+  longword repeated_one;
+  longword repeated_c;
+  unsigned char c;
+
+  c = (unsigned char) c_in;
+
+  /* Handle the first few bytes by reading one byte at a time.
+     Do this until CHAR_PTR is aligned on a longword boundary.  */
+  for (char_ptr = (const unsigned char *) s;
+       (size_t) char_ptr % sizeof (longword) != 0;
+       ++char_ptr)
+    if (!*char_ptr || *char_ptr == c)
+      return (char *) char_ptr;
+
+  longword_ptr = (const longword *) char_ptr;
+
+  /* All these elucidatory comments refer to 4-byte longwords,
+     but the theory applies equally well to any size longwords.  */
+
+  /* Compute auxiliary longword values:
+     repeated_one is a value which has a 1 in every byte.
+     repeated_c has c in every byte.  */
+  repeated_one = 0x01010101;
+  repeated_c = c | (c << 8);
+  repeated_c |= repeated_c << 16;
+  if (0xffffffffU < (longword) -1)
+    {
+      repeated_one |= repeated_one << 31 << 1;
+      repeated_c |= repeated_c << 31 << 1;
+      if (8 < sizeof (longword))
+       {
+         size_t i;
+
+         for (i = 64; i < sizeof (longword) * 8; i *= 2)
+           {
+             repeated_one |= repeated_one << i;
+             repeated_c |= repeated_c << i;
+           }
+       }
+    }
+
+  /* Instead of the traditional loop which tests each byte, we will
+     test a longword at a time.  The tricky part is testing if *any of
+     the four* bytes in the longword in question are equal to NUL or
+     c.  We first use an xor with repeated_c.  This reduces the task
+     to testing whether *any of the four* bytes in longword1 or
+     longword2 is zero.
 
-  return (char *) s;
+     Let's consider longword1.  We compute tmp =
+       ((longword1 - repeated_one) & ~longword1) & (repeated_one << 7).
+     That is, we perform the following operations:
+       1. Subtract repeated_one.
+       2. & ~longword1.
+       3. & a mask consisting of 0x80 in every byte.
+     Consider what happens in each byte:
+       - If a byte of longword1 is zero, step 1 and 2 transform it into 0xff,
+         and step 3 transforms it into 0x80.  A carry can also be propagated
+         to more significant bytes.
+       - If a byte of longword1 is nonzero, let its lowest 1 bit be at
+         position k (0 <= k <= 7); so the lowest k bits are 0.  After step 1,
+         the byte ends in a single bit of value 0 and k bits of value 1.
+         After step 2, the result is just k bits of value 1: 2^k - 1.  After
+         step 3, the result is 0.  And no carry is produced.
+     So, if longword1 has only non-zero bytes, tmp is zero.
+     Whereas if longword1 has a zero byte, call j the position of the least
+     significant zero byte.  Then the result has a zero at positions 0, ...,
+     j-1 and a 0x80 at position j.  We cannot predict the result at the more
+     significant bytes (positions j+1..3), but it does not matter since we
+     already have a non-zero bit at position 8*j+7.
+
+     The test whether any byte in longword1 or longword2 is zero is equivalent
+     to testing whether tmp1 is nonzero or tmp2 is nonzero.  We can combine
+     this into a single test, whether (tmp1 | tmp2) is nonzero.
+
+     This test can read more than one byte beyond the end of a string,
+     depending on where the terminating NUL is encountered.  However,
+     this is considered safe since the initialization phase ensured
+     that the read will be aligned, therefore, the read will not cross
+     page boundaries and will not cause a fault.  */
+
+  while (1)
+    {
+      longword longword1 = *longword_ptr ^ repeated_c;
+      longword longword2 = *longword_ptr;
+
+      if (((((longword1 - repeated_one) & ~longword1)
+           | ((longword2 - repeated_one) & ~longword2))
+          & (repeated_one << 7)) != 0)
+       break;
+      longword_ptr++;
+    }
+
+  char_ptr = (const unsigned char *) longword_ptr;
+
+  /* At this point, we know that one of the sizeof (longword) bytes
+     starting at char_ptr is == 0 or == c.  On little-endian machines,
+     we could determine the first such byte without any further memory
+     accesses, just by looking at the tmp result from the last loop
+     iteration.  But this does not work on big-endian machines.
+     Choose code that works in both cases.  */
+
+  char_ptr = (unsigned char *) longword_ptr;
+  while (*char_ptr && (*char_ptr != c))
+    char_ptr++;
+  return (char *) char_ptr;
 }
new file mode 100644
--- /dev/null
+++ b/modules/strchrnul-tests
@@ -0,0 +1,10 @@
+Files:
+tests/test-strchrnul.c
+
+Depends-on:
+
+configure.ac:
+
+Makefile.am:
+TESTS += test-strchrnul
+check_PROGRAMS += test-strchrnul
new file mode 100644
--- /dev/null
+++ b/tests/test-strchrnul.c
@@ -0,0 +1,94 @@
+/*
+ * Copyright (C) 2008 Free Software Foundation
+ * Written by Eric Blake and Bruno Haible
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+
+#include <string.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#define ASSERT(expr) \
+  do                                                                        \
+    {                                                                       \
+      if (!(expr))                                                          \
+       {                                                                    \
+         fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \
+         fflush (stderr);                                                   \
+         abort ();                                                          \
+       }                                                                    \
+    }                                                                       \
+  while (0)
+
+int
+main ()
+{
+  size_t n = 0x100000;
+  char *input = malloc (n + 1);
+  ASSERT (input);
+
+  input[0] = 'a';
+  input[1] = 'b';
+  memset (input + 2, 'c', 1024);
+  memset (input + 1026, 'd', n - 1028);
+  input[n - 2] = 'e';
+  input[n - 1] = 'a';
+  input[n] = '\0';
+
+  /* Basic behavior tests.  */
+  ASSERT (strchrnul (input, 'a') == input);
+  ASSERT (strchrnul (input, 'b') == input + 1);
+  ASSERT (strchrnul (input, 'c') == input + 2);
+  ASSERT (strchrnul (input, 'd') == input + 1026);
+
+  ASSERT (strchrnul (input + 1, 'a') == input + n - 1);
+  ASSERT (strchrnul (input + 1, 'e') == input + n - 2);
+
+  ASSERT (strchrnul (input, 'f') == input + n);
+  ASSERT (strchrnul (input, '\0') == input + n);
+
+  /* Check that a very long haystack is handled quickly if the byte is
+     found near the beginning.  */
+  {
+    size_t repeat = 10000;
+    for (; repeat > 0; repeat--)
+      {
+       ASSERT (strchrnul (input, 'c') == input + 2);
+      }
+  }
+
+  /* Alignment tests.  */
+  {
+    int i, j;
+    for (i = 0; i < 32; i++)
+      {
+       for (j = 0; j < 256; j++)
+         input[i + j] = (j + 1) & 0xff;
+       for (j = 1; j < 256; j++)
+         {
+           ASSERT (strchrnul (input + i, j) == input + i + j - 1);
+           input[i + j - 1] = (j == 1 ? 2 : 1);
+           ASSERT (strchrnul (input + i, j) == input + i + 255);
+           input[i + j - 1] = j;
+         }
+      }
+  }
+
+  free (input);
+
+  return 0;
+}