# HG changeset patch # User Eric Blake # Date 1209418984 21600 # Node ID 8e859c0b80c6d97bb59d52a0bb5364319f679790 # Parent dd3a38e7cf66357711807619185c1022768beae9 Optimize and test strchrnul. * lib/strchrnul.c (strchrnul): Rewrite to do parallel search. * modules/strchrnul-tests: New file. * tests/test-strchrnul.c: Likewise. Signed-off-by: Eric Blake diff --git a/ChangeLog b/ChangeLog --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,10 @@ 2008-04-28 Eric Blake + Optimize and test strchrnul. + * lib/strchrnul.c (strchrnul): Rewrite to do parallel search. + * modules/strchrnul-tests: New file. + * tests/test-strchrnul.c: Likewise. + Remove intprops dependency. * modules/memchr (Depends-on): Remove intprops. * modules/memrchr (Depends-on): Likewise. diff --git a/lib/strchrnul.c b/lib/strchrnul.c --- a/lib/strchrnul.c +++ b/lib/strchrnul.c @@ -1,5 +1,5 @@ /* Searching in a string. - Copyright (C) 2003, 2007 Free Software Foundation, Inc. + Copyright (C) 2003, 2007, 2008 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -23,9 +23,118 @@ char * strchrnul (const char *s, int c_in) { - char c = c_in; - while (*s && (*s != c)) - s++; + /* On 32-bit hardware, choosing longword to be a 32-bit unsigned + long instead of a 64-bit uintmax_t tends to give better + performance. On 64-bit hardware, unsigned long is generally 64 + bits already. Change this typedef to experiment with + performance. */ + typedef unsigned long int longword; + + const unsigned char *char_ptr; + const longword *longword_ptr; + longword repeated_one; + longword repeated_c; + unsigned char c; + + c = (unsigned char) c_in; + + /* Handle the first few bytes by reading one byte at a time. + Do this until CHAR_PTR is aligned on a longword boundary. */ + for (char_ptr = (const unsigned char *) s; + (size_t) char_ptr % sizeof (longword) != 0; + ++char_ptr) + if (!*char_ptr || *char_ptr == c) + return (char *) char_ptr; + + longword_ptr = (const longword *) char_ptr; + + /* All these elucidatory comments refer to 4-byte longwords, + but the theory applies equally well to any size longwords. */ + + /* Compute auxiliary longword values: + repeated_one is a value which has a 1 in every byte. + repeated_c has c in every byte. */ + repeated_one = 0x01010101; + repeated_c = c | (c << 8); + repeated_c |= repeated_c << 16; + if (0xffffffffU < (longword) -1) + { + repeated_one |= repeated_one << 31 << 1; + repeated_c |= repeated_c << 31 << 1; + if (8 < sizeof (longword)) + { + size_t i; + + for (i = 64; i < sizeof (longword) * 8; i *= 2) + { + repeated_one |= repeated_one << i; + repeated_c |= repeated_c << i; + } + } + } + + /* Instead of the traditional loop which tests each byte, we will + test a longword at a time. The tricky part is testing if *any of + the four* bytes in the longword in question are equal to NUL or + c. We first use an xor with repeated_c. This reduces the task + to testing whether *any of the four* bytes in longword1 or + longword2 is zero. - return (char *) s; + Let's consider longword1. We compute tmp = + ((longword1 - repeated_one) & ~longword1) & (repeated_one << 7). + That is, we perform the following operations: + 1. Subtract repeated_one. + 2. & ~longword1. + 3. & a mask consisting of 0x80 in every byte. + Consider what happens in each byte: + - If a byte of longword1 is zero, step 1 and 2 transform it into 0xff, + and step 3 transforms it into 0x80. A carry can also be propagated + to more significant bytes. + - If a byte of longword1 is nonzero, let its lowest 1 bit be at + position k (0 <= k <= 7); so the lowest k bits are 0. After step 1, + the byte ends in a single bit of value 0 and k bits of value 1. + After step 2, the result is just k bits of value 1: 2^k - 1. After + step 3, the result is 0. And no carry is produced. + So, if longword1 has only non-zero bytes, tmp is zero. + Whereas if longword1 has a zero byte, call j the position of the least + significant zero byte. Then the result has a zero at positions 0, ..., + j-1 and a 0x80 at position j. We cannot predict the result at the more + significant bytes (positions j+1..3), but it does not matter since we + already have a non-zero bit at position 8*j+7. + + The test whether any byte in longword1 or longword2 is zero is equivalent + to testing whether tmp1 is nonzero or tmp2 is nonzero. We can combine + this into a single test, whether (tmp1 | tmp2) is nonzero. + + This test can read more than one byte beyond the end of a string, + depending on where the terminating NUL is encountered. However, + this is considered safe since the initialization phase ensured + that the read will be aligned, therefore, the read will not cross + page boundaries and will not cause a fault. */ + + while (1) + { + longword longword1 = *longword_ptr ^ repeated_c; + longword longword2 = *longword_ptr; + + if (((((longword1 - repeated_one) & ~longword1) + | ((longword2 - repeated_one) & ~longword2)) + & (repeated_one << 7)) != 0) + break; + longword_ptr++; + } + + char_ptr = (const unsigned char *) longword_ptr; + + /* At this point, we know that one of the sizeof (longword) bytes + starting at char_ptr is == 0 or == c. On little-endian machines, + we could determine the first such byte without any further memory + accesses, just by looking at the tmp result from the last loop + iteration. But this does not work on big-endian machines. + Choose code that works in both cases. */ + + char_ptr = (unsigned char *) longword_ptr; + while (*char_ptr && (*char_ptr != c)) + char_ptr++; + return (char *) char_ptr; } diff --git a/modules/strchrnul-tests b/modules/strchrnul-tests new file mode 100644 --- /dev/null +++ b/modules/strchrnul-tests @@ -0,0 +1,10 @@ +Files: +tests/test-strchrnul.c + +Depends-on: + +configure.ac: + +Makefile.am: +TESTS += test-strchrnul +check_PROGRAMS += test-strchrnul diff --git a/tests/test-strchrnul.c b/tests/test-strchrnul.c new file mode 100644 --- /dev/null +++ b/tests/test-strchrnul.c @@ -0,0 +1,94 @@ +/* + * Copyright (C) 2008 Free Software Foundation + * Written by Eric Blake and Bruno Haible + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . */ + +#include + +#include + +#include +#include + +#define ASSERT(expr) \ + do \ + { \ + if (!(expr)) \ + { \ + fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \ + fflush (stderr); \ + abort (); \ + } \ + } \ + while (0) + +int +main () +{ + size_t n = 0x100000; + char *input = malloc (n + 1); + ASSERT (input); + + input[0] = 'a'; + input[1] = 'b'; + memset (input + 2, 'c', 1024); + memset (input + 1026, 'd', n - 1028); + input[n - 2] = 'e'; + input[n - 1] = 'a'; + input[n] = '\0'; + + /* Basic behavior tests. */ + ASSERT (strchrnul (input, 'a') == input); + ASSERT (strchrnul (input, 'b') == input + 1); + ASSERT (strchrnul (input, 'c') == input + 2); + ASSERT (strchrnul (input, 'd') == input + 1026); + + ASSERT (strchrnul (input + 1, 'a') == input + n - 1); + ASSERT (strchrnul (input + 1, 'e') == input + n - 2); + + ASSERT (strchrnul (input, 'f') == input + n); + ASSERT (strchrnul (input, '\0') == input + n); + + /* Check that a very long haystack is handled quickly if the byte is + found near the beginning. */ + { + size_t repeat = 10000; + for (; repeat > 0; repeat--) + { + ASSERT (strchrnul (input, 'c') == input + 2); + } + } + + /* Alignment tests. */ + { + int i, j; + for (i = 0; i < 32; i++) + { + for (j = 0; j < 256; j++) + input[i + j] = (j + 1) & 0xff; + for (j = 1; j < 256; j++) + { + ASSERT (strchrnul (input + i, j) == input + i + j - 1); + input[i + j - 1] = (j == 1 ? 2 : 1); + ASSERT (strchrnul (input + i, j) == input + i + 255); + input[i + j - 1] = j; + } + } + } + + free (input); + + return 0; +}