changeset 9584:f490fd6e4955

Rewrite memmem to guarantee linear complexity without malloc. * lib/memmem.c (memmem): Use Two-Way rather than Knuth-Morris-Pratt, to allow O(1) space usage. (critical_factorization, two_way_short_needle) (two_way_long_needle): New functions. (knuth_morris_pratt): Delete. * modules/memmem (Depends-on): No longer need malloca or stdbool. Add stdint. * tests/test-memmem.c (main): Add tests for periodic needle and sublinear performance. * doc/functions/memmem.texi (memmem): Document other deficiencies in cygwin and older glibc. Signed-off-by: Eric Blake <ebb9@byu.net>
author Eric Blake <ebb9@byu.net>
date Sat, 05 Jan 2008 14:09:11 -0700
parents 0446ff006bf3
children 1487c59e896d
files ChangeLog doc/functions/memmem.texi lib/memmem.c modules/memmem tests/test-memmem.c
diffstat 5 files changed, 416 insertions(+), 196 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,18 @@
+2008-01-08  Eric Blake  <ebb9@byu.net>
+
+	Rewrite memmem to guarantee linear complexity without malloc.
+	* lib/memmem.c (memmem): Use Two-Way rather than
+	Knuth-Morris-Pratt, to allow O(1) space usage.
+	(critical_factorization, two_way_short_needle)
+	(two_way_long_needle): New functions.
+	(knuth_morris_pratt): Delete.
+	* modules/memmem (Depends-on): No longer need malloca or stdbool.
+	Add stdint.
+	* tests/test-memmem.c (main): Add tests for periodic needle and
+	sublinear performance.
+	* doc/functions/memmem.texi (memmem): Document other deficiencies
+	in cygwin and older glibc.
+
 2008-01-08  Bruno Haible  <bruno@clisp.org>
 
 	* modules/memmem-tests (Makefile.am): Remove TESTS_ENVIRONMENT
--- a/doc/functions/memmem.texi
+++ b/doc/functions/memmem.texi
@@ -9,14 +9,18 @@
 Portability problems fixed by Gnulib:
 @itemize
 @item
-This function fails to return the start of the haystack for an empty
-needle on some platforms:
-Cygwin 1.5.x
+This function has reversed arguments on some older platforms:
+Linux libc 5.0.9
+
+@item
+This function returns incorrect values in some cases, such as when
+given an empty needle:
+glibc <= 2.0, cygwin 1.5.x
 
 @item
 This function has quadratic instead of linear complexity on some
 platforms:
-glibc <= 2.6.1
+glibc <= 2.6.1, cygwin 1.5.x
 
 @item
 This function is missing on some platforms:
--- a/lib/memmem.c
+++ b/lib/memmem.c
@@ -1,4 +1,5 @@
-/* Copyright (C) 1991,92,93,94,96,97,98,2000,2004,2007,2008 Free Software Foundation, Inc.
+/* Copyright (C) 1991,92,93,94,96,97,98,2000,2004,2007,2008 Free Software
+   Foundation, Inc.
    This file is part of the GNU C Library.
 
    This program is free software; you can redistribute it and/or modify
@@ -15,140 +16,369 @@
    with this program; if not, write to the Free Software Foundation,
    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
 
+/* This particular implementation was written by Eric Blake, 2008.  */
+
 #ifndef _LIBC
 # include <config.h>
 #endif
 
-#include <stddef.h>
+/* Specification of memmem.  */
 #include <string.h>
-#include <stdbool.h>
 
-#include "malloca.h"
+#include <limits.h>
+#include <stddef.h>
+#include <stdint.h>
 
 #ifndef _LIBC
 # define __builtin_expect(expr, val)   (expr)
 #endif
 
-/* Knuth-Morris-Pratt algorithm.
-   See http://en.wikipedia.org/wiki/Knuth-Morris-Pratt_algorithm
-   Return a boolean indicating success.  */
+/* We use the Two-Way string matching algorithm, which guarantees
+   linear complexity with constant space.  Additionally, for long
+   needles, we also use a bad character shift table similar to the
+   Boyer-Moore algorithm to achieve sub-linear performance.
+
+   See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260
+   and http://en.wikipedia.org/wiki/Boyer-Moore_string_search_algorithm
+*/
+
+/* Point at which computing a bad-byte shift table is likely to be
+   worthwhile.  Small needles should not compute a table, since it
+   adds (1 << CHAR_BIT) + NEEDLE_LEN computations of preparation for a
+   speedup no greater than a factor of NEEDLE_LEN.  The larger the
+   needle, the better the potential performance gain.  On the other
+   hand, on non-POSIX systems with CHAR_BIT larger than eight, the
+   memory required for the table is prohibitive.  */
+#if CHAR_BIT < 10
+# define LONG_NEEDLE_THRESHOLD 32U
+#else
+# define LONG_NEEDLE_THRESHOLD SIZE_MAX
+#endif
+
+#define MAX(a, b) ((a < b) ? (b) : (a))
 
-static bool
-knuth_morris_pratt (const unsigned char *haystack,
-                    const unsigned char *last_haystack,
-                    const unsigned char *needle, size_t m,
-                    const unsigned char **resultp)
+/* Perform a critical factorization of NEEDLE, of length NEEDLE_LEN.
+   Return the index of the first byte in the right half, and set
+   *PERIOD to the global period of the right half.
+
+   The global period of a string is the smallest index (possibly its
+   length) at which all remaining bytes in the string are repetitions
+   of the prefix (the last repetition may be a subset of the prefix).
+
+   When NEEDLE is factored into two halves, a local period is the
+   length of the smallest word that shares a suffix with the left half
+   and shares a prefix with the right half.  All factorizations of a
+   non-empty NEEDLE have a local period of at least 1 and no greater
+   than NEEDLE_LEN.
+
+   A critical factorization has the property that the local period
+   equals the global period.  All strings have at least one critical
+   factorization with the left half smaller than the global period.
+
+   Given an ordered alphabet, a critical factorization can be computed
+   in linear time, with 2 * NEEDLE_LEN comparisons, by computing the
+   larger of two ordered maximal suffixes.  The ordered maximal
+   suffixes are determined by lexicographic comparison of
+   periodicity.  */
+static size_t
+critical_factorization (const unsigned char *needle, size_t needle_len,
+			size_t *period)
 {
-  /* Allocate the table.  */
-  size_t *table = (size_t *) nmalloca (m, sizeof (size_t));
-  if (table == NULL)
-    return false;
-  /* Fill the table.
-     For 0 < i < m:
-       0 < table[i] <= i is defined such that
-       forall 0 < x < table[i]: needle[x..i-1] != needle[0..i-1-x],
-       and table[i] is as large as possible with this property.
-     This implies:
-     1) For 0 < i < m:
-          If table[i] < i,
-          needle[table[i]..i-1] = needle[0..i-1-table[i]].
-     2) For 0 < i < m:
-          rhaystack[0..i-1] == needle[0..i-1]
-          and exists h, i <= h < m: rhaystack[h] != needle[h]
-          implies
-          forall 0 <= x < table[i]: rhaystack[x..x+m-1] != needle[0..m-1].
-     table[0] remains uninitialized.  */
-  {
-    size_t i, j;
-
-    /* i = 1: Nothing to verify for x = 0.  */
-    table[1] = 1;
-    j = 0;
-
-    for (i = 2; i < m; i++)
-      {
-	/* Here: j = i-1 - table[i-1].
-	   The inequality needle[x..i-1] != needle[0..i-1-x] is known to hold
-	   for x < table[i-1], by induction.
-	   Furthermore, if j>0: needle[i-1-j..i-2] = needle[0..j-1].  */
-	unsigned char b = needle[i - 1];
+  /* Index of last byte of left half, or SIZE_MAX.  */
+  size_t max_suffix, max_suffix_rev;
+  size_t j; /* Index into NEEDLE for current candidate suffix.  */
+  size_t k; /* Offset into current period.  */
+  size_t p; /* Intermediate period.  */
+  unsigned char a, b; /* Current comparison bytes.  */
 
-	for (;;)
-	  {
-	    /* Invariants: The inequality needle[x..i-1] != needle[0..i-1-x]
-	       is known to hold for x < i-1-j.
-	       Furthermore, if j>0: needle[i-1-j..i-2] = needle[0..j-1].  */
-	    if (b == needle[j])
-	      {
-		/* Set table[i] := i-1-j.  */
-		table[i] = i - ++j;
-		break;
-	      }
-	    /* The inequality needle[x..i-1] != needle[0..i-1-x] also holds
-	       for x = i-1-j, because
-	         needle[i-1] != needle[j] = needle[i-1-x].  */
-	    if (j == 0)
-	      {
-		/* The inequality holds for all possible x.  */
-		table[i] = i;
-		break;
-	      }
-	    /* The inequality needle[x..i-1] != needle[0..i-1-x] also holds
-	       for i-1-j < x < i-1-j+table[j], because for these x:
-		 needle[x..i-2]
-		 = needle[x-(i-1-j)..j-1]
-		 != needle[0..j-1-(x-(i-1-j))]  (by definition of table[j])
-		    = needle[0..i-2-x],
-	       hence needle[x..i-1] != needle[0..i-1-x].
-	       Furthermore
-		 needle[i-1-j+table[j]..i-2]
-		 = needle[table[j]..j-1]
-		 = needle[0..j-1-table[j]]  (by definition of table[j]).  */
-	    j = j - table[j];
-	  }
-	/* Here: j = i - table[i].  */
-      }
-  }
+  /* Invariants:
+     0 <= j < NEEDLE_LEN - 1
+     -1 <= max_suffix{,_rev} < j (treating SIZE_MAX as if it were signed)
+     min(max_suffix, max_suffix_rev) < global period of NEEDLE
+     1 <= p <= global period of NEEDLE
+     p == global period of the substring NEEDLE[max_suffix{,_rev}+1...j]
+     1 <= k <= p
+  */
 
-  /* Search, using the table to accelerate the processing.  */
-  {
-    size_t j;
-    const unsigned char *rhaystack;
-    const unsigned char *phaystack;
+  /* Perform lexicographic search.  */
+  max_suffix = SIZE_MAX;
+  j = 0;
+  k = p = 1;
+  while (j + k < needle_len)
+    {
+      a = needle[j + k];
+      b = needle[max_suffix + k];
+      if (a < b)
+	{
+	  /* Suffix is smaller, period is entire prefix so far.  */
+	  j += k;
+	  k = 1;
+	  p = j - max_suffix;
+	}
+      else if (a == b)
+	{
+	  /* Advance through repetition of the current period.  */
+	  if (k != p)
+	    ++k;
+	  else
+	    {
+	      j += p;
+	      k = 1;
+	    }
+	}
+      else /* b < a */
+	{
+	  /* Suffix is larger, start over from current location.  */
+	  max_suffix = j++;
+	  k = p = 1;
+	}
+    }
+  *period = p;
 
-    *resultp = NULL;
-    j = 0;
-    rhaystack = haystack;
-    phaystack = haystack;
-    /* Invariant: phaystack = rhaystack + j.  */
-    while (phaystack != last_haystack)
-      if (needle[j] == *phaystack)
+  /* Perform reverse lexicographic search.  */
+  max_suffix_rev = SIZE_MAX;
+  j = 0;
+  k = p = 1;
+  while (j + k < needle_len)
+    {
+      a = needle[j + k];
+      b = needle[max_suffix_rev + k];
+      if (b < a)
 	{
-	  j++;
-	  phaystack++;
-	  if (j == m)
+	  /* Suffix is smaller, period is entire prefix so far.  */
+	  j += k;
+	  k = 1;
+	  p = j - max_suffix_rev;
+	}
+      else if (a == b)
+	{
+	  /* Advance through repetition of the current period.  */
+	  if (k != p)
+	    ++k;
+	  else
 	    {
-	      /* The entire needle has been found.  */
-	      *resultp = rhaystack;
-	      break;
+	      j += p;
+	      k = 1;
 	    }
 	}
-      else if (j > 0)
+      else /* a < b */
 	{
-	  /* Found a match of needle[0..j-1], mismatch at needle[j].  */
-	  rhaystack += table[j];
-	  j -= table[j];
+	  /* Suffix is larger, start over from current location.  */
+	  max_suffix_rev = j++;
+	  k = p = 1;
 	}
-      else
+    }
+
+  /* Choose the longer suffix.  Return the first byte of the right
+     half, rather than the last byte of the left half.  */
+  if (max_suffix_rev + 1 < max_suffix + 1)
+    return max_suffix + 1;
+  *period = p;
+  return max_suffix_rev + 1;
+}
+
+/* Return the first location of NEEDLE within HAYSTACK, or NULL.  This
+   method requires 0 < NEEDLE_LEN <= HAYSTACK_LEN, and is optimized
+   for NEEDLE_LEN < LONG_NEEDLE_THRESHOLD.  Performance is linear,
+   with 2 * NEEDLE_LEN comparisons in preparation, and at most 2 *
+   HAYSTACK_LEN - NEEDLE_LEN comparisons in searching.  */
+static void *
+two_way_short_needle (const unsigned char *haystack, size_t haystack_len,
+		      const unsigned char *needle, size_t needle_len)
+{
+  size_t i; /* Index into current byte of NEEDLE.  */
+  size_t j; /* Index into current window of HAYSTACK.  */
+  size_t period; /* The period of the right half of needle.  */
+  size_t suffix; /* The index of the right half of needle.  */
+
+  /* Factor the needle into two halves, such that the left half is
+     smaller than the global period, and the right half is
+     periodic (with a period as large as NEEDLE_LEN - suffix).  */
+  suffix = critical_factorization (needle, needle_len, &period);
+
+  /* Perform the search.  Each iteration compares the right half
+     first.  */
+  if (memcmp (needle, needle + period, suffix) == 0)
+    {
+      /* Entire needle is periodic; a mismatch can only advance by the
+	 period, so use memory to avoid rescanning known occurrences
+	 of the period.  */
+      size_t memory = 0;
+      j = 0;
+      while (j <= haystack_len - needle_len)
+	{
+	  /* Scan for matches in right half.  */
+	  i = MAX (suffix, memory);
+	  while (i < needle_len && needle[i] == haystack[i + j])
+	    ++i;
+	  if (needle_len <= i)
+	    {
+	      /* Scan for matches in left half.  */
+	      i = suffix - 1;
+	      while (memory < i + 1 && needle[i] == haystack[i + j])
+		--i;
+	      if (i + 1 < memory + 1)
+		return (void *) (haystack + j);
+	      /* No match, so remember how many repetitions of period
+		 on the right half were scanned.  */
+	      j += period;
+	      memory = needle_len - period;
+	    }
+	  else
+	    {
+	      j += i - suffix + 1;
+	      memory = 0;
+	    }
+	}
+    }
+  else
+    {
+      /* The two halves of needle are distinct; no extra memory is
+	 required, and any mismatch results in a maximal shift.  */
+      period = MAX (suffix, needle_len - suffix) + 1;
+      j = 0;
+      while (j <= haystack_len - needle_len)
 	{
-	  /* Found a mismatch at needle[0] already.  */
-	  rhaystack++;
-	  phaystack++;
+	  /* Scan for matches in right half.  */
+	  i = suffix;
+	  while (i < needle_len && needle[i] == haystack[i + j])
+	    ++i;
+	  if (needle_len <= i)
+	    {
+	      /* Scan for matches in left half.  */
+	      i = suffix - 1;
+	      while (i != SIZE_MAX && needle[i] == haystack[i + j])
+		--i;
+	      if (i == SIZE_MAX)
+		return (void *) (haystack + j);
+	      j += period;
+	    }
+	  else
+	    j += i - suffix + 1;
 	}
-  }
+    }
+  return NULL;
+}
+
+/* Return the first location of NEEDLE within HAYSTACK, or NULL.  This
+   method requires 0 < NEEDLE_LEN <= HAYSTACK_LEN, and is optimized
+   for LONG_NEEDLE_THRESHOLD <= NEEDLE_LEN.  Performance is linear,
+   with 3 * NEEDLE_LEN + (1U << CHAR_BIT) operations in preparation,
+   and at most 2 * HAYSTACK_LEN - NEEDLE_LEN comparisons in searching.
+   The extra initialization cost allows for potential sublinear
+   performance O(HAYSTACK_LEN / NEEDLE_LEN).  */
+static void *
+two_way_long_needle (const unsigned char *haystack, size_t haystack_len,
+		     const unsigned char *needle, size_t needle_len)
+{
+  size_t i; /* Index into current byte of NEEDLE.  */
+  size_t j; /* Index into current window of HAYSTACK.  */
+  size_t period; /* The period of the right half of needle.  */
+  size_t suffix; /* The index of the right half of needle.  */
+  size_t shift_table[1U << CHAR_BIT]; /* See below.  */
+
+  /* Factor the needle into two halves, such that the left half is
+     smaller than the global period, and the right half is
+     periodic (with a period as large as NEEDLE_LEN - suffix).  */
+  suffix = critical_factorization (needle, needle_len, &period);
+
+  /* Populate shift_table.  For each possible byte value c,
+     shift_table[c] is the distance from the last occurrence of c to
+     the end of NEEDLE, or NEEDLE_LEN if c is absent from the NEEDLE.
+     shift_table[NEEDLE[NEEDLE_LEN - 1]] contains the only 0.  */
+  for (i = 0; i < 1U << CHAR_BIT; i++)
+    shift_table[i] = needle_len;
+  for (i = 0; i < needle_len; i++)
+    shift_table[needle[i]] = needle_len - i - 1;
 
-  freea (table);
-  return true;
+  /* Perform the search.  Each iteration compares the right half
+     first.  */
+  if (memcmp (needle, needle + period, suffix) == 0)
+    {
+      /* Entire needle is periodic; a mismatch can only advance by the
+	 period, so use memory to avoid rescanning known occurrences
+	 of the period.  */
+      size_t memory = 0;
+      j = 0;
+      while (j <= haystack_len - needle_len)
+	{
+	  /* Check the last byte first; if it does not match, then
+	     shift to the next possible match location.  */
+	  size_t shift = shift_table[haystack[j + needle_len - 1]];
+	  if (0 < shift)
+	    {
+	      if (memory && shift < period)
+		{
+		  /* Since needle is periodic, but the last period has
+		     a byte out of place, there can be no match until
+		     after the mismatch.  */
+		  shift = needle_len - period;
+		  memory = 0;
+		}
+	      j += shift;
+	      continue;
+	    }
+	  /* Scan for matches in right half.  The last byte has
+	     already been matched, by virtue of the shift table.  */
+	  i = MAX (suffix, memory);
+	  while (i < needle_len - 1 && needle[i] == haystack[i + j])
+	    ++i;
+	  if (needle_len - 1 <= i)
+	    {
+	      /* Scan for matches in left half.  */
+	      i = suffix - 1;
+	      while (memory < i + 1 && needle[i] == haystack[i + j])
+		--i;
+	      if (i + 1 < memory + 1)
+		return (void *) (haystack + j);
+	      /* No match, so remember how many repetitions of period
+		 on the right half were scanned.  */
+	      j += period;
+	      memory = needle_len - period;
+	    }
+	  else
+	    {
+	      j += i - suffix + 1;
+	      memory = 0;
+	    }
+	}
+    }
+  else
+    {
+      /* The two halves of needle are distinct; no extra memory is
+	 required, and any mismatch results in a maximal shift.  */
+      period = MAX (suffix, needle_len - suffix) + 1;
+      j = 0;
+      while (j <= haystack_len - needle_len)
+	{
+	  /* Check the last byte first; if it does not match, then
+	     shift to the next possible match location.  */
+	  size_t shift = shift_table[haystack[j + needle_len - 1]];
+	  if (0 < shift)
+	    {
+	      j += shift;
+	      continue;
+	    }
+	  /* Scan for matches in right half.  The last byte has
+	     already been matched, by virtue of the shift table.  */
+	  i = suffix;
+	  while (i < needle_len - 1 && needle[i] == haystack[i + j])
+	    ++i;
+	  if (needle_len - 1 <= i)
+	    {
+	      /* Scan for matches in left half.  */
+	      i = suffix - 1;
+	      while (i != SIZE_MAX && needle[i] == haystack[i + j])
+		--i;
+	      if (i == SIZE_MAX)
+		return (void *) (haystack + j);
+	      j += period;
+	    }
+	  else
+	    j += i - suffix + 1;
+	}
+    }
+  return NULL;
 }
 
 /* Return the first occurrence of NEEDLE in HAYSTACK.  Return HAYSTACK
@@ -162,8 +392,6 @@
      not an array of 'char' values.  See ISO C 99 section 6.2.6.1.  */
   const unsigned char *haystack = (const unsigned char *) haystack_start;
   const unsigned char *needle = (const unsigned char *) needle_start;
-  const unsigned char *last_haystack = haystack + haystack_len;
-  const unsigned char *last_needle = needle + needle_len;
 
   if (needle_len == 0)
     /* The first occurrence of the empty string is deemed to occur at
@@ -175,82 +403,23 @@
   if (__builtin_expect (haystack_len < needle_len, 0))
     return NULL;
 
-  /* Use optimizations in memchr when possible.  */
-  if (__builtin_expect (needle_len == 1, 0))
-    return memchr (haystack, *needle, haystack_len);
-
-  /* Minimizing the worst-case complexity:
-     Let n = haystack_len, m = needle_len.
-     The naïve algorithm is O(n*m) worst-case.
-     The Knuth-Morris-Pratt algorithm is O(n) worst-case but it needs a
-     memory allocation.
-     To achieve linear complexity and yet amortize the cost of the
-     memory allocation, we activate the Knuth-Morris-Pratt algorithm
-     only once the naïve algorithm has already run for some time; more
-     precisely, when
-       - the outer loop count is >= 10,
-       - the average number of comparisons per outer loop is >= 5,
-       - the total number of comparisons is >= m.
-     But we try it only once.  If the memory allocation attempt failed,
-     we don't retry it.  */
-  {
-    bool try_kmp = true;
-    size_t outer_loop_count = 0;
-    size_t comparison_count = 0;
-
-    /* Speed up the following searches of needle by caching its first
-       byte.  */
-    unsigned char b = *needle++;
-
-    for (;; haystack++)
-      {
-	if (haystack == last_haystack)
-	  /* No match.  */
-	  return NULL;
+  /* Use optimizations in memchr when possible, to reduce the search
+     size of haystack using a linear algorithm with a smaller
+     coefficient.  However, avoid memchr for long needles, since we
+     can often achieve sublinear performance.  */
+  if (needle_len < LONG_NEEDLE_THRESHOLD)
+    {
+      haystack = memchr (haystack, *needle, haystack_len);
+      if (!haystack || __builtin_expect (needle_len == 1, 0))
+	return (void *) haystack;
+      haystack_len -= haystack - (const unsigned char *) haystack_start;
+      if (haystack_len < needle_len)
+	return NULL;
+      return two_way_short_needle (haystack, haystack_len, needle, needle_len);
+    }
+  else
+    return two_way_long_needle (haystack, haystack_len, needle, needle_len);
+}
 
-	/* See whether it's advisable to use an asymptotically faster
-	   algorithm.  */
-	if (try_kmp
-	    && outer_loop_count >= 10
-	    && comparison_count >= 5 * outer_loop_count)
-	  {
-	    /* See if needle + comparison_count now reaches the end of
-	       needle.  */
-	    if (comparison_count >= needle_len)
-	      {
-		/* Try the Knuth-Morris-Pratt algorithm.  */
-		const unsigned char *result;
-		if (knuth_morris_pratt (haystack, last_haystack,
-					needle - 1, needle_len, &result))
-		  return (void *) result;
-		try_kmp = false;
-	      }
-	  }
-
-	outer_loop_count++;
-	comparison_count++;
-	if (*haystack == b)
-	  /* The first byte matches.  */
-	  {
-	    const unsigned char *rhaystack = haystack + 1;
-	    const unsigned char *rneedle = needle;
-
-	    for (;; rhaystack++, rneedle++)
-	      {
-		if (rneedle == last_needle)
-		  /* Found a match.  */
-		  return (void *) haystack;
-		if (rhaystack == last_haystack)
-		  /* No match.  */
-		  return NULL;
-		comparison_count++;
-		if (*rhaystack != *rneedle)
-		  /* Nothing in this round.  */
-		  break;
-	      }
-	  }
-      }
-  }
-
-  return NULL;
-}
+#undef LONG_NEEDLE_THRESHOLD
+#undef MAX
--- a/modules/memmem
+++ b/modules/memmem
@@ -8,8 +8,7 @@
 Depends-on:
 extensions
 string
-stdbool
-malloca
+stdint
 memchr
 memcmp
 
--- a/tests/test-memmem.c
+++ b/tests/test-memmem.c
@@ -69,6 +69,12 @@
     ASSERT (result == NULL);
   }
 
+  {
+    const char input[] = "ABC ABCDAB ABCDABCDABDE";
+    const char *result = memmem (input, strlen (input), "ABCDABCD", 8);
+    ASSERT (result == input + 11);
+  }
+
   /* Check that length 0 does not dereference NULL.  */
   {
     const char *result = memmem (NULL, 0, "foo", 3);
@@ -152,5 +158,32 @@
       free (haystack);
   }
 
+  /* Check that long needles not present in a haystack can be handled
+     with sublinear speed.  */
+  {
+    size_t repeat = 10000;
+    size_t m = 1000000;
+    size_t n = 1000;
+    char *haystack = (char *) malloc (m);
+    char *needle = (char *) malloc (n);
+    if (haystack != NULL && needle != NULL)
+      {
+	const char *result;
+
+	memset (haystack, 'A', m);
+	memset (needle, 'B', n);
+
+	for (; repeat > 0; repeat--)
+	  {
+	    result = memmem (haystack, m, needle, n);
+	    ASSERT (result == NULL);
+	  }
+      }
+    if (haystack != NULL)
+      free (haystack);
+    if (needle != NULL)
+      free (needle);
+  }
+
   return 0;
 }