changeset 8132:95ebb0f8b658

Tests for module 'mbsstr'.
author Bruno Haible <bruno@clisp.org>
date Sun, 11 Feb 2007 19:16:21 +0000
parents 2c4153302289
children 678421bdc3ae
files m4/locale-fr.m4 modules/mbsstr-tests tests/test-mbsstr1.c tests/test-mbsstr2.c tests/test-mbsstr2.sh tests/test-mbsstr3.c tests/test-mbsstr3.sh
diffstat 7 files changed, 585 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
new file mode 100644
--- /dev/null
+++ b/m4/locale-fr.m4
@@ -0,0 +1,169 @@
+# locale-fr.m4 serial 3 (gettext-0.15)
+dnl Copyright (C) 2003, 2005-2006 Free Software Foundation, Inc.
+dnl This file is free software; the Free Software Foundation
+dnl gives unlimited permission to copy and/or distribute it,
+dnl with or without modifications, as long as this notice is preserved.
+
+dnl From Bruno Haible.
+
+dnl Determine the name of a french locale with traditional encoding.
+AC_DEFUN([gt_LOCALE_FR],
+[
+  AC_REQUIRE([AC_CANONICAL_HOST])
+  AC_REQUIRE([AM_LANGINFO_CODESET])
+  AC_CACHE_CHECK([for a traditional french locale], gt_cv_locale_fr, [
+    macosx=
+    case "$host_os" in
+      darwin[56]*) ;;
+      darwin*) macosx=yes;;
+    esac
+    if test -n "$macosx"; then
+      # On Darwin 7 (MacOS X), the libc supports some locales in non-UTF-8
+      # encodings, but the kernel does not support them. The documentation
+      # says:
+      #   "... all code that calls BSD system routines should ensure
+      #    that the const *char parameters of these routines are in UTF-8
+      #    encoding. All BSD system functions expect their string
+      #    parameters to be in UTF-8 encoding and nothing else."
+      # See the comments in config.charset. Therefore we bypass the test.
+      gt_cv_locale_fr=none
+    else
+      AC_LANG_CONFTEST([AC_LANG_SOURCE([
+changequote(,)dnl
+#include <locale.h>
+#include <time.h>
+#if HAVE_LANGINFO_CODESET
+# include <langinfo.h>
+#endif
+struct tm t;
+char buf[16];
+int main () {
+  /* Check whether the given locale name is recognized by the system.  */
+  if (setlocale (LC_ALL, "") == NULL) return 1;
+  /* Check whether nl_langinfo(CODESET) is nonempty.
+     On MacOS X 10.3.5 (Darwin 7.5) in the fr_FR locale, nl_langinfo(CODESET)
+     is empty, and the behaviour of Tcl 8.4 in this locale is not useful.  */
+#if HAVE_LANGINFO_CODESET
+  if (nl_langinfo (CODESET) [0] == '\0') return 1;
+#endif
+  /* Check whether in the abbreviation of the second month, the second
+     character (should be U+00E9: LATIN SMALL LETTER E WITH ACUTE) is only
+     one byte long. This excludes the UTF-8 encoding.  */
+  t.tm_year = 1975 - 1900; t.tm_mon = 2 - 1; t.tm_mday = 4;
+  if (strftime (buf, sizeof (buf), "%b", &t) < 3 || buf[2] != 'v') return 1;
+  return 0;
+}
+changequote([,])dnl
+        ])])
+      if AC_TRY_EVAL([ac_link]) && test -s conftest$ac_exeext; then
+        # Setting LC_ALL is not enough. Need to set LC_TIME to empty, because
+        # otherwise on MacOS X 10.3.5 the LC_TIME=C from the beginning of the
+        # configure script would override the LC_ALL setting. Likewise for
+        # LC_CTYPE, which is also set at the beginning of the configure script.
+        # Test for the usual locale name.
+        if (LC_ALL=fr_FR LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then
+          gt_cv_locale_fr=fr_FR
+        else
+          # Test for the locale name with explicit encoding suffix.
+          if (LC_ALL=fr_FR.ISO-8859-1 LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then
+            gt_cv_locale_fr=fr_FR.ISO-8859-1
+          else
+            # Test for the AIX, OSF/1, FreeBSD, NetBSD, OpenBSD locale name.
+            if (LC_ALL=fr_FR.ISO8859-1 LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then
+              gt_cv_locale_fr=fr_FR.ISO8859-1
+            else
+              # Test for the HP-UX locale name.
+              if (LC_ALL=fr_FR.iso88591 LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then
+                gt_cv_locale_fr=fr_FR.iso88591
+              else
+                # Test for the Solaris 7 locale name.
+                if (LC_ALL=fr LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then
+                  gt_cv_locale_fr=fr
+                else
+                  # Special test for NetBSD 1.6.
+                  if test -f /usr/share/locale/fr_FR.ISO8859-1/LC_CTYPE; then
+                    gt_cv_locale_fr=fr_FR.ISO8859-1
+                  else
+                    # None found.
+                    gt_cv_locale_fr=none
+                  fi
+                fi
+              fi
+            fi
+          fi
+        fi
+      fi
+      rm -fr conftest*
+    fi
+  ])
+  LOCALE_FR=$gt_cv_locale_fr
+  AC_SUBST([LOCALE_FR])
+])
+
+dnl Determine the name of a french locale with UTF-8 encoding.
+AC_DEFUN([gt_LOCALE_FR_UTF8],
+[
+  AC_REQUIRE([AM_LANGINFO_CODESET])
+  AC_CACHE_CHECK([for a french Unicode locale], gt_cv_locale_fr_utf8, [
+    AC_LANG_CONFTEST([AC_LANG_SOURCE([
+changequote(,)dnl
+#include <locale.h>
+#include <time.h>
+#if HAVE_LANGINFO_CODESET
+# include <langinfo.h>
+#endif
+struct tm t;
+char buf[16];
+int main () {
+  /* On BeOS, locales are not implemented in libc.  Rather, libintl
+     imitates locale dependent behaviour by looking at the environment
+     variables, and all locales use the UTF-8 encoding.  */
+#if !defined(__BEOS__)
+  /* Check whether the given locale name is recognized by the system.  */
+  if (setlocale (LC_ALL, "") == NULL) return 1;
+  /* Check whether nl_langinfo(CODESET) is nonempty.
+     On MacOS X 10.3.5 (Darwin 7.5) in the fr_FR locale, nl_langinfo(CODESET)
+     is empty, and the behaviour of Tcl 8.4 in this locale is not useful.  */
+# if HAVE_LANGINFO_CODESET
+  if (nl_langinfo (CODESET) [0] == '\0') return 1;
+# endif
+  /* Check whether in the abbreviation of the second month, the second
+     character (should be U+00E9: LATIN SMALL LETTER E WITH ACUTE) is
+     two bytes long, with UTF-8 encoding.  */
+  t.tm_year = 1975 - 1900; t.tm_mon = 2 - 1; t.tm_mday = 4;
+  if (strftime (buf, sizeof (buf), "%b", &t) < 4
+      || buf[1] != (char) 0xc3 || buf[2] != (char) 0xa9 || buf[3] != 'v')
+    return 1;
+#endif
+  return 0;
+}
+changequote([,])dnl
+      ])])
+    if AC_TRY_EVAL([ac_link]) && test -s conftest$ac_exeext; then
+      # Setting LC_ALL is not enough. Need to set LC_TIME to empty, because
+      # otherwise on MacOS X 10.3.5 the LC_TIME=C from the beginning of the
+      # configure script would override the LC_ALL setting. Likewise for
+      # LC_CTYPE, which is also set at the beginning of the configure script.
+      # Test for the usual locale name.
+      if (LC_ALL=fr_FR LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then
+        gt_cv_locale_fr_utf8=fr_FR
+      else
+        # Test for the locale name with explicit encoding suffix.
+        if (LC_ALL=fr_FR.UTF-8 LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then
+          gt_cv_locale_fr_utf8=fr_FR.UTF-8
+        else
+          # Test for the Solaris 7 locale name.
+          if (LC_ALL=fr.UTF-8 LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then
+            gt_cv_locale_fr_utf8=fr.UTF-8
+          else
+            # None found.
+            gt_cv_locale_fr_utf8=none
+          fi
+        fi
+      fi
+    fi
+    rm -fr conftest*
+  ])
+  LOCALE_FR_UTF8=$gt_cv_locale_fr_utf8
+  AC_SUBST([LOCALE_FR_UTF8])
+])
new file mode 100644
--- /dev/null
+++ b/modules/mbsstr-tests
@@ -0,0 +1,22 @@
+Files:
+tests/test-mbsstr1.c
+tests/test-mbsstr2.sh
+tests/test-mbsstr2.c
+tests/test-mbsstr3.sh
+tests/test-mbsstr3.c
+m4/locale-fr.m4
+m4/locale-zh.m4
+m4/codeset.m4
+
+Depends-on:
+
+configure.ac:
+gt_LOCALE_FR_UTF8
+gt_LOCALE_ZH_CN
+
+Makefile.am:
+TESTS += test-mbsstr1 test-mbsstr2.sh test-mbsstr3.sh
+TESTS_ENVIRONMENT += EXEEXT='@EXEEXT@' LOCALE_FR_UTF8='@LOCALE_FR_UTF8@' LOCALE_ZH_CN='@LOCALE_ZH_CN@'
+EXTRA_DIST += test-mbsstr2.sh test-mbsstr3.sh
+check_PROGRAMS += test-mbsstr1 test-mbsstr2 test-mbsstr3
+
new file mode 100644
--- /dev/null
+++ b/tests/test-mbsstr1.c
@@ -0,0 +1,133 @@
+/* Test of searching in a string.
+   Copyright (C) 2007 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software Foundation,
+   Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
+
+/* Written by Bruno Haible <bruno@clisp.org>, 2007.  */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <string.h>
+
+#include <stdlib.h>
+
+#define ASSERT(expr) if (!(expr)) abort ();
+
+int
+main ()
+{
+  /* This test is executed in the C locale.  */
+
+  {
+    const char input[] = "foo";
+    const char *result = mbsstr (input, "");
+    ASSERT (result == input);
+  }
+
+  {
+    const char input[] = "foo";
+    const char *result = mbsstr (input, "o");
+    ASSERT (result == input + 1);
+  }
+
+  {
+    const char input[] = "ABC ABCDAB ABCDABCDABDE";
+    const char *result = mbsstr (input, "ABCDABD");
+    ASSERT (result == input + 15);
+  }
+
+  {
+    const char input[] = "ABC ABCDAB ABCDABCDABDE";
+    const char *result = mbsstr (input, "ABCDABE");
+    ASSERT (result == NULL);
+  }
+
+  /* Check that a very long haystack is handled quickly if the needle is
+     short and occurs near the beginning.  */
+  {
+    size_t repeat = 10000;
+    size_t m = 1000000;
+    char *needle =
+      "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+      "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA";
+    char *haystack = (char *) malloc (m + 1);
+    if (haystack != NULL)
+      {
+	memset (haystack, 'A', m);
+	haystack[0] = 'B';
+	haystack[m] = '\0';
+
+	for (; repeat > 0; repeat--)
+	  {
+	    ASSERT (mbsstr (haystack, needle) == haystack + 1);
+	  }
+
+	free (haystack);
+      }
+  }
+
+  /* Check that a very long needle is discarded quickly if the haystack is
+     short.  */
+  {
+    size_t repeat = 10000;
+    size_t m = 1000000;
+    char *haystack =
+      "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+      "ABABABABABABABABABABABABABABABABABABABABABABABABABABABABABABABABABAB";
+    char *needle = (char *) malloc (m + 1);
+    if (needle != NULL)
+      {
+	memset (needle, 'A', m);
+	needle[m] = '\0';
+
+	for (; repeat > 0; repeat--)
+	  {
+	    ASSERT (mbsstr (haystack, needle) == NULL);
+	  }
+
+	free (needle);
+      }
+  }
+
+  /* Check that the asymptotic worst-case complexity is not quadratic.  */
+  {
+    size_t m = 1000000;
+    char *haystack = (char *) malloc (2 * m + 2);
+    char *needle = (char *) malloc (m + 2);
+    if (haystack != NULL && needle != NULL)
+      {
+	const char *result;
+
+	memset (haystack, 'A', 2 * m);
+	haystack[2 * m] = 'B';
+	haystack[2 * m + 1] = '\0';
+
+	memset (needle, 'A', m);
+	needle[m] = 'B';
+	needle[m + 1] = '\0';
+
+	result = mbsstr (haystack, needle);
+	ASSERT (result == haystack + m);
+      }
+    if (needle != NULL)
+      free (needle);
+    if (haystack != NULL)
+      free (haystack);
+  }
+
+  return 0;
+}
new file mode 100644
--- /dev/null
+++ b/tests/test-mbsstr2.c
@@ -0,0 +1,146 @@
+/* Test of searching in a string.
+   Copyright (C) 2007 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software Foundation,
+   Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
+
+/* Written by Bruno Haible <bruno@clisp.org>, 2007.  */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <string.h>
+
+#include <locale.h>
+#include <stdlib.h>
+
+#define ASSERT(expr) if (!(expr)) abort ();
+
+int
+main ()
+{
+  /* configure should already have checked that the locale is supported.  */
+  if (setlocale (LC_ALL, "") == NULL)
+    return 1;
+
+  {
+    const char input[] = "f\303\266\303\266";
+    const char *result = mbsstr (input, "");
+    ASSERT (result == input);
+  }
+
+  {
+    const char input[] = "f\303\266\303\266";
+    const char *result = mbsstr (input, "\303\266");
+    ASSERT (result == input + 1);
+  }
+
+  {
+    const char input[] = "f\303\266\303\266";
+    const char *result = mbsstr (input, "\266\303");
+    ASSERT (result == NULL);
+  }
+
+  {
+    const char input[] = "\303\204BC \303\204BCD\303\204B \303\204BCD\303\204BCD\303\204BDE"; /* "ÄBC ÄBCDÄB ÄBCDÄBCDÄBDE" */
+    const char *result = mbsstr (input, "\303\204BCD\303\204BD"); /* "ÄBCDÄBD" */
+    ASSERT (result == input + 19);
+  }
+
+  {
+    const char input[] = "\303\204BC \303\204BCD\303\204B \303\204BCD\303\204BCD\303\204BDE"; /* "ÄBC ÄBCDÄB ÄBCDÄBCDÄBDE" */
+    const char *result = mbsstr (input, "\303\204BCD\303\204BE"); /* "ÄBCDÄBE" */
+    ASSERT (result == NULL);
+  }
+
+  /* Check that a very long haystack is handled quickly if the needle is
+     short and occurs near the beginning.  */
+  {
+    size_t repeat = 10000;
+    size_t m = 1000000;
+    char *needle =
+      "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+      "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA";
+    char *haystack = (char *) malloc (m + 1);
+    if (haystack != NULL)
+      {
+	memset (haystack, 'A', m);
+	haystack[0] = '\303'; haystack[1] = '\204';
+	haystack[m] = '\0';
+
+	for (; repeat > 0; repeat--)
+	  {
+	    ASSERT (mbsstr (haystack, needle) == haystack + 2);
+	  }
+
+	free (haystack);
+      }
+  }
+
+  /* Check that a very long needle is discarded quickly if the haystack is
+     short.  */
+  {
+    size_t repeat = 10000;
+    size_t m = 1000000;
+    char *haystack =
+      "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
+      "A\303\207A\303\207A\303\207A\303\207A\303\207A\303\207A\303\207"
+      "A\303\207A\303\207A\303\207A\303\207A\303\207A\303\207A\303\207"
+      "A\303\207A\303\207A\303\207A\303\207A\303\207A\303\207A\303\207"
+      "A\303\207A\303\207A\303\207A\303\207A\303\207A\303\207A\303\207"
+      "A\303\207A\303\207A\303\207A\303\207A\303\207A\303\207";
+    char *needle = (char *) malloc (m + 1);
+    if (needle != NULL)
+      {
+	memset (needle, 'A', m);
+	needle[m] = '\0';
+
+	for (; repeat > 0; repeat--)
+	  {
+	    ASSERT (mbsstr (haystack, needle) == NULL);
+	  }
+
+	free (needle);
+      }
+  }
+
+  /* Check that the asymptotic worst-case complexity is not quadratic.  */
+  {
+    size_t m = 1000000;
+    char *haystack = (char *) malloc (2 * m + 3);
+    char *needle = (char *) malloc (m + 3);
+    if (haystack != NULL && needle != NULL)
+      {
+	const char *result;
+
+	memset (haystack, 'A', 2 * m);
+	haystack[2 * m] = '\303'; haystack[2 * m + 1] = '\207';
+	haystack[2 * m + 2] = '\0';
+
+	memset (needle, 'A', m);
+	needle[m] = '\303'; needle[m + 1] = '\207';
+	needle[m + 2] = '\0';
+
+	result = mbsstr (haystack, needle);
+	ASSERT (result == haystack + m);
+      }
+    if (needle != NULL)
+      free (needle);
+    if (haystack != NULL)
+      free (haystack);
+  }
+
+  return 0;
+}
new file mode 100755
--- /dev/null
+++ b/tests/test-mbsstr2.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+# Test whether a specific UTF-8 locale is installed.
+: ${LOCALE_FR_UTF8=fr_FR.UTF-8}
+if test $LOCALE_FR_UTF8 = none; then
+  if test -f /usr/bin/localedef; then
+    echo "Skipping test: no french Unicode locale is installed"
+  else
+    echo "Skipping test: no french Unicode locale is supported"
+  fi
+  exit 77
+fi
+
+LC_ALL=$LOCALE_FR_UTF8 \
+./test-mbsstr2${EXEEXT}
new file mode 100644
--- /dev/null
+++ b/tests/test-mbsstr3.c
@@ -0,0 +1,85 @@
+/* Test of searching in a string.
+   Copyright (C) 2007 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software Foundation,
+   Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
+
+/* Written by Bruno Haible <bruno@clisp.org>, 2007.  */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <string.h>
+
+#include <locale.h>
+#include <stdlib.h>
+
+#define ASSERT(expr) if (!(expr)) abort ();
+
+int
+main ()
+{
+  /* configure should already have checked that the locale is supported.  */
+  if (setlocale (LC_ALL, "") == NULL)
+    return 1;
+
+  /* Tests with a character < 0x30.  */
+  {
+    const char input[] = "\312\276\300\375 \312\276\300\375 \312\276\300\375"; /* "示例 示例 示例" */
+    const char *result = mbsstr (input, " ");
+    ASSERT (result == input + 4);
+  }
+
+  {
+    const char input[] = "\312\276\300\375"; /* "示例" */
+    const char *result = mbsstr (input, " ");
+    ASSERT (result == NULL);
+  }
+
+  /* Tests with a character >= 0x30.  */
+  {
+    const char input[] = "\272\305123\324\313\320\320\241\243"; /* "号123运行。" */
+    const char *result = mbsstr (input, "2");
+    ASSERT (result == input + 3);
+  }
+
+  /* The following tests show how mbsstr() is different from strstr().  */
+
+  {
+    const char input[] = "\313\320\320\320"; /* "诵行" */
+    const char *result = mbsstr (input, "\320\320"); /* "行" */
+    ASSERT (result == input + 2);
+  }
+
+  {
+    const char input[] = "\203\062\332\066123\324\313\320\320\241\243"; /* "씋123运行。" */
+    const char *result = mbsstr (input, "2");
+    ASSERT (result == input + 5);
+  }
+
+  {
+    const char input[] = "\312\276\300\375 \312\276\300\375 \312\276\300\375"; /* "示例 示例 示例" */
+    const char *result = mbsstr (input, "\276\300"); /* "纠" */
+    ASSERT (result == NULL);
+  }
+
+  {
+    const char input[] = "\312\276\300\375 \312\276\300\375 \312\276\300\375"; /* "示例 示例 示例" */
+    const char *result = mbsstr (input, "\375 "); /* invalid multibyte sequence */
+    ASSERT (result == NULL);
+  }
+
+  return 0;
+}
new file mode 100755
--- /dev/null
+++ b/tests/test-mbsstr3.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+# Test whether a specific GB18030 locale is installed.
+: ${LOCALE_ZH_CN=zh_CN.GB18030}
+if test $LOCALE_ZH_CN = none; then
+  if test -f /usr/bin/localedef; then
+    echo "Skipping test: no chinese GB18030 locale is installed"
+  else
+    echo "Skipping test: no chinese GB18030 locale is supported"
+  fi
+  exit 77
+fi
+
+LC_ALL=$LOCALE_ZH_CN \
+./test-mbsstr3${EXEEXT}