changeset 8105:43f02832d528

New module 'mbsspn'.
author Bruno Haible <bruno@clisp.org>
date Mon, 05 Feb 2007 03:23:34 +0000
parents 9d0175f6442d
children 2ff19298dd56
files ChangeLog MODULES.html.sh lib/mbsspn.c lib/string_.h m4/mbsspn.m4 m4/string_h.m4 modules/mbsspn modules/string
diffstat 8 files changed, 176 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,16 @@
+2007-02-04  Bruno Haible  <bruno@clisp.org>
+
+	New module mbsspn.
+	* modules/mbsspn: New file.
+	* lib/mbsspn.c: New file.
+	* lib/string_.h (strspn): Add a conditional link warning.
+	(mbsspn): New declaration.
+	* m4/mbsspn.m4: New file.
+	* m4/string_h.m4 (gl_STRING_MODULE_INDICATOR_DEFAULTS): Initialize
+	GNULIB_MBSCSPN.
+	* modules/string (string.h): Also substitute GNULIB_MBSSPN.
+	* MODULES.html.sh (Internationalization functions): Add mbsspn.
+
 2007-02-04  Bruno Haible  <bruno@clisp.org>
 
 	New module mbspbrk.
--- a/MODULES.html.sh
+++ b/MODULES.html.sh
@@ -2167,6 +2167,7 @@
   func_module mbscasestr
   func_module mbscspn
   func_module mbspbrk
+  func_module mbsspn
   func_module mbswidth
   func_module memcasecmp
   func_module memcoll
new file mode 100644
--- /dev/null
+++ b/lib/mbsspn.c
@@ -0,0 +1,97 @@
+/* Searching a string for a character outside a given set of characters.
+   Copyright (C) 1999, 2002, 2006-2007 Free Software Foundation, Inc.
+   Written by Bruno Haible <bruno@clisp.org>, 2007.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software Foundation,
+   Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
+
+#include <config.h>
+
+/* Specification.  */
+#include <string.h>
+
+#if HAVE_MBRTOWC
+# include "mbuiter.h"
+#endif
+
+/* Find the first occurrence in the character string STRING of any character
+   not in the character string REJECT.  Return the number of bytes from the
+   beginning of the string to this occurrence, or to the end of the string
+   if none exists.  */
+size_t
+mbsspn (const char *string, const char *reject)
+{
+  /* Optimize two cases.  */
+  if (reject[0] == '\0')
+    return 0;
+  if (reject[1] == '\0')
+    {
+      unsigned char uc = (unsigned char) reject[0];
+
+#if HAVE_MBRTOWC
+      if (MB_CUR_MAX > 1)
+	{
+	  mbui_iterator_t iter;
+
+	  for (mbui_init (iter, string); mbui_avail (iter); mbui_advance (iter))
+	    if (!(mb_len (mbui_cur (iter)) == 1
+		  && (unsigned char) * mbui_cur_ptr (iter) == uc))
+	      return mbui_cur_ptr (iter) - string;
+	  return strlen (string);
+	}
+      else
+#endif
+	{
+	  const char *ptr;
+
+	  for (ptr = string; *ptr != '\0'; ptr++)
+	    if ((unsigned char) *ptr != uc)
+	      break;
+	  return ptr - string;
+	}
+    }
+  /* General case.  */
+#if HAVE_MBRTOWC
+  if (MB_CUR_MAX > 1)
+    {
+      mbui_iterator_t iter;
+
+      for (mbui_init (iter, string); mbui_avail (iter); mbui_advance (iter))
+	{
+	  if (mb_len (mbui_cur (iter)) == 1)
+	    {
+	      if (mbschr (reject, (unsigned char) * mbui_cur_ptr (iter)) == NULL)
+		return mbui_cur_ptr (iter) - string;
+	    }
+	  else
+	    {
+	      mbui_iterator_t aiter;
+
+	      for (mbui_init (aiter, reject);
+		   mbui_avail (aiter);
+		   mbui_advance (aiter))
+		{
+		  if (!mbui_avail (aiter))
+		    return mbui_cur_ptr (iter) - string;
+		  if (mb_equal (mbui_cur (aiter), mbui_cur (iter)))
+		    break;
+		}
+	    }
+	}
+      return strlen (string);
+    }
+  else
+#endif
+    return strspn (string, reject);
+}
--- a/lib/string_.h
+++ b/lib/string_.h
@@ -233,6 +233,15 @@
 #endif
 
 #if defined GNULIB_POSIXCHECK
+/* strspn() assumes the second argument is a list of single-byte characters.
+   Even in this simple case, it cannot work with multibyte strings.  */
+# undef strspn
+# define strspn(s,a) \
+    (GL_LINK_WARNING ("strspn cannot work correctly on character strings in multibyte locales - use mbsspn if you care about internationalization"), \
+     strspn (s, a))
+#endif
+
+#if defined GNULIB_POSIXCHECK
 /* strrchr() does not work with multibyte strings if the locale encoding is
    GB18030 and the character to be searched is a digit.  */
 # undef strrchr
@@ -391,6 +400,15 @@
 extern char * mbspbrk (const char *string, const char *accept);
 #endif
 
+#if @GNULIB_MBSSPN@
+/* Find the first occurrence in the character string STRING of any character
+   not in the character string REJECT.  Return the number of bytes from the
+   beginning of the string to this occurrence, or to the end of the string
+   if none exists.
+   Unlike strspn(), this function works correctly in multibyte locales.  */
+extern size_t mbsspn (const char *string, const char *reject);
+#endif
+
 
 #ifdef __cplusplus
 }
new file mode 100644
--- /dev/null
+++ b/m4/mbsspn.m4
@@ -0,0 +1,16 @@
+# mbsspn.m4 serial 1
+dnl Copyright (C) 2007 Free Software Foundation, Inc.
+dnl This file is free software; the Free Software Foundation
+dnl gives unlimited permission to copy and/or distribute it,
+dnl with or without modifications, as long as this notice is preserved.
+
+AC_DEFUN([gl_FUNC_MBSSPN],
+[
+  gl_PREREQ_MBSSPN
+])
+
+# Prerequisites of lib/mbsspn.c.
+AC_DEFUN([gl_PREREQ_MBSSPN], [
+  AC_REQUIRE([gl_FUNC_MBRTOWC])
+  :
+])
--- a/m4/string_h.m4
+++ b/m4/string_h.m4
@@ -74,4 +74,5 @@
   GNULIB_MBSCASESTR=0;  AC_SUBST([GNULIB_MBSCASESTR])
   GNULIB_MBSCSPN=0;     AC_SUBST([GNULIB_MBSCSPN])
   GNULIB_MBSPBRK=0;     AC_SUBST([GNULIB_MBSPBRK])
+  GNULIB_MBSSPN=0;      AC_SUBST([GNULIB_MBSSPN])
 ])
new file mode 100644
--- /dev/null
+++ b/modules/mbsspn
@@ -0,0 +1,29 @@
+Description:
+mbsspn() function: search a string for any outside a set of characters.
+
+Files:
+lib/mbsspn.c
+m4/mbsspn.m4
+m4/mbrtowc.m4
+
+Depends-on:
+mbuiter
+string
+mbschr
+
+configure.ac:
+gl_FUNC_MBSSPN
+gl_STRING_MODULE_INDICATOR([mbsspn])
+
+Makefile.am:
+lib_SOURCES += mbsspn.c
+
+Include:
+<string.h>
+
+License:
+LGPL
+
+Maintainer:
+Bruno Haible
+
--- a/modules/string
+++ b/modules/string
@@ -28,6 +28,7 @@
 	      -e 's|@''GNULIB_MBSCASESTR''@|$(GNULIB_MBSCASESTR)|g' \
 	      -e 's|@''GNULIB_MBSCSPN''@|$(GNULIB_MBSCSPN)|g' \
 	      -e 's|@''GNULIB_MBSPBRK''@|$(GNULIB_MBSPBRK)|g' \
+	      -e 's|@''GNULIB_MBSSPN''@|$(GNULIB_MBSSPN)|g' \
 	      -e 's|@''GNULIB_MEMMEM''@|$(GNULIB_MEMMEM)|g' \
 	      -e 's|@''GNULIB_MEMPCPY''@|$(GNULIB_MEMPCPY)|g' \
 	      -e 's|@''GNULIB_MEMRCHR''@|$(GNULIB_MEMRCHR)|g' \