changeset 11393:fed5074dc3ea

New module 'mbmemcasecoll'.
author Bruno Haible <bruno@clisp.org>
date Wed, 11 Mar 2009 00:25:15 +0100
parents 6193d36bf14c
children 0c0887e0337a
files ChangeLog lib/mbmemcasecoll.c lib/mbmemcasecoll.h modules/mbmemcasecoll
diffstat 4 files changed, 279 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,10 @@
 2009-03-10  Bruno Haible  <bruno@clisp.org>
 
+	New module 'mbmemcasecoll'.
+	* lib/mbmemcasecoll.h: New file.
+	* lib/mbmemcasecoll.c: New file.
+	* modules/mbmemcasecoll: New file.
+
 	* tests/test-mbmemcasecmp.h: New file, extracted from
 	tests/test-mbmemcasecmp.c.
 	* tests/test-mbmemcasecmp.c: Include test-mbmemcasecmp.h.
new file mode 100644
--- /dev/null
+++ b/lib/mbmemcasecoll.c
@@ -0,0 +1,186 @@
+/* Locale-specific case-ignoring memory comparison.
+   Copyright (C) 2001, 2009 Free Software Foundation, Inc.
+   Written by Bruno Haible <bruno@clisp.org>, 2001.
+
+   This program is free software: you can redistribute it and/or modify it
+   under the terms of the GNU Lesser General Public License as published
+   by the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+
+/* Specification.  */
+#include "mbmemcasecoll.h"
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* Get tolower().  */
+#include <ctype.h>
+
+/* Get mbstate_t, mbrtowc(), wcrtomb().  */
+#include <wchar.h>
+
+/* Get towlower().  */
+#include <wctype.h>
+
+#include "malloca.h"
+#include "memcmp2.h"
+#include "memcoll.h"
+
+#define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
+
+/* Apply towlower() to the multibyte character sequence in INBUF, storing the
+   result as a multibyte character sequence in OUTBUF.  */
+static size_t
+apply_towlower (const char *inbuf, size_t inbufsize,
+		char *outbuf, size_t outbufsize)
+{
+  char *outbuf_orig = outbuf;
+  size_t remaining;
+
+  remaining = inbufsize;
+  while (remaining > 0)
+    {
+      wchar_t wc1;
+      size_t n1;
+      mbstate_t state;
+
+      memset (&state, '\0', sizeof (mbstate_t));
+      n1 = mbrtowc (&wc1, inbuf, remaining, &state);
+      if (n1 == (size_t)(-2))
+	break;
+      if (n1 != (size_t)(-1))
+	{
+	  wint_t wc2 = towlower (wc1);
+
+	  if (wc2 != wc1)
+	    {
+	      size_t n2;
+
+	      memset (&state, '\0', sizeof (mbstate_t));
+	      n2 = wcrtomb (outbuf, wc2, &state);
+	      if (n2 != (size_t)(-1))
+		{
+		  /* Store the translated multibyte character.  */
+		  inbuf += n1;
+		  remaining -= n1;
+		  outbuf += n2;
+		  continue;
+		}
+	    }
+
+	  /* Nothing to translate. */
+	  memcpy (outbuf, inbuf, n1);
+	  inbuf += n1;
+	  remaining -= n1;
+	  outbuf += n1;
+	  continue;
+	}
+
+      /* Invalid multibyte character on input.
+	 Copy one byte without modification.  */
+      *outbuf++ = *inbuf++;
+      remaining -= 1;
+    }
+  /* Incomplete multibyte sequence on input.
+     Pass it through unmodified.  */
+  while (remaining > 0)
+    {
+      *outbuf++ = *inbuf++;
+      remaining -= 1;
+    }
+
+  /* Verify the output buffer was large enough.  */
+  if (outbuf - outbuf_orig > outbufsize)
+    abort ();
+
+  /* Return the number of written output bytes.  */
+  return outbuf - outbuf_orig;
+}
+
+/* Apply tolower() to the unibyte character sequence in INBUF, storing the
+   result as a unibyte character sequence in OUTBUF.  */
+static void
+apply_tolower (const char *inbuf, char *outbuf, size_t bufsize)
+{
+  for (; bufsize > 0; bufsize--)
+    {
+      *outbuf = TOLOWER ((unsigned char) *inbuf);
+      inbuf++;
+      outbuf++;
+    }
+}
+
+int
+mbmemcasecoll (const char *s1, size_t s1len, const char *s2, size_t s2len,
+	       bool hard_LC_COLLATE)
+{
+  char *t1;
+  size_t t1len;
+  char *t2;
+  size_t t2len;
+  char *memory;
+  int cmp;
+
+  if (MB_CUR_MAX > 1)
+    {
+      /* Application of towlower grows each character by a factor 2
+	 at most.  */
+      t1len = 2 * s1len;
+      t2len = 2 * s2len;
+    }
+  else
+    {
+      /* Application of tolower doesn't change the size.  */
+      t1len = s1len;
+      t2len = s2len;
+    }
+  /* Allocate memory for t1 and t2.  */
+  memory = (char *) malloca (t1len + 1 + t2len + 1);
+  if (memory == NULL)
+    {
+      errno = ENOMEM;
+      return 0;
+    }
+  t1 = memory;
+  t2 = memory + t1len + 1;
+
+  /* Csae-fold the two argument strings.  */
+  if (MB_CUR_MAX > 1)
+    {
+      t1len = apply_towlower (s1, s1len, t1, t1len);
+      t2len = apply_towlower (s2, s2len, t2, t2len);
+    }
+  else
+    {
+      apply_tolower (s1, t1, s1len);
+      apply_tolower (s2, t2, s2len);
+    }
+
+  /* Compare the two case-folded strings.  */
+  if (hard_LC_COLLATE)
+    cmp = memcoll (t1, t1len, t2, t2len);
+  else
+    {
+      cmp = memcmp2 (t1, t1len, t2, t2len);
+      errno = 0;
+    }
+
+  {
+    int saved_errno = errno;
+    freea (memory);
+    errno = saved_errno;
+  }
+
+  return cmp;
+}
new file mode 100644
--- /dev/null
+++ b/lib/mbmemcasecoll.h
@@ -0,0 +1,58 @@
+/* Locale-specific case-ignoring memory comparison.
+   Copyright (C) 2001, 2009 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify it
+   under the terms of the GNU Lesser General Public License as published
+   by the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+/* Written by Bruno Haible <bruno@clisp.org>, 2001.  */
+
+#ifndef MBMEMCASECOLL_H
+#define MBMEMCASECOLL_H
+
+#include <stdbool.h>
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/* Compare the memory regions S1 = [s1..s1+s1len-1], S2 = [s2..s2+s2len-1],
+   that contain character sequences, using the rules of the current locale,
+   ignoring case.
+   HARD_LC_COLLATE is false if the LC_COLLATE category of the current locale
+   is equivalent to the "C" locale.
+
+   This function's result is locale dependent.  Unlike memcasecmp(), it works
+   correctly in multibyte locales and also handles Turkish i / dotless i.
+   Unlike ulc_casecmp(), it does not handle the German sharp s and the Greek
+   final sigma.  Like memcoll() and ulc_casecoll(), it uses collation order.
+
+   Return a negative number if S1 < S2, a positive number if S1 > S2, 0 if
+   S1 and S2 have the same contents, or an unspecified value if there is an
+   error.
+   Set errno to an error number if there is an error, and to zero otherwise.
+
+   Note: This function may, in multibyte locales, return 0 for strings of
+   different lengths!  */
+
+extern int mbmemcasecoll (const char *s1, size_t s1len,
+			  const char *s2, size_t s2len,
+			  bool hard_LC_COLLATE);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* MBMEMCASECOLL_H */
new file mode 100644
--- /dev/null
+++ b/modules/mbmemcasecoll
@@ -0,0 +1,30 @@
+Description:
+mbmemcasecoll() function: locale dependent case-insensitive memory area
+comparison.
+
+Files:
+lib/mbmemcasecoll.h
+lib/mbmemcasecoll.c
+
+Depends-on:
+stdbool
+malloca
+mbrtowc
+wcrtomb
+memcmp2
+memcoll
+
+configure.ac:
+
+Makefile.am:
+lib_SOURCES += mbmemcasecoll.c
+
+Include:
+"mbmemcasecoll.h"
+
+License:
+GPL
+
+Maintainer:
+Bruno Haible
+