changeset 11341:58e3331ddad3

New module 'unicase/u8-casexfrm'.
author Bruno Haible <bruno@clisp.org>
date Sun, 08 Mar 2009 16:40:04 +0100
parents 7cdbdf8ffd81
children 7026795f7657
files ChangeLog lib/unicase/u-casexfrm.h lib/unicase/u8-casexfrm.c modules/unicase/u8-casexfrm
diffstat 4 files changed, 159 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,10 @@
 2009-03-08  Bruno Haible  <bruno@clisp.org>
 
+	New module 'unicase/u8-casexfrm'.
+	* lib/unicase/u8-casexfrm.c: New file.
+	* lib/unicase/u-casexfrm.h: New file.
+	* modules/unicase/u8-casexfrm: New file.
+
 	Tests for module 'unicase/u32-casecmp'.
 	* modules/unicase/u32-casecmp-tests: New file.
 	* tests/unicase/test-u32-casecmp.c: New file.
new file mode 100644
--- /dev/null
+++ b/lib/unicase/u-casexfrm.h
@@ -0,0 +1,90 @@
+/* Locale dependent transformation for case insensitive comparison of Unicode
+   strings.
+   Copyright (C) 2009 Free Software Foundation, Inc.
+   Written by Bruno Haible <bruno@clisp.org>, 2009.
+
+   This program is free software: you can redistribute it and/or modify it
+   under the terms of the GNU Lesser General Public License as published
+   by the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+char *
+FUNC (const UNIT *s, size_t n, const char *iso639_language, uninorm_t nf,
+      char *resultbuf, size_t *lengthp)
+{
+  UNIT foldedsbuf[2048 / sizeof (UNIT)];
+  UNIT *foldeds;
+  size_t foldeds_length;
+  char convsbuf[2048];
+  char *convs;
+  size_t convs_length;
+  int ret;
+  char *result;
+
+  /* Casefold and normalize the Unicode string.  */
+  foldeds_length = sizeof (foldedsbuf) / sizeof (UNIT);
+  foldeds = U_CASEFOLD (s, n, iso639_language, nf, foldedsbuf, &foldeds_length);
+  if (foldeds == NULL)
+    /* errno is set here.  */
+    return NULL;
+
+  /* Convert it to locale encoding.  */
+  convs = convsbuf;
+  convs_length = sizeof (convsbuf) - 1;
+  ret = U_CONV_TO_ENCODING (locale_charset (),
+			    iconveh_error,
+			    foldeds, foldeds_length,
+			    NULL,
+			    &convs, &convs_length);
+  if (ret < 0)
+    {
+      if (foldeds != foldedsbuf)
+	{
+	  int saved_errno = errno;
+	  free (foldeds);
+	  errno = saved_errno;
+	}
+      return NULL;
+    }
+
+  if (foldeds != foldedsbuf)
+    free (foldeds);
+
+  /* Ensure one more byte is available.  */
+  if (convs != convsbuf)
+    {
+      char *memory = (char *) realloc (convs, convs_length + 1);
+      if (memory == NULL)
+	{
+	  free (convs);
+	  errno = ENOMEM;
+	  return NULL;
+	}
+      convs = memory;
+    }
+
+  /* Apply locale dependent transformations for comparison.  */
+  result = memxfrm (convs, convs_length, resultbuf, lengthp);
+  if (result == NULL)
+    {
+      if (convs != convsbuf)
+	{
+	  int saved_errno = errno;
+	  free (convs);
+	  errno = saved_errno;
+	}
+      return NULL;
+    }
+
+  if (convs != convsbuf)
+    free (convs);
+  return result;
+}
new file mode 100644
--- /dev/null
+++ b/lib/unicase/u8-casexfrm.c
@@ -0,0 +1,35 @@
+/* Locale dependent transformation for case insensitive comparison of UTF-8
+   strings.
+   Copyright (C) 2009 Free Software Foundation, Inc.
+   Written by Bruno Haible <bruno@clisp.org>, 2009.
+
+   This program is free software: you can redistribute it and/or modify it
+   under the terms of the GNU Lesser General Public License as published
+   by the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+
+/* Specification.  */
+#include "unicase.h"
+
+#include <errno.h>
+#include <stdlib.h>
+
+#include "localcharset.h"
+#include "uniconv.h"
+#include "memxfrm.h"
+
+#define FUNC u8_casexfrm
+#define UNIT uint8_t
+#define U_CASEFOLD u8_casefold
+#define U_CONV_TO_ENCODING u8_conv_to_encoding
+#include "u-casexfrm.h"
new file mode 100644
--- /dev/null
+++ b/modules/unicase/u8-casexfrm
@@ -0,0 +1,29 @@
+Description:
+Locale dependent transformation for case insensitive comparison of UTF-8
+strings.
+
+Files:
+lib/unicase/u8-casexfrm.c
+lib/unicase/u-casexfrm.h
+
+Depends-on:
+unicase/base
+unicase/u8-casefold
+uniconv/u8-conv-to-enc
+localcharset
+memxfrm
+
+configure.ac:
+
+Makefile.am:
+lib_SOURCES += unicase/u8-casexfrm.c
+
+Include:
+"unicase.h"
+
+License:
+LGPL
+
+Maintainer:
+Bruno Haible
+