changeset 7808:430eccb7818f

New module 'unistr/u8-to-u32'.
author Bruno Haible <bruno@clisp.org>
date Tue, 09 Jan 2007 14:02:33 +0000
parents 5ec477d24232
children aba72e7f7432
files lib/unistr/u8-to-u32.c modules/unistr/u8-to-u32
diffstat 2 files changed, 150 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
new file mode 100644
--- /dev/null
+++ b/lib/unistr/u8-to-u32.c
@@ -0,0 +1,127 @@
+/* Convert UTF-8 string to UTF-32 string.
+   Copyright (C) 2002, 2006 Free Software Foundation, Inc.
+   Written by Bruno Haible <bruno@clisp.org>, 2002.
+
+   This program is free software; you can redistribute it and/or modify it
+   under the terms of the GNU Library General Public License as published
+   by the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
+   USA.  */
+
+#include <config.h>
+
+/* Specification.  */
+#include "unistr.h"
+
+#define FUNC u8_to_u32
+#define SRC_UNIT uint8_t
+#define DST_UNIT uint32_t
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+DST_UNIT *
+FUNC (const SRC_UNIT *s, size_t n, DST_UNIT *resultbuf, size_t *lengthp)
+{
+  const SRC_UNIT *s_end = s + n;
+  /* Output string accumulator.  */
+  DST_UNIT *result;
+  size_t allocated;
+  size_t length;
+
+  if (resultbuf != NULL)
+    {
+      result = resultbuf;
+      allocated = *lengthp;
+    }
+  else
+    {
+      result = NULL;
+      allocated = 0;
+    }
+  length = 0;
+  /* Invariants:
+     result is either == resultbuf or == NULL or malloc-allocated.
+     If length > 0, then result != NULL.  */
+
+  while (s < s_end)
+    {
+      ucs4_t uc;
+      int count;
+
+      /* Fetch a Unicode character from the input string.  */
+      count = u8_mbtouc_safe (&uc, s, s_end - s);
+      if (count < 0)
+	{
+	  if (!(result == resultbuf || result == NULL))
+	    free (result);
+	  errno = EILSEQ;
+	  return NULL;
+	}
+      s += count;
+
+      /* Store it in the output string.  */
+      if (length + 1 > allocated)
+	{
+	  DST_UNIT *memory;
+
+	  allocated = (allocated > 0 ? 2 * allocated : 12);
+	  if (length + 1 > allocated)
+	    allocated = length + 1;
+	  if (result == resultbuf || result == NULL)
+	    memory = (DST_UNIT *) malloc (allocated * sizeof (DST_UNIT));
+	  else
+	    memory =
+	      (DST_UNIT *) realloc (result, allocated * sizeof (DST_UNIT));
+
+	  if (memory == NULL)
+	    {
+	      if (!(result == resultbuf || result == NULL))
+		free (result);
+	      errno = ENOMEM;
+	      return NULL;
+	    }
+	  if (result == resultbuf && length > 0)
+	    memcpy ((char *) memory, (char *) result,
+		    length * sizeof (DST_UNIT));
+	  result = memory;
+	}
+      result[length++] = uc;
+    }
+
+  if (length == 0)
+    {
+      if (result == NULL)
+	{
+	  /* Return a non-NULL value.  NULL means error.  */
+	  result = (DST_UNIT *) malloc (1);
+	  if (result == NULL)
+	    {
+	      errno = ENOMEM;
+	      return NULL;
+	    }
+	}
+    }
+  else if (result != resultbuf && length < allocated)
+    {
+      /* Shrink the allocated memory if possible.  */
+      DST_UNIT *memory;
+
+      memory = (DST_UNIT *) realloc (result, length * sizeof (DST_UNIT));
+      if (memory != NULL)
+	result = memory;
+    }
+
+  *lengthp = length;
+  return result;
+}
new file mode 100644
--- /dev/null
+++ b/modules/unistr/u8-to-u32
@@ -0,0 +1,23 @@
+Description:
+Convert UTF-8 string to UTF-32 string.
+
+Files:
+lib/unistr/u8-to-u32.c
+
+Depends-on:
+unistr/base
+
+configure.ac:
+
+Makefile.am:
+lib_SOURCES += unistr/u8-to-u32.c
+
+Include:
+"unistr.h"
+
+License:
+LGPL
+
+Maintainer:
+Bruno Haible
+