changeset 9946:209912c7e7bd

Fix test failure on platforms with non-GNU iconv.
author Bruno Haible <bruno@clisp.org>
date Sun, 20 Apr 2008 20:02:43 +0200
parents bc56ebca4477
children 570db937d3b0
files ChangeLog lib/uniconv/u-conv-to-enc.h lib/uniconv/u16-conv-to-enc.c modules/uniconv/u16-conv-to-enc
diffstat 4 files changed, 154 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,12 @@
+2008-04-20  Bruno Haible  <bruno@clisp.org>
+
+	Fix test failure on platforms with non-GNU iconv.
+	* lib/uniconv/u16-conv-to-enc.c (u16_to_u8_lenient): New function.
+	(U_TO_U8): Use it, rather than u16_to_u8.
+	* lib/uniconv/u-conv-to-enc.h (FUNC): Allow an incomplete sequence of
+	units at the end of the input string.
+	* modules/uniconv/u16-conv-to-enc (Depends-on): Update.
+
 2008-04-20  Bruno Haible  <bruno@clisp.org>
 
 	* tests/uniconv/test-u8-conv-to-enc.c (main): Accept result == NULL
--- a/lib/uniconv/u-conv-to-enc.h
+++ b/lib/uniconv/u-conv-to-enc.h
@@ -106,7 +106,7 @@
 
       iunit = 0;
       i8 = 0;
-      while (iunit < srclen)
+      while (iunit < srclen && i8 < utf8_srclen)
 	{
 	  int countunit;
 	  int count8;
@@ -120,6 +120,17 @@
 	  iunit += countunit;
 	  i8 += count8;
 	}
+      /* Check that utf8_src has been traversed entirely.  */
+      if (i8 < utf8_srclen)
+	abort ();
+      /* Check that src has been traversed entirely, except possibly for an
+	 incomplete sequence of units at the end.  */
+      if (iunit < srclen)
+	{
+	  offsets[iunit] = *lengthp;
+	  if (!(U_MBLEN (src + iunit, srclen - iunit) < 0))
+	    abort ();
+	}
       free (scaled_offsets);
     }
   if (utf8_src != tmpbuf)
--- a/lib/uniconv/u16-conv-to-enc.c
+++ b/lib/uniconv/u16-conv-to-enc.c
@@ -1,5 +1,5 @@
 /* Conversion from UTF-16 to legacy encodings.
-   Copyright (C) 2002, 2006-2007 Free Software Foundation, Inc.
+   Copyright (C) 2002, 2006-2008 Free Software Foundation, Inc.
 
    This program is free software: you can redistribute it and/or modify it
    under the terms of the GNU Lesser General Public License as published
@@ -39,9 +39,136 @@
 # endif
 #endif
 
+
+#if !defined UTF16_NAME
+
+/* A variant of u16_to_u8 that treats an incomplete sequence of units at the
+   end as a harmless no-op, rather than reporting it as an EILSEQ error.  */
+
+#define FUNC u16_to_u8_lenient
+#define SRC_UNIT uint16_t
+#define DST_UNIT uint8_t
+
+static DST_UNIT *
+FUNC (const SRC_UNIT *s, size_t n, DST_UNIT *resultbuf, size_t *lengthp)
+{
+  const SRC_UNIT *s_end = s + n;
+  /* Output string accumulator.  */
+  DST_UNIT *result;
+  size_t allocated;
+  size_t length;
+
+  if (resultbuf != NULL)
+    {
+      result = resultbuf;
+      allocated = *lengthp;
+    }
+  else
+    {
+      result = NULL;
+      allocated = 0;
+    }
+  length = 0;
+  /* Invariants:
+     result is either == resultbuf or == NULL or malloc-allocated.
+     If length > 0, then result != NULL.  */
+
+  while (s < s_end)
+    {
+      ucs4_t uc;
+      int count;
+
+      /* Fetch a Unicode character from the input string.  */
+      count = u16_mbtoucr (&uc, s, s_end - s);
+      if (count < 0)
+	{
+	  if (count == -2)
+	    /* Incomplete sequence of units.  */
+	    break;
+	  if (!(result == resultbuf || result == NULL))
+	    free (result);
+	  errno = EILSEQ;
+	  return NULL;
+	}
+      s += count;
+
+      /* Store it in the output string.  */
+      count = u8_uctomb (result + length, uc, allocated - length);
+      if (count == -1)
+	{
+	  if (!(result == resultbuf || result == NULL))
+	    free (result);
+	  errno = EILSEQ;
+	  return NULL;
+	}
+      if (count == -2)
+	{
+	  DST_UNIT *memory;
+
+	  allocated = (allocated > 0 ? 2 * allocated : 12);
+	  if (length + 6 > allocated)
+	    allocated = length + 6;
+	  if (result == resultbuf || result == NULL)
+	    memory = (DST_UNIT *) malloc (allocated * sizeof (DST_UNIT));
+	  else
+	    memory =
+	      (DST_UNIT *) realloc (result, allocated * sizeof (DST_UNIT));
+
+	  if (memory == NULL)
+	    {
+	      if (!(result == resultbuf || result == NULL))
+		free (result);
+	      errno = ENOMEM;
+	      return NULL;
+	    }
+	  if (result == resultbuf && length > 0)
+	    memcpy ((char *) memory, (char *) result,
+		    length * sizeof (DST_UNIT));
+	  result = memory;
+	  count = u8_uctomb (result + length, uc, allocated - length);
+	  if (count < 0)
+	    abort ();
+	}
+      length += count;
+    }
+
+  if (length == 0)
+    {
+      if (result == NULL)
+	{
+	  /* Return a non-NULL value.  NULL means error.  */
+	  result = (DST_UNIT *) malloc (1);
+	  if (result == NULL)
+	    {
+	      errno = ENOMEM;
+	      return NULL;
+	    }
+	}
+    }
+  else if (result != resultbuf && length < allocated)
+    {
+      /* Shrink the allocated memory if possible.  */
+      DST_UNIT *memory;
+
+      memory = (DST_UNIT *) realloc (result, length * sizeof (DST_UNIT));
+      if (memory != NULL)
+	result = memory;
+    }
+
+  *lengthp = length;
+  return result;
+}
+
+#undef DST_UNIT
+#undef SRC_UNIT
+#undef FUNC
+
+#endif
+
+
 #define FUNC u16_conv_to_encoding
 #define UNIT uint16_t
-#define U_TO_U8 u16_to_u8
+#define U_TO_U8 u16_to_u8_lenient
 #define U_MBLEN u16_mblen
 #if defined UTF16_NAME
 # define UTF_NAME UTF16_NAME
--- a/modules/uniconv/u16-conv-to-enc
+++ b/modules/uniconv/u16-conv-to-enc
@@ -9,7 +9,10 @@
 uniconv/base
 striconveha
 uniconv/u8-conv-to-enc
-unistr/u16-to-u8
+unistr/u16-mbtoucr
+unistr/u8-uctomb
+unistr/u16-mblen 
+unistr/u8-mblen 
 
 configure.ac:
 AC_REQUIRE([AC_C_BIGENDIAN])