changeset 9341:212d6f041290

New module 'iconv_open-utf': Enhance iconv_open to support UTF-{16,32}{BE,LE}.
author Bruno Haible <bruno@clisp.org>
date Sun, 14 Oct 2007 12:27:28 +0200
parents 0a3ccd66183f
children 014260dbb02a
files ChangeLog doc/functions/iconv_open.texi lib/iconv.c lib/iconv.in.h lib/iconv_close.c lib/iconv_open.c m4/iconv_h.m4 m4/iconv_open.m4 modules/iconv_open modules/iconv_open-utf
diffstat 10 files changed, 851 insertions(+), 8 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,26 @@
+2007-10-14  Bruno Haible  <bruno@clisp.org>
+
+	Enhance iconv_open to support UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE.
+	* modules/iconv_open-utf: New file.
+	* lib/iconv.in.h (_ICONV_UTF8_UTF*, _ICONV_UTF*_UTF8): New macros.
+	(iconv, iconv_close): New declarations.
+	* lib/iconv_open.c: Include c-strcase.h. Don't require ICONV_FLAVOR to
+	be defined.
+	(iconv_open): Add special handling of conversion between UTF-8 and
+	UTF-{16,32}{BE,LE}.
+	* lib/iconv.c: New file, incorporating code from GNU libiconv 1.11.
+	* lib/iconv_close.c: New file.
+	* m4/iconv_open.m4 (gl_REPLACE_ICONV_OPEN): New macro, extracted from
+	gl_FUNC_ICONV_OPEN.
+	(gl_FUNC_ICONV_OPEN): Use it.
+	(gl_FUNC_ICONV_OPEN_UTF): New macro.
+	* m4/iconv_h.m4 (gl_ICONV_H_DEFAULTS): Initialize also REPLACE_ICONV
+	and REPLACE_ICONV_UTF.
+	* modules/iconv_open (Depends-on): Add c-strcase.
+	(Makefile.am): Substitute also REPLACE_ICONV, REPLACE_ICONV_UTF,
+	ICONV_CONST.
+	* doc/functions/iconv_open.texi: Mention the iconv_open-utf module.
+
 2007-10-13  Albert Chin  <china@thewrittenword.com>
             Bruno Haible  <bruno@clisp.org>
 
--- a/doc/functions/iconv_open.texi
+++ b/doc/functions/iconv_open.texi
@@ -4,7 +4,7 @@
 
 POSIX specification: @url{http://www.opengroup.org/susv3xsh/iconv_open.html}
 
-Gnulib module: iconv and iconv_open
+Gnulib module: iconv, iconv_open, iconv_open-utf
 
 Portability problems fixed by either Gnulib module @code{iconv} or @code{iconv_open}:
 @itemize
@@ -23,6 +23,14 @@
 AIX 5.1, HP-UX 11, IRIX 6.5, OSF/1 5.1.
 @end itemize
 
+Portability problems fixed by Gnulib module @code{iconv_open-utf}:
+@itemize
+@item
+This function does not support the encodings UTF-16BE, UTF-16LE, UTF-32BE,
+UTF-32LE on many platforms:
+AIX 5.1, HP-UX 11, IRIX 6.5, OSF/1 5.1, Solaris 8.
+@end itemize
+
 Portability problems not fixed by Gnulib:
 @itemize
 @item
new file mode 100644
--- /dev/null
+++ b/lib/iconv.c
@@ -0,0 +1,450 @@
+/* Character set conversion.
+   Copyright (C) 1999-2001, 2007 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License along
+   with this program; if not, write to the Free Software Foundation,
+   Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
+
+#include <config.h>
+
+/* Specification.  */
+#include <iconv.h>
+
+#include <stddef.h>
+
+#if REPLACE_ICONV_UTF
+# include <errno.h>
+# include <stdint.h>
+# include <stdlib.h>
+# include "unistr.h"
+# ifndef uintptr_t
+#  define uintptr_t unsigned long
+# endif
+#endif
+
+#if REPLACE_ICONV_UTF
+
+/* UTF-{16,32}{BE,LE} converters taken from GNU libiconv 1.11.  */
+
+/* Return code if invalid. (xxx_mbtowc) */
+# define RET_ILSEQ      -1
+/* Return code if no bytes were read. (xxx_mbtowc) */
+# define RET_TOOFEW     -2
+
+/* Return code if invalid. (xxx_wctomb) */
+# define RET_ILUNI      -1
+/* Return code if output buffer is too small. (xxx_wctomb, xxx_reset) */
+# define RET_TOOSMALL   -2
+
+/*
+ * UTF-16BE
+ */
+
+/* Specification: RFC 2781 */
+
+static int
+utf16be_mbtowc (ucs4_t *pwc, const unsigned char *s, size_t n)
+{
+  if (n >= 2)
+    {
+      ucs4_t wc = (s[0] << 8) + s[1];
+      if (wc >= 0xd800 && wc < 0xdc00)
+	{
+	  if (n >= 4)
+	    {
+	      ucs4_t wc2 = (s[2] << 8) + s[3];
+	      if (!(wc2 >= 0xdc00 && wc2 < 0xe000))
+		return RET_ILSEQ;
+	      *pwc = 0x10000 + ((wc - 0xd800) << 10) + (wc2 - 0xdc00);
+	      return 4;
+	    }
+	}
+      else if (wc >= 0xdc00 && wc < 0xe000)
+	{
+	  return RET_ILSEQ;
+	}
+      else
+	{
+	  *pwc = wc;
+	  return 2;
+	}
+    }
+  return RET_TOOFEW;
+}
+
+static int
+utf16be_wctomb (unsigned char *r, ucs4_t wc, size_t n)
+{
+  if (!(wc >= 0xd800 && wc < 0xe000))
+    {
+      if (wc < 0x10000)
+	{
+	  if (n >= 2)
+	    {
+	      r[0] = (unsigned char) (wc >> 8);
+	      r[1] = (unsigned char) wc;
+	      return 2;
+	    }
+	  else
+	    return RET_TOOSMALL;
+	}
+      else if (wc < 0x110000)
+	{
+	  if (n >= 4)
+	    {
+	      ucs4_t wc1 = 0xd800 + ((wc - 0x10000) >> 10);
+	      ucs4_t wc2 = 0xdc00 + ((wc - 0x10000) & 0x3ff);
+	      r[0] = (unsigned char) (wc1 >> 8);
+	      r[1] = (unsigned char) wc1;
+	      r[2] = (unsigned char) (wc2 >> 8);
+	      r[3] = (unsigned char) wc2;
+	      return 4;
+	    }
+	  else
+	    return RET_TOOSMALL;
+	}
+    }
+  return RET_ILUNI;
+}
+
+/*
+ * UTF-16LE
+ */
+
+/* Specification: RFC 2781 */
+
+static int
+utf16le_mbtowc (ucs4_t *pwc, const unsigned char *s, size_t n)
+{
+  if (n >= 2)
+    {
+      ucs4_t wc = s[0] + (s[1] << 8);
+      if (wc >= 0xd800 && wc < 0xdc00)
+	{
+	  if (n >= 4)
+	    {
+	      ucs4_t wc2 = s[2] + (s[3] << 8);
+	      if (!(wc2 >= 0xdc00 && wc2 < 0xe000))
+		return RET_ILSEQ;
+	      *pwc = 0x10000 + ((wc - 0xd800) << 10) + (wc2 - 0xdc00);
+	      return 4;
+	    }
+	}
+      else if (wc >= 0xdc00 && wc < 0xe000)
+	{
+	  return RET_ILSEQ;
+	}
+      else
+	{
+	  *pwc = wc;
+	  return 2;
+	}
+    }
+  return RET_TOOFEW;
+}
+
+static int
+utf16le_wctomb (unsigned char *r, ucs4_t wc, size_t n)
+{
+  if (!(wc >= 0xd800 && wc < 0xe000))
+    {
+      if (wc < 0x10000)
+	{
+	  if (n >= 2)
+	    {
+	      r[0] = (unsigned char) wc;
+	      r[1] = (unsigned char) (wc >> 8);
+	      return 2;
+	    }
+	  else
+	    return RET_TOOSMALL;
+	}
+      else if (wc < 0x110000)
+	{
+	  if (n >= 4)
+	    {
+	      ucs4_t wc1 = 0xd800 + ((wc - 0x10000) >> 10);
+	      ucs4_t wc2 = 0xdc00 + ((wc - 0x10000) & 0x3ff);
+	      r[0] = (unsigned char) wc1;
+	      r[1] = (unsigned char) (wc1 >> 8);
+	      r[2] = (unsigned char) wc2;
+	      r[3] = (unsigned char) (wc2 >> 8);
+	      return 4;
+	    }
+	  else
+	    return RET_TOOSMALL;
+	}
+    }
+  return RET_ILUNI;
+}
+
+/*
+ * UTF-32BE
+ */
+
+/* Specification: Unicode 3.1 Standard Annex #19 */
+
+static int
+utf32be_mbtowc (ucs4_t *pwc, const unsigned char *s, size_t n)
+{
+  if (n >= 4)
+    {
+      ucs4_t wc = (s[0] << 24) + (s[1] << 16) + (s[2] << 8) + s[3];
+      if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000))
+	{
+	  *pwc = wc;
+	  return 4;
+	}
+      else
+	return RET_ILSEQ;
+    }
+  return RET_TOOFEW;
+}
+
+static int
+utf32be_wctomb (unsigned char *r, ucs4_t wc, size_t n)
+{
+  if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000))
+    {
+      if (n >= 4)
+	{
+	  r[0] = 0;
+	  r[1] = (unsigned char) (wc >> 16);
+	  r[2] = (unsigned char) (wc >> 8);
+	  r[3] = (unsigned char) wc;
+	  return 4;
+	}
+      else
+	return RET_TOOSMALL;
+    }
+  return RET_ILUNI;
+}
+
+/*
+ * UTF-32LE
+ */
+
+/* Specification: Unicode 3.1 Standard Annex #19 */
+
+static int
+utf32le_mbtowc (ucs4_t *pwc, const unsigned char *s, size_t n)
+{
+  if (n >= 4)
+    {
+      ucs4_t wc = s[0] + (s[1] << 8) + (s[2] << 16) + (s[3] << 24);
+      if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000))
+	{
+	  *pwc = wc;
+	  return 4;
+	}
+      else
+	return RET_ILSEQ;
+    }
+  return RET_TOOFEW;
+}
+
+static int
+utf32le_wctomb (unsigned char *r, ucs4_t wc, size_t n)
+{
+  if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000))
+    {
+      if (n >= 4)
+	{
+	  r[0] = (unsigned char) wc;
+	  r[1] = (unsigned char) (wc >> 8);
+	  r[2] = (unsigned char) (wc >> 16);
+	  r[3] = 0;
+	  return 4;
+        }
+      else
+	return RET_TOOSMALL;
+    }
+  return RET_ILUNI;
+}
+
+#endif
+
+size_t
+iconv (iconv_t cd,
+       ICONV_CONST char **inbuf, size_t *inbytesleft,
+       char **outbuf, size_t *outbytesleft)
+#undef iconv
+{
+#if REPLACE_ICONV_UTF
+  switch ((uintptr_t) cd)
+    {
+      {
+	int (*xxx_wctomb) (unsigned char *, ucs4_t, size_t);
+
+	case (uintptr_t) _ICONV_UTF8_UTF16BE:
+	  xxx_wctomb = utf16be_wctomb;
+	  goto loop_from_utf8;
+	case (uintptr_t) _ICONV_UTF8_UTF16LE:
+	  xxx_wctomb = utf16le_wctomb;
+	  goto loop_from_utf8;
+	case (uintptr_t) _ICONV_UTF8_UTF32BE:
+	  xxx_wctomb = utf32be_wctomb;
+	  goto loop_from_utf8;
+	case (uintptr_t) _ICONV_UTF8_UTF32LE:
+	  xxx_wctomb = utf32le_wctomb;
+	  goto loop_from_utf8;
+
+       loop_from_utf8:
+	if (inbuf == NULL || *inbuf == NULL)
+	  return 0;
+	{
+	  ICONV_CONST char *inptr = *inbuf;
+	  size_t inleft = *inbytesleft;
+	  char *outptr = *outbuf;
+	  size_t outleft = *outbytesleft;
+	  size_t res = 0;
+	  while (inleft > 0)
+	    {
+	      ucs4_t uc;
+	      int m = u8_mbtoucr (&uc, (const uint8_t *) inptr, inleft);
+	      if (m <= 0)
+		{
+		  if (m == -1)
+		    {
+		      errno = EILSEQ;
+		      res = (size_t)(-1);
+		      break;
+		    }
+		  if (m == -2)
+		    {
+		      errno = EINVAL;
+		      res = (size_t)(-1);
+		      break;
+		    }
+		  abort ();
+		}
+	      else
+		{
+		  int n = xxx_wctomb ((uint8_t *) outptr, uc, outleft);
+		  if (n < 0)
+		    {
+		      if (n == RET_ILUNI)
+			{
+			  errno = EILSEQ;
+			  res = (size_t)(-1);
+			  break;
+			}
+		      if (n == RET_TOOSMALL)
+			{
+			  errno = E2BIG;
+			  res = (size_t)(-1);
+			  break;
+			}
+		      abort ();
+		    }
+		  else
+		    {
+		      inptr += m;
+		      inleft -= m;
+		      outptr += n;
+		      outleft -= n;
+		    }
+		}
+	    }
+	  *inbuf = inptr;
+	  *inbytesleft = inleft;
+	  *outbuf = outptr;
+	  *outbytesleft = outleft;
+	  return res;
+	}
+      }
+
+      {
+	int (*xxx_mbtowc) (ucs4_t *, const unsigned char *, size_t);
+
+	case (uintptr_t) _ICONV_UTF16BE_UTF8:
+	  xxx_mbtowc = utf16be_mbtowc;
+	  goto loop_to_utf8;
+	case (uintptr_t) _ICONV_UTF16LE_UTF8:
+	  xxx_mbtowc = utf16le_mbtowc;
+	  goto loop_to_utf8;
+	case (uintptr_t) _ICONV_UTF32BE_UTF8:
+	  xxx_mbtowc = utf32be_mbtowc;
+	  goto loop_to_utf8;
+	case (uintptr_t) _ICONV_UTF32LE_UTF8:
+	  xxx_mbtowc = utf32le_mbtowc;
+	  goto loop_to_utf8;
+
+       loop_to_utf8:
+	if (inbuf == NULL || *inbuf == NULL)
+	  return 0;
+	{
+	  ICONV_CONST char *inptr = *inbuf;
+	  size_t inleft = *inbytesleft;
+	  char *outptr = *outbuf;
+	  size_t outleft = *outbytesleft;
+	  size_t res = 0;
+	  while (inleft > 0)
+	    {
+	      ucs4_t uc;
+	      int m = xxx_mbtowc (&uc, (const uint8_t *) inptr, inleft);
+	      if (m <= 0)
+		{
+		  if (m == RET_ILSEQ)
+		    {
+		      errno = EILSEQ;
+		      res = (size_t)(-1);
+		      break;
+		    }
+		  if (m == RET_TOOFEW)
+		    {
+		      errno = EINVAL;
+		      res = (size_t)(-1);
+		      break;
+		    }
+		  abort ();
+		}
+	      else
+		{
+		  int n = u8_uctomb ((uint8_t *) outptr, uc, outleft);
+		  if (n < 0)
+		    {
+		      if (n == -1)
+			{
+			  errno = EILSEQ;
+			  res = (size_t)(-1);
+			  break;
+			}
+		      if (n == -2)
+			{
+			  errno = E2BIG;
+			  res = (size_t)(-1);
+			  break;
+			}
+		      abort ();
+		    }
+		  else
+		    {
+		      inptr += m;
+		      inleft -= m;
+		      outptr += n;
+		      outleft -= n;
+		    }
+		}
+	    }
+	  *inbuf = inptr;
+	  *inbytesleft = inleft;
+	  *outbuf = outptr;
+	  *outbytesleft = outleft;
+	  return res;
+	}
+      }
+    }
+#endif
+  return iconv (cd, inbuf, inbytesleft, outbuf, outbytesleft);
+}
--- a/lib/iconv.in.h
+++ b/lib/iconv.in.h
@@ -36,6 +36,28 @@
 extern iconv_t iconv_open (const char *tocode, const char *fromcode);
 #endif
 
+#if @REPLACE_ICONV_UTF@
+/* Special constants for supporting UTF-{16,32}{BE,LE} encodings.
+   Not public.  */
+# define _ICONV_UTF8_UTF16BE (iconv_t)(-161)
+# define _ICONV_UTF8_UTF16LE (iconv_t)(-162)
+# define _ICONV_UTF8_UTF32BE (iconv_t)(-163)
+# define _ICONV_UTF8_UTF32LE (iconv_t)(-164)
+# define _ICONV_UTF16BE_UTF8 (iconv_t)(-165)
+# define _ICONV_UTF16LE_UTF8 (iconv_t)(-166)
+# define _ICONV_UTF32BE_UTF8 (iconv_t)(-167)
+# define _ICONV_UTF32LE_UTF8 (iconv_t)(-168)
+#endif
+
+#if @REPLACE_ICONV@
+# define iconv rpl_iconv
+extern size_t iconv (iconv_t cd,
+		     @ICONV_CONST@ char **inbuf, size_t *inbytesleft,
+		     char **outbuf, size_t *outbytesleft);
+# define iconv_close rpl_iconv_close
+extern int iconv_close (iconv_t cd);
+#endif
+
 
 #ifdef __cplusplus
 }
new file mode 100644
--- /dev/null
+++ b/lib/iconv_close.c
@@ -0,0 +1,47 @@
+/* Character set conversion.
+   Copyright (C) 2007 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License along
+   with this program; if not, write to the Free Software Foundation,
+   Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
+
+#include <config.h>
+
+/* Specification.  */
+#include <iconv.h>
+
+#include <stdint.h>
+#ifndef uintptr_t
+# define uintptr_t unsigned long
+#endif
+
+int
+iconv_close (iconv_t cd)
+#undef iconv_close
+{
+#if REPLACE_ICONV_UTF
+  switch ((uintptr_t) cd)
+    {
+    case (uintptr_t) _ICONV_UTF8_UTF16BE:
+    case (uintptr_t) _ICONV_UTF8_UTF16LE:
+    case (uintptr_t) _ICONV_UTF8_UTF32BE:
+    case (uintptr_t) _ICONV_UTF8_UTF32LE:
+    case (uintptr_t) _ICONV_UTF16BE_UTF8:
+    case (uintptr_t) _ICONV_UTF16LE_UTF8:
+    case (uintptr_t) _ICONV_UTF32BE_UTF8:
+    case (uintptr_t) _ICONV_UTF32LE_UTF8:
+      return 0;
+    }
+#endif
+  return iconv_close (cd);
+}
--- a/lib/iconv_open.c
+++ b/lib/iconv_open.c
@@ -23,20 +23,23 @@
 #include <errno.h>
 #include <string.h>
 #include "c-ctype.h"
+#include "c-strcase.h"
 
 #define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
 
 /* Namespace cleanliness.  */
 #define mapping_lookup rpl_iconv_open_mapping_lookup
 
-/* The macro ICONV_FLAVOR is defined to one of these.  */
+/* The macro ICONV_FLAVOR is defined to one of these or undefined.  */
 
 #define ICONV_FLAVOR_AIX "iconv_open-aix.h"
 #define ICONV_FLAVOR_HPUX "iconv_open-hpux.h"
 #define ICONV_FLAVOR_IRIX "iconv_open-irix.h"
 #define ICONV_FLAVOR_OSF "iconv_open-osf.h"
 
-#include ICONV_FLAVOR
+#ifdef ICONV_FLAVOR
+# include ICONV_FLAVOR
+#endif
 
 iconv_t
 rpl_iconv_open (const char *tocode, const char *fromcode)
@@ -47,6 +50,59 @@
   char *fromcode_upper_end;
   char *tocode_upper_end;
 
+#if REPLACE_ICONV_UTF
+  /* Special handling of conversion between UTF-8 and UTF-{16,32}{BE,LE}.
+     Do this here, before calling the real iconv_open(), because  OSF/1 5.1
+     iconv() to these encoding inserts a BOM, which is wrong.
+     We do not need to handle conversion between arbitrary encodings and
+     UTF-{16,32}{BE,LE}, because the 'striconveh' module implements two-step
+     conversion throough UTF-8.
+     The _ICONV_* constants are chosen to be disjoint from any iconv_t
+     returned by the system's iconv_open() functions.  Recall that iconv_t
+     is a scalar type.  */
+  if (c_toupper (fromcode[0]) == 'U'
+      && c_toupper (fromcode[1]) == 'T'
+      && c_toupper (fromcode[2]) == 'F'
+      && fromcode[3] == '-')
+    {
+      if (c_toupper (tocode[0]) == 'U'
+	  && c_toupper (tocode[1]) == 'T'
+	  && c_toupper (tocode[2]) == 'F'
+	  && tocode[3] == '-')
+	{
+	  if (strcmp (fromcode + 4, "8") == 0)
+	    {
+	      if (c_strcasecmp (tocode + 4, "16BE") == 0)
+		return _ICONV_UTF8_UTF16BE;
+	      if (c_strcasecmp (tocode + 4, "16LE") == 0)
+		return _ICONV_UTF8_UTF16LE;
+	      if (c_strcasecmp (tocode + 4, "32BE") == 0)
+		return _ICONV_UTF8_UTF32BE;
+	      if (c_strcasecmp (tocode + 4, "32LE") == 0)
+		return _ICONV_UTF8_UTF32LE;
+	    }
+	  else if (strcmp (tocode + 4, "8") == 0)
+	    {
+	      if (c_strcasecmp (fromcode + 4, "16BE") == 0)
+		return _ICONV_UTF16BE_UTF8;
+	      if (c_strcasecmp (fromcode + 4, "16LE") == 0)
+		return _ICONV_UTF16LE_UTF8;
+	      if (c_strcasecmp (fromcode + 4, "32BE") == 0)
+		return _ICONV_UTF32BE_UTF8;
+	      if (c_strcasecmp (fromcode + 4, "32LE") == 0)
+		return _ICONV_UTF32LE_UTF8;
+	    }
+	}
+    }
+#endif
+
+  /* Do *not* add special support for 8-bit encodings like ASCII or ISO-8859-1
+     here.  This would lead to programs that work in some locales (such as the
+     "C" or "en_US" locales) but do not work in East Asian locales.  It is
+     better if programmers make their programs depend on GNU libiconv (except
+     on glibc systems), e.g. by using the AM_ICONV macro and documenting the
+     dependency in an INSTALL or DEPENDENCIES file.  */
+
   /* Try with the original names first.
      This covers the case when fromcode or tocode is a lowercase encoding name
      that is understood by the system's iconv_open but not listed in our
@@ -93,6 +149,7 @@
     tocode_upper_end = q;
   }
 
+#ifdef ICONV_FLAVOR
   /* Apply the mappings.  */
   {
     const struct mapping *m =
@@ -106,6 +163,10 @@
 
     tocode = (m != NULL ? m->vendor_name : tocode_upper);
   }
+#else
+  fromcode = fromcode_upper;
+  tocode = tocode_upper;
+#endif
 
   return iconv_open (tocode, fromcode);
 }
--- a/m4/iconv_h.m4
+++ b/m4/iconv_h.m4
@@ -1,4 +1,4 @@
-# iconv_h.m4 serial 2
+# iconv_h.m4 serial 3
 dnl Copyright (C) 2007 Free Software Foundation, Inc.
 dnl This file is free software; the Free Software Foundation
 dnl gives unlimited permission to copy and/or distribute it,
@@ -22,5 +22,7 @@
 AC_DEFUN([gl_ICONV_H_DEFAULTS],
 [
   dnl Assume proper GNU behavior unless another module says otherwise.
+  REPLACE_ICONV=0;      AC_SUBST([REPLACE_ICONV])
   REPLACE_ICONV_OPEN=0; AC_SUBST([REPLACE_ICONV_OPEN])
+  REPLACE_ICONV_UTF=0;  AC_SUBST([REPLACE_ICONV_UTF])
 ])
--- a/m4/iconv_open.m4
+++ b/m4/iconv_open.m4
@@ -1,4 +1,4 @@
-# iconv_open.m4 serial 1
+# iconv_open.m4 serial 2
 dnl Copyright (C) 2007 Free Software Foundation, Inc.
 dnl This file is free software; the Free Software Foundation
 dnl gives unlimited permission to copy and/or distribute it,
@@ -30,11 +30,208 @@
         AC_DEFINE_UNQUOTED([ICONV_FLAVOR], [$iconv_flavor],
           [Define to a symbolic name denoting the flavor of iconv_open()
            implementation.])
-        REPLACE_ICONV_OPEN=1
-        AC_LIBOBJ([iconv_open])
-        ICONV_H='iconv.h'
+        gl_REPLACE_ICONV_OPEN
       fi
     fi
   fi
 ])
 
+AC_DEFUN([gl_REPLACE_ICONV_OPEN],
+[
+  REPLACE_ICONV_OPEN=1
+  AC_LIBOBJ([iconv_open])
+  ICONV_H='iconv.h'
+])
+
+AC_DEFUN([gl_FUNC_ICONV_OPEN_UTF],
+[
+  AC_REQUIRE([gl_FUNC_ICONV_OPEN])
+  AC_REQUIRE([AC_CANONICAL_HOST]) dnl for cross-compiles
+  AC_REQUIRE([gl_ICONV_H_DEFAULTS])
+  if test "$am_cv_func_iconv" = yes; then
+    if test -n "$am_cv_proto_iconv_arg1"; then
+      ICONV_CONST="const"
+    else
+      ICONV_CONST=
+    fi
+    AC_SUBST([ICONV_CONST])
+    AC_CACHE_CHECK([whether iconv supports conversion between UTF-8 and UTF-{16,32}{BE,LE}],
+      [gl_func_iconv_supports_utf],
+      [
+        save_LIBS="$LIBS"
+        LIBS="$LIBS $LIBICONV"
+        AC_TRY_RUN([
+#include <iconv.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#define ASSERT(expr) if (!(expr)) return 1;
+int main ()
+{
+  /* Test conversion from UTF-8 to UTF-16BE with no errors.  */
+  {
+    static const char input[] =
+      "Japanese (\346\227\245\346\234\254\350\252\236) [\360\235\224\215\360\235\224\236\360\235\224\255]";
+    static const char expected[] =
+      "\000J\000a\000p\000a\000n\000e\000s\000e\000 \000(\145\345\147\054\212\236\000)\000 \000[\330\065\335\015\330\065\335\036\330\065\335\055\000]";
+    iconv_t cd;
+    char buf[100];
+    const char *inptr;
+    size_t inbytesleft;
+    char *outptr;
+    size_t outbytesleft;
+    size_t res;
+    cd = iconv_open ("UTF-16BE", "UTF-8");
+    ASSERT (cd != (iconv_t)(-1));
+    inptr = input;
+    inbytesleft = sizeof (input) - 1;
+    outptr = buf;
+    outbytesleft = sizeof (buf);
+    res = iconv (cd,
+		 (ICONV_CONST char **) &inptr, &inbytesleft,
+		 &outptr, &outbytesleft);
+    ASSERT (res == 0 && inbytesleft == 0);
+    ASSERT (outptr == buf + (sizeof (expected) - 1));
+    ASSERT (memcmp (buf, expected, sizeof (expected) - 1) == 0);
+    ASSERT (iconv_close (cd) == 0);
+  }
+  /* Test conversion from UTF-8 to UTF-16LE with no errors.  */
+  {
+    static const char input[] =
+      "Japanese (\346\227\245\346\234\254\350\252\236) [\360\235\224\215\360\235\224\236\360\235\224\255]";
+    static const char expected[] =
+      "J\000a\000p\000a\000n\000e\000s\000e\000 \000(\000\345\145\054\147\236\212)\000 \000[\000\065\330\015\335\065\330\036\335\065\330\055\335]\000";
+    iconv_t cd;
+    char buf[100];
+    const char *inptr;
+    size_t inbytesleft;
+    char *outptr;
+    size_t outbytesleft;
+    size_t res;
+    cd = iconv_open ("UTF-16LE", "UTF-8");
+    ASSERT (cd != (iconv_t)(-1));
+    inptr = input;
+    inbytesleft = sizeof (input) - 1;
+    outptr = buf;
+    outbytesleft = sizeof (buf);
+    res = iconv (cd,
+		 (ICONV_CONST char **) &inptr, &inbytesleft,
+		 &outptr, &outbytesleft);
+    ASSERT (res == 0 && inbytesleft == 0);
+    ASSERT (outptr == buf + (sizeof (expected) - 1));
+    ASSERT (memcmp (buf, expected, sizeof (expected) - 1) == 0);
+    ASSERT (iconv_close (cd) == 0);
+  }
+  /* Test conversion from UTF-8 to UTF-32BE with no errors.  */
+  {
+    static const char input[] =
+      "Japanese (\346\227\245\346\234\254\350\252\236) [\360\235\224\215\360\235\224\236\360\235\224\255]";
+    static const char expected[] =
+      "\000\000\000J\000\000\000a\000\000\000p\000\000\000a\000\000\000n\000\000\000e\000\000\000s\000\000\000e\000\000\000 \000\000\000(\000\000\145\345\000\000\147\054\000\000\212\236\000\000\000)\000\000\000 \000\000\000[\000\001\325\015\000\001\325\036\000\001\325\055\000\000\000]";
+    iconv_t cd;
+    char buf[100];
+    const char *inptr;
+    size_t inbytesleft;
+    char *outptr;
+    size_t outbytesleft;
+    size_t res;
+    cd = iconv_open ("UTF-32BE", "UTF-8");
+    ASSERT (cd != (iconv_t)(-1));
+    inptr = input;
+    inbytesleft = sizeof (input) - 1;
+    outptr = buf;
+    outbytesleft = sizeof (buf);
+    res = iconv (cd,
+		 (ICONV_CONST char **) &inptr, &inbytesleft,
+		 &outptr, &outbytesleft);
+    ASSERT (res == 0 && inbytesleft == 0);
+    ASSERT (outptr == buf + (sizeof (expected) - 1));
+    ASSERT (memcmp (buf, expected, sizeof (expected) - 1) == 0);
+    ASSERT (iconv_close (cd) == 0);
+  }
+  /* Test conversion from UTF-8 to UTF-32LE with no errors.  */
+  {
+    static const char input[] =
+      "Japanese (\346\227\245\346\234\254\350\252\236) [\360\235\224\215\360\235\224\236\360\235\224\255]";
+    static const char expected[] =
+      "J\000\000\000a\000\000\000p\000\000\000a\000\000\000n\000\000\000e\000\000\000s\000\000\000e\000\000\000 \000\000\000(\000\000\000\345\145\000\000\054\147\000\000\236\212\000\000)\000\000\000 \000\000\000[\000\000\000\015\325\001\000\036\325\001\000\055\325\001\000]\000\000\000";
+    iconv_t cd;
+    char buf[100];
+    const char *inptr;
+    size_t inbytesleft;
+    char *outptr;
+    size_t outbytesleft;
+    size_t res;
+    cd = iconv_open ("UTF-32LE", "UTF-8");
+    ASSERT (cd != (iconv_t)(-1));
+    inptr = input;
+    inbytesleft = sizeof (input) - 1;
+    outptr = buf;
+    outbytesleft = sizeof (buf);
+    res = iconv (cd,
+		 (ICONV_CONST char **) &inptr, &inbytesleft,
+		 &outptr, &outbytesleft);
+    ASSERT (res == 0 && inbytesleft == 0);
+    ASSERT (outptr == buf + (sizeof (expected) - 1));
+    ASSERT (memcmp (buf, expected, sizeof (expected) - 1) == 0);
+    ASSERT (iconv_close (cd) == 0);
+  }
+  /* Test conversion from UTF-16BE to UTF-8 with no errors.
+     This test fails on NetBSD 3.0.  */
+  {
+    static const char input[] =
+      "\000J\000a\000p\000a\000n\000e\000s\000e\000 \000(\145\345\147\054\212\236\000)\000 \000[\330\065\335\015\330\065\335\036\330\065\335\055\000]";
+    static const char expected[] =
+      "Japanese (\346\227\245\346\234\254\350\252\236) [\360\235\224\215\360\235\224\236\360\235\224\255]";
+    iconv_t cd;
+    char buf[100];
+    const char *inptr;
+    size_t inbytesleft;
+    char *outptr;
+    size_t outbytesleft;
+    size_t res;
+    cd = iconv_open ("UTF-8", "UTF-16BE");
+    ASSERT (cd != (iconv_t)(-1));
+    inptr = input;
+    inbytesleft = sizeof (input) - 1;
+    outptr = buf;
+    outbytesleft = sizeof (buf);
+    res = iconv (cd,
+		 (ICONV_CONST char **) &inptr, &inbytesleft,
+		 &outptr, &outbytesleft);
+    ASSERT (res == 0 && inbytesleft == 0);
+    ASSERT (outptr == buf + (sizeof (expected) - 1));
+    ASSERT (memcmp (buf, expected, sizeof (expected) - 1) == 0);
+    ASSERT (iconv_close (cd) == 0);
+  }
+  return 0;
+}], [gl_func_iconv_supports_utf=yes], [gl_func_iconv_supports_utf=no],
+          [
+           dnl We know that GNU libiconv, GNU libc, and Solaris >= 9 do.
+           dnl OSF/1 5.1 has these encodings, but inserts a BOM in the "to"
+           dnl direction.
+           gl_func_iconv_supports_utf=no
+           if test $gl_func_iconv_gnu = yes; then
+             gl_func_iconv_supports_utf=yes
+           else
+changequote(,)dnl
+             case "$host_os" in
+               solaris2.9 | solaris2.1[0-9]) gl_func_iconv_supports_utf=yes ;;
+             esac
+changequote([,])dnl
+           fi
+          ])
+        LIBS="$save_LIBS"
+      ])
+    if test $gl_func_iconv_supports_utf = no; then
+      REPLACE_ICONV_UTF=1
+      AC_DEFINE([REPLACE_ICONV_UTF], 1,
+        [Define if the iconv() functions are enhanced to handle the UTF-{16,32}{BE,LE} encodings.])
+      REPLACE_ICONV=1
+      gl_REPLACE_ICONV_OPEN
+      AC_LIBOBJ([iconv])
+      AC_LIBOBJ([iconv_close])
+    fi
+  fi
+])
--- a/modules/iconv_open
+++ b/modules/iconv_open
@@ -15,6 +15,7 @@
 include_next
 iconv
 c-ctype
+c-strcase
 
 configure.ac:
 gl_ICONV_H
@@ -30,7 +31,10 @@
 	{ echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */' && \
 	  sed -e 's/@''INCLUDE_NEXT''@/$(INCLUDE_NEXT)/g' \
 	      -e 's|@''NEXT_ICONV_H''@|$(NEXT_ICONV_H)|g' \
+	      -e 's|@''ICONV_CONST''@|$(ICONV_CONST)|g' \
+	      -e 's|@''REPLACE_ICONV''@|$(REPLACE_ICONV)|g' \
 	      -e 's|@''REPLACE_ICONV_OPEN''@|$(REPLACE_ICONV_OPEN)|g' \
+	      -e 's|@''REPLACE_ICONV_UTF''@|$(REPLACE_ICONV_UTF)|g' \
 	      < $(srcdir)/iconv.in.h; \
 	} > $@-t
 	mv $@-t $@
new file mode 100644
--- /dev/null
+++ b/modules/iconv_open-utf
@@ -0,0 +1,29 @@
+Description:
+Character set conversion support for UTF-{16,32}{BE,LE} encodings.
+
+Files:
+lib/iconv.c
+lib/iconv_close.c
+m4/iconv_open.m4
+
+Depends-on:
+iconv_open
+stdint
+unistr/u8-mbtoucr
+unistr/u8-uctomb
+
+configure.ac:
+gl_FUNC_ICONV_OPEN_UTF
+
+Makefile.am:
+
+Include:
+
+Link:
+
+License:
+LGPL
+
+Maintainer:
+Bruno Haible
+