changeset 8605:414a0a3d5fac

Do an indirect conversion if iconv_open does not support a direct conversion.
author Bruno Haible <bruno@clisp.org>
date Sat, 31 Mar 2007 21:01:29 +0000
parents c92d7e507345
children a83c37b17639
files lib/striconveh.c lib/striconveh.h
diffstat 2 files changed, 94 insertions(+), 38 deletions(-) [+]
line wrap: on
line diff
--- a/lib/striconveh.c
+++ b/lib/striconveh.c
@@ -283,6 +283,10 @@
     }
   result = initial_result;
 
+  /* Test whether a direct conversion is possible at all.  */
+  if (cd == (iconv_t)(-1))
+    goto indirectly;
+
   if (offsets != NULL)
     {
       size_t i;
@@ -481,8 +485,7 @@
   goto done;
 
  indirectly:
-  /* The direct conversion failed, handler != iconveh_error,
-     and cd2 != (iconv_t)(-1).
+  /* The direct conversion failed.
      Use a conversion through UTF-8.  */
   if (offsets != NULL)
     {
@@ -495,6 +498,7 @@
     }
   length = 0;
   {
+    const bool slowly = (offsets != NULL || handler == iconveh_error);
 # define utf8bufsize 4096 /* may also be smaller or larger than tmpbufsize */
     char utf8buf[utf8bufsize + 1];
     size_t utf8len = 0;
@@ -509,7 +513,8 @@
     /* Set to the initial state.  */
     if (cd1 != (iconv_t)(-1))
       iconv (cd1, NULL, NULL, NULL, NULL);
-    iconv (cd2, NULL, NULL, NULL, NULL);
+    if (cd2 != (iconv_t)(-1))
+      iconv (cd2, NULL, NULL, NULL, NULL);
 # endif
 
     while (in1size > 0 || do_final_flush1 || utf8len > 0 || do_final_flush2)
@@ -531,7 +536,7 @@
 	      }
 	    if (cd1 != (iconv_t)(-1))
 	      {
-		if (offsets != NULL)
+		if (slowly)
 		  res1 = iconv_carefully_1 (cd1,
 					    &in1ptr, &in1size,
 					    &out1ptr, &out1size,
@@ -545,7 +550,7 @@
 	    else
 	      {
 		/* FROM_CODESET is UTF-8.  */
-		res1 = utf8conv_carefully (offsets != NULL,
+		res1 = utf8conv_carefully (slowly,
 					   &in1ptr, &in1size,
 					   &out1ptr, &out1size,
 					   &incremented1);
@@ -618,10 +623,19 @@
 		bool grow;
 
 		if (in2size > 0)
-		  res2 = iconv_carefully (cd2,
-					  &in2ptr, &in2size,
-					  &out2ptr, &out2size,
-					  &incremented2);
+		  {
+		    if (cd2 != (iconv_t)(-1))
+		      res2 = iconv_carefully (cd2,
+					      &in2ptr, &in2size,
+					      &out2ptr, &out2size,
+					      &incremented2);
+		    else
+		      /* TO_CODESET is UTF-8.  */
+		      res2 = utf8conv_carefully (false,
+						 &in2ptr, &in2size,
+						 &out2ptr, &out2size,
+						 &incremented2);
+		  }
 		else /* in1size == 0 && !do_final_flush1
 			&& in2size == 0 && do_final_flush2 */
 		  {
@@ -629,10 +643,11 @@
 		       state.  But avoid glibc-2.1 bug and Solaris 2.7 bug.  */
 # if defined _LIBICONV_VERSION \
      || !((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) || defined __sun)
-		    res2 = iconv (cd2, NULL, NULL, &out2ptr, &out2size);
-# else
-		    res2 = 0;
+		    if (cd2 != (iconv_t)(-1))
+		      res2 = iconv (cd2, NULL, NULL, &out2ptr, &out2size);
+		    else
 # endif
+		      res2 = 0;
 		    do_final_flush2 = false;
 		    incremented2 = true;
 		  }
@@ -703,9 +718,28 @@
 
 			inptr = scratchbuf;
 			insize = scratchlen;
-			res = iconv (cd2,
-				     (ICONV_CONST char **) &inptr, &insize,
-				     &out2ptr, &out2size);
+			if (cd2 != (iconv_t)(-1))
+			  res = iconv (cd2,
+				       (ICONV_CONST char **) &inptr, &insize,
+				       &out2ptr, &out2size);
+			else
+			  {
+			    /* TO_CODESET is UTF-8.  */
+			    if (out2size >= insize)
+			      {
+				memcpy (out2ptr, inptr, insize);
+				out2ptr += insize;
+				out2size -= insize;
+				inptr += insize;
+				insize = 0;
+				res = 0;
+			      }
+			    else
+			      {
+				errno = E2BIG;
+				res = (size_t)(-1);
+			      }
+			  }
 			length = out2ptr - result;
 			if (res == (size_t)(-1) && errno == E2BIG)
 			  {
@@ -732,9 +766,23 @@
 
 			    out2ptr = result + length;
 			    out2size = allocated - extra_alloc - length;
-			    res = iconv (cd2,
-					 (ICONV_CONST char **) &inptr, &insize,
-					 &out2ptr, &out2size);
+			    if (cd2 != (iconv_t)(-1))
+			      res = iconv (cd2,
+					   (ICONV_CONST char **) &inptr,
+					   &insize,
+					   &out2ptr, &out2size);
+			    else
+			      {
+				/* TO_CODESET is UTF-8.  */
+				if (!(out2size >= insize))
+				  abort ();
+				memcpy (out2ptr, inptr, insize);
+				out2ptr += insize;
+				out2size -= insize;
+				inptr += insize;
+				insize = 0;
+				res = 0;
+			      }
 			    length = out2ptr - result;
 			  }
 # if !defined _LIBICONV_VERSION && !defined __GLIBC__
@@ -952,8 +1000,6 @@
 # endif
 
       cd = iconv_open (to_codeset, from_codeset);
-      if (cd == (iconv_t)(-1))
-	return -1;
 
       if (STRCASEEQ (from_codeset, "UTF-8", 'U','T','F','-','8',0,0,0,0))
 	cd1 = (iconv_t)(-1);
@@ -963,7 +1009,8 @@
 	  if (cd1 == (iconv_t)(-1))
 	    {
 	      int saved_errno = errno;
-	      iconv_close (cd);
+	      if (cd != (iconv_t)(-1))
+		iconv_close (cd);
 	      errno = saved_errno;
 	      return -1;
 	    }
@@ -979,7 +1026,8 @@
 	      int saved_errno = errno;
 	      if (cd1 != (iconv_t)(-1))
 		iconv_close (cd1);
-	      iconv_close (cd);
+	      if (cd != (iconv_t)(-1))
+		iconv_close (cd);
 	      errno = saved_errno;
 	      return -1;
 	    }
@@ -998,7 +1046,8 @@
 	    iconv_close (cd2);
 	  if (cd1 != (iconv_t)(-1))
 	    iconv_close (cd1);
-	  iconv_close (cd);
+	  if (cd != (iconv_t)(-1))
+	    iconv_close (cd);
 	  errno = saved_errno;
 	}
       else
@@ -1010,7 +1059,8 @@
 	      int saved_errno = errno;
 	      if (cd1 != (iconv_t)(-1))
 		iconv_close (cd1);
-	      iconv_close (cd);
+	      if (cd != (iconv_t)(-1))
+		iconv_close (cd);
 	      if (result != *resultp && result != NULL)
 		free (result);
 	      errno = saved_errno;
@@ -1021,13 +1071,14 @@
 	      /* Return -1, but free the allocated memory, and while doing
 		 that, preserve the errno from iconv_close.  */
 	      int saved_errno = errno;
-	      iconv_close (cd);
+	      if (cd != (iconv_t)(-1))
+		iconv_close (cd);
 	      if (result != *resultp && result != NULL)
 		free (result);
 	      errno = saved_errno;
 	      return -1;
 	    }
-	  if (iconv_close (cd) < 0)
+	  if (cd != (iconv_t)(-1) && iconv_close (cd) < 0)
 	    {
 	      /* Return -1, but free the allocated memory, and while doing
 		 that, preserve the errno from iconv_close.  */
@@ -1085,8 +1136,6 @@
 # endif
 
       cd = iconv_open (to_codeset, from_codeset);
-      if (cd == (iconv_t)(-1))
-	return NULL;
 
       if (STRCASEEQ (from_codeset, "UTF-8", 'U','T','F','-','8',0,0,0,0))
 	cd1 = (iconv_t)(-1);
@@ -1096,7 +1145,8 @@
 	  if (cd1 == (iconv_t)(-1))
 	    {
 	      int saved_errno = errno;
-	      iconv_close (cd);
+	      if (cd != (iconv_t)(-1))
+		iconv_close (cd);
 	      errno = saved_errno;
 	      return NULL;
 	    }
@@ -1112,7 +1162,8 @@
 	      int saved_errno = errno;
 	      if (cd1 != (iconv_t)(-1))
 		iconv_close (cd1);
-	      iconv_close (cd);
+	      if (cd != (iconv_t)(-1))
+		iconv_close (cd);
 	      errno = saved_errno;
 	      return NULL;
 	    }
@@ -1128,7 +1179,8 @@
 	    iconv_close (cd2);
 	  if (cd1 != (iconv_t)(-1))
 	    iconv_close (cd1);
-	  iconv_close (cd);
+	  if (cd != (iconv_t)(-1))
+	    iconv_close (cd);
 	  errno = saved_errno;
 	}
       else
@@ -1140,7 +1192,8 @@
 	      int saved_errno = errno;
 	      if (cd1 != (iconv_t)(-1))
 		iconv_close (cd1);
-	      iconv_close (cd);
+	      if (cd != (iconv_t)(-1))
+		iconv_close (cd);
 	      free (result);
 	      errno = saved_errno;
 	      return NULL;
@@ -1150,12 +1203,13 @@
 	      /* Return NULL, but free the allocated memory, and while doing
 		 that, preserve the errno from iconv_close.  */
 	      int saved_errno = errno;
-	      iconv_close (cd);
+	      if (cd != (iconv_t)(-1))
+		iconv_close (cd);
 	      free (result);
 	      errno = saved_errno;
 	      return NULL;
 	    }
-	  if (iconv_close (cd) < 0)
+	  if (cd != (iconv_t)(-1) && iconv_close (cd) < 0)
 	    {
 	      /* Return NULL, but free the allocated memory, and while doing
 		 that, preserve the errno from iconv_close.  */
--- a/lib/striconveh.h
+++ b/lib/striconveh.h
@@ -42,7 +42,8 @@
 
 /* Convert an entire string from one encoding to another, using iconv.
    The original string is at [SRC,...,SRC+SRCLEN-1].
-   The conversion descriptor from FROMCODE to TOCODE is passed as CD.
+   CD is the conversion descriptor from FROMCODE to TOCODE, or (iconv_t)(-1) if
+   the system does not support a direct conversion from FROMCODE to TOCODE.
    CD1 is the conversion descriptor from FROM_CODESET to UTF-8 (or
    (iconv_t)(-1) if FROM_CODESET is UTF-8).
    CD2 is the conversion descriptor from UTF-8 to TO_CODESET (or (iconv_t)(-1)
@@ -67,9 +68,10 @@
 
 /* Convert an entire string from one encoding to another, using iconv.
    The original string is the NUL-terminated string starting at SRC.
-   The conversion descriptor is passed as CD.  Both the "from" and the "to"
-   encoding must use a single NUL byte at the end of the string (i.e. not
-   UCS-2, UCS-4, UTF-16, UTF-32).
+   CD is the conversion descriptor from FROMCODE to TOCODE, or (iconv_t)(-1) if
+   the system does not support a direct conversion from FROMCODE to TOCODE.
+   Both the "from" and the "to" encoding must use a single NUL byte at the end
+   of the string (i.e. not UCS-2, UCS-4, UTF-16, UTF-32).
    CD1 is the conversion descriptor from FROM_CODESET to UTF-8 (or
    (iconv_t)(-1) if FROM_CODESET is UTF-8).
    CD2 is the conversion descriptor from UTF-8 to TO_CODESET (or (iconv_t)(-1)