# HG changeset patch # User Bruno Haible # Date 1175374889 0 # Node ID 414a0a3d5faccd162ed2f7e871ee2ac130c982af # Parent c92d7e507345e2c33cbb5a563444345dac4def6b Do an indirect conversion if iconv_open does not support a direct conversion. diff --git a/lib/striconveh.c b/lib/striconveh.c --- a/lib/striconveh.c +++ b/lib/striconveh.c @@ -283,6 +283,10 @@ } result = initial_result; + /* Test whether a direct conversion is possible at all. */ + if (cd == (iconv_t)(-1)) + goto indirectly; + if (offsets != NULL) { size_t i; @@ -481,8 +485,7 @@ goto done; indirectly: - /* The direct conversion failed, handler != iconveh_error, - and cd2 != (iconv_t)(-1). + /* The direct conversion failed. Use a conversion through UTF-8. */ if (offsets != NULL) { @@ -495,6 +498,7 @@ } length = 0; { + const bool slowly = (offsets != NULL || handler == iconveh_error); # define utf8bufsize 4096 /* may also be smaller or larger than tmpbufsize */ char utf8buf[utf8bufsize + 1]; size_t utf8len = 0; @@ -509,7 +513,8 @@ /* Set to the initial state. */ if (cd1 != (iconv_t)(-1)) iconv (cd1, NULL, NULL, NULL, NULL); - iconv (cd2, NULL, NULL, NULL, NULL); + if (cd2 != (iconv_t)(-1)) + iconv (cd2, NULL, NULL, NULL, NULL); # endif while (in1size > 0 || do_final_flush1 || utf8len > 0 || do_final_flush2) @@ -531,7 +536,7 @@ } if (cd1 != (iconv_t)(-1)) { - if (offsets != NULL) + if (slowly) res1 = iconv_carefully_1 (cd1, &in1ptr, &in1size, &out1ptr, &out1size, @@ -545,7 +550,7 @@ else { /* FROM_CODESET is UTF-8. */ - res1 = utf8conv_carefully (offsets != NULL, + res1 = utf8conv_carefully (slowly, &in1ptr, &in1size, &out1ptr, &out1size, &incremented1); @@ -618,10 +623,19 @@ bool grow; if (in2size > 0) - res2 = iconv_carefully (cd2, - &in2ptr, &in2size, - &out2ptr, &out2size, - &incremented2); + { + if (cd2 != (iconv_t)(-1)) + res2 = iconv_carefully (cd2, + &in2ptr, &in2size, + &out2ptr, &out2size, + &incremented2); + else + /* TO_CODESET is UTF-8. */ + res2 = utf8conv_carefully (false, + &in2ptr, &in2size, + &out2ptr, &out2size, + &incremented2); + } else /* in1size == 0 && !do_final_flush1 && in2size == 0 && do_final_flush2 */ { @@ -629,10 +643,11 @@ state. But avoid glibc-2.1 bug and Solaris 2.7 bug. */ # if defined _LIBICONV_VERSION \ || !((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) || defined __sun) - res2 = iconv (cd2, NULL, NULL, &out2ptr, &out2size); -# else - res2 = 0; + if (cd2 != (iconv_t)(-1)) + res2 = iconv (cd2, NULL, NULL, &out2ptr, &out2size); + else # endif + res2 = 0; do_final_flush2 = false; incremented2 = true; } @@ -703,9 +718,28 @@ inptr = scratchbuf; insize = scratchlen; - res = iconv (cd2, - (ICONV_CONST char **) &inptr, &insize, - &out2ptr, &out2size); + if (cd2 != (iconv_t)(-1)) + res = iconv (cd2, + (ICONV_CONST char **) &inptr, &insize, + &out2ptr, &out2size); + else + { + /* TO_CODESET is UTF-8. */ + if (out2size >= insize) + { + memcpy (out2ptr, inptr, insize); + out2ptr += insize; + out2size -= insize; + inptr += insize; + insize = 0; + res = 0; + } + else + { + errno = E2BIG; + res = (size_t)(-1); + } + } length = out2ptr - result; if (res == (size_t)(-1) && errno == E2BIG) { @@ -732,9 +766,23 @@ out2ptr = result + length; out2size = allocated - extra_alloc - length; - res = iconv (cd2, - (ICONV_CONST char **) &inptr, &insize, - &out2ptr, &out2size); + if (cd2 != (iconv_t)(-1)) + res = iconv (cd2, + (ICONV_CONST char **) &inptr, + &insize, + &out2ptr, &out2size); + else + { + /* TO_CODESET is UTF-8. */ + if (!(out2size >= insize)) + abort (); + memcpy (out2ptr, inptr, insize); + out2ptr += insize; + out2size -= insize; + inptr += insize; + insize = 0; + res = 0; + } length = out2ptr - result; } # if !defined _LIBICONV_VERSION && !defined __GLIBC__ @@ -952,8 +1000,6 @@ # endif cd = iconv_open (to_codeset, from_codeset); - if (cd == (iconv_t)(-1)) - return -1; if (STRCASEEQ (from_codeset, "UTF-8", 'U','T','F','-','8',0,0,0,0)) cd1 = (iconv_t)(-1); @@ -963,7 +1009,8 @@ if (cd1 == (iconv_t)(-1)) { int saved_errno = errno; - iconv_close (cd); + if (cd != (iconv_t)(-1)) + iconv_close (cd); errno = saved_errno; return -1; } @@ -979,7 +1026,8 @@ int saved_errno = errno; if (cd1 != (iconv_t)(-1)) iconv_close (cd1); - iconv_close (cd); + if (cd != (iconv_t)(-1)) + iconv_close (cd); errno = saved_errno; return -1; } @@ -998,7 +1046,8 @@ iconv_close (cd2); if (cd1 != (iconv_t)(-1)) iconv_close (cd1); - iconv_close (cd); + if (cd != (iconv_t)(-1)) + iconv_close (cd); errno = saved_errno; } else @@ -1010,7 +1059,8 @@ int saved_errno = errno; if (cd1 != (iconv_t)(-1)) iconv_close (cd1); - iconv_close (cd); + if (cd != (iconv_t)(-1)) + iconv_close (cd); if (result != *resultp && result != NULL) free (result); errno = saved_errno; @@ -1021,13 +1071,14 @@ /* Return -1, but free the allocated memory, and while doing that, preserve the errno from iconv_close. */ int saved_errno = errno; - iconv_close (cd); + if (cd != (iconv_t)(-1)) + iconv_close (cd); if (result != *resultp && result != NULL) free (result); errno = saved_errno; return -1; } - if (iconv_close (cd) < 0) + if (cd != (iconv_t)(-1) && iconv_close (cd) < 0) { /* Return -1, but free the allocated memory, and while doing that, preserve the errno from iconv_close. */ @@ -1085,8 +1136,6 @@ # endif cd = iconv_open (to_codeset, from_codeset); - if (cd == (iconv_t)(-1)) - return NULL; if (STRCASEEQ (from_codeset, "UTF-8", 'U','T','F','-','8',0,0,0,0)) cd1 = (iconv_t)(-1); @@ -1096,7 +1145,8 @@ if (cd1 == (iconv_t)(-1)) { int saved_errno = errno; - iconv_close (cd); + if (cd != (iconv_t)(-1)) + iconv_close (cd); errno = saved_errno; return NULL; } @@ -1112,7 +1162,8 @@ int saved_errno = errno; if (cd1 != (iconv_t)(-1)) iconv_close (cd1); - iconv_close (cd); + if (cd != (iconv_t)(-1)) + iconv_close (cd); errno = saved_errno; return NULL; } @@ -1128,7 +1179,8 @@ iconv_close (cd2); if (cd1 != (iconv_t)(-1)) iconv_close (cd1); - iconv_close (cd); + if (cd != (iconv_t)(-1)) + iconv_close (cd); errno = saved_errno; } else @@ -1140,7 +1192,8 @@ int saved_errno = errno; if (cd1 != (iconv_t)(-1)) iconv_close (cd1); - iconv_close (cd); + if (cd != (iconv_t)(-1)) + iconv_close (cd); free (result); errno = saved_errno; return NULL; @@ -1150,12 +1203,13 @@ /* Return NULL, but free the allocated memory, and while doing that, preserve the errno from iconv_close. */ int saved_errno = errno; - iconv_close (cd); + if (cd != (iconv_t)(-1)) + iconv_close (cd); free (result); errno = saved_errno; return NULL; } - if (iconv_close (cd) < 0) + if (cd != (iconv_t)(-1) && iconv_close (cd) < 0) { /* Return NULL, but free the allocated memory, and while doing that, preserve the errno from iconv_close. */ diff --git a/lib/striconveh.h b/lib/striconveh.h --- a/lib/striconveh.h +++ b/lib/striconveh.h @@ -42,7 +42,8 @@ /* Convert an entire string from one encoding to another, using iconv. The original string is at [SRC,...,SRC+SRCLEN-1]. - The conversion descriptor from FROMCODE to TOCODE is passed as CD. + CD is the conversion descriptor from FROMCODE to TOCODE, or (iconv_t)(-1) if + the system does not support a direct conversion from FROMCODE to TOCODE. CD1 is the conversion descriptor from FROM_CODESET to UTF-8 (or (iconv_t)(-1) if FROM_CODESET is UTF-8). CD2 is the conversion descriptor from UTF-8 to TO_CODESET (or (iconv_t)(-1) @@ -67,9 +68,10 @@ /* Convert an entire string from one encoding to another, using iconv. The original string is the NUL-terminated string starting at SRC. - The conversion descriptor is passed as CD. Both the "from" and the "to" - encoding must use a single NUL byte at the end of the string (i.e. not - UCS-2, UCS-4, UTF-16, UTF-32). + CD is the conversion descriptor from FROMCODE to TOCODE, or (iconv_t)(-1) if + the system does not support a direct conversion from FROMCODE to TOCODE. + Both the "from" and the "to" encoding must use a single NUL byte at the end + of the string (i.e. not UCS-2, UCS-4, UTF-16, UTF-32). CD1 is the conversion descriptor from FROM_CODESET to UTF-8 (or (iconv_t)(-1) if FROM_CODESET is UTF-8). CD2 is the conversion descriptor from UTF-8 to TO_CODESET (or (iconv_t)(-1)