# HG changeset patch # User Bruno Haible # Date 1229988790 -3600 # Node ID daa3e6323dc20469ed877282f7f879e9418c0d8b # Parent e87e827ebc0f586eb551db5547e24b39b629c80f Work around mbrlen() bugs on various platforms. diff --git a/ChangeLog b/ChangeLog --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,15 @@ +2008-12-22 Bruno Haible + + Work around mbrlen() bugs on AIX, HP-UX, OSF/1, Solaris. + * m4/mbrlen.m4 (gl_FUNC_MBRLEN): Set REPLACE_MBRLEN if mbrtowc is + being overridden. + (gl_MBRLEN_INCOMPLETE_STATE, gl_MBRLEN_RETVAL, gl_MBRLEN_NUL_RETVAL): + New macros. + * lib/wchar.in.h (mbrlen): Override if REPLACE_MBRLEN is set. + * m4/wchar.m4 (gl_WCHAR_H_DEFAULTS): Initialize REPLACE_MBRLEN. + * modules/wchar (Makefile.am): Substitute REPLACE_MBRLEN. + * doc/posix-functions/mbrlen.texi: Mention the various platform bugs. + 2008-12-22 Bruno Haible * m4/mbrtowc.m4 (gl_MBRTOWC_INCOMPLETE_STATE): Remove unused variable diff --git a/doc/posix-functions/mbrlen.texi b/doc/posix-functions/mbrlen.texi --- a/doc/posix-functions/mbrlen.texi +++ b/doc/posix-functions/mbrlen.texi @@ -11,6 +11,18 @@ @item This function is missing on some platforms: HP-UX 11.00, IRIX 6.5, Solaris 2.6, mingw, Interix 3.5. +@item +This function does not put the state into non-initial state when parsing an +incomplete multibyte character on some platforms: +AIX 5.1, OSF/1 5.1. +@item +This function returns the total number of bytes that make up the multibyte +character, not the number of bytes that were needed to complete the multibyte +character, on some platforms: +HP-UX 11.11, Solaris 10. +@item +This function may not return 0 when parsing the NUL character on some platforms: +Solaris 9. @end itemize Portability problems not fixed by Gnulib: diff --git a/lib/wchar.in.h b/lib/wchar.in.h --- a/lib/wchar.in.h +++ b/lib/wchar.in.h @@ -157,7 +157,11 @@ /* Recognize a multibyte character. */ #if @GNULIB_MBRLEN@ -# if !@HAVE_MBRLEN@ +# if @REPLACE_MBRLEN@ +# undef mbrlen +# define mbrlen rpl_mbrlen +# endif +# if !@HAVE_MBRLEN@ || @REPLACE_MBRLEN@ extern size_t mbrlen (const char *s, size_t n, mbstate_t *ps); # endif #elif defined GNULIB_POSIXCHECK diff --git a/m4/mbrlen.m4 b/m4/mbrlen.m4 --- a/m4/mbrlen.m4 +++ b/m4/mbrlen.m4 @@ -1,4 +1,4 @@ -# mbrlen.m4 serial 1 +# mbrlen.m4 serial 2 dnl Copyright (C) 2008 Free Software Foundation, Inc. dnl This file is free software; the Free Software Foundation dnl gives unlimited permission to copy and/or distribute it, @@ -9,15 +9,188 @@ AC_REQUIRE([gl_WCHAR_H_DEFAULTS]) AC_REQUIRE([AC_TYPE_MBSTATE_T]) + AC_REQUIRE([gl_FUNC_MBRTOWC]) AC_CHECK_FUNCS_ONCE([mbrlen]) if test $ac_cv_func_mbrlen = no; then HAVE_MBRLEN=0 + else + dnl Most bugs affecting the system's mbrtowc function also affect the + dnl mbrlen function. So override mbrlen whenever mbrtowc is overridden. + dnl We could also run the individual tests below; the results would be + dnl the same. + if test $REPLACE_MBRTOWC = 1; then + REPLACE_MBRLEN=1 + fi + fi + if test $HAVE_MBRLEN = 0 || test $REPLACE_MBRLEN = 1; then gl_REPLACE_WCHAR_H AC_LIBOBJ([mbrlen]) gl_PREREQ_MBRLEN fi ]) +dnl Test whether mbrlen puts the state into non-initial state when parsing an +dnl incomplete multibyte character. +dnl Result is gl_cv_func_mbrlen_incomplete_state. + +AC_DEFUN([gl_MBRLEN_INCOMPLETE_STATE], +[ + AC_REQUIRE([AC_PROG_CC]) + AC_REQUIRE([gt_LOCALE_JA]) + AC_REQUIRE([AC_CANONICAL_HOST]) dnl for cross-compiles + AC_CACHE_CHECK([whether mbrlen handles incomplete characters], + [gl_cv_func_mbrlen_incomplete_state], + [ + dnl Initial guess, used when cross-compiling or when no suitable locale + dnl is present. +changequote(,)dnl + case "$host_os" in + # Guess no on AIX and OSF/1. + osf*) gl_cv_func_mbrlen_incomplete_state="guessing no" ;; + # Guess yes otherwise. + *) gl_cv_func_mbrlen_incomplete_state="guessing yes" ;; + esac +changequote([,])dnl + if test $LOCALE_JA != none; then + AC_TRY_RUN([ +#include +#include +#include +int main () +{ + if (setlocale (LC_ALL, "$LOCALE_JA") != NULL) + { + const char input[] = "B\217\253\344\217\251\316er"; /* "Büßer" */ + mbstate_t state; + + memset (&state, '\0', sizeof (mbstate_t)); + if (mbrlen (input + 1, 1, &state) == (size_t)(-2)) + if (mbsinit (&state)) + return 1; + } + return 0; +}], + [gl_cv_func_mbrlen_incomplete_state=yes], + [gl_cv_func_mbrlen_incomplete_state=no], + []) + fi + ]) +]) + +dnl Test whether mbrlen, when parsing the end of a multibyte character, +dnl correctly returns the number of bytes that were needed to complete the +dnl character (not the total number of bytes of the multibyte character). +dnl Result is gl_cv_func_mbrlen_retval. + +AC_DEFUN([gl_MBRLEN_RETVAL], +[ + AC_REQUIRE([AC_PROG_CC]) + AC_REQUIRE([gt_LOCALE_FR_UTF8]) + AC_REQUIRE([gt_LOCALE_JA]) + AC_REQUIRE([AC_CANONICAL_HOST]) dnl for cross-compiles + AC_CACHE_CHECK([whether mbrlen has a correct return value], + [gl_cv_func_mbrlen_retval], + [ + dnl Initial guess, used when cross-compiling or when no suitable locale + dnl is present. +changequote(,)dnl + case "$host_os" in + # Guess no on HP-UX and Solaris. + hpux* | solaris*) gl_cv_func_mbrlen_retval="guessing no" ;; + # Guess yes otherwise. + *) gl_cv_func_mbrlen_retval="guessing yes" ;; + esac +changequote([,])dnl + if test $LOCALE_FR_UTF8 != none || test $LOCALE_JA != none; then + AC_TRY_RUN([ +#include +#include +#include +int main () +{ + /* This fails on Solaris. */ + if (setlocale (LC_ALL, "$LOCALE_FR_UTF8") != NULL) + { + char input[] = "B\303\274\303\237er"; /* "Büßer" */ + mbstate_t state; + + memset (&state, '\0', sizeof (mbstate_t)); + if (mbrlen (input + 1, 1, &state) == (size_t)(-2)) + { + input[1] = '\0'; + if (mbrlen (input + 2, 5, &state) != 1) + return 1; + } + } + /* This fails on HP-UX 11.11. */ + if (setlocale (LC_ALL, "$LOCALE_JA") != NULL) + { + char input[] = "B\217\253\344\217\251\316er"; /* "Büßer" */ + mbstate_t state; + + memset (&state, '\0', sizeof (mbstate_t)); + if (mbrlen (input + 1, 1, &state) == (size_t)(-2)) + { + input[1] = '\0'; + if (mbrlen (input + 2, 5, &state) != 2) + return 1; + } + } + return 0; +}], + [gl_cv_func_mbrlen_retval=yes], + [gl_cv_func_mbrlen_retval=no], + []) + fi + ]) +]) + +dnl Test whether mbrlen, when parsing a NUL character, correctly returns 0. +dnl Result is gl_cv_func_mbrlen_nul_retval. + +AC_DEFUN([gl_MBRLEN_NUL_RETVAL], +[ + AC_REQUIRE([AC_PROG_CC]) + AC_REQUIRE([gt_LOCALE_ZH_CN]) + AC_REQUIRE([AC_CANONICAL_HOST]) dnl for cross-compiles + AC_CACHE_CHECK([whether mbrlen returns 0 when parsing a NUL character], + [gl_cv_func_mbrlen_nul_retval], + [ + dnl Initial guess, used when cross-compiling or when no suitable locale + dnl is present. +changequote(,)dnl + case "$host_os" in + # Guess no on Solaris 9. + solaris2.9) gl_cv_func_mbrlen_nul_retval="guessing no" ;; + # Guess yes otherwise. + *) gl_cv_func_mbrlen_nul_retval="guessing yes" ;; + esac +changequote([,])dnl + if test $LOCALE_ZH_CN != none; then + AC_TRY_RUN([ +#include +#include +#include +int main () +{ + /* This crashes on Solaris 9 inside __mbrtowc_dense_gb18030. */ + if (setlocale (LC_ALL, "$LOCALE_ZH_CN") != NULL) + { + mbstate_t state; + + memset (&state, '\0', sizeof (mbstate_t)); + if (mbrlen ("", 1, &state) != 0) + return 1; + } + return 0; +}], + [gl_cv_func_mbrlen_nul_retval=yes], + [gl_cv_func_mbrlen_nul_retval=no], + []) + fi + ]) +]) + # Prerequisites of lib/mbrlen.c. AC_DEFUN([gl_PREREQ_MBRLEN], [ : diff --git a/m4/wchar.m4 b/m4/wchar.m4 --- a/m4/wchar.m4 +++ b/m4/wchar.m4 @@ -7,7 +7,7 @@ dnl Written by Eric Blake. -# wchar.m4 serial 21 +# wchar.m4 serial 22 AC_DEFUN([gl_WCHAR_H], [ @@ -89,6 +89,7 @@ REPLACE_WCTOB=0; AC_SUBST([REPLACE_WCTOB]) REPLACE_MBSINIT=0; AC_SUBST([REPLACE_MBSINIT]) REPLACE_MBRTOWC=0; AC_SUBST([REPLACE_MBRTOWC]) + REPLACE_MBRLEN=0; AC_SUBST([REPLACE_MBRLEN]) REPLACE_MBSRTOWCS=0; AC_SUBST([REPLACE_MBSRTOWCS]) REPLACE_MBSNRTOWCS=0;AC_SUBST([REPLACE_MBSNRTOWCS]) REPLACE_WCRTOMB=0; AC_SUBST([REPLACE_WCRTOMB]) diff --git a/modules/wchar b/modules/wchar --- a/modules/wchar +++ b/modules/wchar @@ -53,6 +53,7 @@ -e 's|@''REPLACE_WCTOB''@|$(REPLACE_WCTOB)|g' \ -e 's|@''REPLACE_MBSINIT''@|$(REPLACE_MBSINIT)|g' \ -e 's|@''REPLACE_MBRTOWC''@|$(REPLACE_MBRTOWC)|g' \ + -e 's|@''REPLACE_MBRLEN''@|$(REPLACE_MBRLEN)|g' \ -e 's|@''REPLACE_MBSRTOWCS''@|$(REPLACE_MBSRTOWCS)|g' \ -e 's|@''REPLACE_MBSNRTOWCS''@|$(REPLACE_MBSNRTOWCS)|g' \ -e 's|@''REPLACE_WCRTOMB''@|$(REPLACE_WCRTOMB)|g' \