changeset 9075:4d262fc1845c

Work around MacOS X wcwidth(0x0301) bug.
author Bruno Haible <bruno@clisp.org>
date Sat, 07 Jul 2007 21:38:16 +0000
parents 3948e119dd4f
children 93f79b5d3cc2
files ChangeLog doc/functions/wcwidth.texi lib/wcwidth.c m4/wcwidth.m4 modules/wcwidth
diffstat 5 files changed, 80 insertions(+), 6 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+2007-07-07  Bruno Haible  <bruno@clisp.org>
+
+	Work around MacOS X wcwidth() bug.
+	* m4/wcwidth.m4 (gl_FUNC_WCWIDTH): Test against MacOS X 10.3 bug.
+	* lib/wcwidth.c: Include localcharset.h, streq.h, uniwidth.h.
+	(rpl_wcwidth): Special-case the UTF-8 locales. Fall back to the
+	original wcwidth in non-UTF-8 locales.
+	* modules/wcwidth (Depends-on): Add localcharset, streq,
+	uniwidth/width.
+	* doc/functions/wcwidth.texi: Update.
+
 2007-07-07  Bruno Haible  <bruno@clisp.org>
 
 	* lib/wchar_.h: Include the GL_LINK_WARNING macro.
--- a/doc/functions/wcwidth.texi
+++ b/doc/functions/wcwidth.texi
@@ -11,15 +11,15 @@
 @item
 This function is missing on some platforms:
 Solaris 2.5.1, mingw, BeOS.
+@item
+This function handles combining characters in UTF-8 locales incorrectly on some
+platforms:
+MacOS X 10.3.
 @end itemize
 
 Portability problems not fixed by Gnulib:
 @itemize
 @item
-This function handles combining characters in UTF-8 locales incorrectly on some
-platforms:
-MacOS X 10.3.
-@item
 On Windows platforms, @code{wchar_t} is a 16-bit type and therefore cannot
 accommodate all Unicode characters.
 @end itemize
--- a/lib/wcwidth.c
+++ b/lib/wcwidth.c
@@ -23,8 +23,30 @@
 /* Get iswprint.  */
 #include <wctype.h>
 
+#include "localcharset.h"
+#include "streq.h"
+#include "uniwidth.h"
+
+#undef wcwidth
+
 int
 rpl_wcwidth (wchar_t wc)
 {
-  return wc == 0 ? 0 : iswprint (wc) ? 1 : -1;
+  /* In UTF-8 locales, use a Unicode aware width function.  */
+  const char *encoding = locale_charset ();
+  if (STREQ (encoding, "UTF-8", 'U', 'T', 'F', '-', '8', 0, 0, 0 ,0))
+    {
+      /* We assume that in a UTF-8 locale, a wide character is the same as a
+	 Unicode character.  */
+      return uc_width (wc, encoding);
+    }
+  else
+    {
+      /* Otherwise, fall back to the system's wcwidth function.  */
+#if HAVE_WCWIDTH
+      return wcwidth (wc);
+#else
+      return wc == 0 ? 0 : iswprint (wc) ? 1 : -1;
+#endif
+    }
 }
--- a/m4/wcwidth.m4
+++ b/m4/wcwidth.m4
@@ -1,4 +1,4 @@
-# wcwidth.m4 serial 10
+# wcwidth.m4 serial 11
 dnl Copyright (C) 2006, 2007 Free Software Foundation, Inc.
 dnl This file is free software; the Free Software Foundation
 dnl gives unlimited permission to copy and/or distribute it,
@@ -35,6 +35,44 @@
 
   if test $ac_cv_func_wcwidth = no; then
     REPLACE_WCWIDTH=1
+  else
+    dnl On MacOS X 10.3, wcwidth(0x0301) (COMBINING ACUTE ACCENT) returns 1.
+    dnl This leads to bugs in 'ls' (coreutils).
+    AC_CACHE_CHECK([whether wcwidth works reasonably in UTF-8 locales],
+      [gl_cv_func_wcwidth_works],
+      [
+        AC_TRY_RUN([
+#include <locale.h>
+/* AIX 3.2.5 declares wcwidth in <string.h>. */
+#include <string.h>
+/* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before
+   <wchar.h>.
+   BSD/OS 4.0.1 has a bug: <stddef.h>, <stdio.h> and <time.h> must be included
+   before <wchar.h>.  */
+#include <stddef.h>
+#include <stdio.h>
+#include <time.h>
+#include <wchar.h>
+#if !HAVE_DECL_WCWIDTH
+extern
+# ifdef __cplusplus
+"C"
+# endif
+int wcwidth (int);
+#endif
+int main ()
+{
+  if (setlocale (LC_ALL, "fr_FR.UTF-8") != NULL)
+    if (wcwidth (0x0301) > 0)
+      return 1;
+  return 0;
+}], [gl_cv_func_wcwidth_works=yes], [gl_cv_func_wcwidth_works=no],
+          [gl_cv_func_wcwidth_works="guessing no"])
+      ])
+    case "$gl_cv_func_wcwidth_works" in
+      *yes) ;;
+      *no) REPLACE_WCWIDTH=1 ;;
+    esac
   fi
   if test $REPLACE_WCWIDTH = 1; then
     AC_LIBOBJ([wcwidth])
--- a/modules/wcwidth
+++ b/modules/wcwidth
@@ -10,6 +10,9 @@
 Depends-on:
 wchar
 wctype
+localcharset
+streq
+uniwidth/width
 
 configure.ac:
 gl_FUNC_WCWIDTH