changeset 16245:530509562116

quotearg: fall back to Unicode single quotes in UTF-8 and GB-18030 locales Most programs do not have translation catalogs for English and much less separate catalogs for British and American English. Drop the suggestion to translators about these two, and provide it automatically for Unicode locales. Like most programs, even those using American English, we use single quotation marks. This conflicts with the American typographic convention, but works better when you cite the entire error message within double quotes. It also tries not to clash with established practice and with what non-gnulib programs will usually do. * lib/quotearg.c (gettext_quote): Hard-code U+2018 and U+2019 when using an UTF-8 or GB-18030 locale. The list of other locales with quotes was provided by Bruno Haible. (quotearg_buffer_restyled): Adjust instructions to translators. * lib/quotearg.h (locale_quoting_style): Do not put an example in the text, since this would be wrong when using Unicode. * modules/quotearg: Depend on c-strcaseeq.
author Paolo Bonzini <bonzini@gnu.org>
date Sun, 18 Dec 2011 15:33:53 +0100
parents d3bb78595bd0
children 6c6649e7d5ea
files lib/quotearg.c lib/quotearg.h modules/quotearg
diffstat 3 files changed, 55 insertions(+), 15 deletions(-) [+]
line wrap: on
line diff
--- a/lib/quotearg.c
+++ b/lib/quotearg.c
@@ -29,6 +29,8 @@
 #include "quotearg.h"
 
 #include "xalloc.h"
+#include "c-strcaseeq.h"
+#include "localcharset.h"
 
 #include <ctype.h>
 #include <errno.h>
@@ -183,13 +185,45 @@
 }
 
 /* MSGID approximates a quotation mark.  Return its translation if it
-   has one; otherwise, return either it or "\"", depending on S.  */
+   has one; otherwise, return either it or "\"", depending on S.
+
+   S is either clocale_quoting_style or locale_quoting_style.  */
 static char const *
 gettext_quote (char const *msgid, enum quoting_style s)
 {
   char const *translation = _(msgid);
-  if (translation == msgid && s == clocale_quoting_style)
-    translation = "\"";
+  char const *locale_code;
+
+  if (translation != msgid)
+    return translation;
+
+  /* For UTF-8 and GB-18030, use single quotes U+2018 and U+2019.
+     Here is a list of other locales that include U+2018 and U+2019:
+
+        ISO-8859-7   0xA1                 KOI8-T       0x91
+        CP869        0x8B                 CP874        0x91
+        CP932        0x81 0x65            CP936        0xA1 0xAE
+        CP949        0xA1 0xAE            CP950        0xA1 0xA5
+        CP1250       0x91                 CP1251       0x91
+        CP1252       0x91                 CP1253       0x91
+        CP1254       0x91                 CP1255       0x91
+        CP1256       0x91                 CP1257       0x91
+        EUC-JP       0xA1 0xC6            EUC-KR       0xA1 0xAE
+        EUC-TW       0xA1 0xE4            BIG5         0xA1 0xA5
+        BIG5-HKSCS   0xA1 0xA5            EUC-CN       0xA1 0xAE
+        GBK          0xA1 0xAE            Georgian-PS  0x91
+        PT154        0x91
+
+     None of these is still in wide use; using iconv is overkill.  */
+  locale_code = locale_charset ();
+  if (STRCASEEQ (locale_code, "UTF-8", 'U','T','F','-','8',0,0,0,0))
+    return msgid[0] == '`' ? "\xe2\x80\x98": "\xe2\x80\x99";
+  if (STRCASEEQ (locale_code, "GB18030", 'G','B','1','8','0','3','0',0,0))
+    return msgid[0] == '`' ? "\xa1\ae": "\xa1\xaf";
+
+  if (s == clocale_quoting_style)
+    return "\"";
+
   return translation;
 }
 
@@ -258,19 +292,21 @@
           {
             /* TRANSLATORS:
                Get translations for open and closing quotation marks.
-
                The message catalog should translate "`" to a left
                quotation mark suitable for the locale, and similarly for
-               "'".  If the catalog has no translation,
-               locale_quoting_style quotes `like this', and
-               clocale_quoting_style quotes "like this".
+               "'".  For example, a French Unicode local should translate
+               these to U+00AB (LEFT-POINTING DOUBLE ANGLE
+               QUOTATION MARK), and U+00BB (RIGHT-POINTING DOUBLE ANGLE
+               QUOTATION MARK), respectively.
 
-               For example, an American English Unicode locale should
-               translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
-               should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
-               MARK).  A British English Unicode locale should instead
-               translate these to U+2018 (LEFT SINGLE QUOTATION MARK)
-               and U+2019 (RIGHT SINGLE QUOTATION MARK), respectively.
+               If the catalog has no translation, we will try to
+               use Unicode U+2018 (LEFT SINGLE QUOTATION MARK) and
+               Unicode U+2019 (RIGHT SINGLE QUOTATION MARK).  If the
+               current locale is not Unicode, locale_quoting_style
+               will quote `like this', and clocale_quoting_style will
+               quote "like this".  You should always include translations
+               for "`" and "'" even if U+2018 and U+2019 are appropriate
+               for your locale.
 
                If you don't know what to put here, please see
                <http://en.wikipedia.org/wiki/Quotation_marks_in_other_languages>
--- a/lib/quotearg.h
+++ b/lib/quotearg.h
@@ -112,8 +112,10 @@
     */
     escape_quoting_style,
 
-    /* Like clocale_quoting_style, but quote `like this' instead of
-       "like this" in the default C locale (ls --quoting-style=locale).
+    /* Like clocale_quoting_style, but use single quotes in the
+       default C locale or if the program does not use gettext
+       (ls --quoting-style=locale).  For UTF-8 locales, quote
+       characters will use Unicode.
 
        LC_MESSAGES=C
        quotearg_buffer:
--- a/modules/quotearg
+++ b/modules/quotearg
@@ -9,12 +9,14 @@
 m4/quotearg.m4
 
 Depends-on:
+c-strcaseeq
 extensions
 gettext-h
 mbrtowc
 mbsinit
 memcmp
 quotearg-simple
+localcharset
 stdbool
 wchar
 wctype-h