changeset 11081:bc5393a47361

Add support for non-UTF-8 locales on MacOS X.
author Bruno Haible <bruno@clisp.org>
date Sun, 25 Jan 2009 00:51:34 +0100
parents d202ce7525f8
children 96a37da9a3e2
files ChangeLog lib/config.charset
diffstat 2 files changed, 49 insertions(+), 6 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,12 @@
+2009-01-24  Bruno Haible  <bruno@clisp.org>
+
+	Add support for non-UTF-8 locales on MacOS X.
+	* lib/config.charset: Add CP1131, ARMSCII-8, PT154 to the list of
+	canonical encodings. For Darwin 7 and newer, don't map traditional
+	encodings to UTF-8.
+	Reported by Vincent Lefevre <vincent@vinc17.org>
+	at <http://savannah.gnu.org/bugs/?25235>.
+
 2009-01-24  Bruno Haible  <bruno@clisp.org>
 
 	* doc/gnulib.texi (Obsolete modules): New section.
--- a/lib/config.charset
+++ b/lib/config.charset
@@ -1,7 +1,7 @@
 #! /bin/sh
 # Output a system dependent table of character encoding aliases.
 #
-#   Copyright (C) 2000-2004, 2006-2008 Free Software Foundation, Inc.
+#   Copyright (C) 2000-2004, 2006-2009 Free Software Foundation, Inc.
 #
 #   This program is free software; you can redistribute it and/or modify
 #   it under the terms of the GNU General Public License as published by
@@ -63,12 +63,13 @@
 #   CP922                       aix
 #   CP932                       aix woe32 dos
 #   CP943                       aix
-#   CP949                       osf woe32 dos
+#   CP949                       osf darwin woe32 dos
 #   CP950                       woe32 dos
 #   CP1046                      aix
 #   CP1124                      aix
 #   CP1125                      dos
 #   CP1129                      aix
+#   CP1131                      darwin
 #   CP1250                      woe32
 #   CP1251                      glibc solaris netbsd openbsd darwin woe32
 #   CP1252                      aix woe32
@@ -82,15 +83,17 @@
 #   EUC-KR                  Y   glibc aix hpux irix osf solaris freebsd netbsd darwin
 #   EUC-TW                      glibc aix hpux irix osf solaris netbsd
 #   BIG5                    Y   glibc aix hpux osf solaris freebsd netbsd darwin
-#   BIG5-HKSCS                  glibc solaris
-#   GBK                         glibc aix osf solaris woe32 dos
-#   GB18030                     glibc solaris netbsd
+#   BIG5-HKSCS                  glibc solaris darwin
+#   GBK                         glibc aix osf solaris darwin woe32 dos
+#   GB18030                     glibc solaris netbsd darwin
 #   SHIFT_JIS               Y   hpux osf solaris freebsd netbsd darwin
 #   JOHAB                       glibc solaris woe32
 #   TIS-620                     glibc aix hpux osf solaris
 #   VISCII                  Y   glibc
 #   TCVN5712-1                  glibc
+#   ARMSCII-8                   glibc darwin
 #   GEORGIAN-PS                 glibc
+#   PT154                       glibc
 #   HP-ROMAN8                   hpux
 #   HP-ARABIC8                  hpux
 #   HP-GREEK8                   hpux
@@ -449,7 +452,8 @@
 	echo "ko_KR.EUC EUC-KR"
 	;;
     darwin*)
-	# Darwin 7.5 has nl_langinfo(CODESET), but it is useless:
+	# Darwin 7.5 has nl_langinfo(CODESET), but sometimes its value is
+	# useless:
 	# - It returns the empty string when LANG is set to a locale of the
 	#   form ll_CC, although ll_CC/LC_CTYPE is a symlink to an UTF-8
 	#   LC_CTYPE file.
@@ -476,6 +480,36 @@
 	# minimize the use of decomposed Unicode. Unfortunately, through the
 	# Darwin file system, decomposed UTF-8 strings are leaked into user
 	# space nevertheless.
+	# Then there are also the locales with encodings other than US-ASCII
+	# and UTF-8. These locales can be occasionally useful to users (e.g.
+	# when grepping through ISO-8859-1 encoded text files), when all their
+	# file names are in US-ASCII.
+	echo "ISO8859-1 ISO-8859-1"
+	echo "ISO8859-2 ISO-8859-2"
+	echo "ISO8859-4 ISO-8859-4"
+	echo "ISO8859-5 ISO-8859-5"
+	echo "ISO8859-7 ISO-8859-7"
+	echo "ISO8859-9 ISO-8859-9"
+	echo "ISO8859-13 ISO-8859-13"
+	echo "ISO8859-15 ISO-8859-15"
+	echo "KOI8-R KOI8-R"
+	echo "KOI8-U KOI8-U"
+	echo "CP866 CP866"
+	echo "CP949 CP949"
+	echo "CP1131 CP1131"
+	echo "CP1251 CP1251"
+	echo "eucCN GB2312"
+	echo "GB2312 GB2312"
+	echo "eucJP EUC-JP"
+	echo "eucKR EUC-KR"
+	echo "Big5 BIG5"
+	echo "Big5HKSCS BIG5-HKSCS"
+	echo "GBK GBK"
+	echo "GB18030 GB18030"
+	echo "SJIS SHIFT_JIS"
+	echo "ARMSCII-8 ARMSCII-8"
+	echo "PT154 PT154"
+	#echo "ISCII-DEV ?"
 	echo "* UTF-8"
 	;;
     beos* | haiku*)