changeset 8153:0f48fdcc7a64

Fix regex code so it doesn't rely on strcasecmp. * lib/regex_internal.h: Include <langinfo.h> only if _LIBC is defined. Otherwise, include gnulib's langinfo.h. * lib/regcomp.c (init_dfa): Don't use strcasecmp, as it can have undesirable behavior in non-C locales. Instead, rely on locale_charset. * m4/regex.m4 (gl_PREREQ_REGEX): Don't require AM_LANGINFO_CODESET. * modules/regex (FILES): Remove m4/codeset.m4. (Depends-on): Add localcharset. Remove strcase.
author Paul Eggert <eggert@cs.ucla.edu>
date Thu, 15 Feb 2007 00:16:55 +0000
parents 51a661790093
children 669615f8fd83
files ChangeLog lib/regcomp.c lib/regex_internal.h m4/regex.m4 modules/regex
diffstat 5 files changed, 17 insertions(+), 24 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+2007-02-14  Paul Eggert  <eggert@cs.ucla.edu>
+
+	Fix regex code so it doesn't rely on strcasecmp.
+	* lib/regex_internal.h: Include <langinfo.h> only if _LIBC is defined.
+	Otherwise, include gnulib's langinfo.h.
+	* lib/regcomp.c (init_dfa): Don't use strcasecmp, as it can have
+	undesirable behavior in non-C locales.  Instead, rely on locale_charset.
+	* m4/regex.m4 (gl_PREREQ_REGEX): Don't require AM_LANGINFO_CODESET.
+	* modules/regex (FILES): Remove m4/codeset.m4.
+	(Depends-on): Add localcharset.  Remove strcase.
+
 2007-02-13  Ralf Wildenhues  <Ralf.Wildenhues@gmx.de>
 
 	* m4/unlinkdir.m4 (gl_UNLINKDIR): Fix m4 quoting bug.
--- a/lib/regcomp.c
+++ b/lib/regcomp.c
@@ -829,9 +829,6 @@
 init_dfa (re_dfa_t *dfa, size_t pat_len)
 {
   __re_size_t table_size;
-#ifndef _LIBC
-  char *codeset_name;
-#endif
 #ifdef RE_ENABLE_I18N
   size_t max_i18n_object_size = MAX (sizeof (wchar_t), sizeof (wctype_t));
 #else
@@ -875,22 +872,7 @@
   dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII)
 		       != 0);
 #else
-# ifdef HAVE_LANGINFO_CODESET
-  codeset_name = nl_langinfo (CODESET);
-# else
-  codeset_name = getenv ("LC_ALL");
-  if (codeset_name == NULL || codeset_name[0] == '\0')
-    codeset_name = getenv ("LC_CTYPE");
-  if (codeset_name == NULL || codeset_name[0] == '\0')
-    codeset_name = getenv ("LANG");
-  if (codeset_name == NULL)
-    codeset_name = "";
-  else if (strchr (codeset_name, '.') !=  NULL)
-    codeset_name = strchr (codeset_name, '.') + 1;
-# endif
-
-  if (strcasecmp (codeset_name, "UTF-8") == 0
-      || strcasecmp (codeset_name, "UTF8") == 0)
+  if (strcmp (locale_charset (), "UTF-8") == 0)
     dfa->is_utf8 = 1;
 
   /* We check exhaustively in the loop below if this charset is a
--- a/lib/regex_internal.h
+++ b/lib/regex_internal.h
@@ -27,8 +27,10 @@
 #include <stdlib.h>
 #include <string.h>
 
-#if defined HAVE_LANGINFO_H || defined HAVE_LANGINFO_CODESET || defined _LIBC
+#ifdef _LIBC
 # include <langinfo.h>
+#else
+# include "localcharset.h"
 #endif
 #if defined HAVE_LOCALE_H || defined _LIBC
 # include <locale.h>
--- a/m4/regex.m4
+++ b/m4/regex.m4
@@ -1,4 +1,4 @@
-#serial 44
+#serial 45
 
 # Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005,
 # 2006, 2007 Free Software Foundation, Inc.
@@ -203,7 +203,6 @@
 [
   AC_REQUIRE([AC_GNU_SOURCE])
   AC_REQUIRE([AC_C_RESTRICT])
-  AC_REQUIRE([AM_LANGINFO_CODESET])
   AC_CHECK_FUNCS_ONCE([iswctype mbrtowc wcrtomb wcscoll])
   AC_CHECK_DECLS([isblank], [], [], [#include <ctype.h>])
 ])
--- a/modules/regex
+++ b/modules/regex
@@ -8,17 +8,16 @@
 lib/regex_internal.h
 lib/regexec.c
 lib/regcomp.c
-m4/codeset.m4
 m4/regex.m4
 
 Depends-on:
 alloca
 extensions
 gettext-h
+localcharset
 malloc
 stdbool
 stdint
-strcase
 ssize_t
 wchar
 wctype