Mercurial > hg > octave-lojdl > gnulib-hg
changeset 8091:ebf2b19b5684
New module 'mbscasecmp'. strcasecmp is not expected to work with multibyte
strings.
author | Bruno Haible <bruno@clisp.org> |
---|---|
date | Mon, 05 Feb 2007 01:57:07 +0000 |
parents | a60a47069e17 |
children | bdbd1a6a7a67 |
files | ChangeLog MODULES.html.sh lib/mbscasecmp.c lib/strcasecmp.c lib/string_.h m4/string_h.m4 modules/string |
diffstat | 7 files changed, 65 insertions(+), 78 deletions(-) [+] |
line wrap: on
line diff
--- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,20 @@ +2007-02-04 Bruno Haible <bruno@clisp.org> + + New module mbscasecmp, reduced goal of strcasecmp. + * modules/mbscasecmp: New file. + * lib/mbscasecmp.c: New file, copied from lib/strcasecmp.c. + (mbscasecmp): Renamed from strcasecmp. + * lib/strcasecmp.c: Don't include mbuiter.h. + (strcasecmp): Remove support for multibyte locales. + * lib/string_.h (strcasecmp): Don`t rename. Declare only if missing. + Change the conditional link warning. + (mbscasecmp): New declaration. + * m4/mbscasecmp.m4: New file. + * m4/string_h.m4 (gl_STRING_MODULE_INDICATOR_DEFAULTS): Initialize + GNULIB_MBSCASECMP. + * modules/string (string.h): Also substitute GNULIB_MBSCASECMP. + * MODULES.html.sh (Internationalization functions): Add mbscasecmp. + 2007-02-04 Bruno Haible <bruno@clisp.org> New module mbsstr. Remove module strstr.
--- a/MODULES.html.sh +++ b/MODULES.html.sh @@ -2163,6 +2163,7 @@ func_module mbschr func_module mbsrchr func_module mbsstr + func_module mbscasecmp func_module mbswidth func_module memcasecmp func_module memcoll
--- a/lib/mbscasecmp.c +++ b/lib/mbscasecmp.c @@ -31,13 +31,13 @@ #define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch)) -/* Compare strings S1 and S2, ignoring case, returning less than, equal to or - greater than zero if S1 is lexicographically less than, equal to or greater - than S2. +/* Compare the character strings S1 and S2, ignoring case, returning less than, + equal to or greater than zero if S1 is lexicographically less than, equal to + or greater than S2. Note: This function may, in multibyte locales, return 0 for strings of different lengths! */ int -strcasecmp (const char *s1, const char *s2) +mbscasecmp (const char *s1, const char *s2) { if (s1 == s2) return 0;
--- a/lib/strcasecmp.c +++ b/lib/strcasecmp.c @@ -1,7 +1,5 @@ /* Case-insensitive string comparison function. Copyright (C) 1998-1999, 2005-2007 Free Software Foundation, Inc. - Written by Bruno Haible <bruno@clisp.org>, 2005, - based on earlier glibc code. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -25,79 +23,41 @@ #include <ctype.h> #include <limits.h> -#if HAVE_MBRTOWC -# include "mbuiter.h" -#endif - #define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch)) /* Compare strings S1 and S2, ignoring case, returning less than, equal to or greater than zero if S1 is lexicographically less than, equal to or greater than S2. - Note: This function may, in multibyte locales, return 0 for strings of - different lengths! */ + Note: This function does not work with multibyte strings! */ + int strcasecmp (const char *s1, const char *s2) { - if (s1 == s2) + const unsigned char *p1 = (const unsigned char *) s1; + const unsigned char *p2 = (const unsigned char *) s2; + unsigned char c1, c2; + + if (p1 == p2) return 0; - /* Be careful not to look at the entire extent of s1 or s2 until needed. - This is useful because when two strings differ, the difference is - most often already in the very few first characters. */ -#if HAVE_MBRTOWC - if (MB_CUR_MAX > 1) + do { - mbui_iterator_t iter1; - mbui_iterator_t iter2; - - mbui_init (iter1, s1); - mbui_init (iter2, s2); + c1 = TOLOWER (*p1); + c2 = TOLOWER (*p2); - while (mbui_avail (iter1) && mbui_avail (iter2)) - { - int cmp = mb_casecmp (mbui_cur (iter1), mbui_cur (iter2)); - - if (cmp != 0) - return cmp; + if (c1 == '\0') + break; - mbui_advance (iter1); - mbui_advance (iter2); - } - if (mbui_avail (iter1)) - /* s2 terminated before s1. */ - return 1; - if (mbui_avail (iter2)) - /* s1 terminated before s2. */ - return -1; - return 0; + ++p1; + ++p2; } - else -#endif - { - const unsigned char *p1 = (const unsigned char *) s1; - const unsigned char *p2 = (const unsigned char *) s2; - unsigned char c1, c2; - - do - { - c1 = TOLOWER (*p1); - c2 = TOLOWER (*p2); - - if (c1 == '\0') - break; + while (c1 == c2); - ++p1; - ++p2; - } - while (c1 == c2); - - if (UCHAR_MAX <= INT_MAX) - return c1 - c2; - else - /* On machines where 'char' and 'int' are types of the same size, the - difference of two 'unsigned char' values - including the sign bit - - doesn't fit in an 'int'. */ - return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0); - } + if (UCHAR_MAX <= INT_MAX) + return c1 - c2; + else + /* On machines where 'char' and 'int' are types of the same size, the + difference of two 'unsigned char' values - including the sign bit - + doesn't fit in an 'int'. */ + return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0); }
--- a/lib/string_.h +++ b/lib/string_.h @@ -115,20 +115,17 @@ /* Compare strings S1 and S2, ignoring case, returning less than, equal to or greater than zero if S1 is lexicographically less than, equal to or greater than S2. - Note: This function may, in multibyte locales, return 0 for strings of - different lengths! - No known system has a strcasecmp() function that works correctly in - multibyte locales. Therefore use our version always, if the - strcase module is available. */ -#if @GNULIB_STRCASE@ -# if @REPLACE_STRCASECMP@ -# define strcasecmp rpl_strcasecmp -extern int strcasecmp (char const *__s1, char const *__s2); -# endif -#elif defined GNULIB_POSIXCHECK + Note: This function does not work in multibyte locales. */ +#if ! @HAVE_STRCASECMP@ +extern int strcasecmp (char const *s1, char const *s2); +#endif +#if defined GNULIB_POSIXCHECK +/* strcasecmp() does not work with multibyte strings: + POSIX says that it operates on "strings", and "string" in POSIX is defined + as a sequence of bytes, not of characters. */ # undef strcasecmp # define strcasecmp(a,b) \ - (GL_LINK_WARNING ("strcasecmp is often incorrectly implemented for multibyte locales - use gnulib module 'strcase' for correct and portable internationalization"), \ + (GL_LINK_WARNING ("strcasecmp cannot work correctly on character strings in multibyte locales - use mbscasecmp if you care about internationalization, or use c_strcasecmp (from gnulib module c-strcase) if you want a locale independent function"), \ strcasecmp (a, b)) #endif @@ -337,6 +334,16 @@ extern char * mbsstr (const char *haystack, const char *needle); #endif +#if @GNULIB_MBSCASECMP@ +/* Compare the character strings S1 and S2, ignoring case, returning less than, + equal to or greater than zero if S1 is lexicographically less than, equal to + or greater than S2. + Note: This function may, in multibyte locales, return 0 for strings of + different lengths! + Unlike strcasecmp(), this function works correctly in multibyte locales. */ +extern int mbscasecmp (const char *s1, const char *s2); +#endif + #ifdef __cplusplus }
--- a/m4/string_h.m4 +++ b/m4/string_h.m4 @@ -71,4 +71,5 @@ GNULIB_MBSCHR=0; AC_SUBST([GNULIB_MBSCHR]) GNULIB_MBSRCHR=0; AC_SUBST([GNULIB_MBSRCHR]) GNULIB_MBSSTR=0; AC_SUBST([GNULIB_MBSSTR]) + GNULIB_MBSCASECMP=0; AC_SUBST([GNULIB_MBSCASECMP]) ])
--- a/modules/string +++ b/modules/string @@ -24,6 +24,7 @@ -e 's|@''GNULIB_MBSCHR''@|$(GNULIB_MBSCHR)|g' \ -e 's|@''GNULIB_MBSRCHR''@|$(GNULIB_MBSRCHR)|g' \ -e 's|@''GNULIB_MBSSTR''@|$(GNULIB_MBSSTR)|g' \ + -e 's|@''GNULIB_MBSCASECMP''@|$(GNULIB_MBSCASECMP)|g' \ -e 's|@''GNULIB_MEMMEM''@|$(GNULIB_MEMMEM)|g' \ -e 's|@''GNULIB_MEMPCPY''@|$(GNULIB_MEMPCPY)|g' \ -e 's|@''GNULIB_MEMRCHR''@|$(GNULIB_MEMRCHR)|g' \