changeset 7931:0a4679777f66

Add an optional argument specifying transliteration.
author Bruno Haible <bruno@clisp.org>
date Wed, 24 Jan 2007 00:56:40 +0000
parents 8929067c3772
children 1749aa1eb511
files ChangeLog lib/striconveha.c lib/striconveha.h modules/striconveha
diffstat 4 files changed, 134 insertions(+), 23 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,15 @@
+2007-01-23  Bruno Haible  <bruno@clisp.org>
+
+	* lib/striconveha.h: Include <stdbool.h>.
+	(mem_iconveha, str_iconveha): Add 'transliterate' argument.
+	* lib/striconveha.c: Include allocsa.h, strdup.h, c-strcase.h.
+	(mem_iconveha_notranslit): Renamed from mem_iconveha.
+	(mem_iconveha): New function.
+	(str_iconveha_notranslit): Renamed from str_iconveha.
+	(str_iconveha): New function.
+	* modules/striconveha (Depends-on): Add stdbool, allocsa, strdup,
+	c-strcase.
+
 2007-01-23  Bruno Haible  <bruno@clisp.org>
 
 	* lib/striconveha.c (mem_iconveha): Fix endless recursion. Try all
--- a/lib/striconveha.c
+++ b/lib/striconveha.c
@@ -25,6 +25,10 @@
 #include <stdlib.h>
 #include <string.h>
 
+#include "allocsa.h"
+#include "strdup.h"
+#include "c-strcase.h"
+
 #define SIZEOF(a) (sizeof(a)/sizeof(a[0]))
 
 
@@ -143,12 +147,13 @@
     }
 }
 
-int
-mem_iconveha (const char *src, size_t srclen,
-	      const char *from_codeset, const char *to_codeset,
-	      enum iconv_ilseq_handler handler,
-	      size_t *offsets,
-	      char **resultp, size_t *lengthp)
+/* Like mem_iconveha, except no handling of transliteration.  */
+static int
+mem_iconveha_notranslit (const char *src, size_t srclen,
+			 const char *from_codeset, const char *to_codeset,
+			 enum iconv_ilseq_handler handler,
+			 size_t *offsets,
+			 char **resultp, size_t *lengthp)
 {
   int retval = mem_iconveh (src, srclen, from_codeset, to_codeset, handler,
 			    offsets, resultp, lengthp);
@@ -171,10 +176,10 @@
 		encodings = alias->encodings_to_try;
 		do
 		  {
-		    retval = mem_iconveha (src, srclen,
-					   *encodings, to_codeset,
-					   iconveh_error, offsets,
-					   resultp, lengthp);
+		    retval = mem_iconveha_notranslit (src, srclen,
+						      *encodings, to_codeset,
+						      iconveh_error, offsets,
+						      resultp, lengthp);
 		    if (!(retval < 0 && errno == EILSEQ))
 		      return retval;
 		    encodings++;
@@ -185,10 +190,10 @@
 	    encodings = alias->encodings_to_try;
 	    do
 	      {
-		retval = mem_iconveha (src, srclen,
-				       *encodings, to_codeset,
-				       handler, offsets,
-				       resultp, lengthp);
+		retval = mem_iconveha_notranslit (src, srclen,
+						  *encodings, to_codeset,
+						  handler, offsets,
+						  resultp, lengthp);
 		if (!(retval < 0 && errno == EILSEQ))
 		  return retval;
 		encodings++;
@@ -205,10 +210,52 @@
     }
 }
 
-char *
-str_iconveha (const char *src,
+int
+mem_iconveha (const char *src, size_t srclen,
 	      const char *from_codeset, const char *to_codeset,
-	      enum iconv_ilseq_handler handler)
+	      bool transliterate,
+	      enum iconv_ilseq_handler handler,
+	      size_t *offsets,
+	      char **resultp, size_t *lengthp)
+{
+  if (srclen == 0)
+    {
+      /* Nothing to convert.  */
+      *lengthp = 0;
+      return 0;
+    }
+
+  /* When using GNU libc >= 2.2 or GNU libiconv >= 1.5,
+     we want to use transliteration.  */
+#if (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2 || _LIBICONV_VERSION >= 0x0105
+  if (transliterate)
+    {
+      int retval;
+      size_t len = strlen (to_codeset);
+      char *to_codeset_suffixed = (char *) allocsa (len + 10 + 1);
+      memcpy (to_codeset_suffixed, to_codeset, len);
+      memcpy (to_codeset_suffixed + len, "//TRANSLIT", 10 + 1);
+
+      retval = mem_iconveha_notranslit (src, srclen,
+					from_codeset, to_codeset_suffixed,
+					handler, offsets, resultp, lengthp);
+
+      freesa (to_codeset_suffixed);
+
+      return retval;
+    }
+  else
+#endif
+    return mem_iconveha_notranslit (src, srclen,
+				    from_codeset, to_codeset,
+				    handler, offsets, resultp, lengthp);
+}
+
+/* Like str_iconveha, except no handling of transliteration.  */
+static char *
+str_iconveha_notranslit (const char *src,
+			 const char *from_codeset, const char *to_codeset,
+			 enum iconv_ilseq_handler handler)
 {
   char *result = str_iconveh (src, from_codeset, to_codeset, handler);
 
@@ -231,9 +278,9 @@
 		encodings = alias->encodings_to_try;
 		do
 		  {
-		    result = str_iconveha (src,
-					   *encodings, to_codeset,
-					   iconveh_error);
+		    result = str_iconveha_notranslit (src,
+						      *encodings, to_codeset,
+						      iconveh_error);
 		    if (!(result == NULL && errno == EILSEQ))
 		      return result;
 		    encodings++;
@@ -244,9 +291,9 @@
 	    encodings = alias->encodings_to_try;
 	    do
 	      {
-		result = str_iconveha (src,
-				       *encodings, to_codeset,
-				       handler);
+		result = str_iconveha_notranslit (src,
+						  *encodings, to_codeset,
+						  handler);
 		if (!(result == NULL && errno == EILSEQ))
 		  return result;
 		encodings++;
@@ -262,3 +309,41 @@
       return NULL;
     }
 }
+
+char *
+str_iconveha (const char *src,
+	      const char *from_codeset, const char *to_codeset,
+	      bool transliterate,
+	      enum iconv_ilseq_handler handler)
+{
+  if (*src == '\0' || c_strcasecmp (from_codeset, to_codeset) == 0)
+    {
+      char *result = strdup (src);
+
+      if (result == NULL)
+	errno = ENOMEM;
+      return result;
+    }
+
+  /* When using GNU libc >= 2.2 or GNU libiconv >= 1.5,
+     we want to use transliteration.  */
+#if (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2 || _LIBICONV_VERSION >= 0x0105
+  if (transliterate)
+    {
+      char *result;
+      size_t len = strlen (to_codeset);
+      char *to_codeset_suffixed = (char *) allocsa (len + 10 + 1);
+      memcpy (to_codeset_suffixed, to_codeset, len);
+      memcpy (to_codeset_suffixed + len, "//TRANSLIT", 10 + 1);
+
+      result = str_iconveha_notranslit (src, from_codeset, to_codeset_suffixed,
+					handler);
+
+      freesa (to_codeset_suffixed);
+
+      return result;
+    }
+  else
+#endif
+    return str_iconveha_notranslit (src, from_codeset, to_codeset, handler);
+}
--- a/lib/striconveha.h
+++ b/lib/striconveha.h
@@ -19,6 +19,8 @@
 #ifndef _STRICONVEHA_H
 #define _STRICONVEHA_H
 
+#include <stdbool.h>
+
 #include "striconveh.h"
 
 
@@ -30,6 +32,9 @@
 /* Convert an entire string from one encoding to another, using iconv.
    The original string is at [SRC,...,SRC+SRCLEN-1].
    The "from" encoding can also be a name defined for autodetection.
+   If TRANSLITERATE is true, transliteration will attempted to avoid conversion
+   errors, for iconv implementations that support this.  Usually you'll choose
+   TRANSLITERATE = true if HANDLER != iconveh_error.
    If OFFSETS is not NULL, it should point to an array of SRCLEN integers; this
    array is filled with offsets into the result, i.e. the character starting
    at SRC[i] corresponds to the character starting at (*RESULTP)[OFFSETS[i]],
@@ -44,6 +49,7 @@
 extern int
        mem_iconveha (const char *src, size_t srclen,
 		     const char *from_codeset, const char *to_codeset,
+		     bool transliterate,
 		     enum iconv_ilseq_handler handler,
 		     size_t *offsets,
 		     char **resultp, size_t *lengthp);
@@ -53,12 +59,16 @@
    Both the "from" and the "to" encoding must use a single NUL byte at the
    end of the string (i.e. not UCS-2, UCS-4, UTF-16, UTF-32).
    The "from" encoding can also be a name defined for autodetection.
+   If TRANSLITERATE is true, transliteration will attempted to avoid conversion
+   errors, for iconv implementations that support this.  Usually you'll choose
+   TRANSLITERATE = true if HANDLER != iconveh_error.
    Allocate a malloced memory block for the result.
    Return value: the freshly allocated resulting NUL-terminated string if
    successful, otherwise NULL and errno set.  */
 extern char *
        str_iconveha (const char *src,
 		     const char *from_codeset, const char *to_codeset,
+		     bool transliterate,
 		     enum iconv_ilseq_handler handler);
 
 
--- a/modules/striconveha
+++ b/modules/striconveha
@@ -7,7 +7,11 @@
 lib/striconveha.c
 
 Depends-on:
+stdbool
 striconveh
+allocsa
+strdup
+c-strcase
 
 configure.ac: