changeset 11952:0ae7f1f4ed08

Fix bug occurring when converting invalid input.
author Bruno Haible <bruno@clisp.org>
date Sat, 05 Sep 2009 16:49:46 +0200
parents 5bb7bec3987e
children 5c391da16504
files ChangeLog lib/striconveh.c tests/test-striconveh.c
diffstat 3 files changed, 177 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,12 @@
+2009-09-05  Bruno Haible  <bruno@clisp.org>
+
+	Fix conversion behaviour when the input is invalid.
+	* lib/striconveh.c (mem_cd_iconveh_internal): Fix storing of question
+	mark occurring in first pass of indirect conversion.
+	* tests/test-striconveh.c (main): Test conversion of invalid ASCII
+	input.
+	Found by clang's static analyzer.
+
 2009-09-05  Bruno Haible  <bruno@clisp.org>
 
 	* tests/test-striconveh.c (main): Test indirect conversion on platforms
--- a/lib/striconveh.c
+++ b/lib/striconveh.c
@@ -696,7 +696,8 @@
 		in1ptr++;
 		in1size--;
 	      }
-	    utf8buf[utf8len++] = '?';
+	    *out1ptr++ = '?';
+	    res1 = 0;
 	  }
 	errno1 = errno;
 	utf8len = out1ptr - utf8buf;
--- a/tests/test-striconveh.c
+++ b/tests/test-striconveh.c
@@ -66,24 +66,38 @@
 #if HAVE_ICONV
   /* Assume that iconv() supports at least the encodings ASCII, ISO-8859-1,
      ISO-8859-2, and UTF-8.  */
+  iconv_t cd_ascii_to_88591 = iconv_open ("ISO-8859-1", "ASCII");
   iconv_t cd_88591_to_88592 = iconv_open ("ISO-8859-2", "ISO-8859-1");
   iconv_t cd_88592_to_88591 = iconv_open ("ISO-8859-1", "ISO-8859-2");
+  iconv_t cd_ascii_to_utf8 = iconv_open ("UTF-8", "ASCII");
   iconv_t cd_88591_to_utf8 = iconv_open ("UTF-8", "ISO-8859-1");
   iconv_t cd_utf8_to_88591 = iconv_open ("ISO-8859-1", "UTF-8");
   iconv_t cd_88592_to_utf8 = iconv_open ("UTF-8", "ISO-8859-2");
   iconv_t cd_utf8_to_88592 = iconv_open ("ISO-8859-2", "UTF-8");
   iconv_t cd_utf7_to_utf8 = iconv_open ("UTF-8", "UTF-7");
+  iconveh_t cdeh_ascii_to_88591;
+  iconveh_t cdeh_ascii_to_88591_indirectly;
   iconveh_t cdeh_88592_to_88591;
   iconveh_t cdeh_88592_to_88591_indirectly;
+  iconveh_t cdeh_ascii_to_utf8;
   iconveh_t cdeh_88591_to_utf8;
   iconveh_t cdeh_utf8_to_88591;
   iconveh_t cdeh_utf7_to_utf8;
 
+  ASSERT (cd_ascii_to_utf8 != (iconv_t)(-1));
   ASSERT (cd_88591_to_utf8 != (iconv_t)(-1));
   ASSERT (cd_utf8_to_88591 != (iconv_t)(-1));
   ASSERT (cd_88592_to_utf8 != (iconv_t)(-1));
   ASSERT (cd_utf8_to_88592 != (iconv_t)(-1));
 
+  cdeh_ascii_to_88591.cd = cd_ascii_to_88591;
+  cdeh_ascii_to_88591.cd1 = cd_ascii_to_utf8;
+  cdeh_ascii_to_88591.cd2 = cd_utf8_to_88591;
+
+  cdeh_ascii_to_88591_indirectly.cd = (iconv_t)(-1);
+  cdeh_ascii_to_88591_indirectly.cd1 = cd_ascii_to_utf8;
+  cdeh_ascii_to_88591_indirectly.cd2 = cd_utf8_to_88591;
+
   cdeh_88592_to_88591.cd = cd_88592_to_88591;
   cdeh_88592_to_88591.cd1 = cd_88592_to_utf8;
   cdeh_88592_to_88591.cd2 = cd_utf8_to_88591;
@@ -92,6 +106,10 @@
   cdeh_88592_to_88591_indirectly.cd1 = cd_88592_to_utf8;
   cdeh_88592_to_88591_indirectly.cd2 = cd_utf8_to_88591;
 
+  cdeh_ascii_to_utf8.cd = cd_ascii_to_utf8;
+  cdeh_ascii_to_utf8.cd1 = cd_ascii_to_utf8;
+  cdeh_ascii_to_utf8.cd2 = (iconv_t)(-1);
+
   cdeh_88591_to_utf8.cd = cd_88591_to_utf8;
   cdeh_88591_to_utf8.cd1 = cd_88591_to_utf8;
   cdeh_88591_to_utf8.cd2 = (iconv_t)(-1);
@@ -141,6 +159,55 @@
 	}
     }
 
+  /* Test conversion from ASCII to ISO-8859-1 with invalid input (EILSEQ).  */
+  for (indirect = 0; indirect <= 1; indirect++)
+    {
+      for (h = 0; h < SIZEOF (handlers); h++)
+	{
+	  enum iconv_ilseq_handler handler = handlers[h];
+	  static const char input[] = "Rafa\263 Maszkowski"; /* Rafa? Maszkowski */
+	  for (o = 0; o < 2; o++)
+	    {
+	      size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
+	      char *result = NULL;
+	      size_t length = 0;
+	      int retval = mem_cd_iconveh (input, strlen (input),
+					   (indirect
+					    ? &cdeh_ascii_to_88591_indirectly
+					    : &cdeh_ascii_to_88591),
+					   handler,
+					   offsets,
+					   &result, &length);
+	      switch (handler)
+		{
+		case iconveh_error:
+		  ASSERT (retval == -1 && errno == EILSEQ);
+		  ASSERT (result == NULL);
+		  if (o)
+		    free (offsets);
+		  break;
+		case iconveh_question_mark:
+		case iconveh_escape_sequence:
+		  {
+		    static const char expected[] = "Rafa? Maszkowski";
+		    ASSERT (retval == 0);
+		    ASSERT (length == strlen (expected));
+		    ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
+		    if (o)
+		      {
+			for (i = 0; i < 16; i++)
+			  ASSERT (offsets[i] == i);
+			ASSERT (offsets[16] == MAGIC);
+			free (offsets);
+		      }
+		    free (result);
+		  }
+		  break;
+		}
+	    }
+	}
+    }
+
   /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ.  */
   for (indirect = 0; indirect <= 1; indirect++)
     {
@@ -276,6 +343,50 @@
 	}
     }
 
+  /* Test conversion from ASCII to UTF-8 with invalid input (EILSEQ).  */
+  for (h = 0; h < SIZEOF (handlers); h++)
+    {
+      enum iconv_ilseq_handler handler = handlers[h];
+      static const char input[] = "Rafa\263 Maszkowski"; /* Rafa? Maszkowski */
+      for (o = 0; o < 2; o++)
+	{
+	  size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
+	  char *result = NULL;
+	  size_t length = 0;
+	  int retval = mem_cd_iconveh (input, strlen (input),
+				       &cdeh_ascii_to_utf8,
+				       handler,
+				       offsets,
+				       &result, &length);
+	  switch (handler)
+	    {
+	    case iconveh_error:
+	      ASSERT (retval == -1 && errno == EILSEQ);
+	      ASSERT (result == NULL);
+	      if (o)
+		free (offsets);
+	      break;
+	    case iconveh_question_mark:
+	    case iconveh_escape_sequence:
+	      {
+		static const char expected[] = "Rafa? Maszkowski";
+		ASSERT (retval == 0);
+		ASSERT (length == strlen (expected));
+		ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
+		if (o)
+		  {
+		    for (i = 0; i < 16; i++)
+		      ASSERT (offsets[i] == i);
+		    ASSERT (offsets[16] == MAGIC);
+		    free (offsets);
+		  }
+		free (result);
+	      }
+	      break;
+	    }
+	}
+    }
+
   /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ.  */
   for (h = 0; h < SIZEOF (handlers); h++)
     {
@@ -468,6 +579,36 @@
 	}
     }
 
+  /* Test conversion from ASCII to ISO-8859-1 with invalid input (EILSEQ).  */
+  for (indirect = 0; indirect <= 1; indirect++)
+    {
+      for (h = 0; h < SIZEOF (handlers); h++)
+	{
+	  enum iconv_ilseq_handler handler = handlers[h];
+	  static const char input[] = "Rafa\263 Maszkowski"; /* Rafa? Maszkowski */
+	  char *result = str_cd_iconveh (input,
+					 (indirect
+					  ? &cdeh_ascii_to_88591_indirectly
+					  : &cdeh_ascii_to_88591),
+					 handler);
+	  switch (handler)
+	    {
+	    case iconveh_error:
+	      ASSERT (result == NULL && errno == EILSEQ);
+	      break;
+	    case iconveh_question_mark:
+	    case iconveh_escape_sequence:
+	      {
+		static const char expected[] = "Rafa? Maszkowski";
+		ASSERT (result != NULL);
+		ASSERT (strcmp (result, expected) == 0);
+		free (result);
+	      }
+	      break;
+	    }
+	}
+    }
+
   /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ.  */
   for (indirect = 0; indirect <= 1; indirect++)
     {
@@ -533,6 +674,31 @@
       free (result);
     }
 
+  /* Test conversion from ASCII to UTF-8 with invalid input (EILSEQ).  */
+  for (h = 0; h < SIZEOF (handlers); h++)
+    {
+      enum iconv_ilseq_handler handler = handlers[h];
+      static const char input[] = "Rafa\263 Maszkowski"; /* Rafa? Maszkowski */
+      char *result = str_cd_iconveh (input,
+				     &cdeh_ascii_to_utf8,
+				     handler);
+      switch (handler)
+	{
+	case iconveh_error:
+	  ASSERT (result == NULL && errno == EILSEQ);
+	  break;
+	case iconveh_question_mark:
+	case iconveh_escape_sequence:
+	  {
+	    static const char expected[] = "Rafa? Maszkowski";
+	    ASSERT (result != NULL);
+	    ASSERT (strcmp (result, expected) == 0);
+	    free (result);
+	  }
+	  break;
+	}
+    }
+
   /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ.  */
   for (h = 0; h < SIZEOF (handlers); h++)
     {