changeset 8576:e2facecc862e

Distinguish invalid and incomplete UTF-8 input, and set errno accordingly.
author Bruno Haible <bruno@clisp.org>
date Wed, 28 Mar 2007 21:50:51 +0000
parents 3c161e212eeb
children 380675734f5e
files ChangeLog lib/striconveh.c modules/striconveh
diffstat 3 files changed, 16 insertions(+), 9 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,12 @@
+2007-03-28  Bruno Haible  <bruno@clisp.org>
+
+	* lib/striconveh.c (mem_cd_iconveh_internal): Use u8_mbtoucr instead
+	of u8_mbtouc in order to distinguish invalid and incomplete UTF-8
+	input.
+	* modules/striconveh (Depends-on): Add unistr/u8-mbtoucr. Replace
+	utf8-ucs4 with unistr/u8-mbtouc. Replace ucs4-utf8 with
+	unistr/u8-uctomb.
+
 2007-03-28  Bruno Haible  <bruno@clisp.org>
 
 	* modules/unistr/u8-mbtoucr: New file.
--- a/lib/striconveh.c
+++ b/lib/striconveh.c
@@ -479,16 +479,13 @@
 		    int n;
 		    int m;
 
-		    n = u8_mbtouc (&uc, (const uint8_t *) in1ptr, in1size);
-		    if (uc == 0xfffd
-			&& !(n >= 3
-			     && (uint8_t)in1ptr[0] == 0xEF
-			     && (uint8_t)in1ptr[1] == 0xBF
-			     && (uint8_t)in1ptr[2] == 0xBD))
+		    n = u8_mbtoucr (&uc, (const uint8_t *) in1ptr, in1size);
+		    if (n < 0)
 		      {
+			errno = (n == -2 ? EINVAL : EILSEQ);
+			n = u8_mbtouc (&uc, (const uint8_t *) in1ptr, in1size);
 			in1ptr += n;
 			in1size -= n;
-			errno = EILSEQ;
 			res1 = (size_t)(-1);
 			incremented1 = true;
 			break;
--- a/modules/striconveh
+++ b/modules/striconveh
@@ -8,10 +8,11 @@
 Depends-on:
 stdbool
 iconv
-utf8-ucs4
-ucs4-utf8
 unistr/u8-prev
 unistr/u8-mbtouc-unsafe
+unistr/u8-mbtouc
+unistr/u8-mbtoucr
+unistr/u8-uctomb
 strdup
 c-strcase
 c-strcaseeq