changeset 7927:7ebab05df4f6

Add optional offsets argument to conversion routines.
author Bruno Haible <bruno@clisp.org>
date Tue, 23 Jan 2007 01:09:41 +0000
parents 396abf8878a6
children e00d2d470e73
files ChangeLog lib/striconveh.c lib/striconveh.h lib/striconveha.c lib/striconveha.h tests/test-striconveh.c
diffstat 6 files changed, 583 insertions(+), 225 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,17 @@
+2007-01-22  Bruno Haible  <bruno@clisp.org>
+
+	* lib/striconveh.h (mem_cd_iconveh, mem_iconveh): Add 'offsets'
+	argument.
+	* lib/striconveh.c (iconv_carefully_1): New function.
+	(mem_cd_iconveh_internal, mem_cd_iconveh, mem_iconveh): Add 'offsets'
+	argument.
+	(str_cd_iconveh): Update.
+	* lib/striconveha.h (mem_iconveha): Add 'offsets' argument.
+	* lib/striconveha.c (mem_iconveha): Add 'offsets' argument.
+	* tests/test-striconveh.c (MAGIC): New macro.
+	(new_offsets): New function.
+	(main): Test call with and without offsets.
+
 2007-01-22  Bruno Haible  <bruno@clisp.org>
 
 	* modules/sys_stat (Makefile.am): Use @MKDIR_P@ instead of $(MKDIR_P).
--- a/lib/striconveh.c
+++ b/lib/striconveh.c
@@ -119,11 +119,68 @@
       iconv (cd, (ICONV_CONST char **) (inbuf), inbytesleft, outbuf, outbytesleft))
 # endif
 
+/* iconv_carefully_1 is like iconv_carefully, except that it stops after
+   converting one character.  */
+static size_t
+iconv_carefully_1 (iconv_t cd,
+		   const char **inbuf, size_t *inbytesleft,
+		   char **outbuf, size_t *outbytesleft,
+		   bool *incremented)
+{
+  const char *inptr = *inbuf;
+  const char *inptr_end = inptr + *inbytesleft;
+  char *outptr = *outbuf;
+  size_t outsize = *outbytesleft;
+  const char *inptr_before = inptr;
+  size_t res = (size_t)(-1);
+  size_t insize;
+
+  for (insize = 1; inptr + insize <= inptr_end; insize++)
+    {
+      res = iconv (cd,
+		   (ICONV_CONST char **) &inptr, &insize,
+		   &outptr, &outsize);
+      if (!(res == (size_t)(-1) && errno == EINVAL))
+	break;
+      /* We expect that no input bytes have been consumed so far.  */
+      if (inptr != inptr_before)
+	abort ();
+    }
+
+  *inbuf = inptr;
+  *inbytesleft = inptr_end - inptr;
+# if !defined _LIBICONV_VERSION && !defined __GLIBC__
+  /* Irix iconv() inserts a NUL byte if it cannot convert.
+     NetBSD iconv() inserts a question mark if it cannot convert.
+     Only GNU libiconv and GNU libc are known to prefer to fail rather
+     than doing a lossy conversion.  */
+  if (res != (size_t)(-1) && res > 0)
+    {
+      /* iconv() has already incremented INPTR.  We cannot go back to a
+	 previous INPTR, otherwise the state inside CD would become invalid,
+	 if FROM_CODESET is a stateful encoding.  So, tell the caller that
+	 *INBUF has already been incremented.  */
+      *incremented = (inptr > inptr_before);
+      errno = EILSEQ;
+      return (size_t)(-1);
+    }
+# endif
+
+  if (res != (size_t)(-1))
+    {
+      *outbuf = outptr;
+      *outbytesleft = outsize;
+    }
+  *incremented = false;
+  return res;
+}
+
 static int
 mem_cd_iconveh_internal (const char *src, size_t srclen,
 			 iconv_t cd, iconv_t cd1, iconv_t cd2,
 			 enum iconv_ilseq_handler handler,
 			 size_t extra_alloc,
+			 size_t *offsets,
 			 char **resultp, size_t *lengthp)
 {
   /* When a conversion error occurs, we cannot start using CD1 and CD2 at
@@ -141,6 +198,7 @@
   char *result;
   size_t allocated;
   size_t length;
+  size_t last_length = (size_t)(-1); /* only needed if offsets != NULL */
 
   if (*lengthp >= sizeof (tmpbuf))
     {
@@ -153,6 +211,16 @@
       allocated = sizeof (tmpbuf);
     }
   result = initial_result;
+
+  if (offsets != NULL)
+    {
+      size_t i;
+
+      for (i = 0; i < srclen; i++)
+	offsets[i] = (size_t)(-1);
+
+      last_length = (size_t)(-1);
+    }
   length = 0;
 
   /* First, try a direct conversion, and see whether a conversion error
@@ -176,16 +244,29 @@
 	size_t res;
 	bool grow;
 
-	/* Use iconv_carefully instead of iconv here, because:
-	   - If TO_CODESET is UTF-8, we can do the error handling in this loop,
-	     no need for a second loop,
-	   - With iconv() implementations other than GNU libiconv and GNU libc,
-	     if we use iconv() in a big swoop, checking for an E2BIG return,
-	     we lose the number of irreversible conversions.  */
-	res = iconv_carefully (cd,
-			       &inptr, &insize,
-			       &outptr, &outsize,
-			       &incremented);
+	if (offsets != NULL)
+	  {
+	    if (length != last_length) /* ensure that offset[] be increasing */
+	      {
+		offsets[inptr - src] = length;
+		last_length = length;
+	      }
+	    res = iconv_carefully_1 (cd,
+				     &inptr, &insize,
+				     &outptr, &outsize,
+				     &incremented);
+	  }
+	else
+	  /* Use iconv_carefully instead of iconv here, because:
+	     - If TO_CODESET is UTF-8, we can do the error handling in this
+	       loop, no need for a second loop,
+	     - With iconv() implementations other than GNU libiconv and GNU
+	       libc, if we use iconv() in a big swoop, checking for an E2BIG
+	       return, we lose the number of irreversible conversions.  */
+	  res = iconv_carefully (cd,
+				 &inptr, &insize,
+				 &outptr, &outsize,
+				 &incremented);
 
 	length = outptr - result;
 	grow = (length + extra_alloc > allocated / 2);
@@ -332,6 +413,15 @@
   /* The direct conversion failed, handler != iconveh_error,
      and cd2 != (iconv_t)(-1).
      Use a conversion through UTF-8.  */
+  if (offsets != NULL)
+    {
+      size_t i;
+
+      for (i = 0; i < srclen; i++)
+	offsets[i] = (size_t)(-1);
+
+      last_length = (size_t)(-1);
+    }
   length = 0;
   {
 # define utf8bufsize 4096 /* may also be smaller or larger than tmpbufsize */
@@ -362,11 +452,25 @@
 	/* Conversion step 1: from FROM_CODESET to UTF-8.  */
 	if (in1size > 0)
 	  {
+	    if (offsets != NULL
+		&& length != last_length) /* ensure that offset[] be increasing */
+	      {
+		offsets[in1ptr - src] = length;
+		last_length = length;
+	      }
 	    if (cd1 != (iconv_t)(-1))
-	      res1 = iconv_carefully (cd1,
-				      (ICONV_CONST char **) &in1ptr, &in1size,
-				      &out1ptr, &out1size,
-				      &incremented1);
+	      {
+		if (offsets != NULL)
+		  res1 = iconv_carefully_1 (cd1,
+					    &in1ptr, &in1size,
+					    &out1ptr, &out1size,
+					    &incremented1);
+		else
+		  res1 = iconv_carefully (cd1,
+					  &in1ptr, &in1size,
+					  &out1ptr, &out1size,
+					  &incremented1);
+	      }
 	    else
 	      {
 		/* FROM_CODESET is UTF-8.  */
@@ -418,7 +522,7 @@
 		    out1ptr += m;
 		    out1size -= m;
 		  }
-		while (in1size > 0);
+		while (offsets == NULL && in1size > 0);
 	      }
 	  }
 	else if (do_final_flush1)
@@ -469,7 +573,8 @@
 	errno1 = errno;
 	utf8len = out1ptr - utf8buf;
 
-	if (in1size == 0
+	if (offsets != NULL
+	    || in1size == 0
 	    || utf8len > utf8bufsize / 2
 	    || (res1 == (size_t)(-1) && errno1 == E2BIG))
 	  {
@@ -726,10 +831,11 @@
 mem_cd_iconveh (const char *src, size_t srclen,
 		iconv_t cd, iconv_t cd1, iconv_t cd2,
 		enum iconv_ilseq_handler handler,
+		size_t *offsets,
 		char **resultp, size_t *lengthp)
 {
   return mem_cd_iconveh_internal (src, srclen, cd, cd1, cd2, handler, 0,
-				  resultp, lengthp);
+				  offsets, resultp, lengthp);
 }
 
 char *
@@ -744,7 +850,7 @@
   char *result = NULL;
   size_t length = 0;
   int retval = mem_cd_iconveh_internal (src, strlen (src),
-					cd, cd1, cd2, handler, 1,
+					cd, cd1, cd2, handler, 1, NULL,
 					&result, &length);
 
   if (retval < 0)
@@ -770,6 +876,7 @@
 mem_iconveh (const char *src, size_t srclen,
 	     const char *from_codeset, const char *to_codeset,
 	     enum iconv_ilseq_handler handler,
+	     size_t *offsets,
 	     char **resultp, size_t *lengthp)
 {
   if (srclen == 0)
@@ -778,7 +885,7 @@
       *lengthp = 0;
       return 0;
     }
-  else if (c_strcasecmp (from_codeset, to_codeset) == 0)
+  else if (offsets == NULL && c_strcasecmp (from_codeset, to_codeset) == 0)
     {
       char *result;
 
@@ -854,8 +961,8 @@
 
       result = *resultp;
       length = *lengthp;
-      retval =
-	mem_cd_iconveh (src, srclen, cd, cd1, cd2, handler, &result, &length);
+      retval = mem_cd_iconveh (src, srclen, cd, cd1, cd2, handler, offsets,
+			       &result, &length);
 
       if (retval < 0)
 	{
--- a/lib/striconveh.h
+++ b/lib/striconveh.h
@@ -47,6 +47,10 @@
    (iconv_t)(-1) if FROM_CODESET is UTF-8).
    CD2 is the conversion descriptor from UTF-8 to TO_CODESET (or (iconv_t)(-1)
    if TO_CODESET is UTF-8).
+   If OFFSET is not NULL, it should point to an array of SRCLEN integers; this
+   array is filled with offsets into the result, i.e. the character starting
+   at SRC[i] corresponds to the character starting at (*RESULTP)[OFFSETS[i]],
+   and other offsets are set to (size_t)(-1).
    *RESULTP and *LENGTH should initially be a scratch buffer and its size,
    or *RESULTP can initially be NULL.
    May erase the contents of the memory at *RESULTP.
@@ -58,6 +62,7 @@
        mem_cd_iconveh (const char *src, size_t srclen,
 		       iconv_t cd, iconv_t cd1, iconv_t cd2,
 		       enum iconv_ilseq_handler handler,
+		       size_t *offsets,
 		       char **resultp, size_t *lengthp);
 
 /* Convert an entire string from one encoding to another, using iconv.
@@ -81,6 +86,10 @@
 
 /* Convert an entire string from one encoding to another, using iconv.
    The original string is at [SRC,...,SRC+SRCLEN-1].
+   If OFFSET is not NULL, it should point to an array of SRCLEN integers; this
+   array is filled with offsets into the result, i.e. the character starting
+   at SRC[i] corresponds to the character starting at (*RESULTP)[OFFSETS[i]],
+   and other offsets are set to (size_t)(-1).
    *RESULTP and *LENGTH should initially be a scratch buffer and its size,
    or *RESULTP can initially be NULL.
    May erase the contents of the memory at *RESULTP.
@@ -92,6 +101,7 @@
        mem_iconveh (const char *src, size_t srclen,
 		    const char *from_codeset, const char *to_codeset,
 		    enum iconv_ilseq_handler handler,
+		    size_t *offsets,
 		    char **resultp, size_t *lengthp);
 
 /* Convert an entire string from one encoding to another, using iconv.
--- a/lib/striconveha.c
+++ b/lib/striconveha.c
@@ -147,10 +147,11 @@
 mem_iconveha (const char *src, size_t srclen,
 	      const char *from_codeset, const char *to_codeset,
 	      enum iconv_ilseq_handler handler,
+	      size_t *offsets,
 	      char **resultp, size_t *lengthp)
 {
   int retval = mem_iconveh (src, srclen, from_codeset, to_codeset, handler,
-			    resultp, lengthp);
+			    offsets, resultp, lengthp);
   if (retval >= 0 || errno != EINVAL)
     return retval;
   else
@@ -168,7 +169,7 @@
 	      {
 		retval = mem_iconveha (src, srclen,
 				       from_codeset, to_codeset, handler,
-				       resultp, lengthp);
+				       offsets, resultp, lengthp);
 		if (!(retval < 0 && errno == EILSEQ))
 		  return retval;
 		encodings++;
--- a/lib/striconveha.h
+++ b/lib/striconveha.h
@@ -30,6 +30,10 @@
 /* Convert an entire string from one encoding to another, using iconv.
    The original string is at [SRC,...,SRC+SRCLEN-1].
    The "from" encoding can also be a name defined for autodetection.
+   If OFFSET is not NULL, it should point to an array of SRCLEN integers; this
+   array is filled with offsets into the result, i.e. the character starting
+   at SRC[i] corresponds to the character starting at (*RESULTP)[OFFSETS[i]],
+   and other offsets are set to (size_t)(-1).
    *RESULTP and *LENGTH should initially be a scratch buffer and its size,
    or *RESULTP can initially be NULL.
    May erase the contents of the memory at *RESULTP.
@@ -41,6 +45,7 @@
        mem_iconveha (const char *src, size_t srclen,
 		     const char *from_codeset, const char *to_codeset,
 		     enum iconv_ilseq_handler handler,
+		     size_t *offsets,
 		     char **resultp, size_t *lengthp);
 
 /* Convert an entire string from one encoding to another, using iconv.
--- a/tests/test-striconveh.c
+++ b/tests/test-striconveh.c
@@ -34,12 +34,25 @@
 #define SIZEOF(array) (sizeof (array) / sizeof (array[0]))
 #define ASSERT(expr) if (!(expr)) abort ();
 
+/* Magic number for detecting bounds violations.  */
+#define MAGIC 0x1983EFF1
+
+static size_t *
+new_offsets (size_t n)
+{
+  size_t *offsets = (size_t *) malloc ((n + 1) * sizeof (size_t));
+  offsets[n] = MAGIC;
+  return offsets;
+}
+
 int
 main ()
 {
   static enum iconv_ilseq_handler handlers[] =
     { iconveh_error, iconveh_question_mark, iconveh_escape_sequence };
   size_t h;
+  size_t o;
+  size_t i;
 
 #if HAVE_ICONV
   /* Assume that iconv() supports at least the encodings ASCII, ISO-8859-1,
@@ -66,17 +79,29 @@
       enum iconv_ilseq_handler handler = handlers[h];
       static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
       static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
-      char *result = NULL;
-      size_t length = 0;
-      int retval = mem_cd_iconveh (input, strlen (input),
-				   cd_88592_to_88591,
-				   cd_88592_to_utf8, cd_utf8_to_88591,
-				   handler,
-				   &result, &length);
-      ASSERT (retval == 0);
-      ASSERT (length == strlen (expected));
-      ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
-      free (result);
+      for (o = 0; o < 2; o++)
+	{
+	  size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
+	  char *result = NULL;
+	  size_t length = 0;
+	  int retval = mem_cd_iconveh (input, strlen (input),
+				       cd_88592_to_88591,
+				       cd_88592_to_utf8, cd_utf8_to_88591,
+				       handler,
+				       offsets,
+				       &result, &length);
+	  ASSERT (retval == 0);
+	  ASSERT (length == strlen (expected));
+	  ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
+	  if (o)
+	    {
+	      for (i = 0; i < 37; i++)
+		ASSERT (offsets[i] == i);
+	      ASSERT (offsets[37] == MAGIC);
+	      free (offsets);
+	    }
+	  free (result);
+	}
     }
 
   /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ.  */
@@ -84,37 +109,59 @@
     {
       enum iconv_ilseq_handler handler = handlers[h];
       static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
-      char *result = NULL;
-      size_t length = 0;
-      int retval = mem_cd_iconveh (input, strlen (input),
-				   cd_88592_to_88591,
-				   cd_88592_to_utf8, cd_utf8_to_88591,
-				   handler,
-				   &result, &length);
-      switch (handler)
+      for (o = 0; o < 2; o++)
 	{
-	case iconveh_error:
-	  ASSERT (retval == -1 && errno == EILSEQ);
-	  ASSERT (result == NULL);
-	  break;
-	case iconveh_question_mark:
-	  {
-	    static const char expected[] = "Rafa? Maszkowski";
-	    ASSERT (retval == 0);
-	    ASSERT (length == strlen (expected));
-	    ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
-	    free (result);
-	  }
-	  break;
-	case iconveh_escape_sequence:
-	  {
-	    static const char expected[] = "Rafa\\u0142 Maszkowski";
-	    ASSERT (retval == 0);
-	    ASSERT (length == strlen (expected));
-	    ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
-	    free (result);
-	  }
-	  break;
+	  size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
+	  char *result = NULL;
+	  size_t length = 0;
+	  int retval = mem_cd_iconveh (input, strlen (input),
+				       cd_88592_to_88591,
+				       cd_88592_to_utf8, cd_utf8_to_88591,
+				       handler,
+				       offsets,
+				       &result, &length);
+	  switch (handler)
+	    {
+	    case iconveh_error:
+	      ASSERT (retval == -1 && errno == EILSEQ);
+	      ASSERT (result == NULL);
+	      if (o)
+		free (offsets);
+	      break;
+	    case iconveh_question_mark:
+	      {
+		static const char expected[] = "Rafa? Maszkowski";
+		ASSERT (retval == 0);
+		ASSERT (length == strlen (expected));
+		ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
+		if (o)
+		  {
+		    for (i = 0; i < 16; i++)
+		      ASSERT (offsets[i] == i);
+		    ASSERT (offsets[16] == MAGIC);
+		    free (offsets);
+		  }
+		free (result);
+	      }
+	      break;
+	    case iconveh_escape_sequence:
+	      {
+		static const char expected[] = "Rafa\\u0142 Maszkowski";
+		ASSERT (retval == 0);
+		ASSERT (length == strlen (expected));
+		ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
+		if (o)
+		  {
+		    for (i = 0; i < 16; i++)
+		      ASSERT (offsets[i] == (i < 5 ? i :
+					     i + 5));
+		    ASSERT (offsets[16] == MAGIC);
+		    free (offsets);
+		  }
+		free (result);
+	      }
+	      break;
+	    }
 	}
     }
 
@@ -124,17 +171,32 @@
       enum iconv_ilseq_handler handler = handlers[h];
       static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
       static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
-      char *result = NULL;
-      size_t length = 0;
-      int retval = mem_cd_iconveh (input, strlen (input),
-				   cd_88591_to_utf8,
-				   cd_88591_to_utf8, (iconv_t)(-1),
-				   handler,
-				   &result, &length);
-      ASSERT (retval == 0);
-      ASSERT (length == strlen (expected));
-      ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
-      free (result);
+      for (o = 0; o < 2; o++)
+	{
+	  size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
+	  char *result = NULL;
+	  size_t length = 0;
+	  int retval = mem_cd_iconveh (input, strlen (input),
+				       cd_88591_to_utf8,
+				       cd_88591_to_utf8, (iconv_t)(-1),
+				       handler,
+				       offsets,
+				       &result, &length);
+	  ASSERT (retval == 0);
+	  ASSERT (length == strlen (expected));
+	  ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
+	  if (o)
+	    {
+	      for (i = 0; i < 37; i++)
+		ASSERT (offsets[i] == (i < 1 ? i :
+				       i < 12 ? i + 1 :
+				       i < 18 ? i + 2 :
+				       i + 3));
+	      ASSERT (offsets[37] == MAGIC);
+	      free (offsets);
+	    }
+	  free (result);
+	}
     }
 
   /* Test conversion from UTF-8 to ISO-8859-1 with no errors.  */
@@ -143,17 +205,36 @@
       enum iconv_ilseq_handler handler = handlers[h];
       static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
       static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
-      char *result = NULL;
-      size_t length = 0;
-      int retval = mem_cd_iconveh (input, strlen (input),
-				   cd_utf8_to_88591,
-				   (iconv_t)(-1), cd_utf8_to_88591,
-				   handler,
-				   &result, &length);
-      ASSERT (retval == 0);
-      ASSERT (length == strlen (expected));
-      ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
-      free (result);
+      for (o = 0; o < 2; o++)
+	{
+	  size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
+	  char *result = NULL;
+	  size_t length = 0;
+	  int retval = mem_cd_iconveh (input, strlen (input),
+				       cd_utf8_to_88591,
+				       (iconv_t)(-1), cd_utf8_to_88591,
+				       handler,
+				       offsets,
+				       &result, &length);
+	  ASSERT (retval == 0);
+	  ASSERT (length == strlen (expected));
+	  ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
+	  if (o)
+	    {
+	      for (i = 0; i < 41; i++)
+		ASSERT (offsets[i] == (i < 1 ? i :
+				       i == 1 ? (size_t)(-1) :
+				       i < 13 ? i - 1 :
+				       i == 13 ? (size_t)(-1) :
+				       i < 20 ? i - 2 :
+				       i == 20 ? (size_t)(-1) :
+				       i < 40 ? i - 3 :
+				       (size_t)(-1)));
+	      ASSERT (offsets[41] == MAGIC);
+	      free (offsets);
+	    }
+	  free (result);
+	}
     }
 
   /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ.  */
@@ -161,37 +242,62 @@
     {
       enum iconv_ilseq_handler handler = handlers[h];
       static const char input[] = "Rafa\305\202 Maszkowski"; /* Rafał Maszkowski */
-      char *result = NULL;
-      size_t length = 0;
-      int retval = mem_cd_iconveh (input, strlen (input),
-				   cd_utf8_to_88591,
-				   (iconv_t)(-1), cd_utf8_to_88591,
-				   handler,
-				   &result, &length);
-      switch (handler)
+      for (o = 0; o < 2; o++)
 	{
-	case iconveh_error:
-	  ASSERT (retval == -1 && errno == EILSEQ);
-	  ASSERT (result == NULL);
-	  break;
-	case iconveh_question_mark:
-	  {
-	    static const char expected[] = "Rafa? Maszkowski";
-	    ASSERT (retval == 0);
-	    ASSERT (length == strlen (expected));
-	    ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
-	    free (result);
-	  }
-	  break;
-	case iconveh_escape_sequence:
-	  {
-	    static const char expected[] = "Rafa\\u0142 Maszkowski";
-	    ASSERT (retval == 0);
-	    ASSERT (length == strlen (expected));
-	    ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
-	    free (result);
-	  }
-	  break;
+	  size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
+	  char *result = NULL;
+	  size_t length = 0;
+	  int retval = mem_cd_iconveh (input, strlen (input),
+				       cd_utf8_to_88591,
+				       (iconv_t)(-1), cd_utf8_to_88591,
+				       handler,
+				       offsets,
+				       &result, &length);
+	  switch (handler)
+	    {
+	    case iconveh_error:
+	      ASSERT (retval == -1 && errno == EILSEQ);
+	      ASSERT (result == NULL);
+	      if (o)
+		free (offsets);
+	      break;
+	    case iconveh_question_mark:
+	      {
+		static const char expected[] = "Rafa? Maszkowski";
+		ASSERT (retval == 0);
+		ASSERT (length == strlen (expected));
+		ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
+		if (o)
+		  {
+		    for (i = 0; i < 17; i++)
+		      ASSERT (offsets[i] == (i < 5 ? i :
+					     i == 5 ? (size_t)(-1) :
+					     i - 1));
+		    ASSERT (offsets[17] == MAGIC);
+		    free (offsets);
+		  }
+		free (result);
+	      }
+	      break;
+	    case iconveh_escape_sequence:
+	      {
+		static const char expected[] = "Rafa\\u0142 Maszkowski";
+		ASSERT (retval == 0);
+		ASSERT (length == strlen (expected));
+		ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
+		if (o)
+		  {
+		    for (i = 0; i < 17; i++)
+		      ASSERT (offsets[i] == (i < 5 ? i :
+					     i == 5 ? (size_t)(-1) :
+					     i + 4));
+		    ASSERT (offsets[17] == MAGIC);
+		    free (offsets);
+		  }
+		free (result);
+	      }
+	      break;
+	    }
 	}
     }
 
@@ -200,17 +306,28 @@
     {
       enum iconv_ilseq_handler handler = handlers[h];
       static const char input[] = "\342";
-      char *result = NULL;
-      size_t length = 0;
-      int retval = mem_cd_iconveh (input, strlen (input),
-				   cd_utf8_to_88591,
-				   (iconv_t)(-1), cd_utf8_to_88591,
-				   handler,
-				   &result, &length);
-      ASSERT (retval == 0);
-      ASSERT (length == 0);
-      if (result != NULL)
-	free (result);
+      for (o = 0; o < 2; o++)
+	{
+	  size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
+	  char *result = NULL;
+	  size_t length = 0;
+	  int retval = mem_cd_iconveh (input, strlen (input),
+				       cd_utf8_to_88591,
+				       (iconv_t)(-1), cd_utf8_to_88591,
+				       handler,
+				       offsets,
+				       &result, &length);
+	  ASSERT (retval == 0);
+	  ASSERT (length == 0);
+	  if (o)
+	    {
+	      ASSERT (offsets[0] == 0);
+	      ASSERT (offsets[1] == MAGIC);
+	      free (offsets);
+	    }
+	  if (result != NULL)
+	    free (result);
+	}
     }
 
   /* ------------------------ Test str_cd_iconveh() ------------------------ */
@@ -355,16 +472,28 @@
       enum iconv_ilseq_handler handler = handlers[h];
       static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
       static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
-      char *result = NULL;
-      size_t length = 0;
-      int retval = mem_iconveh (input, strlen (input),
-				"ISO-8859-2", "ISO-8859-1",
-				handler,
-				&result, &length);
-      ASSERT (retval == 0);
-      ASSERT (length == strlen (expected));
-      ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
-      free (result);
+      for (o = 0; o < 2; o++)
+	{
+	  size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
+	  char *result = NULL;
+	  size_t length = 0;
+	  int retval = mem_iconveh (input, strlen (input),
+				    "ISO-8859-2", "ISO-8859-1",
+				    handler,
+				    offsets,
+				    &result, &length);
+	  ASSERT (retval == 0);
+	  ASSERT (length == strlen (expected));
+	  ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
+	  if (o)
+	    {
+	      for (i = 0; i < 37; i++)
+		ASSERT (offsets[i] == i);
+	      ASSERT (offsets[37] == MAGIC);
+	      free (offsets);
+	    }
+	  free (result);
+	}
     }
 
   /* Test conversion from ISO-8859-2 to ISO-8859-1 with EILSEQ.  */
@@ -372,36 +501,58 @@
     {
       enum iconv_ilseq_handler handler = handlers[h];
       static const char input[] = "Rafa\263 Maszkowski"; /* Rafał Maszkowski */
-      char *result = NULL;
-      size_t length = 0;
-      int retval = mem_iconveh (input, strlen (input),
-				"ISO-8859-2", "ISO-8859-1",
-				handler,
-				&result, &length);
-      switch (handler)
+      for (o = 0; o < 2; o++)
 	{
-	case iconveh_error:
-	  ASSERT (retval == -1 && errno == EILSEQ);
-	  ASSERT (result == NULL);
-	  break;
-	case iconveh_question_mark:
-	  {
-	    static const char expected[] = "Rafa? Maszkowski";
-	    ASSERT (retval == 0);
-	    ASSERT (length == strlen (expected));
-	    ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
-	    free (result);
-	  }
-	  break;
-	case iconveh_escape_sequence:
-	  {
-	    static const char expected[] = "Rafa\\u0142 Maszkowski";
-	    ASSERT (retval == 0);
-	    ASSERT (length == strlen (expected));
-	    ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
-	    free (result);
-	  }
-	  break;
+	  size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
+	  char *result = NULL;
+	  size_t length = 0;
+	  int retval = mem_iconveh (input, strlen (input),
+				    "ISO-8859-2", "ISO-8859-1",
+				    handler,
+				    offsets,
+				    &result, &length);
+	  switch (handler)
+	    {
+	    case iconveh_error:
+	      ASSERT (retval == -1 && errno == EILSEQ);
+	      ASSERT (result == NULL);
+	      if (o)
+		free (offsets);
+	      break;
+	    case iconveh_question_mark:
+	      {
+		static const char expected[] = "Rafa? Maszkowski";
+		ASSERT (retval == 0);
+		ASSERT (length == strlen (expected));
+		ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
+		if (o)
+		  {
+		    for (i = 0; i < 16; i++)
+		      ASSERT (offsets[i] == i);
+		    ASSERT (offsets[16] == MAGIC);
+		    free (offsets);
+		  }
+		free (result);
+	      }
+	      break;
+	    case iconveh_escape_sequence:
+	      {
+		static const char expected[] = "Rafa\\u0142 Maszkowski";
+		ASSERT (retval == 0);
+		ASSERT (length == strlen (expected));
+		ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
+		if (o)
+		  {
+		    for (i = 0; i < 16; i++)
+		      ASSERT (offsets[i] == (i < 5 ? i :
+					     i + 5));
+		    ASSERT (offsets[16] == MAGIC);
+		    free (offsets);
+		  }
+		free (result);
+	      }
+	      break;
+	    }
 	}
     }
 
@@ -411,16 +562,31 @@
       enum iconv_ilseq_handler handler = handlers[h];
       static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
       static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
-      char *result = NULL;
-      size_t length = 0;
-      int retval = mem_iconveh (input, strlen (input),
-				"ISO-8859-1", "UTF-8",
-				handler,
-				&result, &length);
-      ASSERT (retval == 0);
-      ASSERT (length == strlen (expected));
-      ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
-      free (result);
+      for (o = 0; o < 2; o++)
+	{
+	  size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
+	  char *result = NULL;
+	  size_t length = 0;
+	  int retval = mem_iconveh (input, strlen (input),
+				    "ISO-8859-1", "UTF-8",
+				    handler,
+				    offsets,
+				    &result, &length);
+	  ASSERT (retval == 0);
+	  ASSERT (length == strlen (expected));
+	  ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
+	  if (o)
+	    {
+	      for (i = 0; i < 37; i++)
+		ASSERT (offsets[i] == (i < 1 ? i :
+				       i < 12 ? i + 1 :
+				       i < 18 ? i + 2 :
+				       i + 3));
+	      ASSERT (offsets[37] == MAGIC);
+	      free (offsets);
+	    }
+	  free (result);
+	}
     }
 
   /* Test conversion from UTF-8 to ISO-8859-1 with no errors.  */
@@ -429,16 +595,35 @@
       enum iconv_ilseq_handler handler = handlers[h];
       static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237";
       static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337";
-      char *result = NULL;
-      size_t length = 0;
-      int retval = mem_iconveh (input, strlen (input),
-				"UTF-8", "ISO-8859-1",
-				handler,
-				&result, &length);
-      ASSERT (retval == 0);
-      ASSERT (length == strlen (expected));
-      ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
-      free (result);
+      for (o = 0; o < 2; o++)
+	{
+	  size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
+	  char *result = NULL;
+	  size_t length = 0;
+	  int retval = mem_iconveh (input, strlen (input),
+				    "UTF-8", "ISO-8859-1",
+				    handler,
+				    offsets,
+				    &result, &length);
+	  ASSERT (retval == 0);
+	  ASSERT (length == strlen (expected));
+	  ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
+	  if (o)
+	    {
+	      for (i = 0; i < 41; i++)
+		ASSERT (offsets[i] == (i < 1 ? i :
+				       i == 1 ? (size_t)(-1) :
+				       i < 13 ? i - 1 :
+				       i == 13 ? (size_t)(-1) :
+				       i < 20 ? i - 2 :
+				       i == 20 ? (size_t)(-1) :
+				       i < 40 ? i - 3 :
+				       (size_t)(-1)));
+	      ASSERT (offsets[41] == MAGIC);
+	      free (offsets);
+	    }
+	  free (result);
+	}
     }
 
   /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ.  */
@@ -446,36 +631,61 @@
     {
       enum iconv_ilseq_handler handler = handlers[h];
       static const char input[] = "Rafa\305\202 Maszkowski"; /* Rafał Maszkowski */
-      char *result = NULL;
-      size_t length = 0;
-      int retval = mem_iconveh (input, strlen (input),
-				"UTF-8", "ISO-8859-1",
-				handler,
-				&result, &length);
-      switch (handler)
+      for (o = 0; o < 2; o++)
 	{
-	case iconveh_error:
-	  ASSERT (retval == -1 && errno == EILSEQ);
-	  ASSERT (result == NULL);
-	  break;
-	case iconveh_question_mark:
-	  {
-	    static const char expected[] = "Rafa? Maszkowski";
-	    ASSERT (retval == 0);
-	    ASSERT (length == strlen (expected));
-	    ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
-	    free (result);
-	  }
-	  break;
-	case iconveh_escape_sequence:
-	  {
-	    static const char expected[] = "Rafa\\u0142 Maszkowski";
-	    ASSERT (retval == 0);
-	    ASSERT (length == strlen (expected));
-	    ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
-	    free (result);
-	  }
-	  break;
+	  size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
+	  char *result = NULL;
+	  size_t length = 0;
+	  int retval = mem_iconveh (input, strlen (input),
+				    "UTF-8", "ISO-8859-1",
+				    handler,
+				    offsets,
+				    &result, &length);
+	  switch (handler)
+	    {
+	    case iconveh_error:
+	      ASSERT (retval == -1 && errno == EILSEQ);
+	      ASSERT (result == NULL);
+	      if (o)
+		free (offsets);
+	      break;
+	    case iconveh_question_mark:
+	      {
+		static const char expected[] = "Rafa? Maszkowski";
+		ASSERT (retval == 0);
+		ASSERT (length == strlen (expected));
+		ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
+		if (o)
+		  {
+		    for (i = 0; i < 17; i++)
+		      ASSERT (offsets[i] == (i < 5 ? i :
+					     i == 5 ? (size_t)(-1) :
+					     i - 1));
+		    ASSERT (offsets[17] == MAGIC);
+		    free (offsets);
+		  }
+		free (result);
+	      }
+	      break;
+	    case iconveh_escape_sequence:
+	      {
+		static const char expected[] = "Rafa\\u0142 Maszkowski";
+		ASSERT (retval == 0);
+		ASSERT (length == strlen (expected));
+		ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0);
+		if (o)
+		  {
+		    for (i = 0; i < 17; i++)
+		      ASSERT (offsets[i] == (i < 5 ? i :
+					     i == 5 ? (size_t)(-1) :
+					     i + 4));
+		    ASSERT (offsets[17] == MAGIC);
+		    free (offsets);
+		  }
+		free (result);
+	      }
+	      break;
+	    }
 	}
     }
 
@@ -484,16 +694,27 @@
     {
       enum iconv_ilseq_handler handler = handlers[h];
       static const char input[] = "\342";
-      char *result = NULL;
-      size_t length = 0;
-      int retval = mem_iconveh (input, strlen (input),
-				"UTF-8", "ISO-8859-1",
-				handler,
-				&result, &length);
-      ASSERT (retval == 0);
-      ASSERT (length == 0);
-      if (result != NULL)
-	free (result);
+      for (o = 0; o < 2; o++)
+	{
+	  size_t *offsets = (o ? new_offsets (strlen (input)) : NULL);
+	  char *result = NULL;
+	  size_t length = 0;
+	  int retval = mem_iconveh (input, strlen (input),
+				    "UTF-8", "ISO-8859-1",
+				    handler,
+				    offsets,
+				    &result, &length);
+	  ASSERT (retval == 0);
+	  ASSERT (length == 0);
+	  if (o)
+	    {
+	      ASSERT (offsets[0] == 0);
+	      ASSERT (offsets[1] == MAGIC);
+	      free (offsets);
+	    }
+	  if (result != NULL)
+	    free (result);
+	}
     }
 
   /* ------------------------- Test str_iconveh() ------------------------- */