changeset 18042:6ff81241d5b7

unistr/uN-strtok: handle multibyte delimiters Previously, uN_strtok moved PTR to the next unit to the token end. When DELIM contained a multibyte character, the new position could be a middle of a multibyte character. * lib/unistr/u-strtok.h (FUNC): Place PTR at the next character after the token. * lib/unistr/u8-strtok.c (U_STRMBLEN): New macro. * lib/unistr/u16-strtok.c (U_STRMBLEN): New macro. * lib/unistr/u32-strtok.c (U_STRMBLEN): New macro. * modules/unistr/u8-strtok (Depends-on): Depend on unistr/u8-strmblen. * modules/unistr/u16-strtok (Depends-on): Depend on unistr/u16-strmblen. * modules/unistr/u32-strtok (Depends-on): Depend on unistr/u32-strmblen. * tests/unistr/test-u-strtok.h: New file. * tests/unistr/test-u8-strtok.c: New file. * tests/unistr/test-u16-strtok.c: New file. * tests/unistr/test-u32-strtok.c: New file. * modules/unistr/u8-strtok-tests: New file. * modules/unistr/u32-strtok-tests: New file. * modules/unistr/u16-strtok-tests: New file. Copyright-paperwork-exempt: yes Co-authored-by: Daiki Ueno <ueno@gnu.org>
author Seiya Kawashima <skawashima@uchicago.edu>
date Fri, 03 Jul 2015 11:42:43 +0900
parents 07ede17141c2
children 7e744e6257e6
files ChangeLog lib/unistr/u-strtok.h lib/unistr/u16-strtok.c lib/unistr/u32-strtok.c lib/unistr/u8-strtok.c modules/unistr/u16-strtok modules/unistr/u16-strtok-tests modules/unistr/u32-strtok modules/unistr/u32-strtok-tests modules/unistr/u8-strtok modules/unistr/u8-strtok-tests tests/unistr/test-u-strtok.h tests/unistr/test-u16-strtok.c tests/unistr/test-u32-strtok.c tests/unistr/test-u8-strtok.c
diffstat 15 files changed, 273 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,29 @@
+2015-07-03  Seiya Kawashima <skawashima@uchicago.edu>  (tiny-change)
+	and Daiki Ueno  <ueno@gnu.org>
+
+	unistr/uN-strtok: handle multibyte delimiters
+	Previously, uN_strtok moved PTR to the next unit to the token end.
+	When DELIM contained a multibyte character, the new position could
+	be a middle of a multibyte character.
+	* lib/unistr/u-strtok.h (FUNC): Place PTR at the next character
+	after the token.
+	* lib/unistr/u8-strtok.c (U_STRMBLEN): New macro.
+	* lib/unistr/u16-strtok.c (U_STRMBLEN): New macro.
+	* lib/unistr/u32-strtok.c (U_STRMBLEN): New macro.
+	* modules/unistr/u8-strtok (Depends-on): Depend on
+	unistr/u8-strmblen.
+	* modules/unistr/u16-strtok (Depends-on): Depend on
+	unistr/u16-strmblen.
+	* modules/unistr/u32-strtok (Depends-on): Depend on
+	unistr/u32-strmblen.
+	* tests/unistr/test-u-strtok.h: New file.
+	* tests/unistr/test-u8-strtok.c: New file.
+	* tests/unistr/test-u16-strtok.c: New file.
+	* tests/unistr/test-u32-strtok.c: New file.
+	* modules/unistr/u8-strtok-tests: New file.
+	* modules/unistr/u32-strtok-tests: New file.
+	* modules/unistr/u16-strtok-tests: New file.
+
 2015-07-02  Friedrich Haubensak  <hsk@fli-leibniz.de>
 
 	update-copyright: fix test failure with perl >= 5.22 (trivial)
--- a/lib/unistr/u-strtok.h
+++ b/lib/unistr/u-strtok.h
@@ -40,9 +40,9 @@
     UNIT *token_end = U_STRPBRK (str, delim);
     if (token_end)
       {
+        *ptr = token_end + U_STRMBLEN (token_end);
         /* NUL-terminate the token.  */
         *token_end = 0;
-        *ptr = token_end + 1;
       }
     else
       *ptr = NULL;
--- a/lib/unistr/u16-strtok.c
+++ b/lib/unistr/u16-strtok.c
@@ -24,4 +24,5 @@
 #define UNIT uint16_t
 #define U_STRSPN u16_strspn
 #define U_STRPBRK u16_strpbrk
+#define U_STRMBLEN u16_strmblen
 #include "u-strtok.h"
--- a/lib/unistr/u32-strtok.c
+++ b/lib/unistr/u32-strtok.c
@@ -24,4 +24,5 @@
 #define UNIT uint32_t
 #define U_STRSPN u32_strspn
 #define U_STRPBRK u32_strpbrk
+#define U_STRMBLEN u32_strmblen
 #include "u-strtok.h"
--- a/lib/unistr/u8-strtok.c
+++ b/lib/unistr/u8-strtok.c
@@ -24,4 +24,5 @@
 #define UNIT uint8_t
 #define U_STRSPN u8_strspn
 #define U_STRPBRK u8_strpbrk
+#define U_STRMBLEN u8_strmblen
 #include "u-strtok.h"
--- a/modules/unistr/u16-strtok
+++ b/modules/unistr/u16-strtok
@@ -9,6 +9,7 @@
 unistr/base
 unistr/u16-strspn
 unistr/u16-strpbrk
+unistr/u16-strmblen
 
 configure.ac:
 gl_LIBUNISTRING_MODULE([0.9], [unistr/u16-strtok])
new file mode 100644
--- /dev/null
+++ b/modules/unistr/u16-strtok-tests
@@ -0,0 +1,14 @@
+Files:
+tests/unistr/test-u16-strtok.c
+tests/unistr/test-u-strtok.h
+tests/macros.h
+
+Depends-on:
+
+configure.ac:
+
+Makefile.am:
+TESTS += test-u16-strtok
+check_PROGRAMS += test-u16-strtok
+test_u16_strtok_SOURCES = unistr/test-u16-strtok.c
+test_u16_strtok_LDADD = $(LDADD) $(LIBUNISTRING)
--- a/modules/unistr/u32-strtok
+++ b/modules/unistr/u32-strtok
@@ -9,6 +9,7 @@
 unistr/base
 unistr/u32-strspn
 unistr/u32-strpbrk
+unistr/u32-strmblen
 
 configure.ac:
 gl_LIBUNISTRING_MODULE([0.9], [unistr/u32-strtok])
new file mode 100644
--- /dev/null
+++ b/modules/unistr/u32-strtok-tests
@@ -0,0 +1,15 @@
+Files:
+tests/unistr/test-u32-strtok.c
+tests/unistr/test-u-strtok.h
+tests/macros.h
+
+Depends-on:
+unistr/u32-uctomb
+
+configure.ac:
+
+Makefile.am:
+TESTS += test-u32-strtok
+check_PROGRAMS += test-u32-strtok
+test_u32_strtok_SOURCES = unistr/test-u32-strtok.c
+test_u32_strtok_LDADD = $(LDADD) $(LIBUNISTRING)
--- a/modules/unistr/u8-strtok
+++ b/modules/unistr/u8-strtok
@@ -9,6 +9,7 @@
 unistr/base
 unistr/u8-strspn
 unistr/u8-strpbrk
+unistr/u8-strmblen
 
 configure.ac:
 gl_LIBUNISTRING_MODULE([0.9], [unistr/u8-strtok])
new file mode 100644
--- /dev/null
+++ b/modules/unistr/u8-strtok-tests
@@ -0,0 +1,14 @@
+Files:
+tests/unistr/test-u8-strtok.c
+tests/unistr/test-u-strtok.h
+tests/macros.h
+
+Depends-on:
+
+configure.ac:
+
+Makefile.am:
+TESTS += test-u8-strtok
+check_PROGRAMS += test-u8-strtok
+test_u8_strtok_SOURCES = unistr/test-u8-strtok.c
+test_u8_strtok_LDADD = $(LDADD) $(LIBUNISTRING)
new file mode 100644
--- /dev/null
+++ b/tests/unistr/test-u-strtok.h
@@ -0,0 +1,86 @@
+/* Test of uN_strtok() functions.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+static void
+test_u_strtok (void)
+{
+  {
+    UNIT input[] = { 'f', 'o', 'o', 0 };
+    const UNIT delim[] = { 0 };
+    UNIT *state;
+    const UNIT *result = U_STRTOK (input, delim, &state);
+    ASSERT (result == input);
+  }
+
+  {
+    UNIT input[] =
+      { 'A', 'B', 'C', ' ', 'A', 'B', 'C', 'D', 'A', 'B', ' ', '\t',
+	'A', 'B', 'C', 'D', 'A', 'B', 'C', 'D', 'A', 'B', 'D', 'E', 0
+      };
+    const UNIT delim[] = { ' ', '\t', 0 };
+    UNIT *state;
+    const UNIT *result;
+    result = U_STRTOK (input, delim, &state);
+    ASSERT (result == input);
+    result = U_STRTOK (NULL, delim, &state);
+    ASSERT (result == input + 4);
+    result = U_STRTOK (NULL, delim, &state);
+    ASSERT (result == input + 12);
+    result = U_STRTOK (NULL, delim, &state);
+    ASSERT (result == NULL);
+  }
+
+  /* Check for multibyte delimiters.  */
+  {
+    ucs4_t u_input[] =
+      { 'A', 'B', 'C', 0x3000, 'A', 'B', 'C', 'D', 'A', 'B', 0x3000, 0x3001,
+	'A', 'B', 'C', 'D', 'A', 'B', 'C', 'D', 'A', 'B', 'D', 'E', 0
+      };
+    ucs4_t u_delim[] = { 0x3000, 0x3001, 0 };
+    size_t input_len = 6 * SIZEOF (u_input);
+    UNIT *input = (UNIT *) malloc (input_len);
+    size_t delim_len = 6 * SIZEOF (u_delim);
+    UNIT *delim = (UNIT *) malloc (delim_len);
+    UNIT *state;
+    const UNIT *result;
+    UNIT *ptr, *first_ptr, *second_ptr;
+    size_t i;
+    for (i = 0, ptr = input; i < SIZEOF (u_input) && u_input[i] != 0; i++)
+      {
+	int ret = U_UCTOMB (ptr, u_input[i], input_len - (ptr - input));
+	if (i == 4)
+	  first_ptr = ptr;
+	if (i == 12)
+	  second_ptr = ptr;
+	ptr += ret;
+      }
+    *ptr = 0;
+    for (i = 0, ptr = delim; i < SIZEOF (u_delim) && u_delim[i] != 0; i++)
+      {
+	int ret = U_UCTOMB (ptr, u_delim[i], delim_len - (ptr - delim));
+	ptr += ret;
+      }
+    *ptr = 0;
+    result = U_STRTOK (input, delim, &state);
+    ASSERT (result == input);
+    result = U_STRTOK (NULL, delim, &state);
+    ASSERT (result == first_ptr);
+    result = U_STRTOK (NULL, delim, &state);
+    ASSERT (result == second_ptr);
+    result = U_STRTOK (NULL, delim, &state);
+    ASSERT (result == NULL);
+  }
+}
new file mode 100644
--- /dev/null
+++ b/tests/unistr/test-u16-strtok.c
@@ -0,0 +1,37 @@
+/* Test of u16_strtok() function.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+
+#include "unistr.h"
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "macros.h"
+
+#define UNIT uint16_t
+#define U_STRTOK u16_strtok
+#define U_UCTOMB u16_uctomb
+#include "test-u-strtok.h"
+
+int
+main (void)
+{
+  test_u_strtok ();
+
+  return 0;
+}
new file mode 100644
--- /dev/null
+++ b/tests/unistr/test-u32-strtok.c
@@ -0,0 +1,37 @@
+/* Test of u32_strtok() function.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+
+#include "unistr.h"
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "macros.h"
+
+#define UNIT uint32_t
+#define U_STRTOK u32_strtok
+#define U_UCTOMB u32_uctomb
+#include "test-u-strtok.h"
+
+int
+main (void)
+{
+  test_u_strtok ();
+
+  return 0;
+}
new file mode 100644
--- /dev/null
+++ b/tests/unistr/test-u8-strtok.c
@@ -0,0 +1,37 @@
+/* Test of u8_strtok() function.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+
+#include "unistr.h"
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "macros.h"
+
+#define UNIT uint8_t
+#define U_STRTOK u8_strtok
+#define U_UCTOMB u8_uctomb
+#include "test-u-strtok.h"
+
+int
+main (void)
+{
+  test_u_strtok ();
+
+  return 0;
+}