changeset 14496:e3e6889d281c

unictype/joiningtype-byname: Recognize long names as well. * lib/unictype.in.h (uc_joiningtype_class_byname): Allow argument to be a long name. * lib/unictype/joiningtype_byname.c: Include <string.h>, unictype/joiningtype_byname.h. (uc_joiningtype_class_byname): Use uc_joining_type_lookup. * lib/unictype/joiningtype_byname.gperf: New file. * modules/unictype/joiningtype-byname (Files): Add lib/unictype/joiningtype_byname.gperf. (Depends-on): Add gperf. (Makefile.am): Add rule for generating unictype/joiningtype_byname.h. * tests/unictype/test-joiningtype_byname.c (main): Test the recognition of long names.
author Bruno Haible <bruno@clisp.org>
date Sat, 26 Mar 2011 13:38:00 +0100
parents 93cdd449315f
children 1fe160458a5c
files ChangeLog lib/unictype.in.h lib/unictype/joiningtype_byname.c lib/unictype/joiningtype_byname.gperf modules/unictype/joiningtype-byname tests/unictype/test-joiningtype_byname.c
diffstat 6 files changed, 113 insertions(+), 15 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,19 @@
 2011-03-26  Bruno Haible  <bruno@clisp.org>
 
+	unictype/joiningtype-byname: Recognize long names as well.
+	* lib/unictype.in.h (uc_joiningtype_class_byname): Allow argument to be
+	a long name.
+	* lib/unictype/joiningtype_byname.c: Include <string.h>,
+	unictype/joiningtype_byname.h.
+	(uc_joiningtype_class_byname): Use uc_joining_type_lookup.
+	* lib/unictype/joiningtype_byname.gperf: New file.
+	* modules/unictype/joiningtype-byname (Files): Add
+	lib/unictype/joiningtype_byname.gperf.
+	(Depends-on): Add gperf.
+	(Makefile.am): Add rule for generating unictype/joiningtype_byname.h.
+	* tests/unictype/test-joiningtype_byname.c (main): Test the recognition of
+	long names.
+
 	Tests for module 'unictype/joiningtype-longname'.
 	* modules/unictype/joiningtype-longname-tests: New file.
 	* tests/unictype/test-joiningtype_longname.c: New file.
--- a/lib/unictype.in.h
+++ b/lib/unictype.in.h
@@ -419,7 +419,8 @@
 extern const char *
        uc_joining_type_long_name (int joining_type);
 
-/* Return the joining type given by name, e.g. "D".  */
+/* Return the joining type given by name, e.g. "D", or by long name, e.g.
+   "Dual Joining".  */
 extern int
        uc_joining_type_byname (const char *joining_type_name);
 
--- a/lib/unictype/joiningtype_byname.c
+++ b/lib/unictype/joiningtype_byname.c
@@ -20,25 +20,44 @@
 /* Specification.  */
 #include "unictype.h"
 
+#include <string.h>
+
+#include "unictype/joiningtype_byname.h"
+
 int
 uc_joining_type_byname (const char *joining_type_name)
 {
-  if (joining_type_name[0] != '\0' && joining_type_name[1] == '\0')
-    switch (joining_type_name[0])
+  size_t len;
+
+  len = strlen (joining_type_name);
+  if (len <= MAX_WORD_LENGTH)
+    {
+      char buf[MAX_WORD_LENGTH + 1];
+      const struct named_joining_type *found;
+
+      /* Copy joining_type_name into buf, converting '_' and '-' to ' '.  */
       {
-      case 'C':
-        return UC_JOINING_TYPE_C;
-      case 'D':
-        return UC_JOINING_TYPE_D;
-      case 'L':
-        return UC_JOINING_TYPE_L;
-      case 'R':
-        return UC_JOINING_TYPE_R;
-      case 'T':
-        return UC_JOINING_TYPE_T;
-      case 'U':
-        return UC_JOINING_TYPE_U;
+        const char *p = joining_type_name;
+        char *q = buf;
+
+        for (;; p++, q++)
+          {
+            char c = *p;
+
+            if (c == '_' || c == '-')
+              c = ' ';
+            *q = c;
+            if (c == '\0')
+              break;
+          }
       }
+      /* Here q == buf + len.  */
+
+      /* Do a hash table lookup, with case-insensitive comparison.  */
+      found = uc_joining_type_lookup (buf, len);
+      if (found != NULL)
+        return found->joining_type;
+    }
   /* Invalid joining type name.  */
   return -1;
 }
new file mode 100644
--- /dev/null
+++ b/lib/unictype/joiningtype_byname.gperf
@@ -0,0 +1,30 @@
+/* Arabic joining type of Unicode characters.  */
+struct named_joining_type { int name; int joining_type; };
+%struct-type
+%ignore-case
+%language=ANSI-C
+%define hash-function-name joining_type_hash
+%define lookup-function-name uc_joining_type_lookup
+%readonly-tables
+%global-table
+%define word-array-name joining_type_names
+%pic
+%define string-pool-name joining_type_stringpool
+%%
+C, UC_JOINING_TYPE_C
+D, UC_JOINING_TYPE_D
+L, UC_JOINING_TYPE_L
+R, UC_JOINING_TYPE_R
+T, UC_JOINING_TYPE_T
+U, UC_JOINING_TYPE_U
+Join Causing, UC_JOINING_TYPE_C
+JoinCausing, UC_JOINING_TYPE_C
+Dual Joining, UC_JOINING_TYPE_D
+DualJoining, UC_JOINING_TYPE_D
+Left Joining, UC_JOINING_TYPE_L
+LeftJoining, UC_JOINING_TYPE_L
+Right Joining, UC_JOINING_TYPE_R
+RightJoining, UC_JOINING_TYPE_R
+Transparent, UC_JOINING_TYPE_T
+Non Joining, UC_JOINING_TYPE_U
+NonJoining, UC_JOINING_TYPE_U
--- a/modules/unictype/joiningtype-byname
+++ b/modules/unictype/joiningtype-byname
@@ -3,9 +3,11 @@
 
 Files:
 lib/unictype/joiningtype_byname.c
+lib/unictype/joiningtype_byname.gperf
 
 Depends-on:
 unictype/base
+gperf
 
 configure.ac:
 gl_LIBUNISTRING_MODULE([0.9.4], [unictype/joiningtype-byname])
@@ -15,6 +17,14 @@
 lib_SOURCES += unictype/joiningtype_byname.c
 endif
 
+unictype/joiningtype_byname.h: unictype/joiningtype_byname.gperf
+	$(GPERF) -m 10 $(srcdir)/unictype/joiningtype_byname.gperf > $(srcdir)/unictype/joiningtype_byname.h-t
+	mv $(srcdir)/unictype/joiningtype_byname.h-t $(srcdir)/unictype/joiningtype_byname.h
+BUILT_SOURCES        += unictype/joiningtype_byname.h
+MOSTLYCLEANFILES     += unictype/joiningtype_byname.h-t
+MAINTAINERCLEANFILES += unictype/joiningtype_byname.h
+EXTRA_DIST           += unictype/joiningtype_byname.h
+
 Include:
 "unictype.h"
 
--- a/tests/unictype/test-joiningtype_byname.c
+++ b/tests/unictype/test-joiningtype_byname.c
@@ -33,6 +33,30 @@
   ASSERT (uc_joining_type_byname ("L") == UC_JOINING_TYPE_L);
   ASSERT (uc_joining_type_byname ("R") == UC_JOINING_TYPE_R);
   ASSERT (uc_joining_type_byname ("D") == UC_JOINING_TYPE_D);
+
+  ASSERT (uc_joining_type_byname ("JOIN CAUSING") == UC_JOINING_TYPE_C);
+  ASSERT (uc_joining_type_byname ("Join Causing") == UC_JOINING_TYPE_C);
+  ASSERT (uc_joining_type_byname ("Join_Causing") == UC_JOINING_TYPE_C);
+  ASSERT (uc_joining_type_byname ("JoinCausing") == UC_JOINING_TYPE_C);
+  ASSERT (uc_joining_type_byname ("DUAL JOINING") == UC_JOINING_TYPE_D);
+  ASSERT (uc_joining_type_byname ("Dual Joining") == UC_JOINING_TYPE_D);
+  ASSERT (uc_joining_type_byname ("Dual_Joining") == UC_JOINING_TYPE_D);
+  ASSERT (uc_joining_type_byname ("DualJoining") == UC_JOINING_TYPE_D);
+  ASSERT (uc_joining_type_byname ("LEFT JOINING") == UC_JOINING_TYPE_L);
+  ASSERT (uc_joining_type_byname ("Left Joining") == UC_JOINING_TYPE_L);
+  ASSERT (uc_joining_type_byname ("Left_Joining") == UC_JOINING_TYPE_L);
+  ASSERT (uc_joining_type_byname ("LeftJoining") == UC_JOINING_TYPE_L);
+  ASSERT (uc_joining_type_byname ("RIGHT JOINING") == UC_JOINING_TYPE_R);
+  ASSERT (uc_joining_type_byname ("Right Joining") == UC_JOINING_TYPE_R);
+  ASSERT (uc_joining_type_byname ("Right_Joining") == UC_JOINING_TYPE_R);
+  ASSERT (uc_joining_type_byname ("RightJoining") == UC_JOINING_TYPE_R);
+  ASSERT (uc_joining_type_byname ("TRANSPARENT") == UC_JOINING_TYPE_T);
+  ASSERT (uc_joining_type_byname ("Transparent") == UC_JOINING_TYPE_T);
+  ASSERT (uc_joining_type_byname ("NON JOINING") == UC_JOINING_TYPE_U);
+  ASSERT (uc_joining_type_byname ("Non Joining") == UC_JOINING_TYPE_U);
+  ASSERT (uc_joining_type_byname ("Non_Joining") == UC_JOINING_TYPE_U);
+  ASSERT (uc_joining_type_byname ("NonJoining") == UC_JOINING_TYPE_U);
+
   ASSERT (uc_joining_type_byname ("X") < 0);
   ASSERT (uc_joining_type_byname ("") < 0);