changeset 14476:7e83803507fe

New module 'unictype/category-longname'. * lib/unictype.in.h (uc_general_category_long_name): New declaration. * lib/unictype/categ_longname.c: New file. * modules/unictype/category-longname: New file. * modules/unictype/category-all (Depends-on): Add it.
author Bruno Haible <bruno@clisp.org>
date Fri, 25 Mar 2011 23:14:10 +0100
parents 4f08f1481a55
children 398f5637786e
files ChangeLog lib/unictype.in.h lib/unictype/categ_longname.c modules/unictype/category-all modules/unictype/category-longname
diffstat 5 files changed, 144 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2011-03-25  Bruno Haible  <bruno@clisp.org>
+
+	New module 'unictype/category-longname'.
+	* lib/unictype.in.h (uc_general_category_long_name): New declaration.
+	* lib/unictype/categ_longname.c: New file.
+	* modules/unictype/category-longname: New file.
+	* modules/unictype/category-all (Depends-on): Add it.
+
 2011-03-25  Bruno Haible  <bruno@clisp.org>
 
 	Tests for module 'unictype/category-LC'.
--- a/lib/unictype.in.h
+++ b/lib/unictype.in.h
@@ -207,6 +207,10 @@
 extern const char *
        uc_general_category_name (uc_general_category_t category);
 
+/* Return the long name of a general category.  */
+extern const char *
+       uc_general_category_long_name (uc_general_category_t category);
+
 /* Return the general category given by name, e.g. "Lu".  */
 extern uc_general_category_t
        uc_general_category_byname (const char *category_name);
new file mode 100644
--- /dev/null
+++ b/lib/unictype/categ_longname.c
@@ -0,0 +1,106 @@
+/* Categories of Unicode characters.
+   Copyright (C) 2002, 2006-2007, 2011 Free Software Foundation, Inc.
+   Written by Bruno Haible <bruno@clisp.org>, 2011.
+
+   This program is free software: you can redistribute it and/or modify it
+   under the terms of the GNU Lesser General Public License as published
+   by the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+
+/* Specification.  */
+#include "unictype.h"
+
+static const char u_category_long_name[30][22] =
+{
+  "Uppercase Letter",
+  "Lowercase Letter",
+  "Titlecase Letter",
+  "Modifier Letter",
+  "Other Letter",
+  "Nonspacing Mark",
+  "Spacing Mark",
+  "Enclosing Mark",
+  "Decimal Number",
+  "Letter Number",
+  "Other Number",
+  "Connector Punctuation",
+  "Dash Punctuation",
+  "Open Punctuation",
+  "Close Punctuation",
+  "Initial Punctuation",
+  "Final Punctuation",
+  "Other Punctuation",
+  "Math Symbol",
+  "Currency Symbol",
+  "Modifier Symbol",
+  "Other Symbol",
+  "Space Separator",
+  "Line Separator",
+  "Paragraph Separator",
+  "Control",
+  "Format",
+  "Surrogate",
+  "Private Use",
+  "Unassigned"
+};
+
+const char *
+uc_general_category_long_name (uc_general_category_t category)
+{
+  uint32_t bitmask = category.bitmask;
+  /* bitmask should consist of a single bit.  */
+  if (bitmask != 0)
+    {
+      if ((bitmask & (bitmask - 1)) == 0)
+        {
+          int bit;
+          /* Take log2 using a variant of Robert Harley's method.
+             Found by Bruno Haible 1996.  */
+          uint32_t n = bitmask;
+          static const char ord2_tab[64] =
+            {
+              -1,  0,  1, 12,  2,  6, -1, 13,  3, -1,  7, -1, -1, -1, -1, 14,
+              10,  4, -1, -1,  8, -1, -1, 25, -1, -1, -1, -1, -1, 21, 27, 15,
+              31, 11,  5, -1, -1, -1, -1, -1,  9, -1, -1, 24, -1, -1, 20, 26,
+              30, -1, -1, -1, -1, 23, -1, 19, 29, -1, 22, 18, 28, 17, 16, -1
+            };
+          n += n << 4;
+          n += n << 6;
+          n = (n << 16) - n;
+          bit = ord2_tab[n >> 26];
+
+          if (bit < sizeof (u_category_long_name) / sizeof (u_category_long_name[0]))
+            return u_category_long_name[bit];
+        }
+      else
+        {
+          if (bitmask == UC_CATEGORY_MASK_L)
+            return "Letter";
+          if (bitmask == UC_CATEGORY_MASK_LC)
+            return "Cased Letter";
+          if (bitmask == UC_CATEGORY_MASK_M)
+            return "Mark";
+          if (bitmask == UC_CATEGORY_MASK_N)
+            return "Number";
+          if (bitmask == UC_CATEGORY_MASK_P)
+            return "Punctuation";
+          if (bitmask == UC_CATEGORY_MASK_S)
+            return "Symbol";
+          if (bitmask == UC_CATEGORY_MASK_Z)
+            return "Separator";
+          if (bitmask == UC_CATEGORY_MASK_C)
+            return "Other";
+        }
+    }
+  return NULL;
+}
--- a/modules/unictype/category-all
+++ b/modules/unictype/category-all
@@ -45,6 +45,7 @@
 unictype/category-and
 unictype/category-and-not
 unictype/category-byname
+unictype/category-longname
 unictype/category-name
 unictype/category-of
 unictype/category-or
new file mode 100644
--- /dev/null
+++ b/modules/unictype/category-longname
@@ -0,0 +1,25 @@
+Description:
+Name of Unicode character category.
+
+Files:
+lib/unictype/categ_longname.c
+
+Depends-on:
+unictype/base
+
+configure.ac:
+gl_LIBUNISTRING_MODULE([0.9.4], [unictype/category-longname])
+
+Makefile.am:
+if LIBUNISTRING_COMPILE_UNICTYPE_CATEGORY_LONGNAME
+lib_SOURCES += unictype/categ_longname.c
+endif
+
+Include:
+"unictype.h"
+
+License:
+LGPL
+
+Maintainer:
+Bruno Haible