changeset 14150:2e27d5cf0137

uniwidth/width: Fix width of U+1D173..U+1D17A. * lib/gen-uni-tables.c (is_nonspacing, output_nonspacing_property, symbolic_width, output_width_property_test): New functions. (main): Invoke output_nonspacing_property, output_width_property_test. * lib/uniwidth/width.c (nonspacing_table_data): Set bits for U+1D173..U+1D17A. * tests/uniwidth/test-uc_width2.sh: For U+1D173..U+1D17A, expect 0, not 1. * modules/uniwidth/*: Bump version number of expected libunistring version. * modules/unilbrk/*: Likewise.
author Bruno Haible <bruno@clisp.org>
date Sun, 09 Jan 2011 10:11:05 +0100
parents d9736adb0cbc
children 69b92624775e
files ChangeLog lib/gen-uni-tables.c lib/uniwidth/width.c modules/unilbrk/u16-width-linebreaks modules/unilbrk/u32-width-linebreaks modules/unilbrk/u8-width-linebreaks modules/unilbrk/ulc-width-linebreaks modules/uniwidth/u16-strwidth modules/uniwidth/u16-width modules/uniwidth/u32-strwidth modules/uniwidth/u32-width modules/uniwidth/u8-strwidth modules/uniwidth/u8-width modules/uniwidth/width tests/uniwidth/test-uc_width2.sh
diffstat 15 files changed, 251 insertions(+), 14 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,17 @@
+2011-01-09  Bruno Haible  <bruno@clisp.org>
+
+	uniwidth/width: Fix width of U+1D173..U+1D17A.
+	* lib/gen-uni-tables.c (is_nonspacing, output_nonspacing_property,
+	symbolic_width, output_width_property_test): New functions.
+	(main): Invoke output_nonspacing_property, output_width_property_test.
+	* lib/uniwidth/width.c (nonspacing_table_data): Set bits for
+	U+1D173..U+1D17A.
+	* tests/uniwidth/test-uc_width2.sh: For U+1D173..U+1D17A, expect 0, not
+	1.
+	* modules/uniwidth/*: Bump version number of expected libunistring
+	version.
+	* modules/unilbrk/*: Likewise.
+
 2011-01-08  Bruno Haible  <bruno@clisp.org>
 
 	uninorm tests: Preserve copyright of Unicode data file.
--- a/lib/gen-uni-tables.c
+++ b/lib/gen-uni-tables.c
@@ -5144,6 +5144,227 @@
 
 /* ========================================================================= */
 
+/* Non-spacing attribute and width.  */
+
+/* The non-spacing attribute table consists of:
+   - Non-spacing characters; generated from PropList.txt or
+     "grep '^[^;]*;[^;]*;[^;]*;[^;]*;NSM;' UnicodeData.txt"
+   - Format control characters; generated from
+     "grep '^[^;]*;[^;]*;Cf;' UnicodeData.txt"
+   - Zero width characters; generated from
+     "grep '^[^;]*;ZERO WIDTH ' UnicodeData.txt"
+ */
+
+static bool
+is_nonspacing (unsigned int ch)
+{
+  return (unicode_attributes[ch].name != NULL
+          && (get_bidi_category (ch) == UC_BIDI_NSM
+              || is_category_Cc (ch) || is_category_Cf (ch)
+              || strncmp (unicode_attributes[ch].name, "ZERO WIDTH ", 11) == 0));
+}
+
+static void
+output_nonspacing_property (const char *filename)
+{
+  FILE *stream;
+  int ind[0x110000 / 0x200];
+  unsigned int i;
+  unsigned int i_max;
+  int next_ind;
+
+  stream = fopen (filename, "w");
+  if (stream == NULL)
+    {
+      fprintf (stderr, "cannot open '%s' for writing\n", filename);
+      exit (1);
+    }
+
+  next_ind = 0;
+  for (i = 0; i < 0x110000 / 0x200; i++)
+    {
+      bool nontrivial = false;
+      unsigned int ch;
+
+      if (i != 0xe0000 / 0x200) /* The 0xe0000 block is handled by code.  */
+        for (ch = i * 0x200; ch < (i + 1) * 0x200; ch++)
+          if (is_nonspacing (ch))
+            {
+              nontrivial = true;
+              break;
+            }
+      if (nontrivial)
+        ind[i] = next_ind++;
+      else
+        ind[i] = -1;
+    }
+
+  fprintf (stream, "static const unsigned char nonspacing_table_data[%d*64] = {\n",
+           next_ind);
+  i_max = 0;
+  for (i = 0; i < 0x110000 / 0x200; i++)
+    {
+      bool nontrivial = (ind[i] >= 0);
+
+      if (nontrivial)
+        {
+          unsigned int j;
+
+          fprintf (stream, "  /* 0x%04x-0x%04x */\n", i * 0x200, (i + 1) * 0x200 - 1);
+          for (j = 0; j < 8; j++)
+            {
+              unsigned int k;
+
+              fprintf (stream, " ");
+              for (k = 0; k < 8; k++)
+                {
+                  unsigned int l;
+                  unsigned char bits = 0;
+
+                  for (l = 0; l < 8; l++)
+                    {
+                      unsigned int ch = i * 0x200 + j * 0x40 + k * 8 + l;
+
+                      if (is_nonspacing (ch))
+                        bits |= 1 << l;
+                    }
+                  fprintf (stream, " 0x%02x%c", bits,
+                           ind[i] + 1 == next_ind && j == 8 - 1 && k == 8 - 1 ? ' ' : ',');
+                }
+              fprintf (stream, " /* 0x%04x-0x%04x */\n",
+                       i * 0x200 + j * 0x40, i * 0x200 + (j + 1) * 0x40 - 1);
+            }
+          i_max = i;
+        }
+    }
+  fprintf (stream, "};\n");
+
+  i_max = ((i_max + 8 - 1) / 8) * 8;
+  fprintf (stream, "static const signed char nonspacing_table_ind[%u] = {\n",
+           i_max);
+  {
+    unsigned int j;
+
+    for (j = 0; j < i_max / 8; j++)
+      {
+        unsigned int k;
+
+        fprintf (stream, " ");
+        for (k = 0; k < 8; k++)
+          {
+            i = j * 8 + k;
+            fprintf (stream, " %2d%c", ind[i],
+                     j == i_max / 8 - 1 && k == 8 - 1 ? ' ' : ',');
+          }
+        fprintf (stream, " /* 0x%04x-0x%04x */\n",
+                 j * 8 * 0x200, (j + 1) * 8 * 0x200 - 1);
+      }
+  }
+  fprintf (stream, "};\n");
+
+  if (ferror (stream) || fclose (stream))
+    {
+      fprintf (stderr, "error writing to '%s'\n", filename);
+      exit (1);
+    }
+}
+
+/* Returns the width of ch as one of 0, '0', '1', '2', 'A'.  */
+static char
+symbolic_width (unsigned int ch)
+{
+  /* Test for unassigned character.  */
+  if (is_property_unassigned_code_value (ch))
+    {
+      /* Unicode TR#11 section "Unassigned and Private-Use Characters".  */
+      if (ch >= 0xE000 && ch <= 0xF8FF)
+        return 'A';
+      if ((ch >= 0x20000 && ch <= 0x2FFFD) || (ch >= 0x30000 && ch <= 0x3FFFD))
+        return '2';
+      return 0;
+    }
+  else
+    {
+      /* Test for non-spacing or control character.  */
+      if (is_category_Cc (ch) && ch < 0x00A0)
+        return 0;
+      if (is_nonspacing (ch))
+        return '0';
+      /* Test for double-width character.  */
+      if (unicode_width[ch] != NULL
+          && (strcmp (unicode_width[ch], "W") == 0
+              || strcmp (unicode_width[ch], "F") == 0))
+        return '2';
+      /* Test for half-width character.  */
+      if (unicode_width[ch] != NULL
+          && strcmp (unicode_width[ch], "H") == 0)
+        return '1';
+    }
+  /* In ancient CJK encodings, Cyrillic and most other characters are
+     double-width as well.  */
+  if (ch >= 0x00A1 && ch < 0x10000)
+    return 'A';
+  return '1';
+}
+
+static void
+output_width_property_test (const char *filename)
+{
+  FILE *stream;
+  unsigned int interval_start, interval_end, ch;
+  char interval_value;
+
+  stream = fopen (filename, "w");
+  if (stream == NULL)
+    {
+      fprintf (stderr, "cannot open '%s' for writing\n", filename);
+      exit (1);
+    }
+
+  interval_value = 0;
+  interval_start = interval_end = 0; /* avoid GCC warning */
+  for (ch = 0; ch < 0x110000; ch++)
+    {
+      char value = symbolic_width (ch);
+      if (value != 0) /* skip Cc control characters and unassigned characters */
+        {
+          if (value == interval_value)
+            /* Extend the interval.  */
+            interval_end = ch;
+          else
+            {
+              /* Terminate the interval.  */
+              if (interval_value != 0)
+                {
+                  if (interval_end == interval_start)
+                    fprintf (stream, "%04X\t\t%c\n", interval_start, interval_value);
+                  else
+                    fprintf (stream, "%04X..%04X\t%c\n", interval_start, interval_end, interval_value);
+                }
+              /* Start a new interval.  */
+              interval_start = interval_end = ch;
+              interval_value = value;
+            }
+        }
+    }
+  /* Terminate the last interval.  */
+  if (interval_value != 0)
+    {
+      if (interval_end == interval_start)
+        fprintf (stream, "%04X\t\t%c\n", interval_start, interval_value);
+      else
+        fprintf (stream, "%04X..%04X\t%c\n", interval_start, interval_end, interval_value);
+    }
+
+  if (ferror (stream) || fclose (stream))
+    {
+      fprintf (stderr, "error writing to '%s'\n", filename);
+      exit (1);
+    }
+}
+
+/* ========================================================================= */
+
 /* Line breaking classification.  */
 
 enum
@@ -8641,6 +8862,8 @@
   output_scripts_byname (version);
   output_blocks (version);
   output_ident_properties (version);
+  output_nonspacing_property ("uniwidth/width.c.part");
+  output_width_property_test ("../tests/uniwidth/test-uc_width2.sh.part");
   output_old_ctype (version);
 
   debug_output_lbrk_tables ("unilbrk/lbrkprop.txt");
--- a/lib/uniwidth/width.c
+++ b/lib/uniwidth/width.c
@@ -255,7 +255,7 @@
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1d080-0x1d0bf */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1d0c0-0x1d0ff */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1d100-0x1d13f */
-  0x00, 0x00, 0x00, 0x00, 0x80, 0x03, 0x00, 0xf8, /* 0x1d140-0x1d17f */
+  0x00, 0x00, 0x00, 0x00, 0x80, 0x03, 0xf8, 0xff, /* 0x1d140-0x1d17f */
   0xe7, 0x0f, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, /* 0x1d180-0x1d1bf */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1d1c0-0x1d1ff */
   /* 0x1d200-0x1d3ff */
--- a/modules/unilbrk/u16-width-linebreaks
+++ b/modules/unilbrk/u16-width-linebreaks
@@ -11,7 +11,7 @@
 unistr/u16-mbtouc-unsafe
 
 configure.ac:
-gl_LIBUNISTRING_MODULE([0.9], [unilbrk/u16-width-linebreaks])
+gl_LIBUNISTRING_MODULE([0.9.4], [unilbrk/u16-width-linebreaks])
 
 Makefile.am:
 if LIBUNISTRING_COMPILE_UNILBRK_U16_WIDTH_LINEBREAKS
--- a/modules/unilbrk/u32-width-linebreaks
+++ b/modules/unilbrk/u32-width-linebreaks
@@ -10,7 +10,7 @@
 uniwidth/width
 
 configure.ac:
-gl_LIBUNISTRING_MODULE([0.9], [unilbrk/u32-width-linebreaks])
+gl_LIBUNISTRING_MODULE([0.9.4], [unilbrk/u32-width-linebreaks])
 
 Makefile.am:
 if LIBUNISTRING_COMPILE_UNILBRK_U32_WIDTH_LINEBREAKS
--- a/modules/unilbrk/u8-width-linebreaks
+++ b/modules/unilbrk/u8-width-linebreaks
@@ -11,7 +11,7 @@
 unistr/u8-mbtouc-unsafe
 
 configure.ac:
-gl_LIBUNISTRING_MODULE([0.9], [unilbrk/u8-width-linebreaks])
+gl_LIBUNISTRING_MODULE([0.9.4], [unilbrk/u8-width-linebreaks])
 
 Makefile.am:
 if LIBUNISTRING_COMPILE_UNILBRK_U8_WIDTH_LINEBREAKS
--- a/modules/unilbrk/ulc-width-linebreaks
+++ b/modules/unilbrk/ulc-width-linebreaks
@@ -12,7 +12,7 @@
 c-ctype
 
 configure.ac:
-gl_LIBUNISTRING_MODULE([0.9], [unilbrk/ulc-width-linebreaks])
+gl_LIBUNISTRING_MODULE([0.9.4], [unilbrk/ulc-width-linebreaks])
 
 Makefile.am:
 if LIBUNISTRING_COMPILE_UNILBRK_ULC_WIDTH_LINEBREAKS
--- a/modules/uniwidth/u16-strwidth
+++ b/modules/uniwidth/u16-strwidth
@@ -10,7 +10,7 @@
 unistr/u16-strlen
 
 configure.ac:
-gl_LIBUNISTRING_MODULE([0.9], [uniwidth/u16-strwidth])
+gl_LIBUNISTRING_MODULE([0.9.4], [uniwidth/u16-strwidth])
 
 Makefile.am:
 if LIBUNISTRING_COMPILE_UNIWIDTH_U16_STRWIDTH
--- a/modules/uniwidth/u16-width
+++ b/modules/uniwidth/u16-width
@@ -10,7 +10,7 @@
 unistr/u16-mbtouc-unsafe
 
 configure.ac:
-gl_LIBUNISTRING_MODULE([0.9], [uniwidth/u16-width])
+gl_LIBUNISTRING_MODULE([0.9.4], [uniwidth/u16-width])
 
 Makefile.am:
 if LIBUNISTRING_COMPILE_UNIWIDTH_U16_WIDTH
--- a/modules/uniwidth/u32-strwidth
+++ b/modules/uniwidth/u32-strwidth
@@ -10,7 +10,7 @@
 unistr/u32-strlen
 
 configure.ac:
-gl_LIBUNISTRING_MODULE([0.9], [uniwidth/u32-strwidth])
+gl_LIBUNISTRING_MODULE([0.9.4], [uniwidth/u32-strwidth])
 
 Makefile.am:
 if LIBUNISTRING_COMPILE_UNIWIDTH_U32_STRWIDTH
--- a/modules/uniwidth/u32-width
+++ b/modules/uniwidth/u32-width
@@ -9,7 +9,7 @@
 uniwidth/width
 
 configure.ac:
-gl_LIBUNISTRING_MODULE([0.9], [uniwidth/u32-width])
+gl_LIBUNISTRING_MODULE([0.9.4], [uniwidth/u32-width])
 
 Makefile.am:
 if LIBUNISTRING_COMPILE_UNIWIDTH_U32_WIDTH
--- a/modules/uniwidth/u8-strwidth
+++ b/modules/uniwidth/u8-strwidth
@@ -10,7 +10,7 @@
 unistr/u8-strlen
 
 configure.ac:
-gl_LIBUNISTRING_MODULE([0.9], [uniwidth/u8-strwidth])
+gl_LIBUNISTRING_MODULE([0.9.4], [uniwidth/u8-strwidth])
 
 Makefile.am:
 if LIBUNISTRING_COMPILE_UNIWIDTH_U8_STRWIDTH
--- a/modules/uniwidth/u8-width
+++ b/modules/uniwidth/u8-width
@@ -10,7 +10,7 @@
 unistr/u8-mbtouc-unsafe
 
 configure.ac:
-gl_LIBUNISTRING_MODULE([0.9], [uniwidth/u8-width])
+gl_LIBUNISTRING_MODULE([0.9.4], [uniwidth/u8-width])
 
 Makefile.am:
 if LIBUNISTRING_COMPILE_UNIWIDTH_U8_WIDTH
--- a/modules/uniwidth/width
+++ b/modules/uniwidth/width
@@ -10,7 +10,7 @@
 streq
 
 configure.ac:
-gl_LIBUNISTRING_MODULE([0.9], [uniwidth/width])
+gl_LIBUNISTRING_MODULE([0.9.4], [uniwidth/width])
 
 Makefile.am:
 if LIBUNISTRING_COMPILE_UNIWIDTH_WIDTH
--- a/tests/uniwidth/test-uc_width2.sh
+++ b/tests/uniwidth/test-uc_width2.sh
@@ -352,8 +352,8 @@
 10A3F		0
 10A40..1D166	1
 1D167..1D169	0
-1D16A..1D17A	1
-1D17B..1D182	0
+1D16A..1D172	1
+1D173..1D182	0
 1D183..1D184	1
 1D185..1D18B	0
 1D18C..1D1A9	1