# HG changeset patch # User Bruno Haible # Date 1294564265 -3600 # Node ID 2e27d5cf01375007c0e3cd0e2a2925d77451e234 # Parent d9736adb0cbc54c6ddd99992171ad651004ff221 uniwidth/width: Fix width of U+1D173..U+1D17A. * lib/gen-uni-tables.c (is_nonspacing, output_nonspacing_property, symbolic_width, output_width_property_test): New functions. (main): Invoke output_nonspacing_property, output_width_property_test. * lib/uniwidth/width.c (nonspacing_table_data): Set bits for U+1D173..U+1D17A. * tests/uniwidth/test-uc_width2.sh: For U+1D173..U+1D17A, expect 0, not 1. * modules/uniwidth/*: Bump version number of expected libunistring version. * modules/unilbrk/*: Likewise. diff --git a/ChangeLog b/ChangeLog --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,17 @@ +2011-01-09 Bruno Haible + + uniwidth/width: Fix width of U+1D173..U+1D17A. + * lib/gen-uni-tables.c (is_nonspacing, output_nonspacing_property, + symbolic_width, output_width_property_test): New functions. + (main): Invoke output_nonspacing_property, output_width_property_test. + * lib/uniwidth/width.c (nonspacing_table_data): Set bits for + U+1D173..U+1D17A. + * tests/uniwidth/test-uc_width2.sh: For U+1D173..U+1D17A, expect 0, not + 1. + * modules/uniwidth/*: Bump version number of expected libunistring + version. + * modules/unilbrk/*: Likewise. + 2011-01-08 Bruno Haible uninorm tests: Preserve copyright of Unicode data file. diff --git a/lib/gen-uni-tables.c b/lib/gen-uni-tables.c --- a/lib/gen-uni-tables.c +++ b/lib/gen-uni-tables.c @@ -5144,6 +5144,227 @@ /* ========================================================================= */ +/* Non-spacing attribute and width. */ + +/* The non-spacing attribute table consists of: + - Non-spacing characters; generated from PropList.txt or + "grep '^[^;]*;[^;]*;[^;]*;[^;]*;NSM;' UnicodeData.txt" + - Format control characters; generated from + "grep '^[^;]*;[^;]*;Cf;' UnicodeData.txt" + - Zero width characters; generated from + "grep '^[^;]*;ZERO WIDTH ' UnicodeData.txt" + */ + +static bool +is_nonspacing (unsigned int ch) +{ + return (unicode_attributes[ch].name != NULL + && (get_bidi_category (ch) == UC_BIDI_NSM + || is_category_Cc (ch) || is_category_Cf (ch) + || strncmp (unicode_attributes[ch].name, "ZERO WIDTH ", 11) == 0)); +} + +static void +output_nonspacing_property (const char *filename) +{ + FILE *stream; + int ind[0x110000 / 0x200]; + unsigned int i; + unsigned int i_max; + int next_ind; + + stream = fopen (filename, "w"); + if (stream == NULL) + { + fprintf (stderr, "cannot open '%s' for writing\n", filename); + exit (1); + } + + next_ind = 0; + for (i = 0; i < 0x110000 / 0x200; i++) + { + bool nontrivial = false; + unsigned int ch; + + if (i != 0xe0000 / 0x200) /* The 0xe0000 block is handled by code. */ + for (ch = i * 0x200; ch < (i + 1) * 0x200; ch++) + if (is_nonspacing (ch)) + { + nontrivial = true; + break; + } + if (nontrivial) + ind[i] = next_ind++; + else + ind[i] = -1; + } + + fprintf (stream, "static const unsigned char nonspacing_table_data[%d*64] = {\n", + next_ind); + i_max = 0; + for (i = 0; i < 0x110000 / 0x200; i++) + { + bool nontrivial = (ind[i] >= 0); + + if (nontrivial) + { + unsigned int j; + + fprintf (stream, " /* 0x%04x-0x%04x */\n", i * 0x200, (i + 1) * 0x200 - 1); + for (j = 0; j < 8; j++) + { + unsigned int k; + + fprintf (stream, " "); + for (k = 0; k < 8; k++) + { + unsigned int l; + unsigned char bits = 0; + + for (l = 0; l < 8; l++) + { + unsigned int ch = i * 0x200 + j * 0x40 + k * 8 + l; + + if (is_nonspacing (ch)) + bits |= 1 << l; + } + fprintf (stream, " 0x%02x%c", bits, + ind[i] + 1 == next_ind && j == 8 - 1 && k == 8 - 1 ? ' ' : ','); + } + fprintf (stream, " /* 0x%04x-0x%04x */\n", + i * 0x200 + j * 0x40, i * 0x200 + (j + 1) * 0x40 - 1); + } + i_max = i; + } + } + fprintf (stream, "};\n"); + + i_max = ((i_max + 8 - 1) / 8) * 8; + fprintf (stream, "static const signed char nonspacing_table_ind[%u] = {\n", + i_max); + { + unsigned int j; + + for (j = 0; j < i_max / 8; j++) + { + unsigned int k; + + fprintf (stream, " "); + for (k = 0; k < 8; k++) + { + i = j * 8 + k; + fprintf (stream, " %2d%c", ind[i], + j == i_max / 8 - 1 && k == 8 - 1 ? ' ' : ','); + } + fprintf (stream, " /* 0x%04x-0x%04x */\n", + j * 8 * 0x200, (j + 1) * 8 * 0x200 - 1); + } + } + fprintf (stream, "};\n"); + + if (ferror (stream) || fclose (stream)) + { + fprintf (stderr, "error writing to '%s'\n", filename); + exit (1); + } +} + +/* Returns the width of ch as one of 0, '0', '1', '2', 'A'. */ +static char +symbolic_width (unsigned int ch) +{ + /* Test for unassigned character. */ + if (is_property_unassigned_code_value (ch)) + { + /* Unicode TR#11 section "Unassigned and Private-Use Characters". */ + if (ch >= 0xE000 && ch <= 0xF8FF) + return 'A'; + if ((ch >= 0x20000 && ch <= 0x2FFFD) || (ch >= 0x30000 && ch <= 0x3FFFD)) + return '2'; + return 0; + } + else + { + /* Test for non-spacing or control character. */ + if (is_category_Cc (ch) && ch < 0x00A0) + return 0; + if (is_nonspacing (ch)) + return '0'; + /* Test for double-width character. */ + if (unicode_width[ch] != NULL + && (strcmp (unicode_width[ch], "W") == 0 + || strcmp (unicode_width[ch], "F") == 0)) + return '2'; + /* Test for half-width character. */ + if (unicode_width[ch] != NULL + && strcmp (unicode_width[ch], "H") == 0) + return '1'; + } + /* In ancient CJK encodings, Cyrillic and most other characters are + double-width as well. */ + if (ch >= 0x00A1 && ch < 0x10000) + return 'A'; + return '1'; +} + +static void +output_width_property_test (const char *filename) +{ + FILE *stream; + unsigned int interval_start, interval_end, ch; + char interval_value; + + stream = fopen (filename, "w"); + if (stream == NULL) + { + fprintf (stderr, "cannot open '%s' for writing\n", filename); + exit (1); + } + + interval_value = 0; + interval_start = interval_end = 0; /* avoid GCC warning */ + for (ch = 0; ch < 0x110000; ch++) + { + char value = symbolic_width (ch); + if (value != 0) /* skip Cc control characters and unassigned characters */ + { + if (value == interval_value) + /* Extend the interval. */ + interval_end = ch; + else + { + /* Terminate the interval. */ + if (interval_value != 0) + { + if (interval_end == interval_start) + fprintf (stream, "%04X\t\t%c\n", interval_start, interval_value); + else + fprintf (stream, "%04X..%04X\t%c\n", interval_start, interval_end, interval_value); + } + /* Start a new interval. */ + interval_start = interval_end = ch; + interval_value = value; + } + } + } + /* Terminate the last interval. */ + if (interval_value != 0) + { + if (interval_end == interval_start) + fprintf (stream, "%04X\t\t%c\n", interval_start, interval_value); + else + fprintf (stream, "%04X..%04X\t%c\n", interval_start, interval_end, interval_value); + } + + if (ferror (stream) || fclose (stream)) + { + fprintf (stderr, "error writing to '%s'\n", filename); + exit (1); + } +} + +/* ========================================================================= */ + /* Line breaking classification. */ enum @@ -8641,6 +8862,8 @@ output_scripts_byname (version); output_blocks (version); output_ident_properties (version); + output_nonspacing_property ("uniwidth/width.c.part"); + output_width_property_test ("../tests/uniwidth/test-uc_width2.sh.part"); output_old_ctype (version); debug_output_lbrk_tables ("unilbrk/lbrkprop.txt"); diff --git a/lib/uniwidth/width.c b/lib/uniwidth/width.c --- a/lib/uniwidth/width.c +++ b/lib/uniwidth/width.c @@ -255,7 +255,7 @@ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1d080-0x1d0bf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1d0c0-0x1d0ff */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1d100-0x1d13f */ - 0x00, 0x00, 0x00, 0x00, 0x80, 0x03, 0x00, 0xf8, /* 0x1d140-0x1d17f */ + 0x00, 0x00, 0x00, 0x00, 0x80, 0x03, 0xf8, 0xff, /* 0x1d140-0x1d17f */ 0xe7, 0x0f, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, /* 0x1d180-0x1d1bf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1d1c0-0x1d1ff */ /* 0x1d200-0x1d3ff */ diff --git a/modules/unilbrk/u16-width-linebreaks b/modules/unilbrk/u16-width-linebreaks --- a/modules/unilbrk/u16-width-linebreaks +++ b/modules/unilbrk/u16-width-linebreaks @@ -11,7 +11,7 @@ unistr/u16-mbtouc-unsafe configure.ac: -gl_LIBUNISTRING_MODULE([0.9], [unilbrk/u16-width-linebreaks]) +gl_LIBUNISTRING_MODULE([0.9.4], [unilbrk/u16-width-linebreaks]) Makefile.am: if LIBUNISTRING_COMPILE_UNILBRK_U16_WIDTH_LINEBREAKS diff --git a/modules/unilbrk/u32-width-linebreaks b/modules/unilbrk/u32-width-linebreaks --- a/modules/unilbrk/u32-width-linebreaks +++ b/modules/unilbrk/u32-width-linebreaks @@ -10,7 +10,7 @@ uniwidth/width configure.ac: -gl_LIBUNISTRING_MODULE([0.9], [unilbrk/u32-width-linebreaks]) +gl_LIBUNISTRING_MODULE([0.9.4], [unilbrk/u32-width-linebreaks]) Makefile.am: if LIBUNISTRING_COMPILE_UNILBRK_U32_WIDTH_LINEBREAKS diff --git a/modules/unilbrk/u8-width-linebreaks b/modules/unilbrk/u8-width-linebreaks --- a/modules/unilbrk/u8-width-linebreaks +++ b/modules/unilbrk/u8-width-linebreaks @@ -11,7 +11,7 @@ unistr/u8-mbtouc-unsafe configure.ac: -gl_LIBUNISTRING_MODULE([0.9], [unilbrk/u8-width-linebreaks]) +gl_LIBUNISTRING_MODULE([0.9.4], [unilbrk/u8-width-linebreaks]) Makefile.am: if LIBUNISTRING_COMPILE_UNILBRK_U8_WIDTH_LINEBREAKS diff --git a/modules/unilbrk/ulc-width-linebreaks b/modules/unilbrk/ulc-width-linebreaks --- a/modules/unilbrk/ulc-width-linebreaks +++ b/modules/unilbrk/ulc-width-linebreaks @@ -12,7 +12,7 @@ c-ctype configure.ac: -gl_LIBUNISTRING_MODULE([0.9], [unilbrk/ulc-width-linebreaks]) +gl_LIBUNISTRING_MODULE([0.9.4], [unilbrk/ulc-width-linebreaks]) Makefile.am: if LIBUNISTRING_COMPILE_UNILBRK_ULC_WIDTH_LINEBREAKS diff --git a/modules/uniwidth/u16-strwidth b/modules/uniwidth/u16-strwidth --- a/modules/uniwidth/u16-strwidth +++ b/modules/uniwidth/u16-strwidth @@ -10,7 +10,7 @@ unistr/u16-strlen configure.ac: -gl_LIBUNISTRING_MODULE([0.9], [uniwidth/u16-strwidth]) +gl_LIBUNISTRING_MODULE([0.9.4], [uniwidth/u16-strwidth]) Makefile.am: if LIBUNISTRING_COMPILE_UNIWIDTH_U16_STRWIDTH diff --git a/modules/uniwidth/u16-width b/modules/uniwidth/u16-width --- a/modules/uniwidth/u16-width +++ b/modules/uniwidth/u16-width @@ -10,7 +10,7 @@ unistr/u16-mbtouc-unsafe configure.ac: -gl_LIBUNISTRING_MODULE([0.9], [uniwidth/u16-width]) +gl_LIBUNISTRING_MODULE([0.9.4], [uniwidth/u16-width]) Makefile.am: if LIBUNISTRING_COMPILE_UNIWIDTH_U16_WIDTH diff --git a/modules/uniwidth/u32-strwidth b/modules/uniwidth/u32-strwidth --- a/modules/uniwidth/u32-strwidth +++ b/modules/uniwidth/u32-strwidth @@ -10,7 +10,7 @@ unistr/u32-strlen configure.ac: -gl_LIBUNISTRING_MODULE([0.9], [uniwidth/u32-strwidth]) +gl_LIBUNISTRING_MODULE([0.9.4], [uniwidth/u32-strwidth]) Makefile.am: if LIBUNISTRING_COMPILE_UNIWIDTH_U32_STRWIDTH diff --git a/modules/uniwidth/u32-width b/modules/uniwidth/u32-width --- a/modules/uniwidth/u32-width +++ b/modules/uniwidth/u32-width @@ -9,7 +9,7 @@ uniwidth/width configure.ac: -gl_LIBUNISTRING_MODULE([0.9], [uniwidth/u32-width]) +gl_LIBUNISTRING_MODULE([0.9.4], [uniwidth/u32-width]) Makefile.am: if LIBUNISTRING_COMPILE_UNIWIDTH_U32_WIDTH diff --git a/modules/uniwidth/u8-strwidth b/modules/uniwidth/u8-strwidth --- a/modules/uniwidth/u8-strwidth +++ b/modules/uniwidth/u8-strwidth @@ -10,7 +10,7 @@ unistr/u8-strlen configure.ac: -gl_LIBUNISTRING_MODULE([0.9], [uniwidth/u8-strwidth]) +gl_LIBUNISTRING_MODULE([0.9.4], [uniwidth/u8-strwidth]) Makefile.am: if LIBUNISTRING_COMPILE_UNIWIDTH_U8_STRWIDTH diff --git a/modules/uniwidth/u8-width b/modules/uniwidth/u8-width --- a/modules/uniwidth/u8-width +++ b/modules/uniwidth/u8-width @@ -10,7 +10,7 @@ unistr/u8-mbtouc-unsafe configure.ac: -gl_LIBUNISTRING_MODULE([0.9], [uniwidth/u8-width]) +gl_LIBUNISTRING_MODULE([0.9.4], [uniwidth/u8-width]) Makefile.am: if LIBUNISTRING_COMPILE_UNIWIDTH_U8_WIDTH diff --git a/modules/uniwidth/width b/modules/uniwidth/width --- a/modules/uniwidth/width +++ b/modules/uniwidth/width @@ -10,7 +10,7 @@ streq configure.ac: -gl_LIBUNISTRING_MODULE([0.9], [uniwidth/width]) +gl_LIBUNISTRING_MODULE([0.9.4], [uniwidth/width]) Makefile.am: if LIBUNISTRING_COMPILE_UNIWIDTH_WIDTH diff --git a/tests/uniwidth/test-uc_width2.sh b/tests/uniwidth/test-uc_width2.sh --- a/tests/uniwidth/test-uc_width2.sh +++ b/tests/uniwidth/test-uc_width2.sh @@ -352,8 +352,8 @@ 10A3F 0 10A40..1D166 1 1D167..1D169 0 -1D16A..1D17A 1 -1D17B..1D182 0 +1D16A..1D172 1 +1D173..1D182 0 1D183..1D184 1 1D185..1D18B 0 1D18C..1D1A9 1