changeset 4314:bc3a7b27a426

Upgrade the width function to Unicode 4.0.
author Bruno Haible <bruno@clisp.org>
date Mon, 05 May 2003 09:12:58 +0000
parents c2e2172a99d2
children f1a5dc0b9981
files lib/ChangeLog lib/linebreak.c
diffstat 2 files changed, 32 insertions(+), 17 deletions(-) [+]
line wrap: on
line diff
--- a/lib/ChangeLog
+++ b/lib/ChangeLog
@@ -1,3 +1,15 @@
+2003-05-03  Bruno Haible  <bruno@clisp.org>
+
+	Upgrade to Unicode-4.0.
+	* linebreak.c (nonspacing_table_data): Change width of U+00AD,
+	U+0350..U+0357, U+035D..U+035F, U+0600..U+0603, U+0610..U+0615,
+	U+0656..U+0658, U+0A01, U+0AE2..U+0AE3, U+0CBC, U+17B4..U+17B5,
+	U+17DD, U+1920..U+1922, U+1927..U+192B, U+1932, U+1939..U+193B
+	from 1 to 0. Change width of U+0CBF, U+0CC6, U+180E from 0 to 1.
+	(uc_width): Change width of U+4DC0..U+4DFF from 2 to 1. Change width
+	of U+2A6D7..U+2F7FF, U+2FA1E..U+2FFFD, U+30000..U+3FFFD from 1 to 2.
+	Change width of U+E0100..U+E01EF from 1 to 0.
+
 2003-04-25  Bruno Haible  <bruno@clisp.org>
 
 	* copy-file.c: Include <stddef.h>, for size_t.
--- a/lib/linebreak.c
+++ b/lib/linebreak.c
@@ -233,7 +233,7 @@
   /* 0x0000-0x01ff */
   0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, /* 0x0000-0x003f */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, /* 0x0040-0x007f */
-  0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, /* 0x0080-0x00bf */
+  0xff, 0xff, 0xff, 0xff, 0x00, 0x20, 0x00, 0x00, /* 0x0080-0x00bf */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x00c0-0x00ff */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0100-0x013f */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0140-0x017f */
@@ -245,7 +245,7 @@
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0280-0x02bf */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x02c0-0x02ff */
   0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x0300-0x033f */
-  0xff, 0xff, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, /* 0x0340-0x037f */
+  0xff, 0xff, 0xff, 0xe0, 0xff, 0xff, 0x00, 0x00, /* 0x0340-0x037f */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0380-0x03bf */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x03c0-0x03ff */
   /* 0x0400-0x05ff */
@@ -258,8 +258,8 @@
   0x00, 0x00, 0xfe, 0xff, 0xfb, 0xff, 0xff, 0xbb, /* 0x0580-0x05bf */
   0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x05c0-0x05ff */
   /* 0x0600-0x07ff */
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0600-0x063f */
-  0x00, 0xf8, 0x3f, 0x00, 0x00, 0x00, 0x01, 0x00, /* 0x0640-0x067f */
+  0x0f, 0x00, 0x3f, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0600-0x063f */
+  0x00, 0xf8, 0xff, 0x01, 0x00, 0x00, 0x01, 0x00, /* 0x0640-0x067f */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0680-0x06bf */
   0x00, 0x00, 0xc0, 0xff, 0x9f, 0x3d, 0x00, 0x00, /* 0x06c0-0x06ff */
   0x00, 0x80, 0x02, 0x00, 0x00, 0x00, 0xff, 0xff, /* 0x0700-0x073f */
@@ -276,10 +276,10 @@
   0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, /* 0x0980-0x09bf */
   0x1e, 0x20, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, /* 0x09c0-0x09ff */
   /* 0x0a00-0x0bff */
-  0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, /* 0x0a00-0x0a3f */
+  0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, /* 0x0a00-0x0a3f */
   0x86, 0x39, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, /* 0x0a40-0x0a7f */
   0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, /* 0x0a80-0x0abf */
-  0xbe, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0ac0-0x0aff */
+  0xbe, 0x21, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, /* 0x0ac0-0x0aff */
   0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x90, /* 0x0b00-0x0b3f */
   0x0e, 0x20, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0b40-0x0b7f */
   0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0b80-0x0bbf */
@@ -287,8 +287,8 @@
   /* 0x0c00-0x0dff */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, /* 0x0c00-0x0c3f */
   0xc1, 0x3d, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0c40-0x0c7f */
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, /* 0x0c80-0x0cbf */
-  0x40, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0cc0-0x0cff */
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, /* 0x0c80-0x0cbf */
+  0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0cc0-0x0cff */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0d00-0x0d3f */
   0x0e, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0d40-0x0d7f */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0d80-0x0dbf */
@@ -318,14 +318,14 @@
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x16c0-0x16ff */
   0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1c, 0x00, /* 0x1700-0x173f */
   0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0c, 0x00, /* 0x1740-0x177f */
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, /* 0x1780-0x17bf */
-  0x40, 0xfe, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x17c0-0x17ff */
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb0, 0x3f, /* 0x1780-0x17bf */
+  0x40, 0xfe, 0x0f, 0x20, 0x00, 0x00, 0x00, 0x00, /* 0x17c0-0x17ff */
   /* 0x1800-0x19ff */
-  0x00, 0x78, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1800-0x183f */
+  0x00, 0x38, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1800-0x183f */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1840-0x187f */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, /* 0x1880-0x18bf */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18c0-0x18ff */
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1900-0x193f */
+  0x00, 0x00, 0x00, 0x00, 0x87, 0x0f, 0x04, 0x0e, /* 0x1900-0x193f */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1940-0x197f */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x1980-0x19bf */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x19c0-0x19ff */
@@ -419,7 +419,7 @@
       if (ind >= 0)
 	if ((nonspacing_table_data[64*ind + ((uc >> 3) & 63)] >> (uc & 7)) & 1)
 	  {
-	    if (uc > 0 && uc < 0x100)
+	    if (uc > 0 && uc < 0xa0)
 	      return -1;
 	    else
 	      return 0;
@@ -427,7 +427,9 @@
     }
   else if ((uc >> 9) == (0xe0000 >> 9))
     {
-      if (uc >= 0xe0020 ? uc <= 0xe007f : uc == 0xe0001)
+      if (uc < 0xe0100
+	  ? (uc >= 0xe0020 ? uc <= 0xe007f : uc == 0xe0001)
+	  : (uc <= 0xe01ef))
 	return 0;
     }
   /* Test for double-width character.
@@ -436,15 +438,16 @@
    */
   if (uc >= 0x1100
       && ((uc < 0x1160) /* Hangul Jamo */
-	  || (uc >= 0x2e80 && uc < 0xa4d0  /* CJK ... Yi */
+	  || (uc >= 0x2e80 && uc < 0x4dc0  /* CJK */
 	      && !(uc == 0x303f))
+	  || (uc >= 0x4e00 && uc < 0xa4d0) /* CJK ... Yi */
 	  || (uc >= 0xac00 && uc < 0xd7a4) /* Hangul Syllables */
 	  || (uc >= 0xf900 && uc < 0xfb00) /* CJK Compatibility Ideographs */
 	  || (uc >= 0xfe30 && uc < 0xfe70) /* CJK Compatibility Forms */
 	  || (uc >= 0xff00 && uc < 0xff61) /* Fullwidth Forms */
 	  || (uc >= 0xffe0 && uc < 0xffe7)
-	  || (uc >= 0x20000 && uc <= 0x2a6d6) /* CJK */
-	  || (uc >= 0x2f800 && uc <= 0x2fa1d) /* CJK Compatibility Ideographs */
+	  || (uc >= 0x20000 && uc <= 0x2fffd) /* CJK, CJK Compatibility Ideographs */
+	  || (uc >= 0x30000 && uc <= 0x3fffd)
      )   )
     return 2;
   /* In ancient CJK encodings, Cyrillic and most other characters are