changeset 10064:f71eb33cf29b

Tailor the linebreaking algorithm.
author Bruno Haible <bruno@clisp.org>
date Mon, 12 May 2008 19:22:07 +0200
parents 2b9bc74f76e4
children e1449fcc7ecb
files ChangeLog lib/unilbrk/tables.c
diffstat 2 files changed, 11 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,4 +1,9 @@
-2008-05-11  Bruno Haible  <bruno@clisp.org>
+2008-05-12  Bruno Haible  <bruno@clisp.org>
+
+	Tailor the linebreaking algorithm.
+	* lib/unilbrk/tables.c (unilbrk_table): Change (IS,AL) entry.
+
+2008-05-12  Bruno Haible  <bruno@clisp.org>
 
 	Update to Unicode 5.0.0.
 	* lib/unilbrk/tables.h (LBP_*): Add LBP_WJ, LBP_H2, LBP_H3, LBP_JL,
--- a/lib/unilbrk/tables.c
+++ b/lib/unilbrk/tables.c
@@ -39,7 +39,7 @@
 /* NS */ { P, I, D, I, D, I, P, P, D, I, D, I, P, D, D, D, P, D, D, D, D, D, D, D, },
 /* OP */ { P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, },
 /* QU */ { P, I, I, I, I, I, P, P, I, I, P, I, P, I, I, I, P, I, I, I, I, I, I, I, },
-/* IS */ { P, I, D, I, D, I, P, P, D, I, D, I, P, I, D, D, P, I, D, D, D, D, D, D, },
+/* IS */ { P, I, D, I, D, I, P, P, D, I, D, I, P, I, D, D, P, D, D, D, D, D, D, D, },
 /* NU */ { P, I, D, I, D, I, P, P, I, I, D, I, P, I, I, I, P, I, D, D, D, D, D, D, },
 /* PO */ { P, I, D, I, D, I, P, P, D, I, I, I, P, I, D, D, P, I, D, D, D, D, D, D, },
 /* PR */ { P, I, D, I, D, I, P, P, D, I, I, I, P, I, D, D, P, I, I, I, I, I, I, I, },
@@ -54,6 +54,10 @@
 /* "" */
 /* before */
 };
+/* Note: The (IS,AL) entry has been changed from I to D. In other words, the
+   rule "Do not break between numeric punctuation and alphabetics" is not
+   implemented here. We want to break before the HTML tag in strings like
+   "<P>Some sentence.</P>"  */
 /* Note: The (B2,B2) entry should probably be D instead of P.  */
 /* Note: The (PR,ID) entry should probably be D instead of I.  */
 /* Note: The (WJ,*) and (GL,*) entries should probably be P instead of I.  */