# HG changeset patch # User Bruno Haible # Date 1234263304 -3600 # Node ID 47e2fd3b4cf8d7ed4a971150cd0fe62776c3d11a # Parent 59b0e29f69082eef88e714c1accfd2cb8a0f97bc Rename tables.[hc] to lbrktables.[hc]. diff --git a/lib/unilbrk/lbrktables.c b/lib/unilbrk/lbrktables.c new file mode 100644 --- /dev/null +++ b/lib/unilbrk/lbrktables.c @@ -0,0 +1,63 @@ +/* Line breaking auxiliary tables. + Copyright (C) 2001-2003, 2006-2009 Free Software Foundation, Inc. + Written by Bruno Haible , 2001. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +#include + +/* Specification. */ +#include "unilbrk/lbrktables.h" + +/* Define unilbrkprop, table of line breaking properties. */ +#include "unilbrk/lbrkprop2.h" + +const unsigned char unilbrk_table[24][24] = +{ + /* after */ + /* WJ GL B2 BA BB HY CL EX IN NS OP QU IS NU PO PR SY AL H2 H3 ID JL JV JT */ +/* WJ */ { P, I, I, I, I, I, P, P, I, I, I, I, P, I, I, I, P, I, I, I, I, I, I, I, }, +/* GL */ { P, I, I, I, I, I, P, P, I, I, I, I, P, I, I, I, P, I, I, I, I, I, I, I, }, +/* B2 */ { P, I, P, I, D, I, P, P, D, I, D, I, P, D, D, D, P, D, D, D, D, D, D, D, }, +/* BA */ { P, D, D, I, D, I, P, P, D, I, D, I, P, D, D, D, P, D, D, D, D, D, D, D, }, +/* BB */ { P, I, I, I, I, I, P, P, I, I, I, I, P, I, I, I, P, I, I, I, I, I, I, I, }, +/* HY */ { P, D, D, I, D, I, P, P, D, I, D, I, P, I, D, D, P, D, D, D, D, D, D, D, }, +/* CL */ { P, I, D, I, D, I, P, P, D, P, D, I, P, D, I, I, P, D, D, D, D, D, D, D, }, +/* EX */ { P, I, D, I, D, I, P, P, D, I, D, I, P, D, D, D, P, D, D, D, D, D, D, D, }, +/* IN */ { P, I, D, I, D, I, P, P, I, I, D, I, P, D, D, D, P, D, D, D, D, D, D, D, }, +/* NS */ { P, I, D, I, D, I, P, P, D, I, D, I, P, D, D, D, P, D, D, D, D, D, D, D, }, +/* OP */ { P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, }, +/* QU */ { P, I, I, I, I, I, P, P, I, I, P, I, P, I, I, I, P, I, I, I, I, I, I, I, }, +/* IS */ { P, I, D, I, D, I, P, P, D, I, D, I, P, I, D, D, P, D, D, D, D, D, D, D, }, +/* NU */ { P, I, D, I, D, I, P, P, I, I, D, I, P, I, I, I, P, I, D, D, D, D, D, D, }, +/* PO */ { P, I, D, I, D, I, P, P, D, I, I, I, P, I, D, D, P, I, D, D, D, D, D, D, }, +/* PR */ { P, I, D, I, D, I, P, P, D, I, I, I, P, I, D, D, P, I, I, I, I, I, I, I, }, +/* SY */ { P, I, D, I, D, I, P, P, D, I, D, I, P, I, D, D, P, D, D, D, D, D, D, D, }, +/* AL */ { P, I, D, I, D, I, P, P, I, I, D, I, P, I, D, D, P, I, D, D, D, D, D, D, }, +/* H2 */ { P, I, D, I, D, I, P, P, I, I, D, I, P, D, I, D, P, D, D, D, D, D, I, I, }, +/* H3 */ { P, I, D, I, D, I, P, P, I, I, D, I, P, D, I, D, P, D, D, D, D, D, D, I, }, +/* ID */ { P, I, D, I, D, I, P, P, I, I, D, I, P, D, I, D, P, D, D, D, D, D, D, D, }, +/* JL */ { P, I, D, I, D, I, P, P, I, I, D, I, P, D, I, D, P, D, I, I, D, I, I, D, }, +/* JV */ { P, I, D, I, D, I, P, P, I, I, D, I, P, D, I, D, P, D, D, D, D, D, I, I, }, +/* JT */ { P, I, D, I, D, I, P, P, I, I, D, I, P, D, I, D, P, D, D, D, D, D, D, I, }, +/* "" */ +/* before */ +}; +/* Note: The (IS,AL) entry has been changed from I to D. In other words, the + rule "Do not break between numeric punctuation and alphabetics" is not + implemented here. We want to break before the HTML tag in strings like + "

Some sentence.

" */ +/* Note: The (B2,B2) entry should probably be D instead of P. */ +/* Note: The (PR,ID) entry should probably be D instead of I. */ +/* Note: The (WJ,*) and (GL,*) entries should probably be P instead of I. */ diff --git a/lib/unilbrk/lbrktables.h b/lib/unilbrk/lbrktables.h new file mode 100644 --- /dev/null +++ b/lib/unilbrk/lbrktables.h @@ -0,0 +1,94 @@ +/* Line breaking auxiliary tables. + Copyright (C) 2001-2003, 2006-2008 Free Software Foundation, Inc. + Written by Bruno Haible , 2001. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +#include "unitypes.h" + +/* Line breaking classification. */ + +enum +{ + /* Values >= 24 are resolved at run time. */ + LBP_BK = 24, /* mandatory break */ +/*LBP_CR, carriage return - not used here because it's a DOSism */ +/*LBP_LF, line feed - not used here because it's a DOSism */ + LBP_CM = 25, /* attached characters and combining marks */ +/*LBP_NL, next line - not used here because it's equivalent to LBP_BK */ +/*LBP_SG, surrogates - not used here because they are not characters */ + LBP_WJ = 0, /* word joiner */ + LBP_ZW = 26, /* zero width space */ + LBP_GL = 1, /* non-breaking (glue) */ + LBP_SP = 27, /* space */ + LBP_B2 = 2, /* break opportunity before and after */ + LBP_BA = 3, /* break opportunity after */ + LBP_BB = 4, /* break opportunity before */ + LBP_HY = 5, /* hyphen */ + LBP_CB = 28, /* contingent break opportunity */ + LBP_CL = 6, /* closing punctuation */ + LBP_EX = 7, /* exclamation/interrogation */ + LBP_IN = 8, /* inseparable */ + LBP_NS = 9, /* non starter */ + LBP_OP = 10, /* opening punctuation */ + LBP_QU = 11, /* ambiguous quotation */ + LBP_IS = 12, /* infix separator (numeric) */ + LBP_NU = 13, /* numeric */ + LBP_PO = 14, /* postfix (numeric) */ + LBP_PR = 15, /* prefix (numeric) */ + LBP_SY = 16, /* symbols allowing breaks */ + LBP_AI = 29, /* ambiguous (alphabetic or ideograph) */ + LBP_AL = 17, /* ordinary alphabetic and symbol characters */ + LBP_H2 = 18, /* Hangul LV syllable */ + LBP_H3 = 19, /* Hangul LVT syllable */ + LBP_ID = 20, /* ideographic */ + LBP_JL = 21, /* Hangul L Jamo */ + LBP_JV = 22, /* Hangul V Jamo */ + LBP_JT = 23, /* Hangul T Jamo */ + LBP_SA = 30, /* complex context (South East Asian) */ + LBP_XX = 31 /* unknown */ +}; + +#include "lbrkprop1.h" + +static inline unsigned char +unilbrkprop_lookup (ucs4_t uc) +{ + unsigned int index1 = uc >> lbrkprop_header_0; + if (index1 < lbrkprop_header_1) + { + int lookup1 = unilbrkprop.level1[index1]; + if (lookup1 >= 0) + { + unsigned int index2 = (uc >> lbrkprop_header_2) & lbrkprop_header_3; + int lookup2 = unilbrkprop.level2[lookup1 + index2]; + if (lookup2 >= 0) + { + unsigned int index3 = uc & lbrkprop_header_4; + return unilbrkprop.level3[lookup2 + index3]; + } + } + } + return LBP_XX; +} + +/* Table indexed by two line breaking classifications. */ +#define D 1 /* direct break opportunity, empty in table 7.3 of UTR #14 */ +#define I 2 /* indirect break opportunity, '%' in table 7.3 of UTR #14 */ +#define P 3 /* prohibited break, '^' in table 7.3 of UTR #14 */ + +extern const unsigned char unilbrk_table[24][24]; + +/* We don't support line breaking of complex-context dependent characters + (Thai, Lao, Myanmar, Khmer) yet, because it requires dictionary lookup. */ diff --git a/lib/unilbrk/tables.c b/lib/unilbrk/tables.c deleted file mode 100644 --- a/lib/unilbrk/tables.c +++ /dev/null @@ -1,63 +0,0 @@ -/* Line breaking auxiliary tables. - Copyright (C) 2001-2003, 2006-2008 Free Software Foundation, Inc. - Written by Bruno Haible , 2001. - - This program is free software: you can redistribute it and/or modify it - under the terms of the GNU Lesser General Public License as published - by the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with this program. If not, see . */ - -#include - -/* Specification. */ -#include "unilbrk/tables.h" - -/* Define unilbrkprop, table of line breaking properties. */ -#include "unilbrk/lbrkprop2.h" - -const unsigned char unilbrk_table[24][24] = -{ - /* after */ - /* WJ GL B2 BA BB HY CL EX IN NS OP QU IS NU PO PR SY AL H2 H3 ID JL JV JT */ -/* WJ */ { P, I, I, I, I, I, P, P, I, I, I, I, P, I, I, I, P, I, I, I, I, I, I, I, }, -/* GL */ { P, I, I, I, I, I, P, P, I, I, I, I, P, I, I, I, P, I, I, I, I, I, I, I, }, -/* B2 */ { P, I, P, I, D, I, P, P, D, I, D, I, P, D, D, D, P, D, D, D, D, D, D, D, }, -/* BA */ { P, D, D, I, D, I, P, P, D, I, D, I, P, D, D, D, P, D, D, D, D, D, D, D, }, -/* BB */ { P, I, I, I, I, I, P, P, I, I, I, I, P, I, I, I, P, I, I, I, I, I, I, I, }, -/* HY */ { P, D, D, I, D, I, P, P, D, I, D, I, P, I, D, D, P, D, D, D, D, D, D, D, }, -/* CL */ { P, I, D, I, D, I, P, P, D, P, D, I, P, D, I, I, P, D, D, D, D, D, D, D, }, -/* EX */ { P, I, D, I, D, I, P, P, D, I, D, I, P, D, D, D, P, D, D, D, D, D, D, D, }, -/* IN */ { P, I, D, I, D, I, P, P, I, I, D, I, P, D, D, D, P, D, D, D, D, D, D, D, }, -/* NS */ { P, I, D, I, D, I, P, P, D, I, D, I, P, D, D, D, P, D, D, D, D, D, D, D, }, -/* OP */ { P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, }, -/* QU */ { P, I, I, I, I, I, P, P, I, I, P, I, P, I, I, I, P, I, I, I, I, I, I, I, }, -/* IS */ { P, I, D, I, D, I, P, P, D, I, D, I, P, I, D, D, P, D, D, D, D, D, D, D, }, -/* NU */ { P, I, D, I, D, I, P, P, I, I, D, I, P, I, I, I, P, I, D, D, D, D, D, D, }, -/* PO */ { P, I, D, I, D, I, P, P, D, I, I, I, P, I, D, D, P, I, D, D, D, D, D, D, }, -/* PR */ { P, I, D, I, D, I, P, P, D, I, I, I, P, I, D, D, P, I, I, I, I, I, I, I, }, -/* SY */ { P, I, D, I, D, I, P, P, D, I, D, I, P, I, D, D, P, D, D, D, D, D, D, D, }, -/* AL */ { P, I, D, I, D, I, P, P, I, I, D, I, P, I, D, D, P, I, D, D, D, D, D, D, }, -/* H2 */ { P, I, D, I, D, I, P, P, I, I, D, I, P, D, I, D, P, D, D, D, D, D, I, I, }, -/* H3 */ { P, I, D, I, D, I, P, P, I, I, D, I, P, D, I, D, P, D, D, D, D, D, D, I, }, -/* ID */ { P, I, D, I, D, I, P, P, I, I, D, I, P, D, I, D, P, D, D, D, D, D, D, D, }, -/* JL */ { P, I, D, I, D, I, P, P, I, I, D, I, P, D, I, D, P, D, I, I, D, I, I, D, }, -/* JV */ { P, I, D, I, D, I, P, P, I, I, D, I, P, D, I, D, P, D, D, D, D, D, I, I, }, -/* JT */ { P, I, D, I, D, I, P, P, I, I, D, I, P, D, I, D, P, D, D, D, D, D, D, I, }, -/* "" */ -/* before */ -}; -/* Note: The (IS,AL) entry has been changed from I to D. In other words, the - rule "Do not break between numeric punctuation and alphabetics" is not - implemented here. We want to break before the HTML tag in strings like - "

Some sentence.

" */ -/* Note: The (B2,B2) entry should probably be D instead of P. */ -/* Note: The (PR,ID) entry should probably be D instead of I. */ -/* Note: The (WJ,*) and (GL,*) entries should probably be P instead of I. */ diff --git a/lib/unilbrk/tables.h b/lib/unilbrk/tables.h deleted file mode 100644 --- a/lib/unilbrk/tables.h +++ /dev/null @@ -1,94 +0,0 @@ -/* Line breaking auxiliary tables. - Copyright (C) 2001-2003, 2006-2008 Free Software Foundation, Inc. - Written by Bruno Haible , 2001. - - This program is free software: you can redistribute it and/or modify it - under the terms of the GNU Lesser General Public License as published - by the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with this program. If not, see . */ - -#include "unitypes.h" - -/* Line breaking classification. */ - -enum -{ - /* Values >= 24 are resolved at run time. */ - LBP_BK = 24, /* mandatory break */ -/*LBP_CR, carriage return - not used here because it's a DOSism */ -/*LBP_LF, line feed - not used here because it's a DOSism */ - LBP_CM = 25, /* attached characters and combining marks */ -/*LBP_NL, next line - not used here because it's equivalent to LBP_BK */ -/*LBP_SG, surrogates - not used here because they are not characters */ - LBP_WJ = 0, /* word joiner */ - LBP_ZW = 26, /* zero width space */ - LBP_GL = 1, /* non-breaking (glue) */ - LBP_SP = 27, /* space */ - LBP_B2 = 2, /* break opportunity before and after */ - LBP_BA = 3, /* break opportunity after */ - LBP_BB = 4, /* break opportunity before */ - LBP_HY = 5, /* hyphen */ - LBP_CB = 28, /* contingent break opportunity */ - LBP_CL = 6, /* closing punctuation */ - LBP_EX = 7, /* exclamation/interrogation */ - LBP_IN = 8, /* inseparable */ - LBP_NS = 9, /* non starter */ - LBP_OP = 10, /* opening punctuation */ - LBP_QU = 11, /* ambiguous quotation */ - LBP_IS = 12, /* infix separator (numeric) */ - LBP_NU = 13, /* numeric */ - LBP_PO = 14, /* postfix (numeric) */ - LBP_PR = 15, /* prefix (numeric) */ - LBP_SY = 16, /* symbols allowing breaks */ - LBP_AI = 29, /* ambiguous (alphabetic or ideograph) */ - LBP_AL = 17, /* ordinary alphabetic and symbol characters */ - LBP_H2 = 18, /* Hangul LV syllable */ - LBP_H3 = 19, /* Hangul LVT syllable */ - LBP_ID = 20, /* ideographic */ - LBP_JL = 21, /* Hangul L Jamo */ - LBP_JV = 22, /* Hangul V Jamo */ - LBP_JT = 23, /* Hangul T Jamo */ - LBP_SA = 30, /* complex context (South East Asian) */ - LBP_XX = 31 /* unknown */ -}; - -#include "lbrkprop1.h" - -static inline unsigned char -unilbrkprop_lookup (ucs4_t uc) -{ - unsigned int index1 = uc >> lbrkprop_header_0; - if (index1 < lbrkprop_header_1) - { - int lookup1 = unilbrkprop.level1[index1]; - if (lookup1 >= 0) - { - unsigned int index2 = (uc >> lbrkprop_header_2) & lbrkprop_header_3; - int lookup2 = unilbrkprop.level2[lookup1 + index2]; - if (lookup2 >= 0) - { - unsigned int index3 = uc & lbrkprop_header_4; - return unilbrkprop.level3[lookup2 + index3]; - } - } - } - return LBP_XX; -} - -/* Table indexed by two line breaking classifications. */ -#define D 1 /* direct break opportunity, empty in table 7.3 of UTR #14 */ -#define I 2 /* indirect break opportunity, '%' in table 7.3 of UTR #14 */ -#define P 3 /* prohibited break, '^' in table 7.3 of UTR #14 */ - -extern const unsigned char unilbrk_table[24][24]; - -/* We don't support line breaking of complex-context dependent characters - (Thai, Lao, Myanmar, Khmer) yet, because it requires dictionary lookup. */ diff --git a/lib/unilbrk/u16-possible-linebreaks.c b/lib/unilbrk/u16-possible-linebreaks.c --- a/lib/unilbrk/u16-possible-linebreaks.c +++ b/lib/unilbrk/u16-possible-linebreaks.c @@ -1,5 +1,5 @@ /* Line breaking of UTF-16 strings. - Copyright (C) 2001-2003, 2006-2008 Free Software Foundation, Inc. + Copyright (C) 2001-2003, 2006-2009 Free Software Foundation, Inc. Written by Bruno Haible , 2001. This program is free software: you can redistribute it and/or modify it @@ -23,7 +23,7 @@ #include #include -#include "unilbrk/tables.h" +#include "unilbrk/lbrktables.h" #include "uniwidth/cjk.h" #include "unistr.h" diff --git a/lib/unilbrk/u32-possible-linebreaks.c b/lib/unilbrk/u32-possible-linebreaks.c --- a/lib/unilbrk/u32-possible-linebreaks.c +++ b/lib/unilbrk/u32-possible-linebreaks.c @@ -1,5 +1,5 @@ /* Line breaking of UTF-32 strings. - Copyright (C) 2001-2003, 2006-2008 Free Software Foundation, Inc. + Copyright (C) 2001-2003, 2006-2009 Free Software Foundation, Inc. Written by Bruno Haible , 2001. This program is free software: you can redistribute it and/or modify it @@ -22,7 +22,7 @@ #include -#include "unilbrk/tables.h" +#include "unilbrk/lbrktables.h" #include "uniwidth/cjk.h" void diff --git a/lib/unilbrk/u8-possible-linebreaks.c b/lib/unilbrk/u8-possible-linebreaks.c --- a/lib/unilbrk/u8-possible-linebreaks.c +++ b/lib/unilbrk/u8-possible-linebreaks.c @@ -1,5 +1,5 @@ /* Line breaking of UTF-8 strings. - Copyright (C) 2001-2003, 2006-2008 Free Software Foundation, Inc. + Copyright (C) 2001-2003, 2006-2009 Free Software Foundation, Inc. Written by Bruno Haible , 2001. This program is free software: you can redistribute it and/or modify it @@ -23,7 +23,7 @@ #include #include -#include "unilbrk/tables.h" +#include "unilbrk/lbrktables.h" #include "uniwidth/cjk.h" #include "unistr.h" diff --git a/modules/unilbrk/tables b/modules/unilbrk/tables --- a/modules/unilbrk/tables +++ b/modules/unilbrk/tables @@ -2,8 +2,8 @@ Line breaking auxiliary tables. Files: -lib/unilbrk/tables.h -lib/unilbrk/tables.c +lib/unilbrk/lbrktables.h +lib/unilbrk/lbrktables.c lib/unilbrk/lbrkprop1.h lib/unilbrk/lbrkprop2.h @@ -14,10 +14,10 @@ AC_REQUIRE([AC_C_INLINE]) Makefile.am: -lib_SOURCES += unilbrk/tables.c +lib_SOURCES += unilbrk/lbrktables.c Include: -"unilbrk/tables.h" +"unilbrk/lbrktables.h" License: LGPL