Mercurial > hg > octave-kai > gnulib-hg
changeset 11206:c7e84b56dbc3
Implement new clarified decomposition of Hangul syllables.
author | Bruno Haible <bruno@clisp.org> |
---|---|
date | Sun, 22 Feb 2009 15:05:45 +0100 |
parents | 2d19b06ab374 |
children | 4daf474e9033 |
files | ChangeLog lib/uninorm/canonical-decomposition.c lib/uninorm/decomposition.c tests/uninorm/test-canonical-decomposition.c tests/uninorm/test-compat-decomposition.c tests/uninorm/test-decomposition.c |
diffstat | 6 files changed, 97 insertions(+), 19 deletions(-) [+] |
line wrap: on
line diff
--- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,14 @@ +2009-02-22 Bruno Haible <bruno@clisp.org> + + Implement new clarified decomposition of Hangul syllables. + * lib/uninorm/decomposition.c (uc_decomposition): For Hangul syllables + of type LTV, return only a pairwise decomposition. + * lib/uninorm/canonical-decomposition.c (uc_canonical_decomposition): + Likewise. + * tests/uninorm/test-decomposition.c (main): Updated expected result. + * tests/uninorm/test-canonical-decomposition.c (main): Likewise. + * tests/uninorm/test-compat-decomposition.c (main): Likewise. + 2009-02-22 Bruno Haible <bruno@clisp.org> * lib/uninorm/u-normalize-internal.h (FUNC): At the end, handle
--- a/lib/uninorm/canonical-decomposition.c +++ b/lib/uninorm/canonical-decomposition.c @@ -29,24 +29,45 @@ { if (uc >= 0xAC00 && uc < 0xD7A4) { - /* Hangul syllable. See Unicode standard, chapter 3, - section "Hangul Syllable Decomposition". */ - unsigned int t, v, l; + /* Hangul syllable. See Unicode standard, chapter 3, section + "Hangul Syllable Decomposition", See also the clarification at + <http://www.unicode.org/versions/Unicode5.1.0/>, section + "Clarification of Hangul Jamo Handling". */ + unsigned int t; uc -= 0xAC00; t = uc % 28; - uc = uc / 28; - v = uc % 21; - l = uc / 21; + + if (t == 0) + { + unsigned int v, l; - decomposition[0] = 0x1100 + l; - decomposition[1] = 0x1161 + v; - if (t == 0) - return 2; + uc = uc / 28; + v = uc % 21; + l = uc / 21; + + decomposition[0] = 0x1100 + l; + decomposition[1] = 0x1161 + v; + return 2; + } else { +#if 1 /* Return the pairwise decomposition, not the full decomposition. */ + decomposition[0] = 0xAC00 + uc - t; /* = 0xAC00 + (l * 21 + v) * 28; */ + decomposition[1] = 0x11A7 + t; + return 2; +#else + unsigned int v, l; + + uc = uc / 28; + v = uc % 21; + l = uc / 21; + + decomposition[0] = 0x1100 + l; + decomposition[1] = 0x1161 + v; decomposition[2] = 0x11A7 + t; return 3; +#endif } } else if (uc < 0x110000)
--- a/lib/uninorm/decomposition.c +++ b/lib/uninorm/decomposition.c @@ -27,25 +27,46 @@ { if (uc >= 0xAC00 && uc < 0xD7A4) { - /* Hangul syllable. See Unicode standard, chapter 3, - section "Hangul Syllable Decomposition". */ - unsigned int t, v, l; + /* Hangul syllable. See Unicode standard, chapter 3, section + "Hangul Syllable Decomposition", See also the clarification at + <http://www.unicode.org/versions/Unicode5.1.0/>, section + "Clarification of Hangul Jamo Handling". */ + unsigned int t; uc -= 0xAC00; t = uc % 28; - uc = uc / 28; - v = uc % 21; - l = uc / 21; *decomp_tag = UC_DECOMP_CANONICAL; - decomposition[0] = 0x1100 + l; - decomposition[1] = 0x1161 + v; if (t == 0) - return 2; + { + unsigned int v, l; + + uc = uc / 28; + v = uc % 21; + l = uc / 21; + + decomposition[0] = 0x1100 + l; + decomposition[1] = 0x1161 + v; + return 2; + } else { +#if 1 /* Return the pairwise decomposition, not the full decomposition. */ + decomposition[0] = 0xAC00 + uc - t; /* = 0xAC00 + (l * 21 + v) * 28; */ + decomposition[1] = 0x11A7 + t; + return 2; +#else + unsigned int v, l; + + uc = uc / 28; + v = uc % 21; + l = uc / 21; + + decomposition[0] = 0x1100 + l; + decomposition[1] = 0x1161 + v; decomposition[2] = 0x11A7 + t; return 3; +#endif } } else if (uc < 0x110000)
--- a/tests/uninorm/test-canonical-decomposition.c +++ b/tests/uninorm/test-canonical-decomposition.c @@ -133,10 +133,18 @@ /* HANGUL SYLLABLE GEUL */ ret = uc_canonical_decomposition (0xAE00, decomposed); + /* See the clarification at <http://www.unicode.org/versions/Unicode5.1.0/>, + section "Clarification of Hangul Jamo Handling". */ +#if 1 + ASSERT (ret == 2); + ASSERT (decomposed[0] == 0xADF8); + ASSERT (decomposed[1] == 0x11AF); +#else ASSERT (ret == 3); ASSERT (decomposed[0] == 0x1100); ASSERT (decomposed[1] == 0x1173); ASSERT (decomposed[2] == 0x11AF); +#endif /* HANGUL SYLLABLE GEU */ ret = uc_canonical_decomposition (0xADF8, decomposed);
--- a/tests/uninorm/test-compat-decomposition.c +++ b/tests/uninorm/test-compat-decomposition.c @@ -175,10 +175,18 @@ /* HANGUL SYLLABLE GEUL */ ret = uc_compat_decomposition (0xAE00, decomposed); + /* See the clarification at <http://www.unicode.org/versions/Unicode5.1.0/>, + section "Clarification of Hangul Jamo Handling". */ +#if 1 + ASSERT (ret == 2); + ASSERT (decomposed[0] == 0xADF8); + ASSERT (decomposed[1] == 0x11AF); +#else ASSERT (ret == 3); ASSERT (decomposed[0] == 0x1100); ASSERT (decomposed[1] == 0x1173); ASSERT (decomposed[2] == 0x11AF); +#endif /* HANGUL SYLLABLE GEU */ ret = uc_compat_decomposition (0xADF8, decomposed);
--- a/tests/uninorm/test-decomposition.c +++ b/tests/uninorm/test-decomposition.c @@ -194,11 +194,20 @@ /* HANGUL SYLLABLE GEUL */ ret = uc_decomposition (0xAE00, &tag, decomposed); + /* See the clarification at <http://www.unicode.org/versions/Unicode5.1.0/>, + section "Clarification of Hangul Jamo Handling". */ +#if 1 + ASSERT (ret == 2); + ASSERT (tag == UC_DECOMP_CANONICAL); + ASSERT (decomposed[0] == 0xADF8); + ASSERT (decomposed[1] == 0x11AF); +#else ASSERT (ret == 3); ASSERT (tag == UC_DECOMP_CANONICAL); ASSERT (decomposed[0] == 0x1100); ASSERT (decomposed[1] == 0x1173); ASSERT (decomposed[2] == 0x11AF); +#endif /* HANGUL SYLLABLE GEU */ ret = uc_decomposition (0xADF8, &tag, decomposed);