# HG changeset patch # User Bruno Haible # Date 1246405773 -7200 # Node ID daa1eb869953ad4bdd3f0a0012baaa3b8745f44b # Parent 4b50bb02dc1be395aa0b5cac12c1586d83901fc8 Reduce the number of uc_is_cased calls. diff --git a/ChangeLog b/ChangeLog --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,20 @@ +2009-06-30 Bruno Haible + + Reduce the number of uc_is_cased calls. + * lib/unicase.h (casing_suffix_context_t): Add + 'first_char_except_ignorable' field. + * lib/unicase/context.h (SCC_FINAL_SIGMA_MASK): Remove macro. + (SCC_MORE_ABOVE_MASK, SCC_BEFORE_DOT_MASK): Update. + * lib/unicase/empty-suffix-context.c (unicase_empty_suffix_context): + Update initializer. + * lib/unicase/u-casemap.h (FUNC): Don't invoke uc_is_cased on + case-ignorable characters. + * lib/unicase/u-ct-totitle.h (FUNC): Likewise. + * lib/unicase/u-suffix-context.h (FUNC2): Don't call uc_is_cased here. + * modules/unicase/u8-suffix-context (Depends-on): Remove unicase/cased. + * modules/unicase/u16-suffix-context (Depends-on): Likewise. + * modules/unicase/u32-suffix-context (Depends-on): Likewise. + 2009-06-30 Bruno Haible Tests for module 'unicase/ignorable'. diff --git a/lib/unicase.h b/lib/unicase.h --- a/lib/unicase.h +++ b/lib/unicase.h @@ -167,8 +167,8 @@ typedef struct casing_suffix_context { /* These fields are private, undocumented. */ + uint32_t first_char_except_ignorable; uint32_t bits; - uint32_t unused_bits; } casing_suffix_context_t; /* The case-mapping context of the empty suffix string. */ diff --git a/lib/unicase/context.h b/lib/unicase/context.h --- a/lib/unicase/context.h +++ b/lib/unicase/context.h @@ -44,20 +44,22 @@ casing_suffix_context_t contains the following fields: // For evaluating the FINAL_SIGMA condition: - // Bit 0 is set if the suffix starts with a sequence consisting of a - // case-ignorable sequence and then a cased letter. - // + // First character that was not case-ignorable. + ucs4_t first_char_except_ignorable; + // For evaluating the MORE_ABOVE condition: - // Bit 1 is set if the suffix contains a character of combining class + // Bit 0 is set if the suffix contains a character of combining class // 230 (Above) with no character of combining class 0 or 230 (Above) // before it. // // For evaluating the BEFORE_DOT condition: - // Bit 2 is set if the suffix contains a COMBINING DOT ABOVE (U+0307) + // Bit 1 is set if the suffix contains a COMBINING DOT ABOVE (U+0307) // with no character of combining class 0 or 230 (Above) before it. // uint32_t bits; - */ -#define SCC_FINAL_SIGMA_MASK 1 -#define SCC_MORE_ABOVE_MASK 2 -#define SCC_BEFORE_DOT_MASK 4 + + Three bits would be sufficient to carry the context information, but + that would require to invoke uc_is_cased ahead of time, more often than + actually needed. */ +#define SCC_MORE_ABOVE_MASK 1 +#define SCC_BEFORE_DOT_MASK 2 diff --git a/lib/unicase/empty-suffix-context.c b/lib/unicase/empty-suffix-context.c --- a/lib/unicase/empty-suffix-context.c +++ b/lib/unicase/empty-suffix-context.c @@ -22,6 +22,6 @@ const casing_suffix_context_t unicase_empty_suffix_context = { - 0 /* bits */, - 0 /* unused_bits */ + 0xFFFD /* first_char_except_ignorable */, + 0 /* bits */ }; diff --git a/lib/unicase/u-casemap.h b/lib/unicase/u-casemap.h --- a/lib/unicase/u-casemap.h +++ b/lib/unicase/u-casemap.h @@ -118,18 +118,20 @@ { ucs4_t uc2; int count2 = U_MBTOUC_UNSAFE (&uc2, s2, s_end - s2); - if (uc_is_cased (uc2)) + /* Our uc_is_case_ignorable function is + known to return false for all cased + characters. So we can call + uc_is_case_ignorable first. */ + if (!uc_is_case_ignorable (uc2)) { - applies = false; + applies = ! uc_is_cased (uc2); break; } - if (!uc_is_case_ignorable (uc2)) - break; s2 += count2; } else { - applies = ((suffix_context.bits & SCC_FINAL_SIGMA_MASK) == 0); + applies = ! uc_is_cased (suffix_context.first_char_except_ignorable); break; } } diff --git a/lib/unicase/u-ct-totitle.h b/lib/unicase/u-ct-totitle.h --- a/lib/unicase/u-ct-totitle.h +++ b/lib/unicase/u-ct-totitle.h @@ -194,18 +194,20 @@ { ucs4_t uc2; int count2 = U_MBTOUC_UNSAFE (&uc2, s2, s_end - s2); - if (uc_is_cased (uc2)) + /* Our uc_is_case_ignorable function is + known to return false for all cased + characters. So we can call + uc_is_case_ignorable first. */ + if (!uc_is_case_ignorable (uc2)) { - applies = false; + applies = ! uc_is_cased (uc2); break; } - if (!uc_is_case_ignorable (uc2)) - break; s2 += count2; } else { - applies = ((suffix_context.bits & SCC_FINAL_SIGMA_MASK) == 0); + applies = ! uc_is_cased (suffix_context.first_char_except_ignorable); break; } } diff --git a/lib/unicase/u-suffix-context.h b/lib/unicase/u-suffix-context.h --- a/lib/unicase/u-suffix-context.h +++ b/lib/unicase/u-suffix-context.h @@ -28,7 +28,7 @@ /* Evaluate all three conditions in a single pass through the string S. The three variables are -1 as long as the value of the condition has not been determined. */ - int scc_FINAL_SIGMA = -1; + ucs4_t first_char_except_ignorable = (ucs4_t)(-1); int scc_MORE_ABOVE = -1; int scc_BEFORE_DOT = -1; const UNIT *s_end = s + n; @@ -38,12 +38,10 @@ ucs4_t uc; int count = U_MBTOUC_UNSAFE (&uc, s, s_end - s); - if (scc_FINAL_SIGMA < 0) + if (first_char_except_ignorable == (ucs4_t)(-1)) { - if (uc_is_cased (uc)) - scc_FINAL_SIGMA = SCC_FINAL_SIGMA_MASK; - else if (!uc_is_case_ignorable (uc)) - scc_FINAL_SIGMA = 0; + if (!uc_is_case_ignorable (uc)) + first_char_except_ignorable = uc; } if (scc_MORE_ABOVE < 0) @@ -67,7 +65,8 @@ } } - if ((scc_FINAL_SIGMA | scc_MORE_ABOVE | scc_BEFORE_DOT) >= 0) + if (first_char_except_ignorable != (ucs4_t)(-1) + && (scc_MORE_ABOVE | scc_BEFORE_DOT) >= 0) /* All conditions have been determined. */ break; @@ -76,13 +75,14 @@ /* For those conditions that have not been determined so far, use the value from the argument context. */ + context.first_char_except_ignorable = + (first_char_except_ignorable != (ucs4_t)(-1) + ? first_char_except_ignorable + : a_context.first_char_except_ignorable); context.bits = - (scc_FINAL_SIGMA >= 0 - ? scc_FINAL_SIGMA - : a_context.bits & SCC_FINAL_SIGMA_MASK) - | (scc_MORE_ABOVE >= 0 - ? scc_MORE_ABOVE - : a_context.bits & SCC_MORE_ABOVE_MASK) + (scc_MORE_ABOVE >= 0 + ? scc_MORE_ABOVE + : a_context.bits & SCC_MORE_ABOVE_MASK) | (scc_BEFORE_DOT >= 0 ? scc_BEFORE_DOT : a_context.bits & SCC_BEFORE_DOT_MASK); diff --git a/modules/unicase/u16-suffix-context b/modules/unicase/u16-suffix-context --- a/modules/unicase/u16-suffix-context +++ b/modules/unicase/u16-suffix-context @@ -9,7 +9,6 @@ Depends-on: unicase/base unicase/empty-prefix-context -unicase/cased unicase/ignorable unictype/combining-class unistr/u16-mbtouc-unsafe diff --git a/modules/unicase/u32-suffix-context b/modules/unicase/u32-suffix-context --- a/modules/unicase/u32-suffix-context +++ b/modules/unicase/u32-suffix-context @@ -9,7 +9,6 @@ Depends-on: unicase/base unicase/empty-prefix-context -unicase/cased unicase/ignorable unictype/combining-class unistr/u32-mbtouc-unsafe diff --git a/modules/unicase/u8-suffix-context b/modules/unicase/u8-suffix-context --- a/modules/unicase/u8-suffix-context +++ b/modules/unicase/u8-suffix-context @@ -9,7 +9,6 @@ Depends-on: unicase/base unicase/empty-prefix-context -unicase/cased unicase/ignorable unictype/combining-class unistr/u8-mbtouc-unsafe