# HG changeset patch # User Bruno Haible # Date 1246311693 -7200 # Node ID 73abbb4482fb1dba3b37d80a68b4a872872b864c # Parent 35b3e3eea12c43b37cf85f03659ac5bf9f3f563d New module 'unicase/u8-suffix-context'. diff --git a/ChangeLog b/ChangeLog --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,10 @@ 2009-06-29 Bruno Haible + New module 'unicase/u8-suffix-context'. + * lib/unicase/u8-suffix-context.c: New file. + * lib/unicase/u-suffix-context.h: New file. + * modules/unicase/u8-suffix-context: New file. + New module 'unicase/empty-suffix-context'. * lib/unicase/empty-suffix-context.c: New file. * modules/unicase/empty-suffix-context: New file. diff --git a/lib/unicase/u-suffix-context.h b/lib/unicase/u-suffix-context.h new file mode 100644 --- /dev/null +++ b/lib/unicase/u-suffix-context.h @@ -0,0 +1,90 @@ +/* Case-mapping context of suffix UTF-8/UTF-16/UTF-32 string. + Copyright (C) 2009 Free Software Foundation, Inc. + Written by Bruno Haible , 2009. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +casing_suffix_context_t +FUNC1 (const UNIT *s, size_t n) +{ + return FUNC2 (s, n, unicase_empty_suffix_context); +} + +casing_suffix_context_t +FUNC2 (const UNIT *s, size_t n, casing_suffix_context_t a_context) +{ + casing_suffix_context_t context; + /* Evaluate all three conditions in a single pass through the string S. + The three variables are -1 as long as the value of the condition has + not been determined. */ + int scc_FINAL_SIGMA = -1; + int scc_MORE_ABOVE = -1; + int scc_BEFORE_DOT = -1; + const UNIT *s_end = s + n; + + while (s < s_end) + { + ucs4_t uc; + int count = U_MBTOUC_UNSAFE (&uc, s, s_end - s); + + if (scc_FINAL_SIGMA < 0) + { + if (uc_is_cased (uc)) + scc_FINAL_SIGMA = SCC_FINAL_SIGMA_MASK; + else if (!uc_is_case_ignorable (uc)) + scc_FINAL_SIGMA = 0; + } + + if (scc_MORE_ABOVE < 0) + { + int ccc = uc_combining_class (uc); + if (ccc == UC_CCC_A) + scc_MORE_ABOVE = SCC_MORE_ABOVE_MASK; + else if (ccc == UC_CCC_NR) + scc_MORE_ABOVE = 0; + } + + if (scc_BEFORE_DOT < 0) + { + if (uc == 0x0307) /* COMBINING DOT ABOVE */ + scc_BEFORE_DOT = SCC_BEFORE_DOT_MASK; + else + { + int ccc = uc_combining_class (uc); + if (ccc == UC_CCC_A || ccc == UC_CCC_NR) + scc_BEFORE_DOT = 0; + } + } + + if ((scc_FINAL_SIGMA | scc_MORE_ABOVE | scc_BEFORE_DOT) >= 0) + /* All conditions have been determined. */ + break; + + s += count; + } + + /* For those conditions that have not been determined so far, use the + value from the argument context. */ + context.bits = + (scc_FINAL_SIGMA >= 0 + ? scc_FINAL_SIGMA + : a_context.bits & SCC_FINAL_SIGMA_MASK) + | (scc_MORE_ABOVE >= 0 + ? scc_MORE_ABOVE + : a_context.bits & SCC_MORE_ABOVE_MASK) + | (scc_BEFORE_DOT >= 0 + ? scc_BEFORE_DOT + : a_context.bits & SCC_BEFORE_DOT_MASK); + return context; +} diff --git a/lib/unicase/u8-suffix-context.c b/lib/unicase/u8-suffix-context.c new file mode 100644 --- /dev/null +++ b/lib/unicase/u8-suffix-context.c @@ -0,0 +1,32 @@ +/* Case-mapping context of suffix UTF-8 string. + Copyright (C) 2009 Free Software Foundation, Inc. + Written by Bruno Haible , 2009. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +#include + +/* Specification. */ +#include "unicase.h" + +#include "unictype.h" +#include "unistr.h" +#include "caseprop.h" +#include "context.h" + +#define FUNC1 u8_casing_suffix_context +#define FUNC2 u8_casing_suffixes_context +#define UNIT uint8_t +#define U_MBTOUC_UNSAFE u8_mbtouc_unsafe +#include "u-suffix-context.h" diff --git a/modules/unicase/u8-suffix-context b/modules/unicase/u8-suffix-context new file mode 100644 --- /dev/null +++ b/modules/unicase/u8-suffix-context @@ -0,0 +1,30 @@ +Description: +Case-mapping context of suffix UTF-8 string. + +Files: +lib/unicase/u8-suffix-context.c +lib/unicase/u-suffix-context.h +lib/unicase/context.h + +Depends-on: +unicase/base +unicase/empty-prefix-context +unicase/cased +unicase/ignorable +unictype/combining-class +unistr/u8-mbtouc-unsafe + +configure.ac: + +Makefile.am: +lib_SOURCES += unicase/u8-suffix-context.c + +Include: +"unicase.h" + +License: +LGPL + +Maintainer: +Bruno Haible +