changeset 11673:d7c2930c7cb2

Add context arguments to u*_casemap functions.
author Bruno Haible <bruno@clisp.org>
date Mon, 29 Jun 2009 23:51:02 +0200
parents acbed3910711
children 480c7b715197
files ChangeLog lib/unicase/u-casemap.h lib/unicase/u16-casemap.c lib/unicase/u32-casemap.c lib/unicase/u8-casemap.c lib/unicase/unicasemap.h modules/unicase/u16-casemap modules/unicase/u32-casemap modules/unicase/u8-casemap
diffstat 9 files changed, 108 insertions(+), 86 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,23 @@
 2009-06-29  Bruno Haible  <bruno@clisp.org>
 
+	Add context arguments to u*_casemap functions.
+	* lib/unicase/unicasemap.h: Include unicase.h.
+	(u8_casemap, u16_casemap, u32_casemap): Add prefix_context and
+	suffix_context arguments.
+	* lib/unicase/u-casemap.h (is_cased, is_case_ignorable): Remove
+	functions.
+	(FUNC): Add prefix_context and suffix_context arguments. Use
+	uc_is_cased and uc_is_case_ignorable.
+	* lib/unicase/u8-casemap.c: Include caseprop.h and context.h.
+	* lib/unicase/u16-casemap.c: Likewise.
+	* lib/unicase/u32-casemap.c: Likewise.
+	* modules/unicase/u8-casemap (Files): Add lib/unicase/context.h.
+	(Depends-on): Add unicase/cased, unicase/ignorable. Clean up.
+	* modules/unicase/u16-casemap (Files): Add lib/unicase/context.h.
+	(Depends-on): Add unicase/cased, unicase/ignorable. Clean up.
+	* modules/unicase/u32-casemap (Files): Add lib/unicase/context.h.
+	(Depends-on): Add unicase/cased, unicase/ignorable. Clean up.
+
 	New module 'unicase/u32-suffix-context'.
 	* lib/unicase/u32-suffix-context.c: New file.
 	* modules/unicase/u32-suffix-context: New file.
--- a/lib/unicase/u-casemap.h
+++ b/lib/unicase/u-casemap.h
@@ -15,40 +15,11 @@
    You should have received a copy of the GNU Lesser General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
 
-/* Quoting the Unicode standard:
-     Definition: A character is defined to be "cased" if it has the Lowercase or
-     Uppercase property or has a General_Category value of Titlecase_Letter.  */
-static inline bool
-is_cased (ucs4_t uc)
-{
-  return (uc_is_property_lowercase (uc)
-	  || uc_is_property_uppercase (uc)
-	  || uc_is_general_category (uc, UC_TITLECASE_LETTER));
-}
-
-/* Quoting the Unicode standard:
-     Definition: A character is defined to be "case-ignorable" if it has the
-     value MidLetter {or the value MidNumLet} for the Word_Break property or
-     its General_Category is one of Nonspacing_Mark (Mn), Enclosing_Mark (Me),
-     Format (Cf), Modifier_Letter (Lm), or Modifier_Symbol (Sk).
-   The text marked in braces was added in Unicode 5.1.0, see
-   <http://www.unicode.org/versions/Unicode5.1.0/> section "Update of
-   Definition of case-ignorable".   */
-static inline bool
-is_case_ignorable (ucs4_t uc)
-{
-  int wbp = uc_wordbreak_property (uc);
-
-  return (wbp == WBP_MIDLETTER || wbp == WBP_MIDNUMLET
-	  || uc_is_general_category_withtable (uc, UC_CATEGORY_MASK_Mn
-						   | UC_CATEGORY_MASK_Me
-						   | UC_CATEGORY_MASK_Cf
-						   | UC_CATEGORY_MASK_Lm
-						   | UC_CATEGORY_MASK_Sk));
-}
-
 UNIT *
-FUNC (const UNIT *s, size_t n, const char *iso639_language,
+FUNC (const UNIT *s, size_t n,
+      casing_prefix_context_t prefix_context,
+      casing_suffix_context_t suffix_context,
+      const char *iso639_language,
       ucs4_t (*single_character_map) (ucs4_t),
       size_t offset_in_rule, /* offset in 'struct special_casing_rule' */
       uninorm_t nf,
@@ -77,11 +48,13 @@
 
     /* Helper for evaluating the FINAL_SIGMA condition:
        Last character that was not case-ignorable.  */
-    ucs4_t last_char_except_ignorable = 0xFFFD;
+    ucs4_t last_char_except_ignorable =
+      prefix_context.last_char_except_ignorable;
 
     /* Helper for evaluating the AFTER_SOFT_DOTTED and AFTER_I conditions:
        Last character that was of combining class 230 ("Above") or 0.  */
-    ucs4_t last_char_normal_or_above = 0xFFFD;
+    ucs4_t last_char_normal_or_above =
+      prefix_context.last_char_normal_or_above;
 
     while (s < s_end)
       {
@@ -134,23 +107,31 @@
 			   consisting of a case-ignorable sequence and then a
 			   cased letter.  */
 			/* Test the "before" condition.  */
-			applies = is_cased (last_char_except_ignorable);
+			applies = uc_is_cased (last_char_except_ignorable);
 			/* Test the "after" condition.  */
 			if (applies)
 			  {
 			    const UNIT *s2 = s + count;
-			    while (s2 < s_end)
+			    for (;;)
 			      {
-				ucs4_t uc2;
-				int count2 = U_MBTOUC_UNSAFE (&uc2, s2, s_end - s2);
-				if (is_cased (uc2))
+				if (s2 < s_end)
 				  {
-				    applies = false;
+				    ucs4_t uc2;
+				    int count2 = U_MBTOUC_UNSAFE (&uc2, s2, s_end - s2);
+				    if (uc_is_cased (uc2))
+				      {
+					applies = false;
+					break;
+				      }
+				    if (!uc_is_case_ignorable (uc2))
+				      break;
+				    s2 += count2;
+				  }
+				else
+				  {
+				    applies = ((suffix_context.bits & SCC_FINAL_SIGMA_MASK) == 0);
 				    break;
 				  }
-				if (!is_case_ignorable (uc2))
-				  break;
-				s2 += count2;
 			      }
 			  }
 			break;
@@ -171,19 +152,27 @@
 			{
 			  const UNIT *s2 = s + count;
 			  applies = false;
-			  while (s2 < s_end)
+			  for (;;)
 			    {
-			      ucs4_t uc2;
-			      int count2 = U_MBTOUC_UNSAFE (&uc2, s2, s_end - s2);
-			      int ccc = uc_combining_class (uc2);
-			      if (ccc == UC_CCC_A)
+			      if (s2 < s_end)
 				{
-				  applies = true;
+				  ucs4_t uc2;
+				  int count2 = U_MBTOUC_UNSAFE (&uc2, s2, s_end - s2);
+				  int ccc = uc_combining_class (uc2);
+				  if (ccc == UC_CCC_A)
+				    {
+				      applies = true;
+				      break;
+				    }
+				  if (ccc == UC_CCC_NR)
+				    break;
+				  s2 += count2;
+				}
+			      else
+				{
+				  applies = ((suffix_context.bits & SCC_MORE_ABOVE_MASK) != 0);
 				  break;
 				}
-			      if (ccc == UC_CCC_NR)
-				break;
-			      s2 += count2;
 			    }
 			}
 			break;
@@ -198,21 +187,29 @@
 			{
 			  const UNIT *s2 = s + count;
 			  applies = false;
-			  while (s2 < s_end)
+			  for (;;)
 			    {
-			      ucs4_t uc2;
-			      int count2 = U_MBTOUC_UNSAFE (&uc2, s2, s_end - s2);
-			      if (uc2 == 0x0307) /* COMBINING DOT ABOVE */
+			      if (s2 < s_end)
 				{
-				  applies = true;
+				  ucs4_t uc2;
+				  int count2 = U_MBTOUC_UNSAFE (&uc2, s2, s_end - s2);
+				  if (uc2 == 0x0307) /* COMBINING DOT ABOVE */
+				    {
+				      applies = true;
+				      break;
+				    }
+				  {
+				    int ccc = uc_combining_class (uc2);
+				    if (ccc == UC_CCC_A || ccc == UC_CCC_NR)
+				      break;
+				  }
+				  s2 += count2;
+				}
+			      else
+				{
+				  applies = ((suffix_context.bits & SCC_BEFORE_DOT_MASK) != 0);
 				  break;
 				}
-			      {
-				int ccc = uc_combining_class (uc2);
-				if (ccc == UC_CCC_A || ccc == UC_CCC_NR)
-				  break;
-			      }
-			      s2 += count2;
 			    }
 			}
 			break;
@@ -354,7 +351,7 @@
 	    }
 	}
 
-	if (!is_case_ignorable (uc))
+	if (!uc_is_case_ignorable (uc))
 	  last_char_except_ignorable = uc;
 
 	{
--- a/lib/unicase/u16-casemap.c
+++ b/lib/unicase/u16-casemap.c
@@ -28,6 +28,8 @@
 #include "unictype.h"
 #include "uniwbrk.h"
 #include "uninorm.h"
+#include "caseprop.h"
+#include "context.h"
 #include "special-casing.h"
 
 #define FUNC u16_casemap
--- a/lib/unicase/u32-casemap.c
+++ b/lib/unicase/u32-casemap.c
@@ -28,6 +28,8 @@
 #include "unictype.h"
 #include "uniwbrk.h"
 #include "uninorm.h"
+#include "caseprop.h"
+#include "context.h"
 #include "special-casing.h"
 
 #define FUNC u32_casemap
--- a/lib/unicase/u8-casemap.c
+++ b/lib/unicase/u8-casemap.c
@@ -28,6 +28,8 @@
 #include "unictype.h"
 #include "uniwbrk.h"
 #include "uninorm.h"
+#include "caseprop.h"
+#include "context.h"
 #include "special-casing.h"
 
 #define FUNC u8_casemap
--- a/lib/unicase/unicasemap.h
+++ b/lib/unicase/unicasemap.h
@@ -18,24 +18,34 @@
 #include <stddef.h>
 
 #include "unitypes.h"
+#include "unicase.h"
 #include "uninorm.h"
 
 extern uint8_t *
-       u8_casemap (const uint8_t *s, size_t n, const char *iso639_language,
+       u8_casemap (const uint8_t *s, size_t n,
+		   casing_prefix_context_t prefix_context,
+		   casing_suffix_context_t suffix_context,
+		   const char *iso639_language,
 		   ucs4_t (*single_character_map) (ucs4_t),
 		   size_t offset_in_rule, /* offset in 'struct special_casing_rule' */
 		   uninorm_t nf,
 		   uint8_t *resultbuf, size_t *lengthp);
 
 extern uint16_t *
-       u16_casemap (const uint16_t *s, size_t n, const char *iso639_language,
+       u16_casemap (const uint16_t *s, size_t n,
+		    casing_prefix_context_t prefix_context,
+		    casing_suffix_context_t suffix_context,
+		    const char *iso639_language,
 		    ucs4_t (*single_character_map) (ucs4_t),
 		    size_t offset_in_rule, /* offset in 'struct special_casing_rule' */
 		    uninorm_t nf,
 		    uint16_t *resultbuf, size_t *lengthp);
 
 extern uint32_t *
-       u32_casemap (const uint32_t *s, size_t n, const char *iso639_language,
+       u32_casemap (const uint32_t *s, size_t n,
+		    casing_prefix_context_t prefix_context,
+		    casing_suffix_context_t suffix_context,
+		    const char *iso639_language,
 		    ucs4_t (*single_character_map) (ucs4_t),
 		    size_t offset_in_rule, /* offset in 'struct special_casing_rule' */
 		    uninorm_t nf,
--- a/modules/unicase/u16-casemap
+++ b/modules/unicase/u16-casemap
@@ -5,17 +5,14 @@
 lib/unicase/unicasemap.h
 lib/unicase/u16-casemap.c
 lib/unicase/u-casemap.h
+lib/unicase/context.h
 
 Depends-on:
 unicase/base
+unicase/cased
+unicase/ignorable
 unicase/special-casing
-uniwbrk/wordbreak-property
-unictype/category-of
-unictype/category-test
-unictype/category-Lt
 unictype/combining-class
-unictype/property-lowercase
-unictype/property-uppercase
 unictype/property-soft-dotted
 unistr/u16-mbtouc-unsafe
 unistr/u16-uctomb
--- a/modules/unicase/u32-casemap
+++ b/modules/unicase/u32-casemap
@@ -5,17 +5,14 @@
 lib/unicase/unicasemap.h
 lib/unicase/u32-casemap.c
 lib/unicase/u-casemap.h
+lib/unicase/context.h
 
 Depends-on:
 unicase/base
+unicase/cased
+unicase/ignorable
 unicase/special-casing
-uniwbrk/wordbreak-property
-unictype/category-of
-unictype/category-test
-unictype/category-Lt
 unictype/combining-class
-unictype/property-lowercase
-unictype/property-uppercase
 unictype/property-soft-dotted
 unistr/u32-mbtouc-unsafe
 unistr/u32-uctomb
--- a/modules/unicase/u8-casemap
+++ b/modules/unicase/u8-casemap
@@ -5,17 +5,14 @@
 lib/unicase/unicasemap.h
 lib/unicase/u8-casemap.c
 lib/unicase/u-casemap.h
+lib/unicase/context.h
 
 Depends-on:
 unicase/base
+unicase/cased
+unicase/ignorable
 unicase/special-casing
-uniwbrk/wordbreak-property
-unictype/category-of
-unictype/category-test
-unictype/category-Lt
 unictype/combining-class
-unictype/property-lowercase
-unictype/property-uppercase
 unictype/property-soft-dotted
 unistr/u8-mbtouc-unsafe
 unistr/u8-uctomb