changeset 17869:74d3193af09c

uniwbrk: ignore Extended/Format characters at BOL * lib/uniwbrk/u-wordbreaks.h (FUNC): Ignore Extend and Format characters if the previous character property is one of WBP_NEWLINE, WBP_CR, and WBP_LF.
author Daiki Ueno <ueno@gnu.org>
date Thu, 15 Jan 2015 12:03:09 +0900
parents 33325a4c16a9
children 04ed097a2048
files ChangeLog lib/uniwbrk/u-wordbreaks.h
diffstat 2 files changed, 14 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2015-01-15  Daiki Ueno  <ueno@gnu.org>
+
+	uniwbrk: ignore Extended/Format characters at BOL not BOS
+	* lib/uniwbrk/u-wordbreaks.h (FUNC): Ignore Extend and Format
+	characters if the previous character property is one of
+	WBP_NEWLINE, WBP_CR, and WBP_LF.
+
 2015-01-11  Jim Meyering  <meyering@fb.com>
 
 	test-strstr.c: avoid a trivial leak
--- a/lib/uniwbrk/u-wordbreaks.h
+++ b/lib/uniwbrk/u-wordbreaks.h
@@ -112,8 +112,13 @@
             }
 
           last_char_prop = prop;
-          /* Ignore Format and Extend characters, except at the start of the string.  */
-          if (last_compchar_prop < 0 || !(prop == WBP_EXTEND || prop == WBP_FORMAT))
+          /* Ignore Format and Extend characters, except at the start
+             of the line.  */
+          if (last_compchar_prop < 0
+              || last_compchar_prop == WBP_CR
+              || last_compchar_prop == WBP_LF
+              || last_compchar_prop == WBP_NEWLINE
+              || !(prop == WBP_EXTEND || prop == WBP_FORMAT))
             {
               secondlast_compchar_prop = last_compchar_prop;
               last_compchar_prop = prop;