# HG changeset patch # User Daiki Ueno # Date 1421290989 -32400 # Node ID 74d3193af09cf7e6a93a0f2ce0b77331db0f8d8e # Parent 33325a4c16a97095178dd23171a754a9d12b3f00 uniwbrk: ignore Extended/Format characters at BOL * lib/uniwbrk/u-wordbreaks.h (FUNC): Ignore Extend and Format characters if the previous character property is one of WBP_NEWLINE, WBP_CR, and WBP_LF. diff --git a/ChangeLog b/ChangeLog --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +2015-01-15 Daiki Ueno + + uniwbrk: ignore Extended/Format characters at BOL not BOS + * lib/uniwbrk/u-wordbreaks.h (FUNC): Ignore Extend and Format + characters if the previous character property is one of + WBP_NEWLINE, WBP_CR, and WBP_LF. + 2015-01-11 Jim Meyering test-strstr.c: avoid a trivial leak diff --git a/lib/uniwbrk/u-wordbreaks.h b/lib/uniwbrk/u-wordbreaks.h --- a/lib/uniwbrk/u-wordbreaks.h +++ b/lib/uniwbrk/u-wordbreaks.h @@ -112,8 +112,13 @@ } last_char_prop = prop; - /* Ignore Format and Extend characters, except at the start of the string. */ - if (last_compchar_prop < 0 || !(prop == WBP_EXTEND || prop == WBP_FORMAT)) + /* Ignore Format and Extend characters, except at the start + of the line. */ + if (last_compchar_prop < 0 + || last_compchar_prop == WBP_CR + || last_compchar_prop == WBP_LF + || last_compchar_prop == WBP_NEWLINE + || !(prop == WBP_EXTEND || prop == WBP_FORMAT)) { secondlast_compchar_prop = last_compchar_prop; last_compchar_prop = prop;