Index: source/data/brkitr/title.txt |
diff --git a/source/data/brkitr/title.txt b/source/data/brkitr/title.txt |
index e3f6566c90139d53c2175bafa6dfc7f9abf01f4a..30c1c40d45b1d602cfd669702eb6990c5fbed42d 100644 |
--- a/source/data/brkitr/title.txt |
+++ b/source/data/brkitr/title.txt |
@@ -1,32 +1,53 @@ |
-# Copyright (c) 2002-2003, International Business Machines Corporation and |
+# Copyright (c) 2002-2014, International Business Machines Corporation and |
# others. All Rights Reserved. |
# |
# Title Casing Break Rules |
# |
+ |
$CaseIgnorable = [[:Mn:][:Me:][:Cf:][:Lm:][:Sk:] \u0027 \u00AD \u2019]; |
$Cased = [[:Upper_Case:][:Lower_Case:][:Lt:] - $CaseIgnorable]; |
-$NotCased = [^ $Cased]; |
+$NotCased = [[^ $Cased] - $CaseIgnorable]; |
-# |
-# If the iterator was not stopped on a cased character, advance it to the first cased char |
-# |
-$NotCased+; |
+!!forward; |
+ |
+# If the iterator begins on a CaseIgnorable, advance it past it/them. |
+# This can occur at the start-of-text, or after application of the |
+# safe-reverse rule. |
+ |
+($CaseIgnorable | $NotCased)*; |
+ |
+# Normal exact forward rule: beginning at the start of a word |
+# (at a cased character), advance through the word and through |
+# the uncased characters following the word. |
+ |
+$Cased ($Cased | $CaseIgnorable)* ($NotCased | $CaseIgnorable)*; |
-# |
-# If the iterator starts on a cased item, advance through all adjacent cased items plus |
-# any non-cased stuff, to reach the start of the next word. |
-# |
-$Cased ($Cased | $CaseIgnorable)* $NotCased*; |
-# |
# Reverse Rules |
-# |
+!!reverse; |
-! $NotCased+; |
+# Normal Rule, will work nearly universally, so long as there is a |
+# start-of-word preceding the current iteration position. |
-# |
-# If the iterator starts on a cased item, advance through all adjacent cased items plus |
-# any non-cased stuff, to reach the start of the next word. |
-# |
-! $NotCased* ($Cased | $CaseIgnorable)* $Cased; |
+($NotCased | $CaseIgnorable)* ($Cased | $CaseIgnorable)* $Cased; |
+ |
+# Short rule, will be effective only when moving to the start of text, |
+# with no word (cased character) preceding the current iteration position. |
+ |
+($NotCased | $CaseIgnorable)*; |
+ |
+!!safe_reverse; |
+ |
+# Safe Reverse: the exact forward rule must not start in the middle |
+# of a word, so the safe reverse skips over any Cased characters, |
+# leaving it just before the start of a word. |
+ |
+($Cased | $CaseIgnorable)*; |
+ |
+!!safe_forward; |
+ |
+# Safe Forward, nothing needs to be done, the exact Reverse rules will |
+# always find valid boundaries from any starting position. |
+# Still, some rule is needed, so '.', a one character movement. |
+.; |