OLD | NEW |
1 # Copyright (c) 2002-2003, International Business Machines Corporation and | 1 # Copyright (c) 2002-2014, International Business Machines Corporation and |
2 # others. All Rights Reserved. | 2 # others. All Rights Reserved. |
3 # | 3 # |
4 # Title Casing Break Rules | 4 # Title Casing Break Rules |
5 # | 5 # |
6 | 6 |
| 7 |
7 $CaseIgnorable = [[:Mn:][:Me:][:Cf:][:Lm:][:Sk:] \u0027 \u00AD \u2019]; | 8 $CaseIgnorable = [[:Mn:][:Me:][:Cf:][:Lm:][:Sk:] \u0027 \u00AD \u2019]; |
8 $Cased = [[:Upper_Case:][:Lower_Case:][:Lt:] - $CaseIgnorable]; | 9 $Cased = [[:Upper_Case:][:Lower_Case:][:Lt:] - $CaseIgnorable]; |
9 $NotCased = [^ $Cased]; | 10 $NotCased = [[^ $Cased] - $CaseIgnorable]; |
10 | 11 |
11 # | 12 !!forward; |
12 # If the iterator was not stopped on a cased character, advance it to the first
cased char | |
13 # | |
14 $NotCased+; | |
15 | 13 |
16 # | 14 # If the iterator begins on a CaseIgnorable, advance it past it/them. |
17 # If the iterator starts on a cased item, advance through all adjacent cased it
ems plus | 15 # This can occur at the start-of-text, or after application of the |
18 # any non-cased stuff, to reach the start of the next word. | 16 # safe-reverse rule. |
19 # | |
20 $Cased ($Cased | $CaseIgnorable)* $NotCased*; | |
21 | 17 |
22 # | 18 ($CaseIgnorable | $NotCased)*; |
| 19 |
| 20 # Normal exact forward rule: beginning at the start of a word |
| 21 # (at a cased character), advance through the word and through |
| 22 # the uncased characters following the word. |
| 23 |
| 24 $Cased ($Cased | $CaseIgnorable)* ($NotCased | $CaseIgnorable)*; |
| 25 |
| 26 |
23 # Reverse Rules | 27 # Reverse Rules |
24 # | 28 !!reverse; |
25 | 29 |
26 ! $NotCased+; | 30 # Normal Rule, will work nearly universally, so long as there is a |
| 31 # start-of-word preceding the current iteration position. |
27 | 32 |
28 # | 33 ($NotCased | $CaseIgnorable)* ($Cased | $CaseIgnorable)* $Cased; |
29 # If the iterator starts on a cased item, advance through all adjacent cased it
ems plus | 34 |
30 # any non-cased stuff, to reach the start of the next word. | 35 # Short rule, will be effective only when moving to the start of text, |
31 # | 36 # with no word (cased character) preceding the current iteration position. |
32 ! $NotCased* ($Cased | $CaseIgnorable)* $Cased; | 37 |
| 38 ($NotCased | $CaseIgnorable)*; |
| 39 |
| 40 !!safe_reverse; |
| 41 |
| 42 # Safe Reverse: the exact forward rule must not start in the middle |
| 43 # of a word, so the safe reverse skips over any Cased characters, |
| 44 # leaving it just before the start of a word. |
| 45 |
| 46 ($Cased | $CaseIgnorable)*; |
| 47 |
| 48 !!safe_forward; |
| 49 |
| 50 # Safe Forward, nothing needs to be done, the exact Reverse rules will |
| 51 # always find valid boundaries from any starting position. |
| 52 # Still, some rule is needed, so '.', a one character movement. |
| 53 .; |
OLD | NEW |