| OLD | NEW |
| 1 # | 1 # |
| 2 # Copyright (C) 2002-2011, International Business Machines Corporation and oth
ers. | 2 # Copyright (C) 2002-2015, International Business Machines Corporation and oth
ers. |
| 3 # All Rights Reserved. | 3 # All Rights Reserved. |
| 4 # | 4 # |
| 5 # file: sent_el.txt | 5 # file: sent_el.txt |
| 6 # | 6 # |
| 7 # ICU Sentence Break Rules | 7 # ICU Sentence Break Rules |
| 8 # See Unicode Standard Annex #29. | 8 # See Unicode Standard Annex #29. |
| 9 # These rules are based on UAX #29 Revision 19 for Unicode Version 6.1 | 9 # These rules are based on UAX #29 Revision 26 for Unicode Version 8.0 |
| 10 # | 10 # |
| 11 | 11 |
| 12 | 12 |
| 13 # | 13 # |
| 14 # Character categories as defined in TR 29 | 14 # Character categories as defined in TR 29 |
| 15 # | 15 # |
| 16 $CR = [\p{Sentence_Break = CR}]; | 16 $CR = [\p{Sentence_Break = CR}]; |
| 17 $LF = [\p{Sentence_Break = LF}]; | 17 $LF = [\p{Sentence_Break = LF}]; |
| 18 $Extend = [\p{Sentence_Break = Extend}]; | 18 $Extend = [\p{Sentence_Break = Extend}]; |
| 19 $Sep = [\p{Sentence_Break = Sep}]; | 19 $Sep = [\p{Sentence_Break = Sep}]; |
| (...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 57 # Rule 4 - Break after $Sep. | 57 # Rule 4 - Break after $Sep. |
| 58 # Rule 5 - Ignore $Format and $Extend | 58 # Rule 5 - Ignore $Format and $Extend |
| 59 # | 59 # |
| 60 [^$Sep $CR $LF]? ($Extend | $Format)*; | 60 [^$Sep $CR $LF]? ($Extend | $Format)*; |
| 61 | 61 |
| 62 | 62 |
| 63 # Rule 6 | 63 # Rule 6 |
| 64 $ATermEx $NumericEx; | 64 $ATermEx $NumericEx; |
| 65 | 65 |
| 66 # Rule 7 | 66 # Rule 7 |
| 67 $UpperEx $ATermEx $UpperEx; | 67 ($UpperEx | $LowerEx) $ATermEx $UpperEx; |
| 68 | 68 |
| 69 #Rule 8 | 69 #Rule 8 |
| 70 $NotLettersEx = [^$OLetter $Upper $Lower $Sep $CR $LF $ATerm $STerm] ($Extend |
$Format)*; | 70 $NotLettersEx = [^$OLetter $Upper $Lower $Sep $CR $LF $ATerm $STerm] ($Extend |
$Format)*; |
| 71 $ATermEx $CloseEx* $SpEx* $NotLettersEx* $Lower; | 71 $ATermEx $CloseEx* $SpEx* $NotLettersEx* $Lower; |
| 72 | 72 |
| 73 # Rule 8a | 73 # Rule 8a |
| 74 ($STermEx | $ATermEx) $CloseEx* $SpEx* ($SContinueEx | $STermEx | $ATermEx); | 74 ($STermEx | $ATermEx) $CloseEx* $SpEx* ($SContinueEx | $STermEx | $ATermEx); |
| 75 | 75 |
| 76 #Rule 9, 10, 11 | 76 #Rule 9, 10, 11 |
| 77 ($STermEx | $ATermEx) $CloseEx* $SpEx* ($Sep | $CR | $LF)?; | 77 ($STermEx | $ATermEx) $CloseEx* $SpEx* ($Sep | $CR | $LF)?; |
| (...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 110 # Any immediately preceding STerm or ATerm sequences. We need to see the
se | 110 # Any immediately preceding STerm or ATerm sequences. We need to see the
se |
| 111 # to get the correct rule status when moving forwards again. | 111 # to get the correct rule status when moving forwards again. |
| 112 # | 112 # |
| 113 # [{bof}] inhibit rule chaining. Without this, rule would loop on its
elf and match | 113 # [{bof}] inhibit rule chaining. Without this, rule would loop on its
elf and match |
| 114 # the entire string. | 114 # the entire string. |
| 115 # | 115 # |
| 116 # (.? | $LF $CR) Match one $Sep instance. Use .? rather than $Sep because po
sition might be | 116 # (.? | $LF $CR) Match one $Sep instance. Use .? rather than $Sep because po
sition might be |
| 117 # at the beginning of the string at this point, and we don't w
ant to fail. | 117 # at the beginning of the string at this point, and we don't w
ant to fail. |
| 118 # Can only use {eof} once, and it is used later. | 118 # Can only use {eof} once, and it is used later. |
| 119 # | 119 # |
| OLD | NEW |