OLD | NEW |
(Empty) | |
| 1 # |
| 2 # Copyright (C) 2002-2010, International Business Machines Corporation and oth
ers. |
| 3 # All Rights Reserved. |
| 4 # |
| 5 # file: char.txt |
| 6 # |
| 7 # ICU Character Break Rules, also known as Grapheme Cluster Boundaries |
| 8 # See Unicode Standard Annex #29. |
| 9 # These rules are based on TR29 Revision 16, for Unicode Version 6.0 |
| 10 # |
| 11 |
| 12 # |
| 13 # Character Class Definitions. |
| 14 # |
| 15 $CR = [\p{Grapheme_Cluster_Break = CR}]; |
| 16 $LF = [\p{Grapheme_Cluster_Break = LF}]; |
| 17 $Control = [\p{Grapheme_Cluster_Break = Control}]; |
| 18 $Prepend = [\p{Grapheme_Cluster_Break = Prepend}]; |
| 19 $Extend = [\p{Grapheme_Cluster_Break = Extend}]; |
| 20 $SpacingMark = [\p{Grapheme_Cluster_Break = SpacingMark}]; |
| 21 |
| 22 # |
| 23 # Korean Syllable Definitions |
| 24 # |
| 25 $L = [\p{Grapheme_Cluster_Break = L}]; |
| 26 $V = [\p{Grapheme_Cluster_Break = V}]; |
| 27 $T = [\p{Grapheme_Cluster_Break = T}]; |
| 28 |
| 29 $LV = [\p{Grapheme_Cluster_Break = LV}]; |
| 30 $LVT = [\p{Grapheme_Cluster_Break = LVT}]; |
| 31 |
| 32 |
| 33 ## ------------------------------------------------- |
| 34 !!chain; |
| 35 |
| 36 !!forward; |
| 37 |
| 38 $CR $LF; |
| 39 |
| 40 $L ($L | $V | $LV | $LVT); |
| 41 ($LV | $V) ($V | $T); |
| 42 ($LVT | $T) $T; |
| 43 |
| 44 [^$Control $CR $LF] $Extend; |
| 45 |
| 46 [^$Control $CR $LF] $SpacingMark; |
| 47 $Prepend [^$Control $CR $LF]; |
| 48 |
| 49 |
| 50 ## ------------------------------------------------- |
| 51 |
| 52 !!reverse; |
| 53 $LF $CR; |
| 54 ($L | $V | $LV | $LVT) $L; |
| 55 ($V | $T) ($LV | $V); |
| 56 $T ($LVT | $T); |
| 57 |
| 58 $Extend [^$Control $CR $LF]; |
| 59 $SpacingMark [^$Control $CR $LF]; |
| 60 [^$Control $CR $LF] $Prepend; |
| 61 |
| 62 |
| 63 ## ------------------------------------------------- |
| 64 |
| 65 !!safe_reverse; |
| 66 |
| 67 |
| 68 ## ------------------------------------------------- |
| 69 |
| 70 !!safe_forward; |
| 71 |
OLD | NEW |