OLD | NEW |
(Empty) | |
| 1 # |
| 2 # Copyright (C) 2002-2010, International Business Machines Corporation and oth
ers. |
| 3 # All Rights Reserved. |
| 4 # |
| 5 # file: char_th.txt |
| 6 # |
| 7 # ICU Character Break Rules, also known as Grapheme Cluster Boundaries |
| 8 # See Unicode Standard Annex #29. |
| 9 # These rules are based on TR29 Revision 16, for Unicode Version 6.0 |
| 10 # |
| 11 |
| 12 # |
| 13 # Character Class Definitions. |
| 14 # |
| 15 $CR = [\p{Grapheme_Cluster_Break = CR}]; |
| 16 $LF = [\p{Grapheme_Cluster_Break = LF}]; |
| 17 $Control = [\p{Grapheme_Cluster_Break = Control}]; |
| 18 $Extend = [\p{Grapheme_Cluster_Break = Extend} + [\u0E33 \u0EB3]]; |
| 19 |
| 20 # |
| 21 # Korean Syllable Definitions |
| 22 # |
| 23 $L = [\p{Grapheme_Cluster_Break = L}]; |
| 24 $V = [\p{Grapheme_Cluster_Break = V}]; |
| 25 $T = [\p{Grapheme_Cluster_Break = T}]; |
| 26 |
| 27 $LV = [\p{Grapheme_Cluster_Break = LV}]; |
| 28 $LVT = [\p{Grapheme_Cluster_Break = LVT}]; |
| 29 |
| 30 |
| 31 ## ------------------------------------------------- |
| 32 !!chain; |
| 33 |
| 34 !!forward; |
| 35 |
| 36 $CR $LF; |
| 37 |
| 38 $L ($L | $V | $LV | $LVT); |
| 39 ($LV | $V) ($V | $T); |
| 40 ($LVT | $T) $T; |
| 41 |
| 42 [^$Control $CR $LF] $Extend; |
| 43 |
| 44 ## ------------------------------------------------- |
| 45 |
| 46 !!reverse; |
| 47 $LF $CR; |
| 48 ($L | $V | $LV | $LVT) $L; |
| 49 ($V | $T) ($LV | $V); |
| 50 $T ($LVT | $T); |
| 51 |
| 52 $Extend [^$Control $CR $LF]; |
| 53 |
| 54 |
| 55 ## ------------------------------------------------- |
| 56 |
| 57 !!safe_reverse; |
| 58 |
| 59 |
| 60 ## ------------------------------------------------- |
| 61 |
| 62 !!safe_forward; |
| 63 |
OLD | NEW |