OLD | NEW |
1 diff --git a/source/data/brkitr/brklocal.mk b/source/data/brkitr/brklocal.mk | 1 diff --git a/source/data/brkitr/brklocal.mk b/source/data/brkitr/brklocal.mk |
2 index cb5226a..39202f1 100644 | 2 index b5eca75..2a75a9e 100644 |
3 --- a/source/data/brkitr/brklocal.mk | 3 --- a/source/data/brkitr/brklocal.mk |
4 +++ b/source/data/brkitr/brklocal.mk | 4 +++ b/source/data/brkitr/brklocal.mk |
5 @@ -34,14 +34,14 @@ BRK_RES_ALIAS_SOURCE = $(BRK_RES_SYNTHETIC_ALIAS) | 5 @@ -34,7 +34,7 @@ BRK_RES_ALIAS_SOURCE = $(BRK_RES_SYNTHETIC_ALIAS) |
6 | 6 |
7 | 7 |
8 # List of dictionary files (dict). | 8 # List of dictionary files (dict). |
9 -BRK_DICT_SOURCE = burmesedict.txt cjdict.txt khmerdict.txt laodict.txt\ | 9 -BRK_DICT_SOURCE = burmesedict.txt cjdict.txt khmerdict.txt laodict.txt\ |
10 +BRK_DICT_SOURCE = burmesedict.txt khmerdict.txt laodict.txt\ | 10 +BRK_DICT_SOURCE = burmesedict.txt khmerdict.txt laodict.txt\ |
11 thaidict.txt | 11 thaidict.txt |
12 | 12 |
13 | 13 |
14 # List of break iterator files (brk). | 14 @@ -42,7 +42,7 @@ BRK_DICT_SOURCE = burmesedict.txt cjdict.txt khmerdict.txt lao
dict.txt\ |
15 BRK_SOURCE = char.txt line.txt\ | 15 BRK_SOURCE = char.txt line.txt\ |
16 line_normal.txt line_normal_cj.txt line_normal_fi.txt\ | 16 line_normal.txt line_normal_cj.txt line_normal_fi.txt\ |
| 17 line_loose_cj.txt\ |
17 - sent.txt sent_el.txt title.txt word.txt | 18 - sent.txt sent_el.txt title.txt word.txt |
18 + sent.txt sent_el.txt title.txt word.txt word_ja.txt | 19 + sent.txt sent_el.txt title.txt word.txt word_ja.txt |
19 | 20 |
20 | 21 |
21 # Ordinary resources | 22 # Ordinary resources |
22 diff --git a/source/data/brkitr/ja.txt b/source/data/brkitr/ja.txt | 23 diff --git a/source/data/brkitr/ja.txt b/source/data/brkitr/ja.txt |
23 index f9f986e..cd07526 100644 | 24 index 2e9a1c8..cb732a7 100644 |
24 --- a/source/data/brkitr/ja.txt | 25 --- a/source/data/brkitr/ja.txt |
25 +++ b/source/data/brkitr/ja.txt | 26 +++ b/source/data/brkitr/ja.txt |
26 @@ -13,5 +13,6 @@ ja{ | 27 @@ -7,5 +7,6 @@ ja{ |
27 line_loose:process(dependency){"line_normal_cj.brk"} | 28 line_loose:process(dependency){"line_loose_cj.brk"} |
28 line_normal:process(dependency){"line_normal_cj.brk"} | 29 line_normal:process(dependency){"line_normal_cj.brk"} |
29 line_strict:process(dependency){"line.brk"} | 30 line_strict:process(dependency){"line.brk"} |
30 + word:process(dependency){"word_ja.brk"} | 31 + word:process(dependency){"word_ja.brk"} |
31 } | 32 } |
32 } | 33 } |
33 diff --git a/source/data/brkitr/root.txt b/source/data/brkitr/root.txt | 34 diff --git a/source/data/brkitr/root.txt b/source/data/brkitr/root.txt |
34 index 77ddd14..2e275a1 100644 | 35 index 1a1ad8a..c790282 100644 |
35 --- a/source/data/brkitr/root.txt | 36 --- a/source/data/brkitr/root.txt |
36 +++ b/source/data/brkitr/root.txt | 37 +++ b/source/data/brkitr/root.txt |
37 @@ -19,9 +19,6 @@ root{ | 38 @@ -13,9 +13,6 @@ root{ |
38 word:process(dependency){"word.brk"} | 39 word:process(dependency){"word.brk"} |
39 } | 40 } |
40 dictionaries{ | 41 dictionaries{ |
41 - Hani:process(dependency){"cjdict.dict"} | 42 - Hani:process(dependency){"cjdict.dict"} |
42 - Hira:process(dependency){"cjdict.dict"} | 43 - Hira:process(dependency){"cjdict.dict"} |
43 - Kana:process(dependency){"cjdict.dict"} | 44 - Kana:process(dependency){"cjdict.dict"} |
44 Khmr:process(dependency){"khmerdict.dict"} | 45 Khmr:process(dependency){"khmerdict.dict"} |
45 Laoo:process(dependency){"laodict.dict"} | 46 Laoo:process(dependency){"laodict.dict"} |
46 Mymr:process(dependency){"burmesedict.dict"} | 47 Mymr:process(dependency){"burmesedict.dict"} |
47 diff --git a/source/data/brkitr/word.txt b/source/data/brkitr/word.txt | 48 diff --git a/source/data/brkitr/rules/word.txt b/source/data/brkitr/rules/word.t
xt |
48 index f89a2fe..9603957 100644 | 49 index 9c93dd5..eb150ea 100644 |
49 --- a/source/data/brkitr/word.txt | 50 --- a/source/data/brkitr/rules/word.txt |
50 +++ b/source/data/brkitr/word.txt | 51 +++ b/source/data/brkitr/rules/word.txt |
51 @@ -54,11 +54,9 @@ $Control = [\p{Grapheme_Cluster_Break = Control}]; | 52 @@ -71,11 +71,9 @@ $Control = [\p{Grapheme_Cluster_Break = Control}]; |
52 $HangulSyllable = [\uac00-\ud7a3]; | 53 $HangulSyllable = [\uac00-\ud7a3]; |
53 $ComplexContext = [:LineBreak = Complex_Context:]; | 54 $ComplexContext = [:LineBreak = Complex_Context:]; |
54 $KanaKanji = [$Han $Hiragana $Katakana]; | 55 $KanaKanji = [$Han $Hiragana $Katakana]; |
55 -$dictionaryCJK = [$KanaKanji $HangulSyllable]; | 56 -$dictionaryCJK = [$KanaKanji $HangulSyllable]; |
56 -$dictionary = [$ComplexContext $dictionaryCJK]; | 57 -$dictionary = [$ComplexContext $dictionaryCJK]; |
57 +$dictionary = [$ComplexContext]; | 58 +$dictionary = [$ComplexContext]; |
58 | 59 |
59 -# leave CJK scripts out of ALetterPlus | 60 -# leave CJK scripts out of ALetterPlus |
60 -$ALetterPlus = [$ALetter-$dictionaryCJK [$ComplexContext-$Extend-$Control]]; | 61 -$ALetterPlus = [$ALetter-$dictionaryCJK [$ComplexContext-$Extend-$Control]]; |
61 +$ALetterPlus = [$ALetter [$ComplexContext-$Extend-$Control]]; | 62 +$ALetterPlus = [$ALetter [$ComplexContext-$Extend-$Control]]; |
62 | 63 |
63 | 64 |
64 # | 65 # |
65 @@ -160,11 +158,6 @@ $ExtendNumLetEx $KatakanaEx {400}; # (13b) | 66 @@ -194,11 +192,6 @@ $ExtendNumLetEx $KatakanaEx {400}; # (13b) |
66 | 67 # |
67 $Regional_IndicatorEx $Regional_IndicatorEx; | 68 ^$Regional_IndicatorEx $Regional_IndicatorEx; |
68 | 69 |
69 -# special handling for CJK characters: chain for later dictionary segmentation | 70 -# special handling for CJK characters: chain for later dictionary segmentation |
70 -$HangulSyllable $HangulSyllable {200}; | 71 -$HangulSyllable $HangulSyllable {200}; |
71 -$KanaKanji $KanaKanji {400}; # different rule status if both kana and kanji fou
nd | 72 -$KanaKanji $KanaKanji {400}; # different rule status if both kana and kanji fou
nd |
72 - | 73 - |
73 - | 74 - |
74 ## ------------------------------------------------- | 75 ## ------------------------------------------------- |
75 | 76 |
76 !!reverse; | 77 !!reverse; |
77 @@ -231,10 +224,6 @@ $BackExtendNumLetEx ($BackALetterEx | $BackHebrew_LetterEx
| $BackNumericEx | $B | 78 @@ -265,10 +258,6 @@ $BackKatakanaEx $BackKatakanaEx; |
78 | 79 $BackExtendNumLetEx ($BackALetterEx | $BackHebrew_LetterEx | $BackNumericEx | $
BackKatakanaEx | $BackExtendNumLetEx); |
79 $BackRegional_IndicatorEx $BackRegional_IndicatorEx; | 80 ($BackALetterEx | $BackHebrew_LetterEx | $BackNumericEx | $BackKatakanaEx) $Bac
kExtendNumLetEx; |
80 | 81 |
81 -# special handling for CJK characters: chain for later dictionary segmentation | 82 -# special handling for CJK characters: chain for later dictionary segmentation |
82 -$HangulSyllable $HangulSyllable; | 83 -$HangulSyllable $HangulSyllable; |
83 -$KanaKanji $KanaKanji; #different rule status if both kanji and kana found | 84 -$KanaKanji $KanaKanji; #different rule status if both kanji and kana found |
84 - | 85 - |
85 ## ------------------------------------------------- | 86 # rule 14 |
86 | 87 |
87 !!safe_reverse; | 88 $E_Modifier ($Format | $Extend | $ZWJ)* ($E_Base | $EBG); |
OLD | NEW |