OLD | NEW |
| 1 diff --git a/source/data/brkitr/brklocal.mk b/source/data/brkitr/brklocal.mk |
| 2 index cb5226a..39202f1 100644 |
| 3 --- a/source/data/brkitr/brklocal.mk |
| 4 +++ b/source/data/brkitr/brklocal.mk |
| 5 @@ -34,14 +34,14 @@ BRK_RES_ALIAS_SOURCE = $(BRK_RES_SYNTHETIC_ALIAS) |
| 6 |
| 7 |
| 8 # List of dictionary files (dict). |
| 9 -BRK_DICT_SOURCE = burmesedict.txt cjdict.txt khmerdict.txt laodict.txt\ |
| 10 +BRK_DICT_SOURCE = burmesedict.txt khmerdict.txt laodict.txt\ |
| 11 thaidict.txt |
| 12 |
| 13 |
| 14 # List of break iterator files (brk). |
| 15 BRK_SOURCE = char.txt line.txt line_fi.txt\ |
| 16 line_normal.txt line_normal_cj.txt\ |
| 17 - sent.txt sent_el.txt title.txt word.txt |
| 18 + sent.txt sent_el.txt title.txt word.txt word_ja.txt |
| 19 |
| 20 |
| 21 # Ordinary resources |
| 22 diff --git a/source/data/brkitr/ja.txt b/source/data/brkitr/ja.txt |
| 23 index f9f986e..cd07526 100644 |
| 24 --- a/source/data/brkitr/ja.txt |
| 25 +++ b/source/data/brkitr/ja.txt |
| 26 @@ -13,5 +13,6 @@ ja{ |
| 27 line_loose:process(dependency){"line_normal_cj.brk"} |
| 28 line_normal:process(dependency){"line_normal_cj.brk"} |
| 29 line_strict:process(dependency){"line.brk"} |
| 30 + word:process(dependency){"word_ja.brk"} |
| 31 } |
| 32 } |
| 33 diff --git a/source/data/brkitr/root.txt b/source/data/brkitr/root.txt |
| 34 index 77ddd14..2e275a1 100644 |
| 35 --- a/source/data/brkitr/root.txt |
| 36 +++ b/source/data/brkitr/root.txt |
| 37 @@ -19,9 +19,6 @@ root{ |
| 38 word:process(dependency){"word.brk"} |
| 39 } |
| 40 dictionaries{ |
| 41 - Hani:process(dependency){"cjdict.dict"} |
| 42 - Hira:process(dependency){"cjdict.dict"} |
| 43 - Kana:process(dependency){"cjdict.dict"} |
| 44 Khmr:process(dependency){"khmerdict.dict"} |
| 45 Laoo:process(dependency){"laodict.dict"} |
| 46 Mymr:process(dependency){"burmesedict.dict"} |
1 diff --git a/source/data/brkitr/word.txt b/source/data/brkitr/word.txt | 47 diff --git a/source/data/brkitr/word.txt b/source/data/brkitr/word.txt |
2 index c74da4c..c5d6df7 100644 | 48 index f89a2fe..9603957 100644 |
3 --- a/source/data/brkitr/word.txt | 49 --- a/source/data/brkitr/word.txt |
4 +++ b/source/data/brkitr/word.txt | 50 +++ b/source/data/brkitr/word.txt |
5 @@ -60,11 +60,9 @@ $Control = [\p{Grapheme_Cluster_Break = Control}]; | 51 @@ -54,11 +54,9 @@ $Control = [\p{Grapheme_Cluster_Break = Control}]; |
6 $HangulSyllable = [\uac00-\ud7a3]; | 52 $HangulSyllable = [\uac00-\ud7a3]; |
7 $ComplexContext = [:LineBreak = Complex_Context:]; | 53 $ComplexContext = [:LineBreak = Complex_Context:]; |
8 $KanaKanji = [$Han $Hiragana $Katakana]; | 54 $KanaKanji = [$Han $Hiragana $Katakana]; |
9 -$dictionaryCJK = [$KanaKanji $HangulSyllable]; | 55 -$dictionaryCJK = [$KanaKanji $HangulSyllable]; |
10 -$dictionary = [$ComplexContext $dictionaryCJK]; | 56 -$dictionary = [$ComplexContext $dictionaryCJK]; |
11 +$dictionary = [$ComplexContext]; | 57 +$dictionary = [$ComplexContext]; |
12 | 58 |
13 -# leave CJK scripts out of ALetterPlus | 59 -# leave CJK scripts out of ALetterPlus |
14 -$ALetterPlus = [$ALetter-$dictionaryCJK [$ComplexContext-$Extend-$Control]]; | 60 -$ALetterPlus = [$ALetter-$dictionaryCJK [$ComplexContext-$Extend-$Control]]; |
15 +$ALetterPlus = [$ALetter [$ComplexContext-$Extend-$Control]]; | 61 +$ALetterPlus = [$ALetter [$ComplexContext-$Extend-$Control]]; |
16 | 62 |
17 | 63 |
18 # | 64 # |
19 @@ -166,11 +164,6 @@ $ExtendNumLetEx $KatakanaEx {400}; # (13b) | 65 @@ -160,11 +158,6 @@ $ExtendNumLetEx $KatakanaEx {400}; # (13b) |
20 | 66 |
21 $Regional_IndicatorEx $Regional_IndicatorEx; | 67 $Regional_IndicatorEx $Regional_IndicatorEx; |
22 | 68 |
23 -# special handling for CJK characters: chain for later dictionary segmentation | 69 -# special handling for CJK characters: chain for later dictionary segmentation |
24 -$HangulSyllable $HangulSyllable {200}; | 70 -$HangulSyllable $HangulSyllable {200}; |
25 -$KanaKanji $KanaKanji {400}; # different rule status if both kana and kanji fou
nd | 71 -$KanaKanji $KanaKanji {400}; # different rule status if both kana and kanji fou
nd |
26 - | 72 - |
27 - | 73 - |
28 ## ------------------------------------------------- | 74 ## ------------------------------------------------- |
29 | 75 |
30 !!reverse; | 76 !!reverse; |
31 @@ -237,10 +230,6 @@ $BackExtendNumLetEx ($BackALetterEx | $BackHebrew_LetterEx
| $BackNumericEx | $B | 77 @@ -231,10 +224,6 @@ $BackExtendNumLetEx ($BackALetterEx | $BackHebrew_LetterEx
| $BackNumericEx | $B |
32 | 78 |
33 $BackRegional_IndicatorEx $BackRegional_IndicatorEx; | 79 $BackRegional_IndicatorEx $BackRegional_IndicatorEx; |
34 | 80 |
35 -# special handling for CJK characters: chain for later dictionary segmentation | 81 -# special handling for CJK characters: chain for later dictionary segmentation |
36 -$HangulSyllable $HangulSyllable; | 82 -$HangulSyllable $HangulSyllable; |
37 -$KanaKanji $KanaKanji; #different rule status if both kanji and kana found | 83 -$KanaKanji $KanaKanji; #different rule status if both kanji and kana found |
38 - | 84 - |
39 ## ------------------------------------------------- | 85 ## ------------------------------------------------- |
40 | 86 |
41 !!safe_reverse; | 87 !!safe_reverse; |
42 diff --git a/source/data/brkitr/brklocal.mk b/source/data/brkitr/brklocal.mk | |
43 index c41c85a..5e0641b 100644 | |
44 --- a/source/data/brkitr/brklocal.mk | |
45 +++ b/source/data/brkitr/brklocal.mk | |
46 @@ -34,17 +34,17 @@ BRK_RES_ALIAS_SOURCE = $(BRK_RES_SYNTHETIC_ALIAS) | |
47 | |
48 | |
49 # List of dictionary files (dict). | |
50 -BRK_DICT_SOURCE = burmesedict.txt cjdict.txt khmerdict.txt laodict.txt\ | |
51 +BRK_DICT_SOURCE = burmesedict.txt khmerdict.txt laodict.txt\ | |
52 thaidict.txt | |
53 | |
54 | |
55 # List of break iterator files (brk). | |
56 BRK_SOURCE = char.txt line.txt line_fi.txt\ | |
57 - sent.txt sent_el.txt title.txt word.txt | |
58 + sent.txt sent_el.txt title.txt word.txt word_ja.txt | |
59 | |
60 | |
61 # Ordinary resources | |
62 BRK_RES_SOURCE = de.txt el.txt en.txt en_US.txt\ | |
63 es.txt fi.txt fr.txt it.txt\ | |
64 - pt.txt ru.txt | |
65 + pt.txt ru.txt ja.txt | |
66 | |
67 diff --git a/source/data/brkitr/root.txt b/source/data/brkitr/root.txt | |
68 index 2dd1fdc..1d066be 100644 | |
69 --- a/source/data/brkitr/root.txt | |
70 +++ b/source/data/brkitr/root.txt | |
71 @@ -16,9 +16,6 @@ root{ | |
72 word:process(dependency){"word.brk"} | |
73 } | |
74 dictionaries{ | |
75 - Hani:process(dependency){"cjdict.dict"} | |
76 - Hira:process(dependency){"cjdict.dict"} | |
77 - Kata:process(dependency){"cjdict.dict"} | |
78 Khmr:process(dependency){"khmerdict.dict"} | |
79 Laoo:process(dependency){"laodict.dict"} | |
80 Mymr:process(dependency){"burmesedict.dict"} | |
81 diff --git a/source/data/brkitr/ja.txt b/source/data/brkitr/ja.txt | |
82 index 3eb81d0..ffdbc6d 100644 | |
83 --- a/source/data/brkitr/ja.txt | |
84 +++ b/source/data/brkitr/ja.txt | |
85 @@ -9,6 +9,6 @@ | |
86 ja{ | |
87 Version{"2.0.82.42"} | |
88 boundaries{ | |
89 - line:process(dependency){"line_ja.brk"} | |
90 + word:process(dependency){"word_ja.brk"} | |
91 } | |
92 } | |
OLD | NEW |