| OLD | NEW |
| 1 diff --git a/source/data/brkitr/brklocal.mk b/source/data/brkitr/brklocal.mk | 1 diff --git a/source/data/brkitr/brklocal.mk b/source/data/brkitr/brklocal.mk |
| 2 index cb5226a..39202f1 100644 | 2 index b5eca75..2a75a9e 100644 |
| 3 --- a/source/data/brkitr/brklocal.mk | 3 --- a/source/data/brkitr/brklocal.mk |
| 4 +++ b/source/data/brkitr/brklocal.mk | 4 +++ b/source/data/brkitr/brklocal.mk |
| 5 @@ -34,14 +34,14 @@ BRK_RES_ALIAS_SOURCE = $(BRK_RES_SYNTHETIC_ALIAS) | 5 @@ -34,7 +34,7 @@ BRK_RES_ALIAS_SOURCE = $(BRK_RES_SYNTHETIC_ALIAS) |
| 6 | 6 |
| 7 | 7 |
| 8 # List of dictionary files (dict). | 8 # List of dictionary files (dict). |
| 9 -BRK_DICT_SOURCE = burmesedict.txt cjdict.txt khmerdict.txt laodict.txt\ | 9 -BRK_DICT_SOURCE = burmesedict.txt cjdict.txt khmerdict.txt laodict.txt\ |
| 10 +BRK_DICT_SOURCE = burmesedict.txt khmerdict.txt laodict.txt\ | 10 +BRK_DICT_SOURCE = burmesedict.txt khmerdict.txt laodict.txt\ |
| 11 thaidict.txt | 11 thaidict.txt |
| 12 | 12 |
| 13 | 13 |
| 14 # List of break iterator files (brk). | 14 @@ -42,7 +42,7 @@ BRK_DICT_SOURCE = burmesedict.txt cjdict.txt khmerdict.txt lao
dict.txt\ |
| 15 BRK_SOURCE = char.txt line.txt\ | 15 BRK_SOURCE = char.txt line.txt\ |
| 16 line_normal.txt line_normal_cj.txt line_normal_fi.txt\ | 16 line_normal.txt line_normal_cj.txt line_normal_fi.txt\ |
| 17 line_loose_cj.txt\ |
| 17 - sent.txt sent_el.txt title.txt word.txt | 18 - sent.txt sent_el.txt title.txt word.txt |
| 18 + sent.txt sent_el.txt title.txt word.txt word_ja.txt | 19 + sent.txt sent_el.txt title.txt word.txt word_ja.txt |
| 19 | 20 |
| 20 | 21 |
| 21 # Ordinary resources | 22 # Ordinary resources |
| 22 diff --git a/source/data/brkitr/ja.txt b/source/data/brkitr/ja.txt | 23 diff --git a/source/data/brkitr/ja.txt b/source/data/brkitr/ja.txt |
| 23 index f9f986e..cd07526 100644 | 24 index 2e9a1c8..cb732a7 100644 |
| 24 --- a/source/data/brkitr/ja.txt | 25 --- a/source/data/brkitr/ja.txt |
| 25 +++ b/source/data/brkitr/ja.txt | 26 +++ b/source/data/brkitr/ja.txt |
| 26 @@ -13,5 +13,6 @@ ja{ | 27 @@ -7,5 +7,6 @@ ja{ |
| 27 line_loose:process(dependency){"line_normal_cj.brk"} | 28 line_loose:process(dependency){"line_loose_cj.brk"} |
| 28 line_normal:process(dependency){"line_normal_cj.brk"} | 29 line_normal:process(dependency){"line_normal_cj.brk"} |
| 29 line_strict:process(dependency){"line.brk"} | 30 line_strict:process(dependency){"line.brk"} |
| 30 + word:process(dependency){"word_ja.brk"} | 31 + word:process(dependency){"word_ja.brk"} |
| 31 } | 32 } |
| 32 } | 33 } |
| 33 diff --git a/source/data/brkitr/root.txt b/source/data/brkitr/root.txt | 34 diff --git a/source/data/brkitr/root.txt b/source/data/brkitr/root.txt |
| 34 index 77ddd14..2e275a1 100644 | 35 index 1a1ad8a..c790282 100644 |
| 35 --- a/source/data/brkitr/root.txt | 36 --- a/source/data/brkitr/root.txt |
| 36 +++ b/source/data/brkitr/root.txt | 37 +++ b/source/data/brkitr/root.txt |
| 37 @@ -19,9 +19,6 @@ root{ | 38 @@ -13,9 +13,6 @@ root{ |
| 38 word:process(dependency){"word.brk"} | 39 word:process(dependency){"word.brk"} |
| 39 } | 40 } |
| 40 dictionaries{ | 41 dictionaries{ |
| 41 - Hani:process(dependency){"cjdict.dict"} | 42 - Hani:process(dependency){"cjdict.dict"} |
| 42 - Hira:process(dependency){"cjdict.dict"} | 43 - Hira:process(dependency){"cjdict.dict"} |
| 43 - Kana:process(dependency){"cjdict.dict"} | 44 - Kana:process(dependency){"cjdict.dict"} |
| 44 Khmr:process(dependency){"khmerdict.dict"} | 45 Khmr:process(dependency){"khmerdict.dict"} |
| 45 Laoo:process(dependency){"laodict.dict"} | 46 Laoo:process(dependency){"laodict.dict"} |
| 46 Mymr:process(dependency){"burmesedict.dict"} | 47 Mymr:process(dependency){"burmesedict.dict"} |
| 47 diff --git a/source/data/brkitr/word.txt b/source/data/brkitr/word.txt | 48 diff --git a/source/data/brkitr/rules/word.txt b/source/data/brkitr/rules/word.t
xt |
| 48 index f89a2fe..9603957 100644 | 49 index 9c93dd5..eb150ea 100644 |
| 49 --- a/source/data/brkitr/word.txt | 50 --- a/source/data/brkitr/rules/word.txt |
| 50 +++ b/source/data/brkitr/word.txt | 51 +++ b/source/data/brkitr/rules/word.txt |
| 51 @@ -54,11 +54,9 @@ $Control = [\p{Grapheme_Cluster_Break = Control}]; | 52 @@ -71,11 +71,9 @@ $Control = [\p{Grapheme_Cluster_Break = Control}]; |
| 52 $HangulSyllable = [\uac00-\ud7a3]; | 53 $HangulSyllable = [\uac00-\ud7a3]; |
| 53 $ComplexContext = [:LineBreak = Complex_Context:]; | 54 $ComplexContext = [:LineBreak = Complex_Context:]; |
| 54 $KanaKanji = [$Han $Hiragana $Katakana]; | 55 $KanaKanji = [$Han $Hiragana $Katakana]; |
| 55 -$dictionaryCJK = [$KanaKanji $HangulSyllable]; | 56 -$dictionaryCJK = [$KanaKanji $HangulSyllable]; |
| 56 -$dictionary = [$ComplexContext $dictionaryCJK]; | 57 -$dictionary = [$ComplexContext $dictionaryCJK]; |
| 57 +$dictionary = [$ComplexContext]; | 58 +$dictionary = [$ComplexContext]; |
| 58 | 59 |
| 59 -# leave CJK scripts out of ALetterPlus | 60 -# leave CJK scripts out of ALetterPlus |
| 60 -$ALetterPlus = [$ALetter-$dictionaryCJK [$ComplexContext-$Extend-$Control]]; | 61 -$ALetterPlus = [$ALetter-$dictionaryCJK [$ComplexContext-$Extend-$Control]]; |
| 61 +$ALetterPlus = [$ALetter [$ComplexContext-$Extend-$Control]]; | 62 +$ALetterPlus = [$ALetter [$ComplexContext-$Extend-$Control]]; |
| 62 | 63 |
| 63 | 64 |
| 64 # | 65 # |
| 65 @@ -160,11 +158,6 @@ $ExtendNumLetEx $KatakanaEx {400}; # (13b) | 66 @@ -194,11 +192,6 @@ $ExtendNumLetEx $KatakanaEx {400}; # (13b) |
| 66 | 67 # |
| 67 $Regional_IndicatorEx $Regional_IndicatorEx; | 68 ^$Regional_IndicatorEx $Regional_IndicatorEx; |
| 68 | 69 |
| 69 -# special handling for CJK characters: chain for later dictionary segmentation | 70 -# special handling for CJK characters: chain for later dictionary segmentation |
| 70 -$HangulSyllable $HangulSyllable {200}; | 71 -$HangulSyllable $HangulSyllable {200}; |
| 71 -$KanaKanji $KanaKanji {400}; # different rule status if both kana and kanji fou
nd | 72 -$KanaKanji $KanaKanji {400}; # different rule status if both kana and kanji fou
nd |
| 72 - | 73 - |
| 73 - | 74 - |
| 74 ## ------------------------------------------------- | 75 ## ------------------------------------------------- |
| 75 | 76 |
| 76 !!reverse; | 77 !!reverse; |
| 77 @@ -231,10 +224,6 @@ $BackExtendNumLetEx ($BackALetterEx | $BackHebrew_LetterEx
| $BackNumericEx | $B | 78 @@ -265,10 +258,6 @@ $BackKatakanaEx $BackKatakanaEx; |
| 78 | 79 $BackExtendNumLetEx ($BackALetterEx | $BackHebrew_LetterEx | $BackNumericEx | $
BackKatakanaEx | $BackExtendNumLetEx); |
| 79 $BackRegional_IndicatorEx $BackRegional_IndicatorEx; | 80 ($BackALetterEx | $BackHebrew_LetterEx | $BackNumericEx | $BackKatakanaEx) $Bac
kExtendNumLetEx; |
| 80 | 81 |
| 81 -# special handling for CJK characters: chain for later dictionary segmentation | 82 -# special handling for CJK characters: chain for later dictionary segmentation |
| 82 -$HangulSyllable $HangulSyllable; | 83 -$HangulSyllable $HangulSyllable; |
| 83 -$KanaKanji $KanaKanji; #different rule status if both kanji and kana found | 84 -$KanaKanji $KanaKanji; #different rule status if both kanji and kana found |
| 84 - | 85 - |
| 85 ## ------------------------------------------------- | 86 # rule 14 |
| 86 | 87 |
| 87 !!safe_reverse; | 88 $E_Modifier ($Format | $Extend | $ZWJ)* ($E_Base | $EBG); |
| OLD | NEW |