| OLD | NEW |
| 1 Index: source/data/brkitr/word.txt | 1 diff --git a/source/data/brkitr/word.txt b/source/data/brkitr/word.txt |
| 2 =================================================================== | 2 index c74da4c..c5d6df7 100644 |
| 3 --- source/data/brkitr/word.txt»(revision 264859) | 3 --- a/source/data/brkitr/word.txt |
| 4 +++ source/data/brkitr/word.txt»(working copy) | 4 +++ b/source/data/brkitr/word.txt |
| 5 @@ -56,15 +56,13 @@ | 5 @@ -60,11 +60,9 @@ $Control = [\p{Grapheme_Cluster_Break = Control}]; |
| 6 # 5.0 or later as the definition of Complex_Context was corrected to include
all | |
| 7 # characters requiring dictionary break. | |
| 8 | |
| 9 -$Control = [\p{Grapheme_Cluster_Break = Control}]; | |
| 10 +$Control = [\p{Grapheme_Cluster_Break = Control}]; | |
| 11 $HangulSyllable = [\uac00-\ud7a3]; | 6 $HangulSyllable = [\uac00-\ud7a3]; |
| 12 $ComplexContext = [:LineBreak = Complex_Context:]; | 7 $ComplexContext = [:LineBreak = Complex_Context:]; |
| 13 $KanaKanji = [$Han $Hiragana $Katakana]; | 8 $KanaKanji = [$Han $Hiragana $Katakana]; |
| 14 -$dictionaryCJK = [$KanaKanji $HangulSyllable]; | 9 -$dictionaryCJK = [$KanaKanji $HangulSyllable]; |
| 15 -$dictionary = [$ComplexContext $dictionaryCJK]; | 10 -$dictionary = [$ComplexContext $dictionaryCJK]; |
| 16 +$dictionary = [$ComplexContext]; | 11 +$dictionary = [$ComplexContext]; |
| 17 | 12 |
| 18 -# leave CJK scripts out of ALetterPlus | 13 -# leave CJK scripts out of ALetterPlus |
| 19 -$ALetterPlus = [$ALetter-$dictionaryCJK [$ComplexContext-$Extend-$Control]]; | 14 -$ALetterPlus = [$ALetter-$dictionaryCJK [$ComplexContext-$Extend-$Control]]; |
| 20 +$ALetterPlus = [$ALetter [$ComplexContext-$Extend-$Control]]; | 15 +$ALetterPlus = [$ALetter [$ComplexContext-$Extend-$Control]]; |
| 21 | 16 |
| 22 | 17 |
| 23 # | 18 # |
| 24 @@ -166,11 +164,6 @@ | 19 @@ -166,11 +164,6 @@ $ExtendNumLetEx $KatakanaEx {400}; # (13b) |
| 25 | 20 |
| 26 $Regional_IndicatorEx $Regional_IndicatorEx; | 21 $Regional_IndicatorEx $Regional_IndicatorEx; |
| 27 | 22 |
| 28 -# special handling for CJK characters: chain for later dictionary segmentation | 23 -# special handling for CJK characters: chain for later dictionary segmentation |
| 29 -$HangulSyllable $HangulSyllable {200}; | 24 -$HangulSyllable $HangulSyllable {200}; |
| 30 -$KanaKanji $KanaKanji {400}; # different rule status if both kana and kanji fou
nd | 25 -$KanaKanji $KanaKanji {400}; # different rule status if both kana and kanji fou
nd |
| 31 - | 26 - |
| 32 - | 27 - |
| 33 ## ------------------------------------------------- | 28 ## ------------------------------------------------- |
| 34 | 29 |
| 35 !!reverse; | 30 !!reverse; |
| 36 @@ -237,10 +230,6 @@ | 31 @@ -237,10 +230,6 @@ $BackExtendNumLetEx ($BackALetterEx | $BackHebrew_LetterEx
| $BackNumericEx | $B |
| 37 | 32 |
| 38 $BackRegional_IndicatorEx $BackRegional_IndicatorEx; | 33 $BackRegional_IndicatorEx $BackRegional_IndicatorEx; |
| 39 | 34 |
| 40 -# special handling for CJK characters: chain for later dictionary segmentation | 35 -# special handling for CJK characters: chain for later dictionary segmentation |
| 41 -$HangulSyllable $HangulSyllable; | 36 -$HangulSyllable $HangulSyllable; |
| 42 -$KanaKanji $KanaKanji; #different rule status if both kanji and kana found | 37 -$KanaKanji $KanaKanji; #different rule status if both kanji and kana found |
| 43 - | 38 - |
| 44 ## ------------------------------------------------- | 39 ## ------------------------------------------------- |
| 45 | 40 |
| 46 !!safe_reverse; | 41 !!safe_reverse; |
| 47 Index: source/data/brkitr/brklocal.mk | 42 diff --git a/source/data/brkitr/brklocal.mk b/source/data/brkitr/brklocal.mk |
| 48 =================================================================== | 43 index ad36492..1d69c1e 100644 |
| 49 --- source/data/brkitr/brklocal.mk» (revision 264859) | 44 --- a/source/data/brkitr/brklocal.mk |
| 50 +++ source/data/brkitr/brklocal.mk» (working copy) | 45 +++ b/source/data/brkitr/brklocal.mk |
| 51 @@ -34,13 +34,13 @@ | 46 @@ -34,17 +34,17 @@ BRK_RES_ALIAS_SOURCE = $(BRK_RES_SYNTHETIC_ALIAS) |
| 52 | 47 |
| 53 | 48 |
| 54 # List of dictionary files (dict). | 49 # List of dictionary files (dict). |
| 55 -BRK_DICT_SOURCE = cjdict.txt khmerdict.txt laodict.txt thaidict.txt | 50 -BRK_DICT_SOURCE = burmesedict.txt cjdict.txt khmerdict.txt laodict.txt\ |
| 56 +BRK_DICT_SOURCE = khmerdict.txt laodict.txt thaidict.txt | 51 +BRK_DICT_SOURCE = burmesedict.txt khmerdict.txt laodict.txt\ |
| 52 thaidict.txt |
| 57 | 53 |
| 58 | 54 |
| 59 # List of break iterator files (brk). | 55 # List of break iterator files (brk). |
| 60 -BRK_SOURCE = char.txt line.txt line_fi.txt sent.txt sent_el.txt title.txt word.
txt | 56 BRK_SOURCE = char.txt line.txt line_fi.txt\ |
| 61 +BRK_SOURCE = char.txt line.txt line_fi.txt sent.txt sent_el.txt title.txt word.
txt word_ja.txt | 57 - sent.txt sent_el.txt title.txt word.txt |
| 58 + sent.txt sent_el.txt title.txt word.txt word_ja.txt |
| 62 | 59 |
| 63 | 60 |
| 64 # Ordinary resources | 61 # Ordinary resources |
| 65 -BRK_RES_SOURCE = el.txt en.txt en_US.txt fi.txt | 62 BRK_RES_SOURCE = de.txt el.txt en.txt en_US.txt\ |
| 66 +BRK_RES_SOURCE = el.txt en.txt en_US.txt fi.txt ja.txt | 63 en_US_POSIX.txt es.txt fi.txt fr.txt it.txt\ |
| 64 - pt.txt ru.txt |
| 65 + pt.txt ru.txt ja.txt |
| 67 | 66 |
| 68 Index: source/data/brkitr/root.txt | 67 diff --git a/source/data/brkitr/root.txt b/source/data/brkitr/root.txt |
| 69 =================================================================== | 68 index 2dd1fdc..1d066be 100644 |
| 70 --- source/data/brkitr/root.txt»(revision 264859) | 69 --- a/source/data/brkitr/root.txt |
| 71 +++ source/data/brkitr/root.txt»(working copy) | 70 +++ b/source/data/brkitr/root.txt |
| 72 @@ -16,9 +16,6 @@ | 71 @@ -16,9 +16,6 @@ root{ |
| 73 word:process(dependency){"word.brk"} | 72 word:process(dependency){"word.brk"} |
| 74 } | 73 } |
| 75 dictionaries{ | 74 dictionaries{ |
| 76 - Hani:process(dependency){"cjdict.dict"} | 75 - Hani:process(dependency){"cjdict.dict"} |
| 77 - Hira:process(dependency){"cjdict.dict"} | 76 - Hira:process(dependency){"cjdict.dict"} |
| 78 - Kata:process(dependency){"cjdict.dict"} | 77 - Kata:process(dependency){"cjdict.dict"} |
| 79 Khmr:process(dependency){"khmerdict.dict"} | 78 Khmr:process(dependency){"khmerdict.dict"} |
| 80 Laoo:process(dependency){"laodict.dict"} | 79 Laoo:process(dependency){"laodict.dict"} |
| 81 Thai:process(dependency){"thaidict.dict"} | 80 Mymr:process(dependency){"burmesedict.dict"} |
| 82 Index: source/data/brkitr/ja.txt | 81 diff --git a/source/data/brkitr/ja.txt b/source/data/brkitr/ja.txt |
| 83 =================================================================== | 82 index 3eb81d0..ffdbc6d 100644 |
| 84 --- source/data/brkitr/ja.txt» (revision 264859) | 83 --- a/source/data/brkitr/ja.txt |
| 85 +++ source/data/brkitr/ja.txt» (working copy) | 84 +++ b/source/data/brkitr/ja.txt |
| 86 @@ -9,6 +9,6 @@ | 85 @@ -9,6 +9,6 @@ |
| 87 ja{ | 86 ja{ |
| 88 Version{"1.1"} | 87 Version{"2.0.82.42"} |
| 89 boundaries{ | 88 boundaries{ |
| 90 - line:process(dependency){"line_ja.brk"} | 89 - line:process(dependency){"line_ja.brk"} |
| 91 + word:process(dependency){"word_ja.brk"} | 90 + word:process(dependency){"word_ja.brk"} |
| 92 } | 91 } |
| 93 } | 92 } |
| OLD | NEW |