| OLD | NEW |
| 1 diff --git a/source/data/brkitr/brklocal.mk b/source/data/brkitr/brklocal.mk | 1 Index: source/data/brkitr/word.txt |
| 2 index 91754f1..ccac4d1 100644 | 2 =================================================================== |
| 3 --- a/source/data/brkitr/brklocal.mk | 3 --- source/data/brkitr/word.txt»(revision 264859) |
| 4 +++ b/source/data/brkitr/brklocal.mk | 4 +++ source/data/brkitr/word.txt»(working copy) |
| 5 @@ -34,15 +34,15 @@ BRK_RES_ALIAS_SOURCE = $(BRK_RES_SYNTHETIC_ALIAS) | 5 @@ -56,15 +56,13 @@ |
| 6 # 5.0 or later as the definition of Complex_Context was corrected to include
all |
| 7 # characters requiring dictionary break. |
| 6 | 8 |
| 7 | 9 -$Control = [\p{Grapheme_Cluster_Break = Control}]; |
| 8 # List of compact trie dictionary files (ctd). | 10 +$Control = [\p{Grapheme_Cluster_Break = Control}]; |
| 9 -BRK_CTD_SOURCE = thaidict.txt cjdict.txt | |
| 10 +BRK_CTD_SOURCE = thaidict.txt | |
| 11 | |
| 12 | |
| 13 # List of break iterator files (brk). | |
| 14 -# Chrome change: remove word_ja.txt and line_he.txt | |
| 15 -BRK_SOURCE = sent_el.txt word_POSIX.txt line_fi.txt char.txt word.txt line.txt
sent.txt title.txt char_th.txt | |
| 16 +# Chrome change: remove line_he.txt | |
| 17 +BRK_SOURCE = sent_el.txt word_POSIX.txt line_fi.txt word_ja.txt char.txt word.
txt line.txt sent.txt title.txt char_th.txt | |
| 18 | |
| 19 | |
| 20 # Ordinary resources | |
| 21 -# Chrome change: remove ja.txt and he.txt | |
| 22 +# Chrome change: remove he.txt | |
| 23 BRK_RES_SOURCE = el.txt en.txt en_US.txt en_US_POSIX.txt\ | |
| 24 - fi.txt th.txt | |
| 25 + fi.txt ja.txt th.txt | |
| 26 diff --git a/source/data/brkitr/root.txt b/source/data/brkitr/root.txt | |
| 27 index fb83ac3..5d839bd 100644 | |
| 28 --- a/source/data/brkitr/root.txt | |
| 29 +++ b/source/data/brkitr/root.txt | |
| 30 @@ -17,8 +17,5 @@ root{ | |
| 31 } | |
| 32 dictionaries{ | |
| 33 Thai:process(dependency){"thaidict.ctd"} | |
| 34 - Hani:process(dependency){"cjdict.ctd"} | |
| 35 - Hira:process(dependency){"cjdict.ctd"} | |
| 36 - Kata:process(dependency){"cjdict.ctd"} | |
| 37 } | |
| 38 } | |
| 39 diff --git a/source/data/brkitr/word.txt b/source/data/brkitr/word.txt | |
| 40 index 0b49377..a0e1ceb 100644 | |
| 41 --- a/source/data/brkitr/word.txt | |
| 42 +++ b/source/data/brkitr/word.txt | |
| 43 @@ -60,11 +60,10 @@ $Control = [\p{Grapheme_Cluster_Break = Control}]; | |
| 44 $HangulSyllable = [\uac00-\ud7a3]; | 11 $HangulSyllable = [\uac00-\ud7a3]; |
| 45 $ComplexContext = [:LineBreak = Complex_Context:]; | 12 $ComplexContext = [:LineBreak = Complex_Context:]; |
| 46 $KanaKanji = [$Han $Hiragana $Katakana]; | 13 $KanaKanji = [$Han $Hiragana $Katakana]; |
| 47 -$dictionaryCJK = [$KanaKanji $HangulSyllable]; | 14 -$dictionaryCJK = [$KanaKanji $HangulSyllable]; |
| 48 -$dictionary = [$ComplexContext $dictionaryCJK]; | 15 -$dictionary = [$ComplexContext $dictionaryCJK]; |
| 49 +$dictionary = [:LineBreak = Complex_Context:]; | 16 +$dictionary = [$ComplexContext]; |
| 50 | 17 |
| 51 -# leave CJK scripts out of ALetterPlus | 18 -# leave CJK scripts out of ALetterPlus |
| 52 -$ALetterPlus = [$ALetter-$dictionaryCJK [$ComplexContext-$Extend-$Control]]; | 19 -$ALetterPlus = [$ALetter-$dictionaryCJK [$ComplexContext-$Extend-$Control]]; |
| 53 +$ALetterPlus = [$ALetter [$dictionary-$Extend-$Control]]; # Note: default A
Letter does not | 20 +$ALetterPlus = [$ALetter [$ComplexContext-$Extend-$Control]]; |
| 54 + # include the dic
tionary characters. | |
| 55 | 21 |
| 56 | 22 |
| 57 # | 23 # |
| 58 @@ -99,8 +98,7 @@ $CR $LF; | 24 @@ -166,11 +164,6 @@ |
| 59 # begins with a group of Format chars, or with a "word" consisting of
a single | |
| 60 # char that is not in any of the listed word break categories followed
by | |
| 61 # format char(s). | |
| 62 - # format char(s), or is not a CJK dictionary character. | |
| 63 -[^$CR $LF $Newline $dictionaryCJK]? ($Extend | $Format)+; | |
| 64 +[^$CR $LF $Newline]? ($Extend | $Format)+; | |
| 65 | 25 |
| 66 $NumericEx {100}; | 26 $Regional_IndicatorEx $Regional_IndicatorEx; |
| 67 $ALetterEx {200}; | |
| 68 @@ -155,9 +153,6 @@ $ExtendNumLetEx $ALetterEx {200}; # (13b) | |
| 69 $ExtendNumLetEx $NumericEx {100}; # (13b) | |
| 70 $ExtendNumLetEx $KatakanaEx {400}; # (13b) | |
| 71 | 27 |
| 72 -# special handling for CJK characters: chain for later dictionary segmentation | 28 -# special handling for CJK characters: chain for later dictionary segmentation |
| 73 -$HangulSyllable $HangulSyllable {200}; | 29 -$HangulSyllable $HangulSyllable {200}; |
| 74 -$KanaKanji $KanaKanji {400}; #different rule status if both kanji and kana foun
d | 30 -$KanaKanji $KanaKanji {400}; # different rule status if both kana and kanji fou
nd |
| 31 - |
| 32 - |
| 33 ## ------------------------------------------------- |
| 75 | 34 |
| 35 !!reverse; |
| 36 @@ -237,10 +230,6 @@ |
| 76 | 37 |
| 77 ## ------------------------------------------------- | 38 $BackRegional_IndicatorEx $BackRegional_IndicatorEx; |
| 78 @@ -179,7 +174,7 @@ $BackHebrewLetEx = ($Format | $Extend)* $HebrewLet; | |
| 79 $LF $CR; | |
| 80 | |
| 81 # rule 4 | |
| 82 -($Format | $Extend)* [^$CR $LF $Newline $dictionaryCJK]?; | |
| 83 +($Format | $Extend)* [^$CR $LF $Newline]?; | |
| 84 | |
| 85 # rule 5 | |
| 86 | |
| 87 @@ -217,10 +212,6 @@ $BackKatakanaEx $BackKatakanaEx; | |
| 88 $BackExtendNumLetEx ($BackALetterEx | $BackNumericEx | $BackKatakanaEx | $BackE
xtendNumLetEx); | |
| 89 ($BackALetterEx | $BackNumericEx | $BackKatakanaEx) $BackExtendNumLetEx; | |
| 90 | 39 |
| 91 -# special handling for CJK characters: chain for later dictionary segmentation | 40 -# special handling for CJK characters: chain for later dictionary segmentation |
| 92 -$HangulSyllable $HangulSyllable; | 41 -$HangulSyllable $HangulSyllable; |
| 93 -$KanaKanji $KanaKanji; #different rule status if both kanji and kana found | 42 -$KanaKanji $KanaKanji; #different rule status if both kanji and kana found |
| 94 - | 43 - |
| 95 ## ------------------------------------------------- | 44 ## ------------------------------------------------- |
| 96 | 45 |
| 97 !!safe_reverse; | 46 !!safe_reverse; |
| 47 Index: source/data/brkitr/brklocal.mk |
| 48 =================================================================== |
| 49 --- source/data/brkitr/brklocal.mk (revision 264859) |
| 50 +++ source/data/brkitr/brklocal.mk (working copy) |
| 51 @@ -34,13 +34,13 @@ |
| 52 |
| 53 |
| 54 # List of dictionary files (dict). |
| 55 -BRK_DICT_SOURCE = cjdict.txt khmerdict.txt laodict.txt thaidict.txt |
| 56 +BRK_DICT_SOURCE = khmerdict.txt laodict.txt thaidict.txt |
| 57 |
| 58 |
| 59 # List of break iterator files (brk). |
| 60 -BRK_SOURCE = char.txt line.txt line_fi.txt sent.txt sent_el.txt title.txt word.
txt |
| 61 +BRK_SOURCE = char.txt line.txt line_fi.txt sent.txt sent_el.txt title.txt word.
txt word_ja.txt |
| 62 |
| 63 |
| 64 # Ordinary resources |
| 65 -BRK_RES_SOURCE = el.txt en.txt en_US.txt fi.txt |
| 66 +BRK_RES_SOURCE = el.txt en.txt en_US.txt fi.txt ja.txt |
| 67 |
| 68 Index: source/data/brkitr/root.txt |
| 69 =================================================================== |
| 70 --- source/data/brkitr/root.txt (revision 264859) |
| 71 +++ source/data/brkitr/root.txt (working copy) |
| 72 @@ -16,9 +16,6 @@ |
| 73 word:process(dependency){"word.brk"} |
| 74 } |
| 75 dictionaries{ |
| 76 - Hani:process(dependency){"cjdict.dict"} |
| 77 - Hira:process(dependency){"cjdict.dict"} |
| 78 - Kata:process(dependency){"cjdict.dict"} |
| 79 Khmr:process(dependency){"khmerdict.dict"} |
| 80 Laoo:process(dependency){"laodict.dict"} |
| 81 Thai:process(dependency){"thaidict.dict"} |
| 82 Index: source/data/brkitr/ja.txt |
| 83 =================================================================== |
| 84 --- source/data/brkitr/ja.txt (revision 264859) |
| 85 +++ source/data/brkitr/ja.txt (working copy) |
| 86 @@ -9,6 +9,6 @@ |
| 87 ja{ |
| 88 Version{"1.1"} |
| 89 boundaries{ |
| 90 - line:process(dependency){"line_ja.brk"} |
| 91 + word:process(dependency){"word_ja.brk"} |
| 92 } |
| 93 } |
| OLD | NEW |