| Index: chrome/renderer/spellchecker/spellcheck_worditerator.cc
|
| ===================================================================
|
| --- chrome/renderer/spellchecker/spellcheck_worditerator.cc (revision 56338)
|
| +++ chrome/renderer/spellchecker/spellcheck_worditerator.cc (working copy)
|
| @@ -60,15 +60,19 @@
|
| "$Extend = [\\p{Word_Break = Extend}];"
|
| "$Format = [\\p{Word_Break = Format}];"
|
| "$Katakana = [\\p{Word_Break = Katakana}];"
|
| + // Not all the characters in a given script are ALetter.
|
| + // For instance, U+05F4 is MidLetter. So, this may be
|
| + // better, but it leads to an empty set error in Thai.
|
| + // "$ALetter = [[\\p{script=%s}] & [\\p{Word_Break = ALetter}]];"
|
| "$ALetter = [\\p{script=%s}];"
|
| "$MidNumLet = [\\p{Word_Break = MidNumLet}];"
|
| - "$MidLetter = [\\p{Word_Break = MidLetter}];"
|
| + "$MidLetter = [\\p{Word_Break = MidLetter}%s];"
|
| "$MidNum = [\\p{Word_Break = MidNum}];"
|
| "$Numeric = [\\p{Word_Break = Numeric}];"
|
| "$ExtendNumLet = [\\p{Word_Break = ExtendNumLet}];"
|
|
|
| "$Control = [\\p{Grapheme_Cluster_Break = Control}]; "
|
| - "%s"
|
| + "%s" // ALetterPlus
|
|
|
| "$KatakanaEx = $Katakana ($Extend | $Format)*;"
|
| "$ALetterEx = $ALetterPlus ($Extend | $Format)*;"
|
| @@ -88,7 +92,7 @@
|
| "[^$CR $LF $Newline]? ($Extend | $Format)+;"
|
| "$ALetterEx {200};"
|
| "$ALetterEx $ALetterEx {200};"
|
| - "%s"
|
| + "%s" // (Allow|Disallow) Contraction
|
|
|
| "!!reverse;"
|
| "$BackALetterEx = ($Format | $Extend)* $ALetterPlus;"
|
| @@ -150,6 +154,13 @@
|
| if (script_code_ == USCRIPT_HANGUL || script_code_ == USCRIPT_THAI)
|
| aletter_plus = kWithDictionary;
|
|
|
| + const char kMidLetterExtra[] = "";
|
| + // For Hebrew, treat single/double quoation marks as MidLetter.
|
| + const char kMidLetterExtraHebrew[] = "\"'";
|
| + const char* midletter_extra = kMidLetterExtra;
|
| + if (script_code_ == USCRIPT_HEBREW)
|
| + midletter_extra = kMidLetterExtraHebrew;
|
| +
|
| // Create two custom rule-sets: one allows contraction and the other does not.
|
| // We save these strings in UTF-16 so we can use it without conversions. (ICU
|
| // needs UTF-16 strings.)
|
| @@ -158,9 +169,9 @@
|
| const char kDisallowContraction[] = "";
|
|
|
| ruleset_allow_contraction_ = ASCIIToUTF16(StringPrintf(kRuleTemplate,
|
| - aletter, aletter_plus, kAllowContraction));
|
| + aletter, midletter_extra, aletter_plus, kAllowContraction));
|
| ruleset_disallow_contraction_ = ASCIIToUTF16(StringPrintf(kRuleTemplate,
|
| - aletter, aletter_plus, kDisallowContraction));
|
| + aletter, midletter_extra, aletter_plus, kDisallowContraction));
|
| }
|
|
|
| bool SpellcheckCharAttribute::OutputChar(UChar c, string16* output) const {
|
| @@ -245,7 +256,10 @@
|
| // niqquds as misspelled. (Same as Arabic vowel marks, we need to check
|
| // niqquds manually and filter them out since their script codes are
|
| // USCRIPT_HEBREW.)
|
| - if (0x05D0 <= c && c <= 0x05EA)
|
| + // Pass through ASCII single/double quotation marks and Hebrew Geresh and
|
| + // Gershayim.
|
| + if ((0x05D0 <= c && c <= 0x05EA) || c == 0x22 || c == 0x27 ||
|
| + c == 0x05F4 || c == 0x05F3)
|
| output->push_back(c);
|
| return true;
|
| }
|
|
|