Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(9683)

Unified Diff: chrome/renderer/spellchecker/spellcheck_worditerator.cc

Issue 3112015: Customize Hebrew spellcheck word break iterator... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: '' Created 10 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: chrome/renderer/spellchecker/spellcheck_worditerator.cc
===================================================================
--- chrome/renderer/spellchecker/spellcheck_worditerator.cc (revision 56338)
+++ chrome/renderer/spellchecker/spellcheck_worditerator.cc (working copy)
@@ -60,15 +60,19 @@
"$Extend = [\\p{Word_Break = Extend}];"
"$Format = [\\p{Word_Break = Format}];"
"$Katakana = [\\p{Word_Break = Katakana}];"
+ // Not all the characters in a given script are ALetter.
+ // For instance, U+05F4 is MidLetter. So, this may be
+ // better, but it leads to an empty set error in Thai.
+ // "$ALetter = [[\\p{script=%s}] & [\\p{Word_Break = ALetter}]];"
"$ALetter = [\\p{script=%s}];"
"$MidNumLet = [\\p{Word_Break = MidNumLet}];"
- "$MidLetter = [\\p{Word_Break = MidLetter}];"
+ "$MidLetter = [\\p{Word_Break = MidLetter}%s];"
"$MidNum = [\\p{Word_Break = MidNum}];"
"$Numeric = [\\p{Word_Break = Numeric}];"
"$ExtendNumLet = [\\p{Word_Break = ExtendNumLet}];"
"$Control = [\\p{Grapheme_Cluster_Break = Control}]; "
- "%s"
+ "%s" // ALetterPlus
"$KatakanaEx = $Katakana ($Extend | $Format)*;"
"$ALetterEx = $ALetterPlus ($Extend | $Format)*;"
@@ -88,7 +92,7 @@
"[^$CR $LF $Newline]? ($Extend | $Format)+;"
"$ALetterEx {200};"
"$ALetterEx $ALetterEx {200};"
- "%s"
+ "%s" // (Allow|Disallow) Contraction
"!!reverse;"
"$BackALetterEx = ($Format | $Extend)* $ALetterPlus;"
@@ -150,6 +154,13 @@
if (script_code_ == USCRIPT_HANGUL || script_code_ == USCRIPT_THAI)
aletter_plus = kWithDictionary;
+ const char kMidLetterExtra[] = "";
+ // For Hebrew, treat single/double quoation marks as MidLetter.
+ const char kMidLetterExtraHebrew[] = "\"'";
+ const char* midletter_extra = kMidLetterExtra;
+ if (script_code_ == USCRIPT_HEBREW)
+ midletter_extra = kMidLetterExtraHebrew;
+
// Create two custom rule-sets: one allows contraction and the other does not.
// We save these strings in UTF-16 so we can use it without conversions. (ICU
// needs UTF-16 strings.)
@@ -158,9 +169,9 @@
const char kDisallowContraction[] = "";
ruleset_allow_contraction_ = ASCIIToUTF16(StringPrintf(kRuleTemplate,
- aletter, aletter_plus, kAllowContraction));
+ aletter, midletter_extra, aletter_plus, kAllowContraction));
ruleset_disallow_contraction_ = ASCIIToUTF16(StringPrintf(kRuleTemplate,
- aletter, aletter_plus, kDisallowContraction));
+ aletter, midletter_extra, aletter_plus, kDisallowContraction));
}
bool SpellcheckCharAttribute::OutputChar(UChar c, string16* output) const {
@@ -245,7 +256,10 @@
// niqquds as misspelled. (Same as Arabic vowel marks, we need to check
// niqquds manually and filter them out since their script codes are
// USCRIPT_HEBREW.)
- if (0x05D0 <= c && c <= 0x05EA)
+ // Pass through ASCII single/double quotation marks and Hebrew Geresh and
+ // Gershayim.
+ if ((0x05D0 <= c && c <= 0x05EA) || c == 0x22 || c == 0x27 ||
+ c == 0x05F4 || c == 0x05F3)
output->push_back(c);
return true;
}
« no previous file with comments | « no previous file | chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698