chrome/renderer/spellchecker/spellcheck_worditerator.cc - Issue 3112015: Customize Hebrew spellcheck word break iterator...

Unified Diff: chrome/renderer/spellchecker/spellcheck_worditerator.cc

Issue 3112015: Customize Hebrew spellcheck word break iterator... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/

Patch Set: '' Created 10 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: chrome/renderer/spellchecker/spellcheck_worditerator.cc

===================================================================

--- chrome/renderer/spellchecker/spellcheck_worditerator.cc (revision 56338)

+++ chrome/renderer/spellchecker/spellcheck_worditerator.cc (working copy)

@@ -60,15 +60,19 @@

"$Extend = [\\p{Word_Break = Extend}];"

"$Format = [\\p{Word_Break = Format}];"

"$Katakana = [\\p{Word_Break = Katakana}];"

+ // Not all the characters in a given script are ALetter.

+ // For instance, U+05F4 is MidLetter. So, this may be

+ // better, but it leads to an empty set error in Thai.

+ // "$ALetter = [[\\p{script=%s}] & [\\p{Word_Break = ALetter}]];"

"$ALetter = [\\p{script=%s}];"

"$MidNumLet = [\\p{Word_Break = MidNumLet}];"

- "$MidLetter = [\\p{Word_Break = MidLetter}];"

+ "$MidLetter = [\\p{Word_Break = MidLetter}%s];"

"$MidNum = [\\p{Word_Break = MidNum}];"

"$Numeric = [\\p{Word_Break = Numeric}];"

"$ExtendNumLet = [\\p{Word_Break = ExtendNumLet}];"

"$Control = [\\p{Grapheme_Cluster_Break = Control}]; "

- "%s"

+ "%s" // ALetterPlus

"$KatakanaEx = $Katakana ($Extend | $Format)*;"

"$ALetterEx = $ALetterPlus ($Extend | $Format)*;"

@@ -88,7 +92,7 @@

"[^$CR $LF $Newline]? ($Extend | $Format)+;"

"$ALetterEx {200};"

"$ALetterEx $ALetterEx {200};"

- "%s"

+ "%s" // (Allow|Disallow) Contraction

"!!reverse;"

"$BackALetterEx = ($Format | $Extend)* $ALetterPlus;"

@@ -150,6 +154,13 @@

if (script_code_ == USCRIPT_HANGUL || script_code_ == USCRIPT_THAI)

aletter_plus = kWithDictionary;

+ const char kMidLetterExtra[] = "";

+ // For Hebrew, treat single/double quoation marks as MidLetter.

+ const char kMidLetterExtraHebrew[] = "\"'";

+ const char* midletter_extra = kMidLetterExtra;

+ if (script_code_ == USCRIPT_HEBREW)

+ midletter_extra = kMidLetterExtraHebrew;

// Create two custom rule-sets: one allows contraction and the other does not.

// We save these strings in UTF-16 so we can use it without conversions. (ICU

// needs UTF-16 strings.)

@@ -158,9 +169,9 @@

const char kDisallowContraction[] = "";

ruleset_allow_contraction_ = ASCIIToUTF16(StringPrintf(kRuleTemplate,

- aletter, aletter_plus, kAllowContraction));

+ aletter, midletter_extra, aletter_plus, kAllowContraction));

ruleset_disallow_contraction_ = ASCIIToUTF16(StringPrintf(kRuleTemplate,

- aletter, aletter_plus, kDisallowContraction));

+ aletter, midletter_extra, aletter_plus, kDisallowContraction));

}

bool SpellcheckCharAttribute::OutputChar(UChar c, string16* output) const {

@@ -245,7 +256,10 @@

// niqquds as misspelled. (Same as Arabic vowel marks, we need to check

// niqquds manually and filter them out since their script codes are

// USCRIPT_HEBREW.)

- if (0x05D0 <= c && c <= 0x05EA)

+ // Pass through ASCII single/double quotation marks and Hebrew Geresh and

+ // Gershayim.

+ if ((0x05D0 <= c && c <= 0x05EA) || c == 0x22 || c == 0x27 ||

+ c == 0x05F4 || c == 0x05F3)

output->push_back(c);

return true;

}

« no previous file with comments | « no previous file | chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc » ('j') | no next file with comments »