Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(709)

Side by Side Diff: trunk/src/chrome/renderer/spellchecker/spellcheck_worditerator.cc

Issue 336793004: Revert 277111 "Roll ICU to icu52" (Closed) Base URL: svn://svn.chromium.org/chrome/
Patch Set: Created 6 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Implements a custom word iterator used for our spellchecker. 5 // Implements a custom word iterator used for our spellchecker.
6 6
7 #include "chrome/renderer/spellchecker/spellcheck_worditerator.h" 7 #include "chrome/renderer/spellchecker/spellcheck_worditerator.h"
8 8
9 #include <map> 9 #include <map>
10 #include <string> 10 #include <string>
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after
60 "$LF = [\\p{Word_Break = LF}];" 60 "$LF = [\\p{Word_Break = LF}];"
61 "$Newline = [\\p{Word_Break = Newline}];" 61 "$Newline = [\\p{Word_Break = Newline}];"
62 "$Extend = [\\p{Word_Break = Extend}];" 62 "$Extend = [\\p{Word_Break = Extend}];"
63 "$Format = [\\p{Word_Break = Format}];" 63 "$Format = [\\p{Word_Break = Format}];"
64 "$Katakana = [\\p{Word_Break = Katakana}];" 64 "$Katakana = [\\p{Word_Break = Katakana}];"
65 // Not all the characters in a given script are ALetter. 65 // Not all the characters in a given script are ALetter.
66 // For instance, U+05F4 is MidLetter. So, this may be 66 // For instance, U+05F4 is MidLetter. So, this may be
67 // better, but it leads to an empty set error in Thai. 67 // better, but it leads to an empty set error in Thai.
68 // "$ALetter = [[\\p{script=%s}] & [\\p{Word_Break = ALetter}]];" 68 // "$ALetter = [[\\p{script=%s}] & [\\p{Word_Break = ALetter}]];"
69 "$ALetter = [\\p{script=%s}%s];" 69 "$ALetter = [\\p{script=%s}%s];"
70 // U+0027 (single quote/apostrophe) is not in MidNumLet any more 70 "$MidNumLet = [\\p{Word_Break = MidNumLet}];"
71 // in UAX 29 rev 21 or later. For our purpose, U+0027
72 // has to be treated as MidNumLet. ( http://crbug.com/364072 )
73 "$MidNumLet = [\\p{Word_Break = MidNumLet} \\u0027];"
74 "$MidLetter = [\\p{Word_Break = MidLetter}%s];" 71 "$MidLetter = [\\p{Word_Break = MidLetter}%s];"
75 "$MidNum = [\\p{Word_Break = MidNum}];" 72 "$MidNum = [\\p{Word_Break = MidNum}];"
76 "$Numeric = [\\p{Word_Break = Numeric}];" 73 "$Numeric = [\\p{Word_Break = Numeric}];"
77 "$ExtendNumLet = [\\p{Word_Break = ExtendNumLet}];" 74 "$ExtendNumLet = [\\p{Word_Break = ExtendNumLet}];"
78 75
79 "$Control = [\\p{Grapheme_Cluster_Break = Control}]; " 76 "$Control = [\\p{Grapheme_Cluster_Break = Control}]; "
80 "%s" // ALetterPlus 77 "%s" // ALetterPlus
81 78
82 "$KatakanaEx = $Katakana ($Extend | $Format)*;" 79 "$KatakanaEx = $Katakana ($Extend | $Format)*;"
83 "$ALetterEx = $ALetterPlus ($Extend | $Format)*;" 80 "$ALetterEx = $ALetterPlus ($Extend | $Format)*;"
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after
149 // which don't need them. 146 // which don't need them.
150 const char* aletter = uscript_getName(script_code_); 147 const char* aletter = uscript_getName(script_code_);
151 if (!aletter) 148 if (!aletter)
152 aletter = "Latin"; 149 aletter = "Latin";
153 150
154 const char kWithDictionary[] = 151 const char kWithDictionary[] =
155 "$dictionary = [:LineBreak = Complex_Context:];" 152 "$dictionary = [:LineBreak = Complex_Context:];"
156 "$ALetterPlus = [$ALetter [$dictionary-$Extend-$Control]];"; 153 "$ALetterPlus = [$ALetter [$dictionary-$Extend-$Control]];";
157 const char kWithoutDictionary[] = "$ALetterPlus = $ALetter;"; 154 const char kWithoutDictionary[] = "$ALetterPlus = $ALetter;";
158 const char* aletter_plus = kWithoutDictionary; 155 const char* aletter_plus = kWithoutDictionary;
159 if (script_code_ == USCRIPT_HANGUL || script_code_ == USCRIPT_THAI || 156 if (script_code_ == USCRIPT_HANGUL || script_code_ == USCRIPT_THAI)
160 script_code_ == USCRIPT_LAO || script_code_ == USCRIPT_KHMER)
161 aletter_plus = kWithDictionary; 157 aletter_plus = kWithDictionary;
162 158
163 // Treat numbers as word characters except for Arabic and Hebrew. 159 // Treat numbers as word characters except for Arabic and Hebrew.
164 const char* aletter_extra = " [0123456789]"; 160 const char* aletter_extra = " [0123456789]";
165 if (script_code_ == USCRIPT_HEBREW || script_code_ == USCRIPT_ARABIC) 161 if (script_code_ == USCRIPT_HEBREW || script_code_ == USCRIPT_ARABIC)
166 aletter_extra = ""; 162 aletter_extra = "";
167 163
168 const char kMidLetterExtra[] = ""; 164 const char kMidLetterExtra[] = "";
169 // For Hebrew, treat single/double quoation marks as MidLetter. 165 // For Hebrew, treat single/double quoation marks as MidLetter.
170 const char kMidLetterExtraHebrew[] = "\"'"; 166 const char kMidLetterExtraHebrew[] = "\"'";
(...skipping 243 matching lines...) Expand 10 before | Expand all | Expand 10 after
414 if (status != U_ZERO_ERROR && status != U_STRING_NOT_TERMINATED_WARNING) 410 if (status != U_ZERO_ERROR && status != U_STRING_NOT_TERMINATED_WARNING)
415 return false; 411 return false;
416 412
417 // Copy the normalized text to the output. 413 // Copy the normalized text to the output.
418 icu::StringCharacterIterator it(output); 414 icu::StringCharacterIterator it(output);
419 for (UChar c = it.first(); c != icu::CharacterIterator::DONE; c = it.next()) 415 for (UChar c = it.first(); c != icu::CharacterIterator::DONE; c = it.next())
420 attribute_->OutputChar(c, output_string); 416 attribute_->OutputChar(c, output_string);
421 417
422 return !output_string->empty(); 418 return !output_string->empty();
423 } 419 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698