Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(53)

Side by Side Diff: chrome/renderer/spellchecker/spellcheck_worditerator.cc

Issue 265613002: Roll ICU to icu52 (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/
Patch Set: Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Implements a custom word iterator used for our spellchecker. 5 // Implements a custom word iterator used for our spellchecker.
6 6
7 #include "chrome/renderer/spellchecker/spellcheck_worditerator.h" 7 #include "chrome/renderer/spellchecker/spellcheck_worditerator.h"
8 8
9 #include <map> 9 #include <map>
10 #include <string> 10 #include <string>
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after
60 "$LF = [\\p{Word_Break = LF}];" 60 "$LF = [\\p{Word_Break = LF}];"
61 "$Newline = [\\p{Word_Break = Newline}];" 61 "$Newline = [\\p{Word_Break = Newline}];"
62 "$Extend = [\\p{Word_Break = Extend}];" 62 "$Extend = [\\p{Word_Break = Extend}];"
63 "$Format = [\\p{Word_Break = Format}];" 63 "$Format = [\\p{Word_Break = Format}];"
64 "$Katakana = [\\p{Word_Break = Katakana}];" 64 "$Katakana = [\\p{Word_Break = Katakana}];"
65 // Not all the characters in a given script are ALetter. 65 // Not all the characters in a given script are ALetter.
66 // For instance, U+05F4 is MidLetter. So, this may be 66 // For instance, U+05F4 is MidLetter. So, this may be
67 // better, but it leads to an empty set error in Thai. 67 // better, but it leads to an empty set error in Thai.
68 // "$ALetter = [[\\p{script=%s}] & [\\p{Word_Break = ALetter}]];" 68 // "$ALetter = [[\\p{script=%s}] & [\\p{Word_Break = ALetter}]];"
69 "$ALetter = [\\p{script=%s}%s];" 69 "$ALetter = [\\p{script=%s}%s];"
70 "$MidNumLet = [\\p{Word_Break = MidNumLet}];" 70 // U+0027 (single quote/apostrophe) is not in MidNumLet any more
71 // in UAX 29 rev 21 or later. For our purpose, U+0027
72 // has to be treated as MidNumLet. ( http://crbug.com/364072 )
73 "$MidNumLet = [\\p{Word_Break = MidNumLet} \\u0027];"
71 "$MidLetter = [\\p{Word_Break = MidLetter}%s];" 74 "$MidLetter = [\\p{Word_Break = MidLetter}%s];"
72 "$MidNum = [\\p{Word_Break = MidNum}];" 75 "$MidNum = [\\p{Word_Break = MidNum}];"
73 "$Numeric = [\\p{Word_Break = Numeric}];" 76 "$Numeric = [\\p{Word_Break = Numeric}];"
74 "$ExtendNumLet = [\\p{Word_Break = ExtendNumLet}];" 77 "$ExtendNumLet = [\\p{Word_Break = ExtendNumLet}];"
75 78
76 "$Control = [\\p{Grapheme_Cluster_Break = Control}]; " 79 "$Control = [\\p{Grapheme_Cluster_Break = Control}]; "
77 "%s" // ALetterPlus 80 "%s" // ALetterPlus
78 81
79 "$KatakanaEx = $Katakana ($Extend | $Format)*;" 82 "$KatakanaEx = $Katakana ($Extend | $Format)*;"
80 "$ALetterEx = $ALetterPlus ($Extend | $Format)*;" 83 "$ALetterEx = $ALetterPlus ($Extend | $Format)*;"
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after
146 // which don't need them. 149 // which don't need them.
147 const char* aletter = uscript_getName(script_code_); 150 const char* aletter = uscript_getName(script_code_);
148 if (!aletter) 151 if (!aletter)
149 aletter = "Latin"; 152 aletter = "Latin";
150 153
151 const char kWithDictionary[] = 154 const char kWithDictionary[] =
152 "$dictionary = [:LineBreak = Complex_Context:];" 155 "$dictionary = [:LineBreak = Complex_Context:];"
153 "$ALetterPlus = [$ALetter [$dictionary-$Extend-$Control]];"; 156 "$ALetterPlus = [$ALetter [$dictionary-$Extend-$Control]];";
154 const char kWithoutDictionary[] = "$ALetterPlus = $ALetter;"; 157 const char kWithoutDictionary[] = "$ALetterPlus = $ALetter;";
155 const char* aletter_plus = kWithoutDictionary; 158 const char* aletter_plus = kWithoutDictionary;
156 if (script_code_ == USCRIPT_HANGUL || script_code_ == USCRIPT_THAI) 159 if (script_code_ == USCRIPT_HANGUL || script_code_ == USCRIPT_THAI ||
160 script_code_ == USCRIPT_LAO || script_code_ == USCRIPT_KHMER)
157 aletter_plus = kWithDictionary; 161 aletter_plus = kWithDictionary;
158 162
159 // Treat numbers as word characters except for Arabic and Hebrew. 163 // Treat numbers as word characters except for Arabic and Hebrew.
160 const char* aletter_extra = " [0123456789]"; 164 const char* aletter_extra = " [0123456789]";
161 if (script_code_ == USCRIPT_HEBREW || script_code_ == USCRIPT_ARABIC) 165 if (script_code_ == USCRIPT_HEBREW || script_code_ == USCRIPT_ARABIC)
162 aletter_extra = ""; 166 aletter_extra = "";
163 167
164 const char kMidLetterExtra[] = ""; 168 const char kMidLetterExtra[] = "";
165 // For Hebrew, treat single/double quoation marks as MidLetter. 169 // For Hebrew, treat single/double quoation marks as MidLetter.
166 const char kMidLetterExtraHebrew[] = "\"'"; 170 const char kMidLetterExtraHebrew[] = "\"'";
(...skipping 243 matching lines...) Expand 10 before | Expand all | Expand 10 after
410 if (status != U_ZERO_ERROR && status != U_STRING_NOT_TERMINATED_WARNING) 414 if (status != U_ZERO_ERROR && status != U_STRING_NOT_TERMINATED_WARNING)
411 return false; 415 return false;
412 416
413 // Copy the normalized text to the output. 417 // Copy the normalized text to the output.
414 icu::StringCharacterIterator it(output); 418 icu::StringCharacterIterator it(output);
415 for (UChar c = it.first(); c != icu::CharacterIterator::DONE; c = it.next()) 419 for (UChar c = it.first(); c != icu::CharacterIterator::DONE; c = it.next())
416 attribute_->OutputChar(c, output_string); 420 attribute_->OutputChar(c, output_string);
417 421
418 return !output_string->empty(); 422 return !output_string->empty();
419 } 423 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698