Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(135)

Side by Side Diff: chrome/renderer/spellchecker/spellcheck_worditerator.cc

Issue 265613002: Roll ICU to icu52 (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/
Patch Set: Created 6 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Implements a custom word iterator used for our spellchecker. 5 // Implements a custom word iterator used for our spellchecker.
6 6
7 #include "chrome/renderer/spellchecker/spellcheck_worditerator.h" 7 #include "chrome/renderer/spellchecker/spellcheck_worditerator.h"
8 8
9 #include <map> 9 #include <map>
10 #include <string> 10 #include <string>
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after
59 "$LF = [\\p{Word_Break = LF}];" 59 "$LF = [\\p{Word_Break = LF}];"
60 "$Newline = [\\p{Word_Break = Newline}];" 60 "$Newline = [\\p{Word_Break = Newline}];"
61 "$Extend = [\\p{Word_Break = Extend}];" 61 "$Extend = [\\p{Word_Break = Extend}];"
62 "$Format = [\\p{Word_Break = Format}];" 62 "$Format = [\\p{Word_Break = Format}];"
63 "$Katakana = [\\p{Word_Break = Katakana}];" 63 "$Katakana = [\\p{Word_Break = Katakana}];"
64 // Not all the characters in a given script are ALetter. 64 // Not all the characters in a given script are ALetter.
65 // For instance, U+05F4 is MidLetter. So, this may be 65 // For instance, U+05F4 is MidLetter. So, this may be
66 // better, but it leads to an empty set error in Thai. 66 // better, but it leads to an empty set error in Thai.
67 // "$ALetter = [[\\p{script=%s}] & [\\p{Word_Break = ALetter}]];" 67 // "$ALetter = [[\\p{script=%s}] & [\\p{Word_Break = ALetter}]];"
68 "$ALetter = [\\p{script=%s}%s];" 68 "$ALetter = [\\p{script=%s}%s];"
69 "$MidNumLet = [\\p{Word_Break = MidNumLet}];" 69 // U+0027 (single quote/apostrophe) is not in MidNumLet any more
70 // in UAX 29 rev 21 or later. For our purpose, U+0027
71 // has to be treated as MidNumLet.
groby-ooo-7-16 2014/06/10 20:18:11 nit: Pointer to bug would be appreciated, so futur
jungshik at Google 2014/06/10 20:54:21 Done.
72 "$MidNumLet = [\\p{Word_Break = MidNumLet} \\u0027];"
70 "$MidLetter = [\\p{Word_Break = MidLetter}%s];" 73 "$MidLetter = [\\p{Word_Break = MidLetter}%s];"
71 "$MidNum = [\\p{Word_Break = MidNum}];" 74 "$MidNum = [\\p{Word_Break = MidNum}];"
72 "$Numeric = [\\p{Word_Break = Numeric}];" 75 "$Numeric = [\\p{Word_Break = Numeric}];"
73 "$ExtendNumLet = [\\p{Word_Break = ExtendNumLet}];" 76 "$ExtendNumLet = [\\p{Word_Break = ExtendNumLet}];"
74 77
75 "$Control = [\\p{Grapheme_Cluster_Break = Control}]; " 78 "$Control = [\\p{Grapheme_Cluster_Break = Control}]; "
76 "%s" // ALetterPlus 79 "%s" // ALetterPlus
77 80
78 "$KatakanaEx = $Katakana ($Extend | $Format)*;" 81 "$KatakanaEx = $Katakana ($Extend | $Format)*;"
79 "$ALetterEx = $ALetterPlus ($Extend | $Format)*;" 82 "$ALetterEx = $ALetterPlus ($Extend | $Format)*;"
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after
145 // which don't need them. 148 // which don't need them.
146 const char* aletter = uscript_getName(script_code_); 149 const char* aletter = uscript_getName(script_code_);
147 if (!aletter) 150 if (!aletter)
148 aletter = "Latin"; 151 aletter = "Latin";
149 152
150 const char kWithDictionary[] = 153 const char kWithDictionary[] =
151 "$dictionary = [:LineBreak = Complex_Context:];" 154 "$dictionary = [:LineBreak = Complex_Context:];"
152 "$ALetterPlus = [$ALetter [$dictionary-$Extend-$Control]];"; 155 "$ALetterPlus = [$ALetter [$dictionary-$Extend-$Control]];";
153 const char kWithoutDictionary[] = "$ALetterPlus = $ALetter;"; 156 const char kWithoutDictionary[] = "$ALetterPlus = $ALetter;";
154 const char* aletter_plus = kWithoutDictionary; 157 const char* aletter_plus = kWithoutDictionary;
155 if (script_code_ == USCRIPT_HANGUL || script_code_ == USCRIPT_THAI) 158 if (script_code_ == USCRIPT_HANGUL || script_code_ == USCRIPT_THAI ||
159 script_code_ == USCRIPT_LAO || script_code_ == USCRIPT_KHMER)
156 aletter_plus = kWithDictionary; 160 aletter_plus = kWithDictionary;
157 161
158 // Treat numbers as word characters except for Arabic and Hebrew. 162 // Treat numbers as word characters except for Arabic and Hebrew.
159 const char* aletter_extra = " [0123456789]"; 163 const char* aletter_extra = " [0123456789]";
160 if (script_code_ == USCRIPT_HEBREW || script_code_ == USCRIPT_ARABIC) 164 if (script_code_ == USCRIPT_HEBREW || script_code_ == USCRIPT_ARABIC)
161 aletter_extra = ""; 165 aletter_extra = "";
162 166
163 const char kMidLetterExtra[] = ""; 167 const char kMidLetterExtra[] = "";
164 // For Hebrew, treat single/double quoation marks as MidLetter. 168 // For Hebrew, treat single/double quoation marks as MidLetter.
165 const char kMidLetterExtraHebrew[] = "\"'"; 169 const char kMidLetterExtraHebrew[] = "\"'";
(...skipping 256 matching lines...) Expand 10 before | Expand all | Expand 10 after
422 if (status != U_ZERO_ERROR && status != U_STRING_NOT_TERMINATED_WARNING) 426 if (status != U_ZERO_ERROR && status != U_STRING_NOT_TERMINATED_WARNING)
423 return false; 427 return false;
424 428
425 // Copy the normalized text to the output. 429 // Copy the normalized text to the output.
426 icu::StringCharacterIterator it(output); 430 icu::StringCharacterIterator it(output);
427 for (UChar c = it.first(); c != icu::CharacterIterator::DONE; c = it.next()) 431 for (UChar c = it.first(); c != icu::CharacterIterator::DONE; c = it.next())
428 attribute_->OutputChar(c, output_string); 432 attribute_->OutputChar(c, output_string);
429 433
430 return !output_string->empty(); 434 return !output_string->empty();
431 } 435 }
OLDNEW
« no previous file with comments | « chrome/browser/ui/webui/identity_internals_ui_browsertest.js ('k') | components/query_parser/snippet.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698