chrome/renderer/spellchecker/spellcheck_worditerator.cc - Issue 265613002: Roll ICU to icu52

Side by Side Diff: chrome/renderer/spellchecker/spellcheck_worditerator.cc

Issue 265613002: Roll ICU to icu52 (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/

Patch Set: Created 6 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

« no previous file with comments | « chrome/browser/ui/webui/identity_internals_ui_browsertest.js ('k') | chrome/test/data/extensions/api_test/file_manager_browsertest/test_util.js » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // Implements a custom word iterator used for our spellchecker.	5 // Implements a custom word iterator used for our spellchecker.

6	6

7 #include "chrome/renderer/spellchecker/spellcheck_worditerator.h"	7 #include "chrome/renderer/spellchecker/spellcheck_worditerator.h"

8	8

9 #include <map>	9 #include <map>

10 #include <string>	10 #include <string>

(...skipping 49 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
60 "$LF = [\\p{Word_Break = LF}];"	60 "$LF = [\\p{Word_Break = LF}];"

61 "$Newline = [\\p{Word_Break = Newline}];"	61 "$Newline = [\\p{Word_Break = Newline}];"

62 "$Extend = [\\p{Word_Break = Extend}];"	62 "$Extend = [\\p{Word_Break = Extend}];"

63 "$Format = [\\p{Word_Break = Format}];"	63 "$Format = [\\p{Word_Break = Format}];"

64 "$Katakana = [\\p{Word_Break = Katakana}];"	64 "$Katakana = [\\p{Word_Break = Katakana}];"

65 // Not all the characters in a given script are ALetter.	65 // Not all the characters in a given script are ALetter.

66 // For instance, U+05F4 is MidLetter. So, this may be	66 // For instance, U+05F4 is MidLetter. So, this may be

67 // better, but it leads to an empty set error in Thai.	67 // better, but it leads to an empty set error in Thai.

68 // "$ALetter = [[\\p{script=%s}] & [\\p{Word_Break = ALetter}]];"	68 // "$ALetter = [[\\p{script=%s}] & [\\p{Word_Break = ALetter}]];"

69 "$ALetter = [\\p{script=%s}%s];"	69 "$ALetter = [\\p{script=%s}%s];"

70 "$MidNumLet = [\\p{Word_Break = MidNumLet}];"	70 // U+0027 (single quote/apostrophe) is not in MidNumLet any more

	71 // in UAX 29 rev 21 or later. For our purpose, U+0027

	72 // has to be treated as MidNumLet. ( http://crbug.com/364072 )

	73 "$MidNumLet = [\\p{Word_Break = MidNumLet} \\u0027];"

71 "$MidLetter = [\\p{Word_Break = MidLetter}%s];"	74 "$MidLetter = [\\p{Word_Break = MidLetter}%s];"

72 "$MidNum = [\\p{Word_Break = MidNum}];"	75 "$MidNum = [\\p{Word_Break = MidNum}];"

73 "$Numeric = [\\p{Word_Break = Numeric}];"	76 "$Numeric = [\\p{Word_Break = Numeric}];"

74 "$ExtendNumLet = [\\p{Word_Break = ExtendNumLet}];"	77 "$ExtendNumLet = [\\p{Word_Break = ExtendNumLet}];"

75	78

76 "$Control = [\\p{Grapheme_Cluster_Break = Control}]; "	79 "$Control = [\\p{Grapheme_Cluster_Break = Control}]; "

77 "%s" // ALetterPlus	80 "%s" // ALetterPlus

78	81

79 "$KatakanaEx = $Katakana ($Extend \| $Format)*;"	82 "$KatakanaEx = $Katakana ($Extend \| $Format)*;"

80 "$ALetterEx = $ALetterPlus ($Extend \| $Format)*;"	83 "$ALetterEx = $ALetterPlus ($Extend \| $Format)*;"

(...skipping 65 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
146 // which don't need them.	149 // which don't need them.

147 const char* aletter = uscript_getName(script_code_);	150 const char* aletter = uscript_getName(script_code_);

148 if (!aletter)	151 if (!aletter)

149 aletter = "Latin";	152 aletter = "Latin";

150	153

151 const char kWithDictionary[] =	154 const char kWithDictionary[] =

152 "$dictionary = [:LineBreak = Complex_Context:];"	155 "$dictionary = [:LineBreak = Complex_Context:];"

153 "$ALetterPlus = [$ALetter [$dictionary-$Extend-$Control]];";	156 "$ALetterPlus = [$ALetter [$dictionary-$Extend-$Control]];";

154 const char kWithoutDictionary[] = "$ALetterPlus = $ALetter;";	157 const char kWithoutDictionary[] = "$ALetterPlus = $ALetter;";

155 const char* aletter_plus = kWithoutDictionary;	158 const char* aletter_plus = kWithoutDictionary;

156 if (script_code_ == USCRIPT_HANGUL \|\| script_code_ == USCRIPT_THAI)	159 if (script_code_ == USCRIPT_HANGUL \|\| script_code_ == USCRIPT_THAI \|\|

	160 script_code_ == USCRIPT_LAO \|\| script_code_ == USCRIPT_KHMER)

157 aletter_plus = kWithDictionary;	161 aletter_plus = kWithDictionary;

158	162

159 // Treat numbers as word characters except for Arabic and Hebrew.	163 // Treat numbers as word characters except for Arabic and Hebrew.

160 const char* aletter_extra = " [0123456789]";	164 const char* aletter_extra = " [0123456789]";

161 if (script_code_ == USCRIPT_HEBREW \|\| script_code_ == USCRIPT_ARABIC)	165 if (script_code_ == USCRIPT_HEBREW \|\| script_code_ == USCRIPT_ARABIC)

162 aletter_extra = "";	166 aletter_extra = "";

163	167

164 const char kMidLetterExtra[] = "";	168 const char kMidLetterExtra[] = "";

165 // For Hebrew, treat single/double quoation marks as MidLetter.	169 // For Hebrew, treat single/double quoation marks as MidLetter.

166 const char kMidLetterExtraHebrew[] = "\"'";	170 const char kMidLetterExtraHebrew[] = "\"'";

(...skipping 243 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
410 if (status != U_ZERO_ERROR && status != U_STRING_NOT_TERMINATED_WARNING)	414 if (status != U_ZERO_ERROR && status != U_STRING_NOT_TERMINATED_WARNING)

411 return false;	415 return false;

412	416

413 // Copy the normalized text to the output.	417 // Copy the normalized text to the output.

414 icu::StringCharacterIterator it(output);	418 icu::StringCharacterIterator it(output);

415 for (UChar c = it.first(); c != icu::CharacterIterator::DONE; c = it.next())	419 for (UChar c = it.first(); c != icu::CharacterIterator::DONE; c = it.next())

416 attribute_->OutputChar(c, output_string);	420 attribute_->OutputChar(c, output_string);

417	421

418 return !output_string->empty();	422 return !output_string->empty();

419 }	423 }

OLD	NEW