chrome/renderer/spellchecker/spellcheck_worditerator.cc - Issue 265613002: Roll ICU to icu52

Side by Side Diff: chrome/renderer/spellchecker/spellcheck_worditerator.cc

Issue 265613002: Roll ICU to icu52 (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/

Patch Set: update expectation in photo gallery app test Created 6 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

« no previous file with comments | « chrome/browser/ui/webui/identity_internals_ui_browsertest.js ('k') | chrome/test/data/extensions/api_test/file_manager_browsertest/test_util.js » ('j') | chrome/test/data/extensions/api_test/file_manager_browsertest/test_util.js » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // Implements a custom word iterator used for our spellchecker.	5 // Implements a custom word iterator used for our spellchecker.

6	6

7 #include "chrome/renderer/spellchecker/spellcheck_worditerator.h"	7 #include "chrome/renderer/spellchecker/spellcheck_worditerator.h"

8	8

9 #include <map>	9 #include <map>

10 #include <string>	10 #include <string>

(...skipping 48 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
59 "$LF = [\\p{Word_Break = LF}];"	59 "$LF = [\\p{Word_Break = LF}];"

60 "$Newline = [\\p{Word_Break = Newline}];"	60 "$Newline = [\\p{Word_Break = Newline}];"

61 "$Extend = [\\p{Word_Break = Extend}];"	61 "$Extend = [\\p{Word_Break = Extend}];"

62 "$Format = [\\p{Word_Break = Format}];"	62 "$Format = [\\p{Word_Break = Format}];"

63 "$Katakana = [\\p{Word_Break = Katakana}];"	63 "$Katakana = [\\p{Word_Break = Katakana}];"

64 // Not all the characters in a given script are ALetter.	64 // Not all the characters in a given script are ALetter.

65 // For instance, U+05F4 is MidLetter. So, this may be	65 // For instance, U+05F4 is MidLetter. So, this may be

66 // better, but it leads to an empty set error in Thai.	66 // better, but it leads to an empty set error in Thai.

67 // "$ALetter = [[\\p{script=%s}] & [\\p{Word_Break = ALetter}]];"	67 // "$ALetter = [[\\p{script=%s}] & [\\p{Word_Break = ALetter}]];"

68 "$ALetter = [\\p{script=%s}%s];"	68 "$ALetter = [\\p{script=%s}%s];"

69 "$MidNumLet = [\\p{Word_Break = MidNumLet}];"	69 // U+0027 (single quote/apostrophe) is not in MidNumLet any more

	70 // in UAX 29 rev 21 or later. For our purpose, U+0027

	71 // has to be treated as MidNumLet. ( http://crbug.com/364072 )

	72 "$MidNumLet = [\\p{Word_Break = MidNumLet} \\u0027];"

70 "$MidLetter = [\\p{Word_Break = MidLetter}%s];"	73 "$MidLetter = [\\p{Word_Break = MidLetter}%s];"

71 "$MidNum = [\\p{Word_Break = MidNum}];"	74 "$MidNum = [\\p{Word_Break = MidNum}];"

72 "$Numeric = [\\p{Word_Break = Numeric}];"	75 "$Numeric = [\\p{Word_Break = Numeric}];"

73 "$ExtendNumLet = [\\p{Word_Break = ExtendNumLet}];"	76 "$ExtendNumLet = [\\p{Word_Break = ExtendNumLet}];"

74	77

75 "$Control = [\\p{Grapheme_Cluster_Break = Control}]; "	78 "$Control = [\\p{Grapheme_Cluster_Break = Control}]; "

76 "%s" // ALetterPlus	79 "%s" // ALetterPlus

77	80

78 "$KatakanaEx = $Katakana ($Extend \| $Format)*;"	81 "$KatakanaEx = $Katakana ($Extend \| $Format)*;"

79 "$ALetterEx = $ALetterPlus ($Extend \| $Format)*;"	82 "$ALetterEx = $ALetterPlus ($Extend \| $Format)*;"

(...skipping 65 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
145 // which don't need them.	148 // which don't need them.

146 const char* aletter = uscript_getName(script_code_);	149 const char* aletter = uscript_getName(script_code_);

147 if (!aletter)	150 if (!aletter)

148 aletter = "Latin";	151 aletter = "Latin";

149	152

150 const char kWithDictionary[] =	153 const char kWithDictionary[] =

151 "$dictionary = [:LineBreak = Complex_Context:];"	154 "$dictionary = [:LineBreak = Complex_Context:];"

152 "$ALetterPlus = [$ALetter [$dictionary-$Extend-$Control]];";	155 "$ALetterPlus = [$ALetter [$dictionary-$Extend-$Control]];";

153 const char kWithoutDictionary[] = "$ALetterPlus = $ALetter;";	156 const char kWithoutDictionary[] = "$ALetterPlus = $ALetter;";

154 const char* aletter_plus = kWithoutDictionary;	157 const char* aletter_plus = kWithoutDictionary;

155 if (script_code_ == USCRIPT_HANGUL \|\| script_code_ == USCRIPT_THAI)	158 if (script_code_ == USCRIPT_HANGUL \|\| script_code_ == USCRIPT_THAI \|\|

	159 script_code_ == USCRIPT_LAO \|\| script_code_ == USCRIPT_KHMER)

156 aletter_plus = kWithDictionary;	160 aletter_plus = kWithDictionary;

157	161

158 // Treat numbers as word characters except for Arabic and Hebrew.	162 // Treat numbers as word characters except for Arabic and Hebrew.

159 const char* aletter_extra = " [0123456789]";	163 const char* aletter_extra = " [0123456789]";

160 if (script_code_ == USCRIPT_HEBREW \|\| script_code_ == USCRIPT_ARABIC)	164 if (script_code_ == USCRIPT_HEBREW \|\| script_code_ == USCRIPT_ARABIC)

161 aletter_extra = "";	165 aletter_extra = "";

162	166

163 const char kMidLetterExtra[] = "";	167 const char kMidLetterExtra[] = "";

164 // For Hebrew, treat single/double quoation marks as MidLetter.	168 // For Hebrew, treat single/double quoation marks as MidLetter.

165 const char kMidLetterExtraHebrew[] = "\"'";	169 const char kMidLetterExtraHebrew[] = "\"'";

(...skipping 256 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
422 if (status != U_ZERO_ERROR && status != U_STRING_NOT_TERMINATED_WARNING)	426 if (status != U_ZERO_ERROR && status != U_STRING_NOT_TERMINATED_WARNING)

423 return false;	427 return false;

424	428

425 // Copy the normalized text to the output.	429 // Copy the normalized text to the output.

426 icu::StringCharacterIterator it(output);	430 icu::StringCharacterIterator it(output);

427 for (UChar c = it.first(); c != icu::CharacterIterator::DONE; c = it.next())	431 for (UChar c = it.first(); c != icu::CharacterIterator::DONE; c = it.next())

428 attribute_->OutputChar(c, output_string);	432 attribute_->OutputChar(c, output_string);

429	433

430 return !output_string->empty();	434 return !output_string->empty();

431 }	435 }

OLD	NEW