OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include <stddef.h> | 5 #include <stddef.h> |
6 | 6 |
7 #include <string> | 7 #include <string> |
8 #include <vector> | 8 #include <vector> |
9 | 9 |
10 #include "base/format_macros.h" | 10 #include "base/format_macros.h" |
11 #include "base/i18n/break_iterator.h" | 11 #include "base/i18n/break_iterator.h" |
12 #include "base/macros.h" | 12 #include "base/macros.h" |
13 #include "base/strings/string_split.h" | 13 #include "base/strings/string_split.h" |
14 #include "base/strings/stringprintf.h" | 14 #include "base/strings/stringprintf.h" |
15 #include "base/strings/utf_string_conversions.h" | 15 #include "base/strings/utf_string_conversions.h" |
16 #include "chrome/renderer/spellchecker/spellcheck_worditerator.h" | 16 #include "chrome/renderer/spellchecker/spellcheck_worditerator.h" |
17 #include "testing/gtest/include/gtest/gtest.h" | 17 #include "testing/gtest/include/gtest/gtest.h" |
18 | 18 |
19 using base::i18n::BreakIterator; | 19 using base::i18n::BreakIterator; |
20 using WordIteratorStatus = SpellcheckWordIterator::WordIteratorStatus; | |
20 | 21 |
21 namespace { | 22 namespace { |
22 | 23 |
23 struct TestCase { | 24 struct TestCase { |
24 const char* language; | 25 const char* language; |
25 bool allow_contraction; | 26 bool allow_contraction; |
26 const wchar_t* expected_words; | 27 const wchar_t* expected_words; |
27 }; | 28 }; |
28 | 29 |
29 base::string16 GetRulesForLanguage(const std::string& language) { | 30 base::string16 GetRulesForLanguage(const std::string& language) { |
30 SpellcheckCharAttribute attribute; | 31 SpellcheckCharAttribute attribute; |
31 attribute.SetDefaultLanguage(language); | 32 attribute.SetDefaultLanguage(language); |
32 return attribute.GetRuleSet(true); | 33 return attribute.GetRuleSet(true); |
33 } | 34 } |
34 | 35 |
36 WordIteratorStatus GetNextNonSkippableWord(SpellcheckWordIterator* iterator, | |
37 base::string16* word_string, | |
38 int* word_start, | |
39 int* word_length) { | |
40 WordIteratorStatus status = SpellcheckWordIterator::IS_SKIPPABLE; | |
41 while (status == SpellcheckWordIterator::IS_SKIPPABLE) | |
42 status = iterator->GetNextWord(word_string, word_start, word_length); | |
Nico
2016/01/06 01:38:26
with a do-while loop you don't need to initialize
groby-ooo-7-16
2016/01/06 01:48:37
True, but it's one more line :) See below:
Status
Nico
2016/01/06 02:36:16
<s>More horrible</s>You don't need the parens:
| |
43 return status; | |
44 } | |
45 | |
35 } // namespace | 46 } // namespace |
36 | 47 |
37 // Tests whether or not our SpellcheckWordIterator can extract words used by the | 48 // Tests whether or not our SpellcheckWordIterator can extract words used by the |
38 // specified language from a multi-language text. | 49 // specified language from a multi-language text. |
39 TEST(SpellcheckWordIteratorTest, SplitWord) { | 50 TEST(SpellcheckWordIteratorTest, SplitWord) { |
40 // An input text. This text includes words of several languages. (Some words | 51 // An input text. This text includes words of several languages. (Some words |
41 // are not separated with whitespace characters.) Our SpellcheckWordIterator | 52 // are not separated with whitespace characters.) Our SpellcheckWordIterator |
42 // should extract the words used by the specified language from this text and | 53 // should extract the words used by the specified language from this text and |
43 // normalize them so our spell-checker can check their spellings. If | 54 // normalize them so our spell-checker can check their spellings. If |
44 // characters are found that are not from the specified language the test | 55 // characters are found that are not from the specified language the test |
(...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
137 SpellcheckWordIterator iterator; | 148 SpellcheckWordIterator iterator; |
138 EXPECT_TRUE(iterator.Initialize(&attributes, | 149 EXPECT_TRUE(iterator.Initialize(&attributes, |
139 kTestCases[i].allow_contraction)); | 150 kTestCases[i].allow_contraction)); |
140 EXPECT_TRUE(iterator.SetText(input.c_str(), input.length())); | 151 EXPECT_TRUE(iterator.SetText(input.c_str(), input.length())); |
141 | 152 |
142 std::vector<base::string16> expected_words = base::SplitString( | 153 std::vector<base::string16> expected_words = base::SplitString( |
143 base::WideToUTF16(kTestCases[i].expected_words), | 154 base::WideToUTF16(kTestCases[i].expected_words), |
144 base::string16(1, ' '), base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL); | 155 base::string16(1, ' '), base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL); |
145 | 156 |
146 base::string16 actual_word; | 157 base::string16 actual_word; |
147 int actual_start, actual_end; | 158 int actual_start, actual_len; |
148 size_t index = 0; | 159 size_t index = 0; |
149 for (SpellcheckWordIterator::WordIteratorStatus status = | 160 for (SpellcheckWordIterator::WordIteratorStatus status = |
150 iterator.GetNextWord(&actual_word, &actual_start, &actual_end); | 161 iterator.GetNextWord(&actual_word, &actual_start, &actual_len); |
151 status != SpellcheckWordIterator::IS_END_OF_TEXT; | 162 status != SpellcheckWordIterator::IS_END_OF_TEXT; |
152 status = | 163 status = |
153 iterator.GetNextWord(&actual_word, &actual_start, &actual_end)) { | 164 iterator.GetNextWord(&actual_word, &actual_start, &actual_len)) { |
154 if (status == SpellcheckWordIterator::WordIteratorStatus::IS_SKIPPABLE) | 165 if (status == SpellcheckWordIterator::WordIteratorStatus::IS_SKIPPABLE) |
155 continue; | 166 continue; |
156 | 167 |
157 EXPECT_TRUE(index < expected_words.size()); | 168 EXPECT_TRUE(index < expected_words.size()); |
158 if (index < expected_words.size()) | 169 if (index < expected_words.size()) |
159 EXPECT_EQ(expected_words[index], actual_word); | 170 EXPECT_EQ(expected_words[index], actual_word); |
160 ++index; | 171 ++index; |
161 } | 172 } |
162 } | 173 } |
163 } | 174 } |
164 | 175 |
165 // Tests whether our SpellcheckWordIterator extracts an empty word without | 176 // Tests whether our SpellcheckWordIterator extracts an empty word without |
166 // getting stuck in an infinite loop when inputting a Khmer text. (This is a | 177 // getting stuck in an infinite loop when inputting a Khmer text. (This is a |
167 // regression test for Issue 46278.) | 178 // regression test for Issue 46278.) |
168 TEST(SpellcheckWordIteratorTest, RuleSetConsistency) { | 179 TEST(SpellcheckWordIteratorTest, RuleSetConsistency) { |
169 SpellcheckCharAttribute attributes; | 180 SpellcheckCharAttribute attributes; |
170 attributes.SetDefaultLanguage("en-US"); | 181 attributes.SetDefaultLanguage("en-US"); |
171 | 182 |
172 const wchar_t kTestText[] = L"\x1791\x17c1\x002e"; | 183 const wchar_t kTestText[] = L"\x1791\x17c1\x002e"; |
173 base::string16 input(base::WideToUTF16(kTestText)); | 184 base::string16 input(base::WideToUTF16(kTestText)); |
174 | 185 |
175 SpellcheckWordIterator iterator; | 186 SpellcheckWordIterator iterator; |
176 EXPECT_TRUE(iterator.Initialize(&attributes, true)); | 187 EXPECT_TRUE(iterator.Initialize(&attributes, true)); |
177 EXPECT_TRUE(iterator.SetText(input.c_str(), input.length())); | 188 EXPECT_TRUE(iterator.SetText(input.c_str(), input.length())); |
178 | 189 |
179 // When SpellcheckWordIterator uses an inconsistent ICU ruleset, the following | 190 // When SpellcheckWordIterator uses an inconsistent ICU ruleset, the following |
180 // iterator.GetNextWord() calls get stuck in an infinite loop. Therefore, this | 191 // iterator.GetNextWord() calls get stuck in an infinite loop. Therefore, this |
181 // test succeeds if this call returns without timeouts. | 192 // test succeeds if this call returns without timeouts. |
182 base::string16 actual_word; | 193 base::string16 actual_word; |
183 int actual_start, actual_end; | 194 int actual_start, actual_len; |
184 SpellcheckWordIterator::WordIteratorStatus status; | 195 WordIteratorStatus status = GetNextNonSkippableWord( |
185 for (status = iterator.GetNextWord(&actual_word, &actual_start, &actual_end); | 196 &iterator, &actual_word, &actual_start, &actual_len); |
186 status == SpellcheckWordIterator::IS_SKIPPABLE; | |
187 status = | |
188 iterator.GetNextWord(&actual_word, &actual_start, &actual_end)) { | |
189 continue; | |
190 } | |
191 | 197 |
192 EXPECT_EQ(SpellcheckWordIterator::WordIteratorStatus::IS_END_OF_TEXT, status); | 198 EXPECT_EQ(SpellcheckWordIterator::WordIteratorStatus::IS_END_OF_TEXT, status); |
193 EXPECT_EQ(0, actual_start); | 199 EXPECT_EQ(0, actual_start); |
194 EXPECT_EQ(0, actual_end); | 200 EXPECT_EQ(0, actual_len); |
195 } | 201 } |
196 | 202 |
197 // Vertify our SpellcheckWordIterator can treat ASCII numbers as word characters | 203 // Vertify our SpellcheckWordIterator can treat ASCII numbers as word characters |
198 // on LTR languages. On the other hand, it should not treat ASCII numbers as | 204 // on LTR languages. On the other hand, it should not treat ASCII numbers as |
199 // word characters on RTL languages because they change the text direction from | 205 // word characters on RTL languages because they change the text direction from |
200 // RTL to LTR. | 206 // RTL to LTR. |
201 TEST(SpellcheckWordIteratorTest, TreatNumbersAsWordCharacters) { | 207 TEST(SpellcheckWordIteratorTest, TreatNumbersAsWordCharacters) { |
202 // A set of a language, a dummy word, and a text direction used in this test. | 208 // A set of a language, a dummy word, and a text direction used in this test. |
203 // For each language, this test splits a dummy word, which consists of ASCII | 209 // For each language, this test splits a dummy word, which consists of ASCII |
204 // numbers and an alphabet of the language, into words. When ASCII numbers are | 210 // numbers and an alphabet of the language, into words. When ASCII numbers are |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
242 | 248 |
243 SpellcheckCharAttribute attributes; | 249 SpellcheckCharAttribute attributes; |
244 attributes.SetDefaultLanguage(kTestCases[i].language); | 250 attributes.SetDefaultLanguage(kTestCases[i].language); |
245 | 251 |
246 base::string16 input_word(base::WideToUTF16(kTestCases[i].text)); | 252 base::string16 input_word(base::WideToUTF16(kTestCases[i].text)); |
247 SpellcheckWordIterator iterator; | 253 SpellcheckWordIterator iterator; |
248 EXPECT_TRUE(iterator.Initialize(&attributes, true)); | 254 EXPECT_TRUE(iterator.Initialize(&attributes, true)); |
249 EXPECT_TRUE(iterator.SetText(input_word.c_str(), input_word.length())); | 255 EXPECT_TRUE(iterator.SetText(input_word.c_str(), input_word.length())); |
250 | 256 |
251 base::string16 actual_word; | 257 base::string16 actual_word; |
252 int actual_start, actual_end; | 258 int actual_start, actual_len; |
253 SpellcheckWordIterator::WordIteratorStatus status; | 259 WordIteratorStatus status = GetNextNonSkippableWord( |
254 for (status = | 260 &iterator, &actual_word, &actual_start, &actual_len); |
255 iterator.GetNextWord(&actual_word, &actual_start, &actual_end); | |
256 status == SpellcheckWordIterator::IS_SKIPPABLE; | |
257 status = | |
258 iterator.GetNextWord(&actual_word, &actual_start, &actual_end)) { | |
259 continue; | |
260 } | |
261 | 261 |
262 EXPECT_EQ(SpellcheckWordIterator::WordIteratorStatus::IS_WORD, status); | 262 EXPECT_EQ(SpellcheckWordIterator::WordIteratorStatus::IS_WORD, status); |
263 if (kTestCases[i].left_to_right) | 263 if (kTestCases[i].left_to_right) |
264 EXPECT_EQ(input_word, actual_word); | 264 EXPECT_EQ(input_word, actual_word); |
265 else | 265 else |
266 EXPECT_NE(input_word, actual_word); | 266 EXPECT_NE(input_word, actual_word); |
267 } | 267 } |
268 } | 268 } |
269 | 269 |
270 // Vertify SpellcheckWordIterator treats typographical apostrophe as a part of | 270 // Verify SpellcheckWordIterator treats typographical apostrophe as a part of |
271 // the word. | 271 // the word. |
272 TEST(SpellcheckWordIteratorTest, TypographicalApostropheIsPartOfWord) { | 272 TEST(SpellcheckWordIteratorTest, TypographicalApostropheIsPartOfWord) { |
273 static const struct { | 273 static const struct { |
274 const char* language; | 274 const char* language; |
275 const wchar_t* word; | 275 const wchar_t* input; |
276 const wchar_t* expected; | |
276 } kTestCases[] = { | 277 } kTestCases[] = { |
277 // Typewriter apostrophe: | 278 // Typewriter apostrophe: |
278 { | 279 {"en-AU", L"you're", L"you're"}, |
279 "en-AU", L"you're" | 280 {"en-CA", L"you're", L"you're"}, |
280 }, { | 281 {"en-GB", L"you're", L"you're"}, |
281 "en-CA", L"you're" | 282 {"en-US", L"you're", L"you're"}, |
282 }, { | 283 {"en-US", L"!!!!you're", L"you're"}, |
283 "en-GB", L"you're" | 284 // Typographical apostrophe: |
284 }, { | 285 {"en-AU", L"you\x2019re", L"you\x2019re"}, |
285 "en-US", L"you're" | 286 {"en-CA", L"you\x2019re", L"you\x2019re"}, |
286 }, | 287 {"en-GB", L"you\x2019re", L"you\x2019re"}, |
287 // Typographical apostrophe: | 288 {"en-US", L"you\x2019re", L"you\x2019re"}, |
288 { | 289 {"en-US", L"....you\x2019re", L"you\x2019re"}, |
289 "en-AU", L"you\x2019re" | |
290 }, { | |
291 "en-CA", L"you\x2019re" | |
292 }, { | |
293 "en-GB", L"you\x2019re" | |
294 }, { | |
295 "en-US", L"you\x2019re" | |
296 }, | |
297 }; | 290 }; |
298 | 291 |
299 for (size_t i = 0; i < arraysize(kTestCases); ++i) { | 292 for (size_t i = 0; i < arraysize(kTestCases); ++i) { |
300 SpellcheckCharAttribute attributes; | 293 SpellcheckCharAttribute attributes; |
301 attributes.SetDefaultLanguage(kTestCases[i].language); | 294 attributes.SetDefaultLanguage(kTestCases[i].language); |
302 | 295 |
303 base::string16 input_word(base::WideToUTF16(kTestCases[i].word)); | 296 base::string16 input_word(base::WideToUTF16(kTestCases[i].input)); |
297 base::string16 expected_word(base::WideToUTF16(kTestCases[i].expected)); | |
304 SpellcheckWordIterator iterator; | 298 SpellcheckWordIterator iterator; |
305 EXPECT_TRUE(iterator.Initialize(&attributes, true)); | 299 EXPECT_TRUE(iterator.Initialize(&attributes, true)); |
306 EXPECT_TRUE(iterator.SetText(input_word.c_str(), input_word.length())); | 300 EXPECT_TRUE(iterator.SetText(input_word.c_str(), input_word.length())); |
307 | 301 |
308 base::string16 actual_word; | 302 base::string16 actual_word; |
309 int actual_start, actual_end; | 303 int actual_start, actual_len; |
310 SpellcheckWordIterator::WordIteratorStatus status; | 304 WordIteratorStatus status = GetNextNonSkippableWord( |
311 for (status = | 305 &iterator, &actual_word, &actual_start, &actual_len); |
312 iterator.GetNextWord(&actual_word, &actual_start, &actual_end); | |
313 status == SpellcheckWordIterator::IS_SKIPPABLE; | |
314 iterator.GetNextWord(&actual_word, &actual_start, &actual_end)) { | |
315 continue; | |
316 } | |
317 | 306 |
318 EXPECT_EQ(SpellcheckWordIterator::WordIteratorStatus::IS_WORD, status); | 307 EXPECT_EQ(SpellcheckWordIterator::WordIteratorStatus::IS_WORD, status); |
319 EXPECT_EQ(input_word, actual_word); | 308 EXPECT_EQ(expected_word, actual_word); |
320 EXPECT_EQ(0, actual_start); | 309 EXPECT_LE(0, actual_start); |
321 EXPECT_EQ(input_word.length(), | 310 EXPECT_EQ(expected_word.length(), |
322 static_cast<base::string16::size_type>(actual_end)); | 311 static_cast<base::string16::size_type>(actual_len)); |
323 } | 312 } |
324 } | 313 } |
325 | 314 |
326 TEST(SpellcheckWordIteratorTest, Initialization) { | 315 TEST(SpellcheckWordIteratorTest, Initialization) { |
327 // Test initialization works when a default language is set. | 316 // Test initialization works when a default language is set. |
328 { | 317 { |
329 SpellcheckCharAttribute attributes; | 318 SpellcheckCharAttribute attributes; |
330 attributes.SetDefaultLanguage("en-US"); | 319 attributes.SetDefaultLanguage("en-US"); |
331 | 320 |
332 SpellcheckWordIterator iterator; | 321 SpellcheckWordIterator iterator; |
(...skipping 158 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
491 EXPECT_EQ(base::UTF8ToUTF16("."), iter.GetString()); | 480 EXPECT_EQ(base::UTF8ToUTF16("."), iter.GetString()); |
492 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD); | 481 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD); |
493 EXPECT_TRUE(iter.Advance()); | 482 EXPECT_TRUE(iter.Advance()); |
494 EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString()); | 483 EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString()); |
495 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD); | 484 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD); |
496 EXPECT_TRUE(iter.Advance()); | 485 EXPECT_TRUE(iter.Advance()); |
497 EXPECT_EQ(base::UTF8ToUTF16(","), iter.GetString()); | 486 EXPECT_EQ(base::UTF8ToUTF16(","), iter.GetString()); |
498 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD); | 487 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD); |
499 EXPECT_FALSE(iter.Advance()); | 488 EXPECT_FALSE(iter.Advance()); |
500 } | 489 } |
OLD | NEW |