chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc - Issue 1269343005: Updates SpellcheckWordIterator::GetNextWord to return an enum.

Side by Side Diff: chrome/renderer/spellchecker/spellcheck_worditerator_unittest.cc

Issue 1269343005: Updates SpellcheckWordIterator::GetNextWord to return an enum. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@break-iter

Patch Set: Addressed comments. Created 5 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include <string>	5 #include <string>

6 #include <vector>	6 #include <vector>

7	7

8 #include "base/format_macros.h"	8 #include "base/format_macros.h"

9 #include "base/i18n/break_iterator.h"	9 #include "base/i18n/break_iterator.h"

10 #include "base/strings/string_split.h"	10 #include "base/strings/string_split.h"

(...skipping 13 matching lines...) Expand all Loading...
24 };	24 };

25	25

26 base::string16 GetRulesForLanguage(const std::string& language) {	26 base::string16 GetRulesForLanguage(const std::string& language) {

27 SpellcheckCharAttribute attribute;	27 SpellcheckCharAttribute attribute;

28 attribute.SetDefaultLanguage(language);	28 attribute.SetDefaultLanguage(language);

29 return attribute.GetRuleSet(true);	29 return attribute.GetRuleSet(true);

30 }	30 }

31	31

32 } // namespace	32 } // namespace

33	33

34 // Tests whether or not our SpellcheckWordIterator can extract only words used	34 // Tests whether or not our SpellcheckWordIterator can extract words used by the

35 // by the specified language from a multi-language text.	35 // specified language from a multi-language text.

36 TEST(SpellcheckWordIteratorTest, SplitWord) {	36 TEST(SpellcheckWordIteratorTest, SplitWord) {

37 // An input text. This text includes words of several languages. (Some words	37 // An input text. This text includes words of several languages. (Some words

38 // are not separated with whitespace characters.) Our SpellcheckWordIterator	38 // are not separated with whitespace characters.) Our SpellcheckWordIterator

39 // should extract only the words used by the specified language from this text	39 // should extract the words used by the specified language from this text and

40 // and normalize them so our spell-checker can check their spellings.	40 // normalize them so our spell-checker can check their spellings. If

	41 // characters are found that are not from the specified language we skip them.
	please use gerrit instead 2015/08/13 00:13:28 "we" can be confusing in code. Specify the subject "we" can be confusing in code. Specify the subject. I think it's "the test" that skips the characters. Julius 2015/08/13 01:32:04 Done. Show quoted text On 2015/08/13 00:13:28, Rouslan wrote: > "we" can be confusing in code. Specify the subject. I think it's "the test" that > skips the characters. Done.
41 const wchar_t kTestText[] =	42 const wchar_t kTestText[] =

42 // Graphic characters	43 // Graphic characters

43 L"!@#$%^&*()"	44 L"!@#$%^&*()"

44 // Latin (including a contraction character and a ligature).	45 // Latin (including a contraction character and a ligature).

45 L"hello:hello a\xFB03x"	46 L"hello:hello a\xFB03x"

46 // Greek	47 // Greek

47 L"\x03B3\x03B5\x03B9\x03AC\x0020\x03C3\x03BF\x03C5"	48 L"\x03B3\x03B5\x03B9\x03AC\x0020\x03C3\x03BF\x03C5"

48 // Cyrillic	49 // Cyrillic

49 L"\x0437\x0434\x0440\x0430\x0432\x0441\x0442\x0432"	50 L"\x0437\x0434\x0440\x0430\x0432\x0441\x0442\x0432"

50 L"\x0443\x0439\x0442\x0435"	51 L"\x0443\x0439\x0442\x0435"

(...skipping 83 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
134 kTestCases[i].allow_contraction));	135 kTestCases[i].allow_contraction));

135 EXPECT_TRUE(iterator.SetText(input.c_str(), input.length()));	136 EXPECT_TRUE(iterator.SetText(input.c_str(), input.length()));

136	137

137 std::vector<base::string16> expected_words = base::SplitString(	138 std::vector<base::string16> expected_words = base::SplitString(

138 base::WideToUTF16(kTestCases[i].expected_words),	139 base::WideToUTF16(kTestCases[i].expected_words),

139 base::string16(1, ' '), base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);	140 base::string16(1, ' '), base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);

140	141

141 base::string16 actual_word;	142 base::string16 actual_word;

142 int actual_start, actual_end;	143 int actual_start, actual_end;

143 size_t index = 0;	144 size_t index = 0;

144 while (iterator.GetNextWord(&actual_word, &actual_start, &actual_end)) {	145 for (SpellcheckWordIterator::WordIteratorStatus status =

	146 iterator.GetNextWord(&actual_word, &actual_start, &actual_end);

	147 status != SpellcheckWordIterator::IS_END_OF_TEXT;

	148 status =

	149 iterator.GetNextWord(&actual_word, &actual_start, &actual_end)) {

	150 if (status == SpellcheckWordIterator::WordIteratorStatus::IS_SKIPPABLE)

	151 continue;

	152

145 EXPECT_TRUE(index < expected_words.size());	153 EXPECT_TRUE(index < expected_words.size());

146 if (index < expected_words.size())	154 if (index < expected_words.size())

147 EXPECT_EQ(expected_words[index], actual_word);	155 EXPECT_EQ(expected_words[index], actual_word);

148 ++index;	156 ++index;

149 }	157 }

150 }	158 }

151 }	159 }

152	160

153 // Tests whether our SpellcheckWordIterator extracts an empty word without	161 // Tests whether our SpellcheckWordIterator extracts an empty word without

154 // getting stuck in an infinite loop when inputting a Khmer text. (This is a	162 // getting stuck in an infinite loop when inputting a Khmer text. (This is a

155 // regression test for Issue 46278.)	163 // regression test for Issue 46278.)

156 TEST(SpellcheckWordIteratorTest, RuleSetConsistency) {	164 TEST(SpellcheckWordIteratorTest, RuleSetConsistency) {

157 SpellcheckCharAttribute attributes;	165 SpellcheckCharAttribute attributes;

158 attributes.SetDefaultLanguage("en-US");	166 attributes.SetDefaultLanguage("en-US");

159	167

160 const wchar_t kTestText[] = L"\x1791\x17c1\x002e";	168 const wchar_t kTestText[] = L"\x1791\x17c1\x002e";

161 base::string16 input(base::WideToUTF16(kTestText));	169 base::string16 input(base::WideToUTF16(kTestText));

162	170

163 SpellcheckWordIterator iterator;	171 SpellcheckWordIterator iterator;

164 EXPECT_TRUE(iterator.Initialize(&attributes, true));	172 EXPECT_TRUE(iterator.Initialize(&attributes, true));

165 EXPECT_TRUE(iterator.SetText(input.c_str(), input.length()));	173 EXPECT_TRUE(iterator.SetText(input.c_str(), input.length()));

166	174

167 // When SpellcheckWordIterator uses an inconsistent ICU ruleset, the following	175 // When SpellcheckWordIterator uses an inconsistent ICU ruleset, the following

168 // iterator.GetNextWord() call gets stuck in an infinite loop. Therefore, this	176 // iterator.GetNextWord() calls get stuck in an infinite loop. Therefore, this

169 // test succeeds if this call returns without timeouts.	177 // test succeeds if this call returns without timeouts.

170 base::string16 actual_word;	178 base::string16 actual_word;

171 int actual_start, actual_end;	179 int actual_start, actual_end;

172 EXPECT_FALSE(iterator.GetNextWord(&actual_word, &actual_start, &actual_end));	180 SpellcheckWordIterator::WordIteratorStatus status;

	181 for (status = iterator.GetNextWord(&actual_word, &actual_start, &actual_end);

	182 status == SpellcheckWordIterator::IS_SKIPPABLE;

	183 status =

	184 iterator.GetNextWord(&actual_word, &actual_start, &actual_end)) {

	185 continue;

	186 }

	187

	188 EXPECT_EQ(SpellcheckWordIterator::WordIteratorStatus::IS_END_OF_TEXT, status);

173 EXPECT_EQ(0, actual_start);	189 EXPECT_EQ(0, actual_start);

174 EXPECT_EQ(0, actual_end);	190 EXPECT_EQ(0, actual_end);

175 }	191 }

176	192

177 // Vertify our SpellcheckWordIterator can treat ASCII numbers as word characters	193 // Vertify our SpellcheckWordIterator can treat ASCII numbers as word characters

178 // on LTR languages. On the other hand, it should not treat ASCII numbers as	194 // on LTR languages. On the other hand, it should not treat ASCII numbers as

179 // word characters on RTL languages because they change the text direction from	195 // word characters on RTL languages because they change the text direction from

180 // RTL to LTR.	196 // RTL to LTR.

181 TEST(SpellcheckWordIteratorTest, TreatNumbersAsWordCharacters) {	197 TEST(SpellcheckWordIteratorTest, TreatNumbersAsWordCharacters) {

182 // A set of a language, a dummy word, and a text direction used in this test.	198 // A set of a language, a dummy word, and a text direction used in this test.

(...skipping 40 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
223 SpellcheckCharAttribute attributes;	239 SpellcheckCharAttribute attributes;

224 attributes.SetDefaultLanguage(kTestCases[i].language);	240 attributes.SetDefaultLanguage(kTestCases[i].language);

225	241

226 base::string16 input_word(base::WideToUTF16(kTestCases[i].text));	242 base::string16 input_word(base::WideToUTF16(kTestCases[i].text));

227 SpellcheckWordIterator iterator;	243 SpellcheckWordIterator iterator;

228 EXPECT_TRUE(iterator.Initialize(&attributes, true));	244 EXPECT_TRUE(iterator.Initialize(&attributes, true));

229 EXPECT_TRUE(iterator.SetText(input_word.c_str(), input_word.length()));	245 EXPECT_TRUE(iterator.SetText(input_word.c_str(), input_word.length()));

230	246

231 base::string16 actual_word;	247 base::string16 actual_word;

232 int actual_start, actual_end;	248 int actual_start, actual_end;

233 EXPECT_TRUE(iterator.GetNextWord(&actual_word, &actual_start, &actual_end));	249 SpellcheckWordIterator::WordIteratorStatus status;

	250 for (status =

	251 iterator.GetNextWord(&actual_word, &actual_start, &actual_end);

	252 status == SpellcheckWordIterator::IS_SKIPPABLE;

	253 status =

	254 iterator.GetNextWord(&actual_word, &actual_start, &actual_end)) {

	255 continue;

	256 }

	257

	258 EXPECT_EQ(SpellcheckWordIterator::WordIteratorStatus::IS_WORD, status);

234 if (kTestCases[i].left_to_right)	259 if (kTestCases[i].left_to_right)

235 EXPECT_EQ(input_word, actual_word);	260 EXPECT_EQ(input_word, actual_word);

236 else	261 else

237 EXPECT_NE(input_word, actual_word);	262 EXPECT_NE(input_word, actual_word);

238 }	263 }

239 }	264 }

240	265

241 // Vertify SpellcheckWordIterator treats typographical apostrophe as a part of	266 // Vertify SpellcheckWordIterator treats typographical apostrophe as a part of

242 // the word.	267 // the word.

243 TEST(SpellcheckWordIteratorTest, TypographicalApostropheIsPartOfWord) {	268 TEST(SpellcheckWordIteratorTest, TypographicalApostropheIsPartOfWord) {

(...skipping 27 matching lines...) Expand all Loading...
271 SpellcheckCharAttribute attributes;	296 SpellcheckCharAttribute attributes;

272 attributes.SetDefaultLanguage(kTestCases[i].language);	297 attributes.SetDefaultLanguage(kTestCases[i].language);

273	298

274 base::string16 input_word(base::WideToUTF16(kTestCases[i].word));	299 base::string16 input_word(base::WideToUTF16(kTestCases[i].word));

275 SpellcheckWordIterator iterator;	300 SpellcheckWordIterator iterator;

276 EXPECT_TRUE(iterator.Initialize(&attributes, true));	301 EXPECT_TRUE(iterator.Initialize(&attributes, true));

277 EXPECT_TRUE(iterator.SetText(input_word.c_str(), input_word.length()));	302 EXPECT_TRUE(iterator.SetText(input_word.c_str(), input_word.length()));

278	303

279 base::string16 actual_word;	304 base::string16 actual_word;

280 int actual_start, actual_end;	305 int actual_start, actual_end;

281 EXPECT_TRUE(iterator.GetNextWord(&actual_word, &actual_start, &actual_end));	306 SpellcheckWordIterator::WordIteratorStatus status;

	307 for (status =

	308 iterator.GetNextWord(&actual_word, &actual_start, &actual_end);

	309 status == SpellcheckWordIterator::IS_SKIPPABLE;

	310 iterator.GetNextWord(&actual_word, &actual_start, &actual_end)) {

	311 continue;

	312 }

	313

	314 EXPECT_EQ(SpellcheckWordIterator::WordIteratorStatus::IS_WORD, status);

282 EXPECT_EQ(input_word, actual_word);	315 EXPECT_EQ(input_word, actual_word);

283 EXPECT_EQ(0, actual_start);	316 EXPECT_EQ(0, actual_start);

284 EXPECT_EQ(input_word.length(),	317 EXPECT_EQ(input_word.length(),

285 static_cast<base::string16::size_type>(actual_end));	318 static_cast<base::string16::size_type>(actual_end));

286 }	319 }

287 }	320 }

288	321

289 TEST(SpellcheckWordIteratorTest, Initialization) {	322 TEST(SpellcheckWordIteratorTest, Initialization) {

290 // Test initialization works when a default language is set.	323 // Test initialization works when a default language is set.

291 {	324 {

(...skipping 167 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
459 EXPECT_EQ(base::UTF8ToUTF16("."), iter.GetString());	492 EXPECT_EQ(base::UTF8ToUTF16("."), iter.GetString());

460 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);	493 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);

461 EXPECT_TRUE(iter.Advance());	494 EXPECT_TRUE(iter.Advance());

462 EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString());	495 EXPECT_EQ(base::UTF8ToUTF16(" "), iter.GetString());

463 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);	496 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);

464 EXPECT_TRUE(iter.Advance());	497 EXPECT_TRUE(iter.Advance());

465 EXPECT_EQ(base::UTF8ToUTF16(","), iter.GetString());	498 EXPECT_EQ(base::UTF8ToUTF16(","), iter.GetString());

466 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);	499 EXPECT_EQ(iter.GetWordBreakStatus(), BreakIterator::IS_SKIPPABLE_WORD);

467 EXPECT_FALSE(iter.Advance());	500 EXPECT_FALSE(iter.Advance());

468 }	501 }

OLD	NEW

« chrome/renderer/spellchecker/spellcheck_worditerator.cc ('K') | « chrome/renderer/spellchecker/spellcheck_worditerator.cc ('k') | no next file » | no next file with comments »