chrome/tools/convert_dict/aff_reader.cc - Issue 11776032: Unit test for spellchecking 96- through 102-character words

Side by Side Diff: chrome/tools/convert_dict/aff_reader.cc

Issue 11776032: Unit test for spellchecking 96- through 102-character words (Closed) Base URL: http://git.chromium.org/chromium/src.git@master

Patch Set: Merge master Created 7 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "chrome/tools/convert_dict/aff_reader.h"	5 #include "chrome/tools/convert_dict/aff_reader.h"

6	6

7 #include <algorithm>	7 #include <algorithm>

8	8

9 #include "base/file_util.h"	9 #include "base/file_util.h"

10 #include "base/i18n/icu_string_conversions.h"	10 #include "base/i18n/icu_string_conversions.h"

(...skipping 236 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
247 std::string before_flags = part.substr(0, slash_index + 1);	247 std::string before_flags = part.substr(0, slash_index + 1);

248	248

249 // After the slash are both the flags, then whitespace, then the part	249 // After the slash are both the flags, then whitespace, then the part

250 // that tells us what to strip.	250 // that tells us what to strip.

251 std::vector<std::string> after_slash;	251 std::vector<std::string> after_slash;

252 base::SplitString(part.substr(slash_index + 1), ' ', &after_slash);	252 base::SplitString(part.substr(slash_index + 1), ' ', &after_slash);

253 if (after_slash.size() == 0) {	253 if (after_slash.size() == 0) {

254 printf("ERROR: Found 0 terms after slash in affix rule '%s', "	254 printf("ERROR: Found 0 terms after slash in affix rule '%s', "

255 "but need at least 2.\n",	255 "but need at least 2.\n",

256 part.c_str());	256 part.c_str());

257 return;	257 exit(1);

258 }	258 }

259 if (after_slash.size() == 1) {	259 if (after_slash.size() == 1) {

260 printf("WARNING: Found 1 term after slash in affix rule '%s', "	260 printf("WARNING: Found 1 term after slash in affix rule '%s', "

261 "but expected at least 2. Adding '.'.\n",	261 "but expected at least 2. Adding '.'.\n",

262 part.c_str());	262 part.c_str());

263 after_slash.push_back(".");	263 after_slash.push_back(".");

264 }	264 }

265 // Note that we may get a third term here which is the morphological	265 // Note that we may get a third term here which is the morphological

266 // description of this rule. This happens in the tests only, so we can	266 // description of this rule. This happens in the tests only, so we can

267 // just ignore it.	267 // just ignore it.

268	268

269 part = base::StringPrintf("%s%d %s",	269 part = base::StringPrintf("%s%d %s",

270 before_flags.c_str(),	270 before_flags.c_str(),

271 GetAFIndexForAFString(after_slash[0]),	271 GetAFIndexForAFString(after_slash[0]),

272 after_slash[1].c_str());	272 after_slash[1].c_str());

273 }	273 }

274	274

275 // Reencode from here	275 // Reencode from here

276 std::string reencoded;	276 std::string reencoded;

277 if (!EncodingToUTF8(part, &reencoded)) {	277 if (!EncodingToUTF8(part, &reencoded)) {

278 printf("ERROR: Cannot encode affix rule part '%s' to utf8.\n",	278 printf("ERROR: Cannot encode affix rule part '%s' to utf8.\n",

279 part.c_str());	279 part.c_str());

280 break;	280 exit(1);

281 }	281 }

282	282

283 *rule = rule->substr(0, part_start) + reencoded;	283 *rule = rule->substr(0, part_start) + reencoded;

284 break;	284 break;

285 }	285 }

286 token.clear();	286 token.clear();

287 } else {	287 } else {

288 token.push_back((*rule)[i]);	288 token.push_back((*rule)[i]);

289 }	289 }

290 }	290 }

291	291

292 affix_rules_.push_back(*rule);	292 affix_rules_.push_back(*rule);

293 }	293 }

294	294

295 void AffReader::AddReplacement(std::string* rule) {	295 void AffReader::AddReplacement(std::string* rule) {

296 TrimLine(rule);	296 TrimLine(rule);

297 CollapseDuplicateSpaces(rule);	297 CollapseDuplicateSpaces(rule);

298	298

299 std::string utf8rule;	299 std::string utf8rule;

300 if (!EncodingToUTF8(*rule, &utf8rule)) {	300 if (!EncodingToUTF8(*rule, &utf8rule)) {

301 printf("ERROR: Cannot encode replacement rule '%s' to utf8.\n",	301 printf("ERROR: Cannot encode replacement rule '%s' to utf8.\n",

302 rule->c_str());	302 rule->c_str());

303 return;	303 exit(1);

304 }	304 }

305	305

306 // The first space separates key and value.	306 // The first space separates key and value.

307 size_t space_index = utf8rule.find(' ');	307 size_t space_index = utf8rule.find(' ');

308 if (space_index == std::string::npos) {	308 if (space_index == std::string::npos) {

309 printf("ERROR: Did not find a space in '%s'.\n", utf8rule.c_str());	309 printf("ERROR: Did not find a space in '%s'.\n", utf8rule.c_str());

310 return;	310 exit(1);

311 }	311 }

312 std::vector<std::string> split;	312 std::vector<std::string> split;

313 split.push_back(utf8rule.substr(0, space_index));	313 split.push_back(utf8rule.substr(0, space_index));

314 split.push_back(utf8rule.substr(space_index + 1));	314 split.push_back(utf8rule.substr(space_index + 1));

315	315

316 // Underscores are used to represent spaces in most aff files	316 // Underscores are used to represent spaces in most aff files

317 // (since the line is parsed on spaces).	317 // (since the line is parsed on spaces).

318 std::replace(split[0].begin(), split[0].end(), '_', ' ');	318 std::replace(split[0].begin(), split[0].end(), '_', ' ');

319 std::replace(split[1].begin(), split[1].end(), '_', ' ');	319 std::replace(split[1].begin(), split[1].end(), '_', ' ');

320	320

321 replacements_.push_back(std::make_pair(split[0], split[1]));	321 replacements_.push_back(std::make_pair(split[0], split[1]));

322 }	322 }

323	323

324 void AffReader::HandleRawCommand(const std::string& line) {	324 void AffReader::HandleRawCommand(const std::string& line) {

325 other_commands_.push_back(line);	325 other_commands_.push_back(line);

326 }	326 }

327	327

328 void AffReader::HandleEncodedCommand(const std::string& line) {	328 void AffReader::HandleEncodedCommand(const std::string& line) {

329 std::string utf8;	329 std::string utf8;

330 if (!EncodingToUTF8(line, &utf8)) {	330 if (!EncodingToUTF8(line, &utf8)) {

331 printf("ERROR: Cannot encode command '%s' to utf8.\n", line.c_str());	331 printf("ERROR: Cannot encode command '%s' to utf8.\n", line.c_str());

332 return;	332 exit(1);

333 }	333 }

334 other_commands_.push_back(utf8);	334 other_commands_.push_back(utf8);

335 }	335 }

336	336

337 } // namespace convert_dict	337 } // namespace convert_dict

OLD	NEW

« no previous file with comments | « chrome/renderer/spellchecker/spellcheck_unittest.cc ('k') | chrome/tools/convert_dict/dic_reader.cc » ('j') | no next file with comments »