chrome/tools/convert_dict/aff_reader.cc - Issue 11776032: Unit test for spellchecking 96- through 102-character words

Unified Diff: chrome/tools/convert_dict/aff_reader.cc

Issue 11776032: Unit test for spellchecking 96- through 102-character words (Closed) Base URL: http://git.chromium.org/chromium/src.git@master

Patch Set: Created 7 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« chrome/renderer/spellchecker/spellcheck_unittest.cc ('K') | « chrome/renderer/spellchecker/spellcheck_unittest.cc ('k') | chrome/tools/convert_dict/dic_reader.cc » ('j') | chrome/tools/convert_dict/dic_reader.cc » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: chrome/tools/convert_dict/aff_reader.cc

diff --git a/chrome/tools/convert_dict/aff_reader.cc b/chrome/tools/convert_dict/aff_reader.cc

index b3b0381b98e00366d8f30ed4116565268c42e52b..b24a0d87a4e79885fa2054ad6b752937d84f0f0d 100644

--- a/chrome/tools/convert_dict/aff_reader.cc

+++ b/chrome/tools/convert_dict/aff_reader.cc

@@ -122,7 +122,7 @@ bool AffReader::Read() {

exit(1);

} else if (StringBeginsWith(line, "COMPLEXPREFIXES ")) {

printf("We don't support the COMPLEXPREFIXES command yet. This would "

- "mean we have to insert words backwords as well (I think)\n");

+ "mean we have to insert words backwards as well (I think)\n");

exit(1);

} else {

// All other commands get stored in the other commands list.

@@ -241,7 +241,7 @@ void AffReader::AddAffix(std::string* rule) {

// so that means that this prefix would be a compound one.

// It expects these rules to use the same alias rules as the .dic

- // file. We've forced it to use aliases, which is a numberical index

+ // file. We've forced it to use aliases, which is a numerical index

// instead of these character flags, and this needs to be consistent.

std::string before_flags = part.substr(0, slash_index + 1);

@@ -250,13 +250,21 @@ void AffReader::AddAffix(std::string* rule) {

// that tells us what to strip.

std::vector<std::string> after_slash;

base::SplitString(part.substr(slash_index + 1), ' ', &after_slash);

- if (after_slash.size() < 2) {

- // Note that we may get a third term here which is the

- // morphological description of this rule. This happens in the tests

- // only, so we can just ignore it.

- printf("ERROR: Didn't get enough after the slash\n");

+ if (after_slash.size() == 0) {

+ printf("ERROR: Found 0 terms after slash in affix rule '%s', "

+ "but need at least 2.\n",

+ part.c_str());

return;

}

+ if (after_slash.size() == 1) {

+ printf("WARNING: Found 1 term after slash in affix rule '%s', "

+ "but expected at least 2. Adding '.'.\n",

+ part.c_str());

+ after_slash.push_back(".");

+ }

+ // Note that we may get a third term here which is the morphological

+ // description of this rule. This happens in the tests only, so we can

+ // just ignore it.

part = base::StringPrintf("%s%d %s",

before_flags.c_str(),

@@ -266,8 +274,11 @@ void AffReader::AddAffix(std::string* rule) {

// Reencode from here

std::string reencoded;

- if (!EncodingToUTF8(part, &reencoded))

+ if (!EncodingToUTF8(part, &reencoded)) {

+ printf("ERROR: Cannot encode affix rule part '%s' to utf8.\n",

+ part.c_str());

break;

+ }

*rule = rule->substr(0, part_start) + reencoded;

break;

@@ -283,19 +294,26 @@ void AffReader::AddAffix(std::string* rule) {

void AffReader::AddReplacement(std::string* rule) {

TrimLine(rule);

+ CollapseDuplicateSpaces(rule);

std::string utf8rule;

- if (!EncodingToUTF8(*rule, &utf8rule))

+ if (!EncodingToUTF8(*rule, &utf8rule)) {

+ printf("ERROR: Cannot encode replacement rule '%s' to utf8.\n",

groby-ooo-7-16 2013/01/10 01:35:46 I wonder if all printf("ERROR...") statements shou

please use gerrit instead 2013/01/10 19:07:49 Added exit(1) after every printf("ERROR:...").

+ rule->c_str());

return;

+ }

- std::vector<std::string> split;

- base::SplitString(utf8rule, ' ', &split);

- // There should be two parts.

- if (split.size() != 2)

+ // The first space separates key and value.

+ size_t space_index = utf8rule.find(' ');

+ if (space_index == std::string::npos) {

+ printf("ERROR: Did not find a space in '%s'.\n", utf8rule.c_str());

return;

+ }

+ std::vector<std::string> split;

+ split.push_back(utf8rule.substr(0, space_index));

+ split.push_back(utf8rule.substr(space_index + 1));

- // Underscores are used to represent spaces

+ // Underscores are used to represent spaces in most aff files

// (since the line is parsed on spaces).

std::replace(split[0].begin(), split[0].end(), '_', ' ');

std::replace(split[1].begin(), split[1].end(), '_', ' ');

@@ -309,8 +327,11 @@ void AffReader::HandleRawCommand(const std::string& line) {

void AffReader::HandleEncodedCommand(const std::string& line) {

std::string utf8;

- if (EncodingToUTF8(line, &utf8))

- other_commands_.push_back(utf8);

+ if (!EncodingToUTF8(line, &utf8)) {

+ printf("ERROR: Cannot encode command '%s' to utf8.\n", line.c_str());

+ return;

+ }

+ other_commands_.push_back(utf8);

}

} // namespace convert_dict