Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(166)

Side by Side Diff: chrome/tools/convert_dict/aff_reader.cc

Issue 11776032: Unit test for spellchecking 96- through 102-character words (Closed) Base URL: http://git.chromium.org/chromium/src.git@master
Patch Set: Merge master Created 7 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/tools/convert_dict/aff_reader.h" 5 #include "chrome/tools/convert_dict/aff_reader.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 8
9 #include "base/file_util.h" 9 #include "base/file_util.h"
10 #include "base/i18n/icu_string_conversions.h" 10 #include "base/i18n/icu_string_conversions.h"
(...skipping 236 matching lines...) Expand 10 before | Expand all | Expand 10 after
247 std::string before_flags = part.substr(0, slash_index + 1); 247 std::string before_flags = part.substr(0, slash_index + 1);
248 248
249 // After the slash are both the flags, then whitespace, then the part 249 // After the slash are both the flags, then whitespace, then the part
250 // that tells us what to strip. 250 // that tells us what to strip.
251 std::vector<std::string> after_slash; 251 std::vector<std::string> after_slash;
252 base::SplitString(part.substr(slash_index + 1), ' ', &after_slash); 252 base::SplitString(part.substr(slash_index + 1), ' ', &after_slash);
253 if (after_slash.size() == 0) { 253 if (after_slash.size() == 0) {
254 printf("ERROR: Found 0 terms after slash in affix rule '%s', " 254 printf("ERROR: Found 0 terms after slash in affix rule '%s', "
255 "but need at least 2.\n", 255 "but need at least 2.\n",
256 part.c_str()); 256 part.c_str());
257 return; 257 exit(1);
258 } 258 }
259 if (after_slash.size() == 1) { 259 if (after_slash.size() == 1) {
260 printf("WARNING: Found 1 term after slash in affix rule '%s', " 260 printf("WARNING: Found 1 term after slash in affix rule '%s', "
261 "but expected at least 2. Adding '.'.\n", 261 "but expected at least 2. Adding '.'.\n",
262 part.c_str()); 262 part.c_str());
263 after_slash.push_back("."); 263 after_slash.push_back(".");
264 } 264 }
265 // Note that we may get a third term here which is the morphological 265 // Note that we may get a third term here which is the morphological
266 // description of this rule. This happens in the tests only, so we can 266 // description of this rule. This happens in the tests only, so we can
267 // just ignore it. 267 // just ignore it.
268 268
269 part = base::StringPrintf("%s%d %s", 269 part = base::StringPrintf("%s%d %s",
270 before_flags.c_str(), 270 before_flags.c_str(),
271 GetAFIndexForAFString(after_slash[0]), 271 GetAFIndexForAFString(after_slash[0]),
272 after_slash[1].c_str()); 272 after_slash[1].c_str());
273 } 273 }
274 274
275 // Reencode from here 275 // Reencode from here
276 std::string reencoded; 276 std::string reencoded;
277 if (!EncodingToUTF8(part, &reencoded)) { 277 if (!EncodingToUTF8(part, &reencoded)) {
278 printf("ERROR: Cannot encode affix rule part '%s' to utf8.\n", 278 printf("ERROR: Cannot encode affix rule part '%s' to utf8.\n",
279 part.c_str()); 279 part.c_str());
280 break; 280 exit(1);
281 } 281 }
282 282
283 *rule = rule->substr(0, part_start) + reencoded; 283 *rule = rule->substr(0, part_start) + reencoded;
284 break; 284 break;
285 } 285 }
286 token.clear(); 286 token.clear();
287 } else { 287 } else {
288 token.push_back((*rule)[i]); 288 token.push_back((*rule)[i]);
289 } 289 }
290 } 290 }
291 291
292 affix_rules_.push_back(*rule); 292 affix_rules_.push_back(*rule);
293 } 293 }
294 294
295 void AffReader::AddReplacement(std::string* rule) { 295 void AffReader::AddReplacement(std::string* rule) {
296 TrimLine(rule); 296 TrimLine(rule);
297 CollapseDuplicateSpaces(rule); 297 CollapseDuplicateSpaces(rule);
298 298
299 std::string utf8rule; 299 std::string utf8rule;
300 if (!EncodingToUTF8(*rule, &utf8rule)) { 300 if (!EncodingToUTF8(*rule, &utf8rule)) {
301 printf("ERROR: Cannot encode replacement rule '%s' to utf8.\n", 301 printf("ERROR: Cannot encode replacement rule '%s' to utf8.\n",
302 rule->c_str()); 302 rule->c_str());
303 return; 303 exit(1);
304 } 304 }
305 305
306 // The first space separates key and value. 306 // The first space separates key and value.
307 size_t space_index = utf8rule.find(' '); 307 size_t space_index = utf8rule.find(' ');
308 if (space_index == std::string::npos) { 308 if (space_index == std::string::npos) {
309 printf("ERROR: Did not find a space in '%s'.\n", utf8rule.c_str()); 309 printf("ERROR: Did not find a space in '%s'.\n", utf8rule.c_str());
310 return; 310 exit(1);
311 } 311 }
312 std::vector<std::string> split; 312 std::vector<std::string> split;
313 split.push_back(utf8rule.substr(0, space_index)); 313 split.push_back(utf8rule.substr(0, space_index));
314 split.push_back(utf8rule.substr(space_index + 1)); 314 split.push_back(utf8rule.substr(space_index + 1));
315 315
316 // Underscores are used to represent spaces in most aff files 316 // Underscores are used to represent spaces in most aff files
317 // (since the line is parsed on spaces). 317 // (since the line is parsed on spaces).
318 std::replace(split[0].begin(), split[0].end(), '_', ' '); 318 std::replace(split[0].begin(), split[0].end(), '_', ' ');
319 std::replace(split[1].begin(), split[1].end(), '_', ' '); 319 std::replace(split[1].begin(), split[1].end(), '_', ' ');
320 320
321 replacements_.push_back(std::make_pair(split[0], split[1])); 321 replacements_.push_back(std::make_pair(split[0], split[1]));
322 } 322 }
323 323
324 void AffReader::HandleRawCommand(const std::string& line) { 324 void AffReader::HandleRawCommand(const std::string& line) {
325 other_commands_.push_back(line); 325 other_commands_.push_back(line);
326 } 326 }
327 327
328 void AffReader::HandleEncodedCommand(const std::string& line) { 328 void AffReader::HandleEncodedCommand(const std::string& line) {
329 std::string utf8; 329 std::string utf8;
330 if (!EncodingToUTF8(line, &utf8)) { 330 if (!EncodingToUTF8(line, &utf8)) {
331 printf("ERROR: Cannot encode command '%s' to utf8.\n", line.c_str()); 331 printf("ERROR: Cannot encode command '%s' to utf8.\n", line.c_str());
332 return; 332 exit(1);
333 } 333 }
334 other_commands_.push_back(utf8); 334 other_commands_.push_back(utf8);
335 } 335 }
336 336
337 } // namespace convert_dict 337 } // namespace convert_dict
OLDNEW
« no previous file with comments | « chrome/renderer/spellchecker/spellcheck_unittest.cc ('k') | chrome/tools/convert_dict/dic_reader.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698