OLD | NEW |
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/tools/convert_dict/aff_reader.h" | 5 #include "chrome/tools/convert_dict/aff_reader.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 | 8 |
9 #include "base/file_util.h" | 9 #include "base/file_util.h" |
10 #include "base/i18n/icu_string_conversions.h" | 10 #include "base/i18n/icu_string_conversions.h" |
(...skipping 236 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
247 std::string before_flags = part.substr(0, slash_index + 1); | 247 std::string before_flags = part.substr(0, slash_index + 1); |
248 | 248 |
249 // After the slash are both the flags, then whitespace, then the part | 249 // After the slash are both the flags, then whitespace, then the part |
250 // that tells us what to strip. | 250 // that tells us what to strip. |
251 std::vector<std::string> after_slash; | 251 std::vector<std::string> after_slash; |
252 base::SplitString(part.substr(slash_index + 1), ' ', &after_slash); | 252 base::SplitString(part.substr(slash_index + 1), ' ', &after_slash); |
253 if (after_slash.size() == 0) { | 253 if (after_slash.size() == 0) { |
254 printf("ERROR: Found 0 terms after slash in affix rule '%s', " | 254 printf("ERROR: Found 0 terms after slash in affix rule '%s', " |
255 "but need at least 2.\n", | 255 "but need at least 2.\n", |
256 part.c_str()); | 256 part.c_str()); |
257 return; | 257 exit(1); |
258 } | 258 } |
259 if (after_slash.size() == 1) { | 259 if (after_slash.size() == 1) { |
260 printf("WARNING: Found 1 term after slash in affix rule '%s', " | 260 printf("WARNING: Found 1 term after slash in affix rule '%s', " |
261 "but expected at least 2. Adding '.'.\n", | 261 "but expected at least 2. Adding '.'.\n", |
262 part.c_str()); | 262 part.c_str()); |
263 after_slash.push_back("."); | 263 after_slash.push_back("."); |
264 } | 264 } |
265 // Note that we may get a third term here which is the morphological | 265 // Note that we may get a third term here which is the morphological |
266 // description of this rule. This happens in the tests only, so we can | 266 // description of this rule. This happens in the tests only, so we can |
267 // just ignore it. | 267 // just ignore it. |
268 | 268 |
269 part = base::StringPrintf("%s%d %s", | 269 part = base::StringPrintf("%s%d %s", |
270 before_flags.c_str(), | 270 before_flags.c_str(), |
271 GetAFIndexForAFString(after_slash[0]), | 271 GetAFIndexForAFString(after_slash[0]), |
272 after_slash[1].c_str()); | 272 after_slash[1].c_str()); |
273 } | 273 } |
274 | 274 |
275 // Reencode from here | 275 // Reencode from here |
276 std::string reencoded; | 276 std::string reencoded; |
277 if (!EncodingToUTF8(part, &reencoded)) { | 277 if (!EncodingToUTF8(part, &reencoded)) { |
278 printf("ERROR: Cannot encode affix rule part '%s' to utf8.\n", | 278 printf("ERROR: Cannot encode affix rule part '%s' to utf8.\n", |
279 part.c_str()); | 279 part.c_str()); |
280 break; | 280 exit(1); |
281 } | 281 } |
282 | 282 |
283 *rule = rule->substr(0, part_start) + reencoded; | 283 *rule = rule->substr(0, part_start) + reencoded; |
284 break; | 284 break; |
285 } | 285 } |
286 token.clear(); | 286 token.clear(); |
287 } else { | 287 } else { |
288 token.push_back((*rule)[i]); | 288 token.push_back((*rule)[i]); |
289 } | 289 } |
290 } | 290 } |
291 | 291 |
292 affix_rules_.push_back(*rule); | 292 affix_rules_.push_back(*rule); |
293 } | 293 } |
294 | 294 |
295 void AffReader::AddReplacement(std::string* rule) { | 295 void AffReader::AddReplacement(std::string* rule) { |
296 TrimLine(rule); | 296 TrimLine(rule); |
297 CollapseDuplicateSpaces(rule); | 297 CollapseDuplicateSpaces(rule); |
298 | 298 |
299 std::string utf8rule; | 299 std::string utf8rule; |
300 if (!EncodingToUTF8(*rule, &utf8rule)) { | 300 if (!EncodingToUTF8(*rule, &utf8rule)) { |
301 printf("ERROR: Cannot encode replacement rule '%s' to utf8.\n", | 301 printf("ERROR: Cannot encode replacement rule '%s' to utf8.\n", |
302 rule->c_str()); | 302 rule->c_str()); |
303 return; | 303 exit(1); |
304 } | 304 } |
305 | 305 |
306 // The first space separates key and value. | 306 // The first space separates key and value. |
307 size_t space_index = utf8rule.find(' '); | 307 size_t space_index = utf8rule.find(' '); |
308 if (space_index == std::string::npos) { | 308 if (space_index == std::string::npos) { |
309 printf("ERROR: Did not find a space in '%s'.\n", utf8rule.c_str()); | 309 printf("ERROR: Did not find a space in '%s'.\n", utf8rule.c_str()); |
310 return; | 310 exit(1); |
311 } | 311 } |
312 std::vector<std::string> split; | 312 std::vector<std::string> split; |
313 split.push_back(utf8rule.substr(0, space_index)); | 313 split.push_back(utf8rule.substr(0, space_index)); |
314 split.push_back(utf8rule.substr(space_index + 1)); | 314 split.push_back(utf8rule.substr(space_index + 1)); |
315 | 315 |
316 // Underscores are used to represent spaces in most aff files | 316 // Underscores are used to represent spaces in most aff files |
317 // (since the line is parsed on spaces). | 317 // (since the line is parsed on spaces). |
318 std::replace(split[0].begin(), split[0].end(), '_', ' '); | 318 std::replace(split[0].begin(), split[0].end(), '_', ' '); |
319 std::replace(split[1].begin(), split[1].end(), '_', ' '); | 319 std::replace(split[1].begin(), split[1].end(), '_', ' '); |
320 | 320 |
321 replacements_.push_back(std::make_pair(split[0], split[1])); | 321 replacements_.push_back(std::make_pair(split[0], split[1])); |
322 } | 322 } |
323 | 323 |
324 void AffReader::HandleRawCommand(const std::string& line) { | 324 void AffReader::HandleRawCommand(const std::string& line) { |
325 other_commands_.push_back(line); | 325 other_commands_.push_back(line); |
326 } | 326 } |
327 | 327 |
328 void AffReader::HandleEncodedCommand(const std::string& line) { | 328 void AffReader::HandleEncodedCommand(const std::string& line) { |
329 std::string utf8; | 329 std::string utf8; |
330 if (!EncodingToUTF8(line, &utf8)) { | 330 if (!EncodingToUTF8(line, &utf8)) { |
331 printf("ERROR: Cannot encode command '%s' to utf8.\n", line.c_str()); | 331 printf("ERROR: Cannot encode command '%s' to utf8.\n", line.c_str()); |
332 return; | 332 exit(1); |
333 } | 333 } |
334 other_commands_.push_back(utf8); | 334 other_commands_.push_back(utf8); |
335 } | 335 } |
336 | 336 |
337 } // namespace convert_dict | 337 } // namespace convert_dict |
OLD | NEW |