| OLD | NEW |
| 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/tools/convert_dict/aff_reader.h" | 5 #include "chrome/tools/convert_dict/aff_reader.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 | 8 |
| 9 #include "base/files/file_util.h" | 9 #include "base/files/file_util.h" |
| 10 #include "base/i18n/icu_string_conversions.h" | 10 #include "base/i18n/icu_string_conversions.h" |
| (...skipping 211 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 222 } | 222 } |
| 223 part = rule->substr(part_start); // From here to end. | 223 part = rule->substr(part_start); // From here to end. |
| 224 | 224 |
| 225 if (part.find('-') != std::string::npos) { | 225 if (part.find('-') != std::string::npos) { |
| 226 // This rule has a morph rule used by old Hungarian dictionaries. | 226 // This rule has a morph rule used by old Hungarian dictionaries. |
| 227 // When a line has a morph rule, its format becomes as listed below. | 227 // When a line has a morph rule, its format becomes as listed below. |
| 228 // AFX D 0 d e - M | 228 // AFX D 0 d e - M |
| 229 // To make hunspell work more happily, replace this morph rule with | 229 // To make hunspell work more happily, replace this morph rule with |
| 230 // a compound flag as listed below. | 230 // a compound flag as listed below. |
| 231 // AFX D 0 d/M e | 231 // AFX D 0 d/M e |
| 232 std::vector<std::string> tokens; | 232 std::vector<std::string> tokens = base::SplitString( |
| 233 base::SplitString(part, ' ', &tokens); | 233 part, " ", base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL); |
| 234 if (tokens.size() >= 5) { | 234 if (tokens.size() >= 5) { |
| 235 part = base::StringPrintf("%s %s/%s %s", | 235 part = base::StringPrintf("%s %s/%s %s", |
| 236 tokens[0].c_str(), | 236 tokens[0].c_str(), |
| 237 tokens[1].c_str(), | 237 tokens[1].c_str(), |
| 238 tokens[4].c_str(), | 238 tokens[4].c_str(), |
| 239 tokens[2].c_str()); | 239 tokens[2].c_str()); |
| 240 } | 240 } |
| 241 } | 241 } |
| 242 | 242 |
| 243 size_t slash_index = part.find('/'); | 243 size_t slash_index = part.find('/'); |
| 244 if (slash_index != std::string::npos && !has_indexed_affixes()) { | 244 if (slash_index != std::string::npos && !has_indexed_affixes()) { |
| 245 // This can also have a rule string associated with it following a | 245 // This can also have a rule string associated with it following a |
| 246 // slash. For example: | 246 // slash. For example: |
| 247 // PFX P 0 foo/Y . | 247 // PFX P 0 foo/Y . |
| 248 // The "Y" is a flag. For example, the aff file might have a line: | 248 // The "Y" is a flag. For example, the aff file might have a line: |
| 249 // COMPOUNDFLAG Y | 249 // COMPOUNDFLAG Y |
| 250 // so that means that this prefix would be a compound one. | 250 // so that means that this prefix would be a compound one. |
| 251 // | 251 // |
| 252 // It expects these rules to use the same alias rules as the .dic | 252 // It expects these rules to use the same alias rules as the .dic |
| 253 // file. We've forced it to use aliases, which is a numerical index | 253 // file. We've forced it to use aliases, which is a numerical index |
| 254 // instead of these character flags, and this needs to be consistent. | 254 // instead of these character flags, and this needs to be consistent. |
| 255 | 255 |
| 256 std::string before_flags = part.substr(0, slash_index + 1); | 256 std::string before_flags = part.substr(0, slash_index + 1); |
| 257 | 257 |
| 258 // After the slash are both the flags, then whitespace, then the part | 258 // After the slash are both the flags, then whitespace, then the part |
| 259 // that tells us what to strip. | 259 // that tells us what to strip. |
| 260 std::vector<std::string> after_slash; | 260 std::vector<std::string> after_slash = base::SplitString( |
| 261 base::SplitString(part.substr(slash_index + 1), ' ', &after_slash); | 261 part.substr(slash_index + 1), " ", |
| 262 base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL); |
| 262 if (after_slash.size() == 0) { | 263 if (after_slash.size() == 0) { |
| 263 Panic("Found 0 terms after slash in affix rule '%s', " | 264 Panic("Found 0 terms after slash in affix rule '%s', " |
| 264 "but need at least 2.", | 265 "but need at least 2.", |
| 265 part.c_str()); | 266 part.c_str()); |
| 266 } | 267 } |
| 267 if (after_slash.size() == 1) { | 268 if (after_slash.size() == 1) { |
| 268 printf("WARNING: Found 1 term after slash in affix rule '%s', " | 269 printf("WARNING: Found 1 term after slash in affix rule '%s', " |
| 269 "but expected at least 2. Adding '.'.\n", | 270 "but expected at least 2. Adding '.'.\n", |
| 270 part.c_str()); | 271 part.c_str()); |
| 271 after_slash.push_back("."); | 272 after_slash.push_back("."); |
| (...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 327 } | 328 } |
| 328 | 329 |
| 329 void AffReader::HandleEncodedCommand(const std::string& line) { | 330 void AffReader::HandleEncodedCommand(const std::string& line) { |
| 330 std::string utf8; | 331 std::string utf8; |
| 331 if (!EncodingToUTF8(line, &utf8)) | 332 if (!EncodingToUTF8(line, &utf8)) |
| 332 Panic("Cannot encode command '%s' to utf8.", line.c_str()); | 333 Panic("Cannot encode command '%s' to utf8.", line.c_str()); |
| 333 other_commands_.push_back(utf8); | 334 other_commands_.push_back(utf8); |
| 334 } | 335 } |
| 335 | 336 |
| 336 } // namespace convert_dict | 337 } // namespace convert_dict |
| OLD | NEW |