| OLD | NEW |
| 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/tools/convert_dict/aff_reader.h" | 5 #include "chrome/tools/convert_dict/aff_reader.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 | 8 |
| 9 #include "base/file_util.h" | 9 #include "base/file_util.h" |
| 10 #include "base/i18n/icu_string_conversions.h" | 10 #include "base/i18n/icu_string_conversions.h" |
| (...skipping 193 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 204 size_t part_start = i; | 204 size_t part_start = i; |
| 205 std::string part; | 205 std::string part; |
| 206 if (token[0] != 'Y' && token[0] != 'N') { | 206 if (token[0] != 'Y' && token[0] != 'N') { |
| 207 // This token represents a stripping prefix or suffix, which is | 207 // This token represents a stripping prefix or suffix, which is |
| 208 // either a length or a string to be replaced. | 208 // either a length or a string to be replaced. |
| 209 // We also reencode them to UTF-8. | 209 // We also reencode them to UTF-8. |
| 210 part_start = i - token.length(); | 210 part_start = i - token.length(); |
| 211 } | 211 } |
| 212 part = rule->substr(part_start); // From here to end. | 212 part = rule->substr(part_start); // From here to end. |
| 213 | 213 |
| 214 if (part.find('-') != std::string::npos) { |
| 215 // This rule has a morph rule used by old Hungarian dictionaries. |
| 216 // When a line has a morph rule, its format becomes as listed below. |
| 217 // AFX D 0 d e - M |
| 218 // To make hunspell work more happily, replace this morph rule with |
| 219 // a compound flag as listed below. |
| 220 // AFX D 0 d/M e |
| 221 std::vector<std::string> tokens; |
| 222 SplitString(part, ' ', &tokens); |
| 223 if (tokens.size() >= 5) { |
| 224 part = StringPrintf("%s %s/%s %s", |
| 225 tokens[0].c_str(), |
| 226 tokens[1].c_str(), tokens[4].c_str(), |
| 227 tokens[2].c_str()); |
| 228 } |
| 229 } |
| 230 |
| 214 size_t slash_index = part.find('/'); | 231 size_t slash_index = part.find('/'); |
| 215 if (slash_index != std::string::npos && !has_indexed_affixes()) { | 232 if (slash_index != std::string::npos && !has_indexed_affixes()) { |
| 216 // This can also have a rule string associated with it following a | 233 // This can also have a rule string associated with it following a |
| 217 // slash. For example: | 234 // slash. For example: |
| 218 // PFX P 0 foo/Y . | 235 // PFX P 0 foo/Y . |
| 219 // The "Y" is a flag. For example, the aff file might have a line: | 236 // The "Y" is a flag. For example, the aff file might have a line: |
| 220 // COMPOUNDFLAG Y | 237 // COMPOUNDFLAG Y |
| 221 // so that means that this prefix would be a compound one. | 238 // so that means that this prefix would be a compound one. |
| 222 // | 239 // |
| 223 // It expects these rules to use the same alias rules as the .dic | 240 // It expects these rules to use the same alias rules as the .dic |
| (...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 286 other_commands_.push_back(line); | 303 other_commands_.push_back(line); |
| 287 } | 304 } |
| 288 | 305 |
| 289 void AffReader::HandleEncodedCommand(const std::string& line) { | 306 void AffReader::HandleEncodedCommand(const std::string& line) { |
| 290 std::string utf8; | 307 std::string utf8; |
| 291 if (EncodingToUTF8(line, &utf8)) | 308 if (EncodingToUTF8(line, &utf8)) |
| 292 other_commands_.push_back(utf8); | 309 other_commands_.push_back(utf8); |
| 293 } | 310 } |
| 294 | 311 |
| 295 } // namespace convert_dict | 312 } // namespace convert_dict |
| OLD | NEW |