OLD | NEW |
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/tools/convert_dict/aff_reader.h" | 5 #include "chrome/tools/convert_dict/aff_reader.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 | 8 |
9 #include "base/file_util.h" | 9 #include "base/file_util.h" |
10 #include "base/i18n/icu_string_conversions.h" | 10 #include "base/i18n/icu_string_conversions.h" |
(...skipping 193 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
204 size_t part_start = i; | 204 size_t part_start = i; |
205 std::string part; | 205 std::string part; |
206 if (token[0] != 'Y' && token[0] != 'N') { | 206 if (token[0] != 'Y' && token[0] != 'N') { |
207 // This token represents a stripping prefix or suffix, which is | 207 // This token represents a stripping prefix or suffix, which is |
208 // either a length or a string to be replaced. | 208 // either a length or a string to be replaced. |
209 // We also reencode them to UTF-8. | 209 // We also reencode them to UTF-8. |
210 part_start = i - token.length(); | 210 part_start = i - token.length(); |
211 } | 211 } |
212 part = rule->substr(part_start); // From here to end. | 212 part = rule->substr(part_start); // From here to end. |
213 | 213 |
| 214 if (part.find('-') != std::string::npos) { |
| 215 // This rule has a morph rule used by old Hungarian dictionaries. |
| 216 // When a line has a morph rule, its format becomes as listed below. |
| 217 // AFX D 0 d e - M |
| 218 // To make hunspell work more happily, replace this morph rule with |
| 219 // a compound flag as listed below. |
| 220 // AFX D 0 d/M e |
| 221 std::vector<std::string> tokens; |
| 222 SplitString(part, ' ', &tokens); |
| 223 if (tokens.size() >= 5) { |
| 224 part = StringPrintf("%s %s/%s %s", |
| 225 tokens[0].c_str(), |
| 226 tokens[1].c_str(), tokens[4].c_str(), |
| 227 tokens[2].c_str()); |
| 228 } |
| 229 } |
| 230 |
214 size_t slash_index = part.find('/'); | 231 size_t slash_index = part.find('/'); |
215 if (slash_index != std::string::npos && !has_indexed_affixes()) { | 232 if (slash_index != std::string::npos && !has_indexed_affixes()) { |
216 // This can also have a rule string associated with it following a | 233 // This can also have a rule string associated with it following a |
217 // slash. For example: | 234 // slash. For example: |
218 // PFX P 0 foo/Y . | 235 // PFX P 0 foo/Y . |
219 // The "Y" is a flag. For example, the aff file might have a line: | 236 // The "Y" is a flag. For example, the aff file might have a line: |
220 // COMPOUNDFLAG Y | 237 // COMPOUNDFLAG Y |
221 // so that means that this prefix would be a compound one. | 238 // so that means that this prefix would be a compound one. |
222 // | 239 // |
223 // It expects these rules to use the same alias rules as the .dic | 240 // It expects these rules to use the same alias rules as the .dic |
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
286 other_commands_.push_back(line); | 303 other_commands_.push_back(line); |
287 } | 304 } |
288 | 305 |
289 void AffReader::HandleEncodedCommand(const std::string& line) { | 306 void AffReader::HandleEncodedCommand(const std::string& line) { |
290 std::string utf8; | 307 std::string utf8; |
291 if (EncodingToUTF8(line, &utf8)) | 308 if (EncodingToUTF8(line, &utf8)) |
292 other_commands_.push_back(utf8); | 309 other_commands_.push_back(utf8); |
293 } | 310 } |
294 | 311 |
295 } // namespace convert_dict | 312 } // namespace convert_dict |
OLD | NEW |