OLD | NEW |
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/tools/convert_dict/aff_reader.h" | 5 #include "chrome/tools/convert_dict/aff_reader.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 | 8 |
9 #include "base/files/file_util.h" | 9 #include "base/files/file_util.h" |
10 #include "base/i18n/icu_string_conversions.h" | 10 #include "base/i18n/icu_string_conversions.h" |
(...skipping 211 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
222 } | 222 } |
223 part = rule->substr(part_start); // From here to end. | 223 part = rule->substr(part_start); // From here to end. |
224 | 224 |
225 if (part.find('-') != std::string::npos) { | 225 if (part.find('-') != std::string::npos) { |
226 // This rule has a morph rule used by old Hungarian dictionaries. | 226 // This rule has a morph rule used by old Hungarian dictionaries. |
227 // When a line has a morph rule, its format becomes as listed below. | 227 // When a line has a morph rule, its format becomes as listed below. |
228 // AFX D 0 d e - M | 228 // AFX D 0 d e - M |
229 // To make hunspell work more happily, replace this morph rule with | 229 // To make hunspell work more happily, replace this morph rule with |
230 // a compound flag as listed below. | 230 // a compound flag as listed below. |
231 // AFX D 0 d/M e | 231 // AFX D 0 d/M e |
232 std::vector<std::string> tokens; | 232 std::vector<std::string> tokens = base::SplitString( |
233 base::SplitString(part, ' ', &tokens); | 233 part, " ", base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL); |
234 if (tokens.size() >= 5) { | 234 if (tokens.size() >= 5) { |
235 part = base::StringPrintf("%s %s/%s %s", | 235 part = base::StringPrintf("%s %s/%s %s", |
236 tokens[0].c_str(), | 236 tokens[0].c_str(), |
237 tokens[1].c_str(), | 237 tokens[1].c_str(), |
238 tokens[4].c_str(), | 238 tokens[4].c_str(), |
239 tokens[2].c_str()); | 239 tokens[2].c_str()); |
240 } | 240 } |
241 } | 241 } |
242 | 242 |
243 size_t slash_index = part.find('/'); | 243 size_t slash_index = part.find('/'); |
244 if (slash_index != std::string::npos && !has_indexed_affixes()) { | 244 if (slash_index != std::string::npos && !has_indexed_affixes()) { |
245 // This can also have a rule string associated with it following a | 245 // This can also have a rule string associated with it following a |
246 // slash. For example: | 246 // slash. For example: |
247 // PFX P 0 foo/Y . | 247 // PFX P 0 foo/Y . |
248 // The "Y" is a flag. For example, the aff file might have a line: | 248 // The "Y" is a flag. For example, the aff file might have a line: |
249 // COMPOUNDFLAG Y | 249 // COMPOUNDFLAG Y |
250 // so that means that this prefix would be a compound one. | 250 // so that means that this prefix would be a compound one. |
251 // | 251 // |
252 // It expects these rules to use the same alias rules as the .dic | 252 // It expects these rules to use the same alias rules as the .dic |
253 // file. We've forced it to use aliases, which is a numerical index | 253 // file. We've forced it to use aliases, which is a numerical index |
254 // instead of these character flags, and this needs to be consistent. | 254 // instead of these character flags, and this needs to be consistent. |
255 | 255 |
256 std::string before_flags = part.substr(0, slash_index + 1); | 256 std::string before_flags = part.substr(0, slash_index + 1); |
257 | 257 |
258 // After the slash are both the flags, then whitespace, then the part | 258 // After the slash are both the flags, then whitespace, then the part |
259 // that tells us what to strip. | 259 // that tells us what to strip. |
260 std::vector<std::string> after_slash; | 260 std::vector<std::string> after_slash = base::SplitString( |
261 base::SplitString(part.substr(slash_index + 1), ' ', &after_slash); | 261 part.substr(slash_index + 1), " ", |
| 262 base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL); |
262 if (after_slash.size() == 0) { | 263 if (after_slash.size() == 0) { |
263 Panic("Found 0 terms after slash in affix rule '%s', " | 264 Panic("Found 0 terms after slash in affix rule '%s', " |
264 "but need at least 2.", | 265 "but need at least 2.", |
265 part.c_str()); | 266 part.c_str()); |
266 } | 267 } |
267 if (after_slash.size() == 1) { | 268 if (after_slash.size() == 1) { |
268 printf("WARNING: Found 1 term after slash in affix rule '%s', " | 269 printf("WARNING: Found 1 term after slash in affix rule '%s', " |
269 "but expected at least 2. Adding '.'.\n", | 270 "but expected at least 2. Adding '.'.\n", |
270 part.c_str()); | 271 part.c_str()); |
271 after_slash.push_back("."); | 272 after_slash.push_back("."); |
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
327 } | 328 } |
328 | 329 |
329 void AffReader::HandleEncodedCommand(const std::string& line) { | 330 void AffReader::HandleEncodedCommand(const std::string& line) { |
330 std::string utf8; | 331 std::string utf8; |
331 if (!EncodingToUTF8(line, &utf8)) | 332 if (!EncodingToUTF8(line, &utf8)) |
332 Panic("Cannot encode command '%s' to utf8.", line.c_str()); | 333 Panic("Cannot encode command '%s' to utf8.", line.c_str()); |
333 other_commands_.push_back(utf8); | 334 other_commands_.push_back(utf8); |
334 } | 335 } |
335 | 336 |
336 } // namespace convert_dict | 337 } // namespace convert_dict |
OLD | NEW |