| OLD | NEW |
| 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/tools/convert_dict/hunspell_reader.h" | 5 #include "chrome/tools/convert_dict/hunspell_reader.h" |
| 6 | 6 |
| 7 #include "base/string_util.h" | 7 #include "base/string_util.h" |
| 8 | 8 |
| 9 namespace convert_dict { | 9 namespace convert_dict { |
| 10 | 10 |
| 11 // This silly 64K buffer is just copied from Hunspell's way of parsing. | 11 // This silly 64K buffer is just copied from Hunspell's way of parsing. |
| 12 const int kLineBufferLen = 65535; | 12 const int kLineBufferLen = 65535; |
| 13 char line_buffer[kLineBufferLen]; | 13 char line_buffer[kLineBufferLen]; |
| 14 | 14 |
| 15 // Shortcut for trimming whitespace from both ends of the line. | 15 // Shortcut for trimming whitespace from both ends of the line. |
| 16 void TrimLine(std::string* line) { | 16 void TrimLine(std::string* line) { |
| 17 if (line->size() > 3 && | 17 if (line->size() > 3 && |
| 18 static_cast<unsigned char>((*line)[0]) == 0xef && | 18 static_cast<unsigned char>((*line)[0]) == 0xef && |
| 19 static_cast<unsigned char>((*line)[1]) == 0xbb && | 19 static_cast<unsigned char>((*line)[1]) == 0xbb && |
| 20 static_cast<unsigned char>((*line)[2]) == 0xbf) | 20 static_cast<unsigned char>((*line)[2]) == 0xbf) |
| 21 *line = line->substr(3); | 21 *line = line->substr(3); |
| 22 | 22 |
| 23 std::wstring line_input_wide = UTF8ToWide(*line); | 23 // Treat this text as an ASCII text and trim whitespace characters as |
| 24 std::wstring line_output_wide; | 24 // hunspell does. The returned text is to be converted into UTF-8 text with |
| 25 TrimWhitespace(line_input_wide, TRIM_ALL, &line_output_wide); | 25 // the encoding defined in an affix file. |
| 26 *line = WideToUTF8(line_output_wide); | 26 TrimWhitespace(*line, TRIM_ALL, line); |
| 27 } | 27 } |
| 28 | 28 |
| 29 std::string ReadLine(FILE* file) { | 29 std::string ReadLine(FILE* file) { |
| 30 const char* line = fgets(line_buffer, kLineBufferLen - 1, file); | 30 const char* line = fgets(line_buffer, kLineBufferLen - 1, file); |
| 31 if (!line) | 31 if (!line) |
| 32 return std::string(); | 32 return std::string(); |
| 33 | 33 |
| 34 std::string str = line; | 34 std::string str = line; |
| 35 TrimLine(&str); | 35 TrimLine(&str); |
| 36 return str; | 36 return str; |
| 37 } | 37 } |
| 38 | 38 |
| 39 void StripComment(std::string* line) { | 39 void StripComment(std::string* line) { |
| 40 for (size_t i = 0; i < line->size(); i++) { | 40 for (size_t i = 0; i < line->size(); i++) { |
| 41 if ((*line)[i] == '#') { | 41 if ((*line)[i] == '#') { |
| 42 line->resize(i); | 42 line->resize(i); |
| 43 TrimLine(line); | 43 TrimLine(line); |
| 44 return; | 44 return; |
| 45 } | 45 } |
| 46 } | 46 } |
| 47 } | 47 } |
| 48 | 48 |
| 49 } // namespace convert_dict | 49 } // namespace convert_dict |
| OLD | NEW |