OLD | NEW |
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/tools/convert_dict/hunspell_reader.h" | 5 #include "chrome/tools/convert_dict/hunspell_reader.h" |
6 | 6 |
7 #include "base/string_util.h" | 7 #include "base/string_util.h" |
8 | 8 |
9 namespace convert_dict { | 9 namespace convert_dict { |
10 | 10 |
11 // This silly 64K buffer is just copied from Hunspell's way of parsing. | 11 // This silly 64K buffer is just copied from Hunspell's way of parsing. |
12 const int kLineBufferLen = 65535; | 12 const int kLineBufferLen = 65535; |
13 char line_buffer[kLineBufferLen]; | 13 char line_buffer[kLineBufferLen]; |
14 | 14 |
15 // Shortcut for trimming whitespace from both ends of the line. | 15 // Shortcut for trimming whitespace from both ends of the line. |
16 void TrimLine(std::string* line) { | 16 void TrimLine(std::string* line) { |
17 if (line->size() > 3 && | 17 if (line->size() > 3 && |
18 static_cast<unsigned char>((*line)[0]) == 0xef && | 18 static_cast<unsigned char>((*line)[0]) == 0xef && |
19 static_cast<unsigned char>((*line)[1]) == 0xbb && | 19 static_cast<unsigned char>((*line)[1]) == 0xbb && |
20 static_cast<unsigned char>((*line)[2]) == 0xbf) | 20 static_cast<unsigned char>((*line)[2]) == 0xbf) |
21 *line = line->substr(3); | 21 *line = line->substr(3); |
22 | 22 |
23 std::wstring line_input_wide = UTF8ToWide(*line); | 23 // Treat this text as an ASCII text and trim whitespace characters as |
24 std::wstring line_output_wide; | 24 // hunspell does. The returned text is to be converted into UTF-8 text with |
25 TrimWhitespace(line_input_wide, TRIM_ALL, &line_output_wide); | 25 // the encoding defined in an affix file. |
26 *line = WideToUTF8(line_output_wide); | 26 TrimWhitespace(*line, TRIM_ALL, line); |
27 } | 27 } |
28 | 28 |
29 std::string ReadLine(FILE* file) { | 29 std::string ReadLine(FILE* file) { |
30 const char* line = fgets(line_buffer, kLineBufferLen - 1, file); | 30 const char* line = fgets(line_buffer, kLineBufferLen - 1, file); |
31 if (!line) | 31 if (!line) |
32 return std::string(); | 32 return std::string(); |
33 | 33 |
34 std::string str = line; | 34 std::string str = line; |
35 TrimLine(&str); | 35 TrimLine(&str); |
36 return str; | 36 return str; |
37 } | 37 } |
38 | 38 |
39 void StripComment(std::string* line) { | 39 void StripComment(std::string* line) { |
40 for (size_t i = 0; i < line->size(); i++) { | 40 for (size_t i = 0; i < line->size(); i++) { |
41 if ((*line)[i] == '#') { | 41 if ((*line)[i] == '#') { |
42 line->resize(i); | 42 line->resize(i); |
43 TrimLine(line); | 43 TrimLine(line); |
44 return; | 44 return; |
45 } | 45 } |
46 } | 46 } |
47 } | 47 } |
48 | 48 |
49 } // namespace convert_dict | 49 } // namespace convert_dict |
OLD | NEW |