OLD | NEW |
---|---|
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/tools/convert_dict/dic_reader.h" | 5 #include "chrome/tools/convert_dict/dic_reader.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <set> | 8 #include <set> |
9 | 9 |
10 #include "base/file_util.h" | 10 #include "base/file_util.h" |
(...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
110 // token. (It is attached to the first token if a word doesn't have affix | 110 // token. (It is attached to the first token if a word doesn't have affix |
111 // rules.) | 111 // rules.) |
112 size_t word_tab_offset = utf8word.find('\t'); | 112 size_t word_tab_offset = utf8word.find('\t'); |
113 if (word_tab_offset != std::string::npos) | 113 if (word_tab_offset != std::string::npos) |
114 utf8word = utf8word.substr(0, word_tab_offset); | 114 utf8word = utf8word.substr(0, word_tab_offset); |
115 | 115 |
116 WordSet::iterator found = word_set->find(utf8word); | 116 WordSet::iterator found = word_set->find(utf8word); |
117 std::set<int> affix_vector; | 117 std::set<int> affix_vector; |
118 affix_vector.insert(affix_index); | 118 affix_vector.insert(affix_index); |
119 | 119 |
120 if (found == word_set->end()) { | 120 if (found == word_set->end()) |
121 word_set->insert(std::make_pair(utf8word, affix_vector)); | 121 word_set->insert(std::make_pair(utf8word, affix_vector)); |
122 } else { | 122 else |
123 // The affixes of the delta file should override those in the | 123 found->second.insert(affix_index); |
groby-ooo-7-16
2013/01/10 01:35:46
I'm worried that the previous behavior was needed
| |
124 // dictionary file. | |
125 found->second.swap(affix_vector); | |
126 } | |
127 } | 124 } |
128 | 125 |
129 return true; | 126 return true; |
130 } | 127 } |
131 | 128 |
132 } // namespace | 129 } // namespace |
133 | 130 |
134 DicReader::DicReader(const FilePath& path) { | 131 DicReader::DicReader(const FilePath& path) { |
135 file_ = file_util::OpenFile(path, "r"); | 132 file_ = file_util::OpenFile(path, "r"); |
136 | 133 |
(...skipping 26 matching lines...) Expand all Loading... | |
163 aff_reader->encoding(), true)) | 160 aff_reader->encoding(), true)) |
164 return false; | 161 return false; |
165 | 162 |
166 // Add words from the .dic_delta file to the word set, if it exists. | 163 // Add words from the .dic_delta file to the word set, if it exists. |
167 // The first line is the first word to add. Word count line is not present. | 164 // The first line is the first word to add. Word count line is not present. |
168 // NOTE: These additional words should be encoded as UTF-8. | 165 // NOTE: These additional words should be encoded as UTF-8. |
169 if (additional_words_file_ != NULL) { | 166 if (additional_words_file_ != NULL) { |
170 PopulateWordSet(&word_set, additional_words_file_, aff_reader, "dic delta", | 167 PopulateWordSet(&word_set, additional_words_file_, aff_reader, "dic delta", |
171 "UTF-8", false); | 168 "UTF-8", false); |
172 } | 169 } |
173 | |
174 // Make sure the words are sorted, they may be unsorted in the input. | 170 // Make sure the words are sorted, they may be unsorted in the input. |
175 for (WordSet::iterator word = word_set.begin(); word != word_set.end(); | 171 for (WordSet::iterator word = word_set.begin(); word != word_set.end(); |
176 ++word) { | 172 ++word) { |
177 std::vector<int> affixes; | 173 std::vector<int> affixes; |
178 for (std::set<int>::iterator aff = word->second.begin(); | 174 for (std::set<int>::iterator aff = word->second.begin(); |
179 aff != word->second.end(); ++aff) | 175 aff != word->second.end(); ++aff) |
180 affixes.push_back(*aff); | 176 affixes.push_back(*aff); |
181 | 177 |
182 // Double check that the affixes are sorted. This isn't strictly necessary | 178 // Double check that the affixes are sorted. This isn't strictly necessary |
183 // but it's nice for the file to have a fixed layout. | 179 // but it's nice for the file to have a fixed layout. |
184 std::sort(affixes.begin(), affixes.end()); | 180 std::sort(affixes.begin(), affixes.end()); |
181 std::reverse(affixes.begin(), affixes.end()); | |
185 words_.push_back(std::make_pair(word->first, affixes)); | 182 words_.push_back(std::make_pair(word->first, affixes)); |
186 } | 183 } |
187 | 184 |
188 // Double-check that the words are sorted. | 185 // Double-check that the words are sorted. |
189 std::sort(words_.begin(), words_.end()); | 186 std::sort(words_.begin(), words_.end()); |
190 return true; | 187 return true; |
191 } | 188 } |
192 | 189 |
193 } // namespace convert_dict | 190 } // namespace convert_dict |
OLD | NEW |