OLD | NEW |
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/tools/convert_dict/dic_reader.h" | 5 #include "chrome/tools/convert_dict/dic_reader.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <set> | 8 #include <set> |
9 | 9 |
10 #include "base/file_util.h" | 10 #include "base/file_util.h" |
(...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
99 size_t split1_tab_offset = split[1].find('\t'); | 99 size_t split1_tab_offset = split[1].find('\t'); |
100 if (split1_tab_offset != std::string::npos) | 100 if (split1_tab_offset != std::string::npos) |
101 split[1] = split[1].substr(0, split1_tab_offset); | 101 split[1] = split[1].substr(0, split1_tab_offset); |
102 | 102 |
103 if (aff_reader->has_indexed_affixes()) | 103 if (aff_reader->has_indexed_affixes()) |
104 affix_index = atoi(split[1].c_str()); | 104 affix_index = atoi(split[1].c_str()); |
105 else | 105 else |
106 affix_index = aff_reader->GetAFIndexForAFString(split[1]); | 106 affix_index = aff_reader->GetAFIndexForAFString(split[1]); |
107 } | 107 } |
108 | 108 |
| 109 // Discard the morphological description if it is attached to the first |
| 110 // token. (It is attached to the first token if a word doesn't have affix |
| 111 // rules.) |
| 112 size_t word_tab_offset = utf8word.find('\t'); |
| 113 if (word_tab_offset != std::string::npos) |
| 114 utf8word = utf8word.substr(0, word_tab_offset); |
| 115 |
109 WordSet::iterator found = word_set->find(utf8word); | 116 WordSet::iterator found = word_set->find(utf8word); |
110 if (found == word_set->end()) { | 117 if (found == word_set->end()) { |
111 std::set<int> affix_vector; | 118 std::set<int> affix_vector; |
112 affix_vector.insert(affix_index); | 119 affix_vector.insert(affix_index); |
113 word_set->insert(std::make_pair(utf8word, affix_vector)); | 120 word_set->insert(std::make_pair(utf8word, affix_vector)); |
114 } else { | 121 } else { |
115 found->second.insert(affix_index); | 122 found->second.insert(affix_index); |
116 } | 123 } |
117 } | 124 } |
118 | 125 |
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
166 std::sort(affixes.begin(), affixes.end()); | 173 std::sort(affixes.begin(), affixes.end()); |
167 words_.push_back(std::make_pair(word->first, affixes)); | 174 words_.push_back(std::make_pair(word->first, affixes)); |
168 } | 175 } |
169 | 176 |
170 // Double-check that the words are sorted. | 177 // Double-check that the words are sorted. |
171 std::sort(words_.begin(), words_.end()); | 178 std::sort(words_.begin(), words_.end()); |
172 return true; | 179 return true; |
173 } | 180 } |
174 | 181 |
175 } // namespace convert_dict | 182 } // namespace convert_dict |
OLD | NEW |