OLD | NEW |
1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "components/bookmarks/browser/bookmark_index.h" | 5 #include "components/bookmarks/browser/titled_url_index.h" |
6 | 6 |
7 #include <stdint.h> | 7 #include <stdint.h> |
8 | 8 |
9 #include "base/i18n/case_conversion.h" | 9 #include "base/i18n/case_conversion.h" |
10 #include "base/logging.h" | 10 #include "base/logging.h" |
11 #include "base/stl_util.h" | 11 #include "base/stl_util.h" |
12 #include "base/strings/utf_offset_string_conversions.h" | 12 #include "base/strings/utf_offset_string_conversions.h" |
13 #include "build/build_config.h" | 13 #include "build/build_config.h" |
14 #include "components/bookmarks/browser/bookmark_match.h" | |
15 #include "components/bookmarks/browser/bookmark_utils.h" | 14 #include "components/bookmarks/browser/bookmark_utils.h" |
| 15 #include "components/bookmarks/browser/titled_url_match.h" |
16 #include "components/bookmarks/browser/titled_url_node.h" | 16 #include "components/bookmarks/browser/titled_url_node.h" |
17 #include "components/bookmarks/browser/titled_url_node_sorter.h" | 17 #include "components/bookmarks/browser/titled_url_node_sorter.h" |
18 #include "components/query_parser/snippet.h" | 18 #include "components/query_parser/snippet.h" |
19 #include "third_party/icu/source/common/unicode/normalizer2.h" | 19 #include "third_party/icu/source/common/unicode/normalizer2.h" |
20 #include "third_party/icu/source/common/unicode/utypes.h" | 20 #include "third_party/icu/source/common/unicode/utypes.h" |
21 | 21 |
22 namespace bookmarks { | 22 namespace bookmarks { |
23 | 23 |
24 namespace { | 24 namespace { |
25 | 25 |
(...skipping 15 matching lines...) Expand all Loading... |
41 // This should not happen. Log the error and fall back. | 41 // This should not happen. Log the error and fall back. |
42 LOG(ERROR) << "normalization failed: " << u_errorName(status); | 42 LOG(ERROR) << "normalization failed: " << u_errorName(status); |
43 return text; | 43 return text; |
44 } | 44 } |
45 return base::string16(unicode_normalized_text.getBuffer(), | 45 return base::string16(unicode_normalized_text.getBuffer(), |
46 unicode_normalized_text.length()); | 46 unicode_normalized_text.length()); |
47 } | 47 } |
48 | 48 |
49 } // namespace | 49 } // namespace |
50 | 50 |
51 BookmarkIndex::BookmarkIndex(std::unique_ptr<TitledUrlNodeSorter> sorter) | 51 TitledUrlIndex::TitledUrlIndex(std::unique_ptr<TitledUrlNodeSorter> sorter) |
52 : sorter_(std::move(sorter)) { | 52 : sorter_(std::move(sorter)) { |
53 } | 53 } |
54 | 54 |
55 BookmarkIndex::~BookmarkIndex() { | 55 TitledUrlIndex::~TitledUrlIndex() { |
56 } | 56 } |
57 | 57 |
58 void BookmarkIndex::Add(const TitledUrlNode* node) { | 58 void TitledUrlIndex::Add(const TitledUrlNode* node) { |
59 std::vector<base::string16> terms = | 59 std::vector<base::string16> terms = |
60 ExtractQueryWords(Normalize(node->GetTitledUrlNodeTitle())); | 60 ExtractQueryWords(Normalize(node->GetTitledUrlNodeTitle())); |
61 for (size_t i = 0; i < terms.size(); ++i) | 61 for (size_t i = 0; i < terms.size(); ++i) |
62 RegisterNode(terms[i], node); | 62 RegisterNode(terms[i], node); |
63 terms = ExtractQueryWords( | 63 terms = ExtractQueryWords( |
64 CleanUpUrlForMatching(node->GetTitledUrlNodeUrl(), nullptr)); | 64 CleanUpUrlForMatching(node->GetTitledUrlNodeUrl(), nullptr)); |
65 for (size_t i = 0; i < terms.size(); ++i) | 65 for (size_t i = 0; i < terms.size(); ++i) |
66 RegisterNode(terms[i], node); | 66 RegisterNode(terms[i], node); |
67 } | 67 } |
68 | 68 |
69 void BookmarkIndex::Remove(const TitledUrlNode* node) { | 69 void TitledUrlIndex::Remove(const TitledUrlNode* node) { |
70 std::vector<base::string16> terms = | 70 std::vector<base::string16> terms = |
71 ExtractQueryWords(Normalize(node->GetTitledUrlNodeTitle())); | 71 ExtractQueryWords(Normalize(node->GetTitledUrlNodeTitle())); |
72 for (size_t i = 0; i < terms.size(); ++i) | 72 for (size_t i = 0; i < terms.size(); ++i) |
73 UnregisterNode(terms[i], node); | 73 UnregisterNode(terms[i], node); |
74 terms = ExtractQueryWords( | 74 terms = ExtractQueryWords( |
75 CleanUpUrlForMatching(node->GetTitledUrlNodeUrl(), nullptr)); | 75 CleanUpUrlForMatching(node->GetTitledUrlNodeUrl(), nullptr)); |
76 for (size_t i = 0; i < terms.size(); ++i) | 76 for (size_t i = 0; i < terms.size(); ++i) |
77 UnregisterNode(terms[i], node); | 77 UnregisterNode(terms[i], node); |
78 } | 78 } |
79 | 79 |
80 void BookmarkIndex::GetResultsMatching( | 80 void TitledUrlIndex::GetResultsMatching( |
81 const base::string16& input_query, | 81 const base::string16& input_query, |
82 size_t max_count, | 82 size_t max_count, |
83 query_parser::MatchingAlgorithm matching_algorithm, | 83 query_parser::MatchingAlgorithm matching_algorithm, |
84 std::vector<BookmarkMatch>* results) { | 84 std::vector<TitledUrlMatch>* results) { |
85 const base::string16 query = Normalize(input_query); | 85 const base::string16 query = Normalize(input_query); |
86 std::vector<base::string16> terms = ExtractQueryWords(query); | 86 std::vector<base::string16> terms = ExtractQueryWords(query); |
87 if (terms.empty()) | 87 if (terms.empty()) |
88 return; | 88 return; |
89 | 89 |
90 TitledUrlNodeSet matches; | 90 TitledUrlNodeSet matches; |
91 for (size_t i = 0; i < terms.size(); ++i) { | 91 for (size_t i = 0; i < terms.size(); ++i) { |
92 if (!GetResultsMatchingTerm(terms[i], i == 0, matching_algorithm, | 92 if (!GetResultsMatchingTerm(terms[i], i == 0, matching_algorithm, |
93 &matches)) { | 93 &matches)) { |
94 return; | 94 return; |
(...skipping 14 matching lines...) Expand all Loading... |
109 // so that the best matches will always be included in the results. The loop | 109 // so that the best matches will always be included in the results. The loop |
110 // that calculates result relevance in HistoryContentsProvider::ConvertResults | 110 // that calculates result relevance in HistoryContentsProvider::ConvertResults |
111 // will run backwards to assure higher relevance will be attributed to the | 111 // will run backwards to assure higher relevance will be attributed to the |
112 // best matches. | 112 // best matches. |
113 for (TitledUrlNodes::const_iterator i = sorted_nodes.begin(); | 113 for (TitledUrlNodes::const_iterator i = sorted_nodes.begin(); |
114 i != sorted_nodes.end() && results->size() < max_count; | 114 i != sorted_nodes.end() && results->size() < max_count; |
115 ++i) | 115 ++i) |
116 AddMatchToResults(*i, &parser, query_nodes, results); | 116 AddMatchToResults(*i, &parser, query_nodes, results); |
117 } | 117 } |
118 | 118 |
119 void BookmarkIndex::SortMatches(const TitledUrlNodeSet& matches, | 119 void TitledUrlIndex::SortMatches(const TitledUrlNodeSet& matches, |
120 TitledUrlNodes* sorted_nodes) const { | 120 TitledUrlNodes* sorted_nodes) const { |
121 if (sorter_) { | 121 if (sorter_) { |
122 sorter_->SortMatches(matches, sorted_nodes); | 122 sorter_->SortMatches(matches, sorted_nodes); |
123 } else { | 123 } else { |
124 sorted_nodes->insert(sorted_nodes->end(), matches.begin(), matches.end()); | 124 sorted_nodes->insert(sorted_nodes->end(), matches.begin(), matches.end()); |
125 } | 125 } |
126 } | 126 } |
127 | 127 |
128 void BookmarkIndex::AddMatchToResults( | 128 void TitledUrlIndex::AddMatchToResults( |
129 const TitledUrlNode* node, | 129 const TitledUrlNode* node, |
130 query_parser::QueryParser* parser, | 130 query_parser::QueryParser* parser, |
131 const query_parser::QueryNodeVector& query_nodes, | 131 const query_parser::QueryNodeVector& query_nodes, |
132 std::vector<BookmarkMatch>* results) { | 132 std::vector<TitledUrlMatch>* results) { |
133 if (!node) { | 133 if (!node) { |
134 return; | 134 return; |
135 } | 135 } |
136 // Check that the result matches the query. The previous search | 136 // Check that the result matches the query. The previous search |
137 // was a simple per-word search, while the more complex matching | 137 // was a simple per-word search, while the more complex matching |
138 // of QueryParser may filter it out. For example, the query | 138 // of QueryParser may filter it out. For example, the query |
139 // ["thi"] will match the title [Thinking], but since | 139 // ["thi"] will match the title [Thinking], but since |
140 // ["thi"] is quoted we don't want to do a prefix match. | 140 // ["thi"] is quoted we don't want to do a prefix match. |
141 query_parser::QueryWordVector title_words, url_words; | 141 query_parser::QueryWordVector title_words, url_words; |
142 const base::string16 lower_title = | 142 const base::string16 lower_title = |
143 base::i18n::ToLower(Normalize(node->GetTitledUrlNodeTitle())); | 143 base::i18n::ToLower(Normalize(node->GetTitledUrlNodeTitle())); |
144 parser->ExtractQueryWords(lower_title, &title_words); | 144 parser->ExtractQueryWords(lower_title, &title_words); |
145 base::OffsetAdjuster::Adjustments adjustments; | 145 base::OffsetAdjuster::Adjustments adjustments; |
146 parser->ExtractQueryWords( | 146 parser->ExtractQueryWords( |
147 CleanUpUrlForMatching(node->GetTitledUrlNodeUrl(), &adjustments), | 147 CleanUpUrlForMatching(node->GetTitledUrlNodeUrl(), &adjustments), |
148 &url_words); | 148 &url_words); |
149 query_parser::Snippet::MatchPositions title_matches, url_matches; | 149 query_parser::Snippet::MatchPositions title_matches, url_matches; |
150 for (const auto& node : query_nodes) { | 150 for (const auto& node : query_nodes) { |
151 const bool has_title_matches = | 151 const bool has_title_matches = |
152 node->HasMatchIn(title_words, &title_matches); | 152 node->HasMatchIn(title_words, &title_matches); |
153 const bool has_url_matches = node->HasMatchIn(url_words, &url_matches); | 153 const bool has_url_matches = node->HasMatchIn(url_words, &url_matches); |
154 if (!has_title_matches && !has_url_matches) | 154 if (!has_title_matches && !has_url_matches) |
155 return; | 155 return; |
156 query_parser::QueryParser::SortAndCoalesceMatchPositions(&title_matches); | 156 query_parser::QueryParser::SortAndCoalesceMatchPositions(&title_matches); |
157 query_parser::QueryParser::SortAndCoalesceMatchPositions(&url_matches); | 157 query_parser::QueryParser::SortAndCoalesceMatchPositions(&url_matches); |
158 } | 158 } |
159 BookmarkMatch match; | 159 TitledUrlMatch match; |
160 if (lower_title.length() == node->GetTitledUrlNodeTitle().length()) { | 160 if (lower_title.length() == node->GetTitledUrlNodeTitle().length()) { |
161 // Only use title matches if the lowercase string is the same length | 161 // Only use title matches if the lowercase string is the same length |
162 // as the original string, otherwise the matches are meaningless. | 162 // as the original string, otherwise the matches are meaningless. |
163 // TODO(mpearson): revise match positions appropriately. | 163 // TODO(mpearson): revise match positions appropriately. |
164 match.title_match_positions.swap(title_matches); | 164 match.title_match_positions.swap(title_matches); |
165 } | 165 } |
166 // Now that we're done processing this entry, correct the offsets of the | 166 // Now that we're done processing this entry, correct the offsets of the |
167 // matches in |url_matches| so they point to offsets in the original URL | 167 // matches in |url_matches| so they point to offsets in the original URL |
168 // spec, not the cleaned-up URL string that we used for matching. | 168 // spec, not the cleaned-up URL string that we used for matching. |
169 std::vector<size_t> offsets = | 169 std::vector<size_t> offsets = |
170 BookmarkMatch::OffsetsFromMatchPositions(url_matches); | 170 TitledUrlMatch::OffsetsFromMatchPositions(url_matches); |
171 base::OffsetAdjuster::UnadjustOffsets(adjustments, &offsets); | 171 base::OffsetAdjuster::UnadjustOffsets(adjustments, &offsets); |
172 url_matches = | 172 url_matches = |
173 BookmarkMatch::ReplaceOffsetsInMatchPositions(url_matches, offsets); | 173 TitledUrlMatch::ReplaceOffsetsInMatchPositions(url_matches, offsets); |
174 match.url_match_positions.swap(url_matches); | 174 match.url_match_positions.swap(url_matches); |
175 match.node = node; | 175 match.node = node; |
176 results->push_back(match); | 176 results->push_back(match); |
177 } | 177 } |
178 | 178 |
179 bool BookmarkIndex::GetResultsMatchingTerm( | 179 bool TitledUrlIndex::GetResultsMatchingTerm( |
180 const base::string16& term, | 180 const base::string16& term, |
181 bool first_term, | 181 bool first_term, |
182 query_parser::MatchingAlgorithm matching_algorithm, | 182 query_parser::MatchingAlgorithm matching_algorithm, |
183 TitledUrlNodeSet* matches) { | 183 TitledUrlNodeSet* matches) { |
184 Index::const_iterator i = index_.lower_bound(term); | 184 Index::const_iterator i = index_.lower_bound(term); |
185 if (i == index_.end()) | 185 if (i == index_.end()) |
186 return false; | 186 return false; |
187 | 187 |
188 if (!query_parser::QueryParser::IsWordLongEnoughForPrefixSearch( | 188 if (!query_parser::QueryParser::IsWordLongEnoughForPrefixSearch( |
189 term, matching_algorithm)) { | 189 term, matching_algorithm)) { |
(...skipping 30 matching lines...) Expand all Loading... |
220 ++i; | 220 ++i; |
221 } | 221 } |
222 if (!first_term) { | 222 if (!first_term) { |
223 *matches = | 223 *matches = |
224 base::STLSetIntersection<TitledUrlNodeSet>(*prefix_matches, *matches); | 224 base::STLSetIntersection<TitledUrlNodeSet>(*prefix_matches, *matches); |
225 } | 225 } |
226 } | 226 } |
227 return !matches->empty(); | 227 return !matches->empty(); |
228 } | 228 } |
229 | 229 |
230 std::vector<base::string16> BookmarkIndex::ExtractQueryWords( | 230 std::vector<base::string16> TitledUrlIndex::ExtractQueryWords( |
231 const base::string16& query) { | 231 const base::string16& query) { |
232 std::vector<base::string16> terms; | 232 std::vector<base::string16> terms; |
233 if (query.empty()) | 233 if (query.empty()) |
234 return std::vector<base::string16>(); | 234 return std::vector<base::string16>(); |
235 query_parser::QueryParser parser; | 235 query_parser::QueryParser parser; |
236 parser.ParseQueryWords(base::i18n::ToLower(query), | 236 parser.ParseQueryWords(base::i18n::ToLower(query), |
237 query_parser::MatchingAlgorithm::DEFAULT, | 237 query_parser::MatchingAlgorithm::DEFAULT, |
238 &terms); | 238 &terms); |
239 return terms; | 239 return terms; |
240 } | 240 } |
241 | 241 |
242 void BookmarkIndex::RegisterNode(const base::string16& term, | 242 void TitledUrlIndex::RegisterNode(const base::string16& term, |
243 const TitledUrlNode* node) { | 243 const TitledUrlNode* node) { |
244 index_[term].insert(node); | 244 index_[term].insert(node); |
245 } | 245 } |
246 | 246 |
247 void BookmarkIndex::UnregisterNode(const base::string16& term, | 247 void TitledUrlIndex::UnregisterNode(const base::string16& term, |
248 const TitledUrlNode* node) { | 248 const TitledUrlNode* node) { |
249 Index::iterator i = index_.find(term); | 249 Index::iterator i = index_.find(term); |
250 if (i == index_.end()) { | 250 if (i == index_.end()) { |
251 // We can get here if the node has the same term more than once. For | 251 // We can get here if the node has the same term more than once. For |
252 // example, a node with the title 'foo foo' would end up here. | 252 // example, a node with the title 'foo foo' would end up here. |
253 return; | 253 return; |
254 } | 254 } |
255 i->second.erase(node); | 255 i->second.erase(node); |
256 if (i->second.empty()) | 256 if (i->second.empty()) |
257 index_.erase(i); | 257 index_.erase(i); |
258 } | 258 } |
259 | 259 |
260 } // namespace bookmarks | 260 } // namespace bookmarks |
OLD | NEW |