OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/bookmarks/bookmark_index.h" | 5 #include "chrome/browser/bookmarks/bookmark_index.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <functional> | 8 #include <functional> |
9 #include <iterator> | 9 #include <iterator> |
10 #include <list> | 10 #include <list> |
11 | 11 |
12 #include "base/i18n/case_conversion.h" | 12 #include "base/i18n/case_conversion.h" |
13 #include "base/logging.h" | 13 #include "base/logging.h" |
14 #include "base/strings/string16.h" | 14 #include "base/strings/string16.h" |
| 15 #include "base/strings/utf_offset_string_conversions.h" |
15 #include "chrome/browser/bookmarks/bookmark_utils.h" | 16 #include "chrome/browser/bookmarks/bookmark_utils.h" |
16 #include "components/bookmarks/core/browser/bookmark_client.h" | 17 #include "components/bookmarks/core/browser/bookmark_client.h" |
17 #include "components/bookmarks/core/browser/bookmark_match.h" | 18 #include "components/bookmarks/core/browser/bookmark_match.h" |
18 #include "components/bookmarks/core/browser/bookmark_node.h" | 19 #include "components/bookmarks/core/browser/bookmark_node.h" |
19 #include "components/query_parser/query_parser.h" | 20 #include "components/query_parser/query_parser.h" |
20 #include "components/query_parser/snippet.h" | 21 #include "components/query_parser/snippet.h" |
21 #include "third_party/icu/source/common/unicode/normalizer2.h" | 22 #include "third_party/icu/source/common/unicode/normalizer2.h" |
22 | 23 |
23 typedef BookmarkClient::NodeTypedCountPair NodeTypedCountPair; | 24 typedef BookmarkClient::NodeTypedCountPair NodeTypedCountPair; |
24 typedef BookmarkClient::NodeTypedCountPairs NodeTypedCountPairs; | 25 typedef BookmarkClient::NodeTypedCountPairs NodeTypedCountPairs; |
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
107 } | 108 } |
108 | 109 |
109 void BookmarkIndex::Add(const BookmarkNode* node) { | 110 void BookmarkIndex::Add(const BookmarkNode* node) { |
110 if (!node->is_url()) | 111 if (!node->is_url()) |
111 return; | 112 return; |
112 std::vector<base::string16> terms = | 113 std::vector<base::string16> terms = |
113 ExtractQueryWords(Normalize(node->GetTitle())); | 114 ExtractQueryWords(Normalize(node->GetTitle())); |
114 for (size_t i = 0; i < terms.size(); ++i) | 115 for (size_t i = 0; i < terms.size(); ++i) |
115 RegisterNode(terms[i], node); | 116 RegisterNode(terms[i], node); |
116 if (index_urls_) { | 117 if (index_urls_) { |
117 terms = ExtractQueryWords( | 118 terms = ExtractQueryWords(bookmark_utils::CleanUpUrlForMatching( |
118 bookmark_utils::CleanUpUrlForMatching(node->url(), languages_)); | 119 node->url(), languages_, NULL)); |
119 for (size_t i = 0; i < terms.size(); ++i) | 120 for (size_t i = 0; i < terms.size(); ++i) |
120 RegisterNode(terms[i], node); | 121 RegisterNode(terms[i], node); |
121 } | 122 } |
122 } | 123 } |
123 | 124 |
124 void BookmarkIndex::Remove(const BookmarkNode* node) { | 125 void BookmarkIndex::Remove(const BookmarkNode* node) { |
125 if (!node->is_url()) | 126 if (!node->is_url()) |
126 return; | 127 return; |
127 | 128 |
128 std::vector<base::string16> terms = | 129 std::vector<base::string16> terms = |
129 ExtractQueryWords(Normalize(node->GetTitle())); | 130 ExtractQueryWords(Normalize(node->GetTitle())); |
130 for (size_t i = 0; i < terms.size(); ++i) | 131 for (size_t i = 0; i < terms.size(); ++i) |
131 UnregisterNode(terms[i], node); | 132 UnregisterNode(terms[i], node); |
132 if (index_urls_) { | 133 if (index_urls_) { |
133 terms = ExtractQueryWords( | 134 terms = ExtractQueryWords(bookmark_utils::CleanUpUrlForMatching( |
134 bookmark_utils::CleanUpUrlForMatching(node->url(), languages_)); | 135 node->url(), languages_, NULL)); |
135 for (size_t i = 0; i < terms.size(); ++i) | 136 for (size_t i = 0; i < terms.size(); ++i) |
136 UnregisterNode(terms[i], node); | 137 UnregisterNode(terms[i], node); |
137 } | 138 } |
138 } | 139 } |
139 | 140 |
140 void BookmarkIndex::GetBookmarksMatching(const base::string16& input_query, | 141 void BookmarkIndex::GetBookmarksMatching(const base::string16& input_query, |
141 size_t max_count, | 142 size_t max_count, |
142 std::vector<BookmarkMatch>* results) { | 143 std::vector<BookmarkMatch>* results) { |
143 const base::string16 query = Normalize(input_query); | 144 const base::string16 query = Normalize(input_query); |
144 std::vector<base::string16> terms = ExtractQueryWords(query); | 145 std::vector<base::string16> terms = ExtractQueryWords(query); |
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
208 std::vector<BookmarkMatch>* results) { | 209 std::vector<BookmarkMatch>* results) { |
209 // Check that the result matches the query. The previous search | 210 // Check that the result matches the query. The previous search |
210 // was a simple per-word search, while the more complex matching | 211 // was a simple per-word search, while the more complex matching |
211 // of QueryParser may filter it out. For example, the query | 212 // of QueryParser may filter it out. For example, the query |
212 // ["thi"] will match the bookmark titled [Thinking], but since | 213 // ["thi"] will match the bookmark titled [Thinking], but since |
213 // ["thi"] is quoted we don't want to do a prefix match. | 214 // ["thi"] is quoted we don't want to do a prefix match. |
214 query_parser::QueryWordVector title_words, url_words; | 215 query_parser::QueryWordVector title_words, url_words; |
215 const base::string16 lower_title = | 216 const base::string16 lower_title = |
216 base::i18n::ToLower(Normalize(node->GetTitle())); | 217 base::i18n::ToLower(Normalize(node->GetTitle())); |
217 parser->ExtractQueryWords(lower_title, &title_words); | 218 parser->ExtractQueryWords(lower_title, &title_words); |
| 219 base::OffsetAdjuster::Adjustments adjustments; |
218 if (index_urls_) { | 220 if (index_urls_) { |
219 parser->ExtractQueryWords( | 221 parser->ExtractQueryWords(bookmark_utils::CleanUpUrlForMatching( |
220 bookmark_utils::CleanUpUrlForMatching(node->url(), languages_), | 222 node->url(), languages_, &adjustments), &url_words); |
221 &url_words); | |
222 } | 223 } |
223 query_parser::Snippet::MatchPositions title_matches, url_matches; | 224 query_parser::Snippet::MatchPositions title_matches, url_matches; |
224 for (size_t i = 0; i < query_nodes.size(); ++i) { | 225 for (size_t i = 0; i < query_nodes.size(); ++i) { |
225 const bool has_title_matches = | 226 const bool has_title_matches = |
226 query_nodes[i]->HasMatchIn(title_words, &title_matches); | 227 query_nodes[i]->HasMatchIn(title_words, &title_matches); |
227 const bool has_url_matches = index_urls_ && | 228 const bool has_url_matches = index_urls_ && |
228 query_nodes[i]->HasMatchIn(url_words, &url_matches); | 229 query_nodes[i]->HasMatchIn(url_words, &url_matches); |
229 if (!has_title_matches && !has_url_matches) | 230 if (!has_title_matches && !has_url_matches) |
230 return; | 231 return; |
231 query_parser::QueryParser::SortAndCoalesceMatchPositions(&title_matches); | 232 query_parser::QueryParser::SortAndCoalesceMatchPositions(&title_matches); |
232 if (index_urls_) | 233 if (index_urls_) |
233 query_parser::QueryParser::SortAndCoalesceMatchPositions(&url_matches); | 234 query_parser::QueryParser::SortAndCoalesceMatchPositions(&url_matches); |
234 } | 235 } |
235 BookmarkMatch match; | 236 BookmarkMatch match; |
236 if (lower_title.length() == node->GetTitle().length()) { | 237 if (lower_title.length() == node->GetTitle().length()) { |
237 // Only use title matches if the lowercase string is the same length | 238 // Only use title matches if the lowercase string is the same length |
238 // as the original string, otherwise the matches are meaningless. | 239 // as the original string, otherwise the matches are meaningless. |
239 // TODO(mpearson): revise match positions appropriately. | 240 // TODO(mpearson): revise match positions appropriately. |
240 match.title_match_positions.swap(title_matches); | 241 match.title_match_positions.swap(title_matches); |
241 } | 242 } |
242 if (index_urls_) | 243 if (index_urls_) { |
| 244 // Now that we're done processing this entry, correct the offsets of the |
| 245 // matches in |url_matches| so they point to offsets in the original URL |
| 246 // spec, not the cleaned-up URL string that we used for matching. |
| 247 std::vector<size_t> offsets = |
| 248 BookmarkMatch::OffsetsFromMatchPositions(url_matches); |
| 249 base::OffsetAdjuster::UnadjustOffsets(adjustments, &offsets); |
| 250 url_matches = |
| 251 BookmarkMatch::ReplaceOffsetsInMatchPositions(url_matches, offsets); |
243 match.url_match_positions.swap(url_matches); | 252 match.url_match_positions.swap(url_matches); |
| 253 } |
244 match.node = node; | 254 match.node = node; |
245 results->push_back(match); | 255 results->push_back(match); |
246 } | 256 } |
247 | 257 |
248 bool BookmarkIndex::GetBookmarksMatchingTerm(const base::string16& term, | 258 bool BookmarkIndex::GetBookmarksMatchingTerm(const base::string16& term, |
249 bool first_term, | 259 bool first_term, |
250 Matches* matches) { | 260 Matches* matches) { |
251 Index::const_iterator i = index_.lower_bound(term); | 261 Index::const_iterator i = index_.lower_bound(term); |
252 if (i == index_.end()) | 262 if (i == index_.end()) |
253 return false; | 263 return false; |
(...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
347 Index::iterator i = index_.find(term); | 357 Index::iterator i = index_.find(term); |
348 if (i == index_.end()) { | 358 if (i == index_.end()) { |
349 // We can get here if the node has the same term more than once. For | 359 // We can get here if the node has the same term more than once. For |
350 // example, a bookmark with the title 'foo foo' would end up here. | 360 // example, a bookmark with the title 'foo foo' would end up here. |
351 return; | 361 return; |
352 } | 362 } |
353 i->second.erase(node); | 363 i->second.erase(node); |
354 if (i->second.empty()) | 364 if (i->second.empty()) |
355 index_.erase(i); | 365 index_.erase(i); |
356 } | 366 } |
OLD | NEW |