OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/bookmarks/bookmark_index.h" | 5 #include "chrome/browser/bookmarks/bookmark_index.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <iterator> | 8 #include <iterator> |
9 #include <list> | 9 #include <list> |
10 | 10 |
11 #include "base/i18n/case_conversion.h" | 11 #include "base/i18n/case_conversion.h" |
12 #include "base/strings/string16.h" | 12 #include "base/strings/string16.h" |
| 13 #include "base/strings/utf_offset_string_conversions.h" |
13 #include "chrome/browser/bookmarks/bookmark_model.h" | 14 #include "chrome/browser/bookmarks/bookmark_model.h" |
14 #include "chrome/browser/bookmarks/bookmark_utils.h" | 15 #include "chrome/browser/bookmarks/bookmark_utils.h" |
15 #include "chrome/browser/history/history_service.h" | 16 #include "chrome/browser/history/history_service.h" |
16 #include "chrome/browser/history/history_service_factory.h" | 17 #include "chrome/browser/history/history_service_factory.h" |
17 #include "chrome/browser/history/url_database.h" | 18 #include "chrome/browser/history/url_database.h" |
18 #include "components/bookmarks/core/browser/bookmark_match.h" | 19 #include "components/bookmarks/core/browser/bookmark_match.h" |
19 #include "components/query_parser/query_parser.h" | 20 #include "components/query_parser/query_parser.h" |
20 #include "components/query_parser/snippet.h" | 21 #include "components/query_parser/snippet.h" |
21 #include "third_party/icu/source/common/unicode/normalizer2.h" | 22 #include "third_party/icu/source/common/unicode/normalizer2.h" |
22 | 23 |
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
85 } | 86 } |
86 | 87 |
87 void BookmarkIndex::Add(const BookmarkNode* node) { | 88 void BookmarkIndex::Add(const BookmarkNode* node) { |
88 if (!node->is_url()) | 89 if (!node->is_url()) |
89 return; | 90 return; |
90 std::vector<base::string16> terms = | 91 std::vector<base::string16> terms = |
91 ExtractQueryWords(Normalize(node->GetTitle())); | 92 ExtractQueryWords(Normalize(node->GetTitle())); |
92 for (size_t i = 0; i < terms.size(); ++i) | 93 for (size_t i = 0; i < terms.size(); ++i) |
93 RegisterNode(terms[i], node); | 94 RegisterNode(terms[i], node); |
94 if (index_urls_) { | 95 if (index_urls_) { |
95 terms = ExtractQueryWords( | 96 terms = ExtractQueryWords(bookmark_utils::CleanUpUrlForMatching( |
96 bookmark_utils::CleanUpUrlForMatching(node->url(), languages_)); | 97 node->url(), languages_, NULL)); |
97 for (size_t i = 0; i < terms.size(); ++i) | 98 for (size_t i = 0; i < terms.size(); ++i) |
98 RegisterNode(terms[i], node); | 99 RegisterNode(terms[i], node); |
99 } | 100 } |
100 } | 101 } |
101 | 102 |
102 void BookmarkIndex::Remove(const BookmarkNode* node) { | 103 void BookmarkIndex::Remove(const BookmarkNode* node) { |
103 if (!node->is_url()) | 104 if (!node->is_url()) |
104 return; | 105 return; |
105 | 106 |
106 std::vector<base::string16> terms = | 107 std::vector<base::string16> terms = |
107 ExtractQueryWords(Normalize(node->GetTitle())); | 108 ExtractQueryWords(Normalize(node->GetTitle())); |
108 for (size_t i = 0; i < terms.size(); ++i) | 109 for (size_t i = 0; i < terms.size(); ++i) |
109 UnregisterNode(terms[i], node); | 110 UnregisterNode(terms[i], node); |
110 if (index_urls_) { | 111 if (index_urls_) { |
111 terms = ExtractQueryWords( | 112 terms = ExtractQueryWords(bookmark_utils::CleanUpUrlForMatching( |
112 bookmark_utils::CleanUpUrlForMatching(node->url(), languages_)); | 113 node->url(), languages_, NULL)); |
113 for (size_t i = 0; i < terms.size(); ++i) | 114 for (size_t i = 0; i < terms.size(); ++i) |
114 UnregisterNode(terms[i], node); | 115 UnregisterNode(terms[i], node); |
115 } | 116 } |
116 } | 117 } |
117 | 118 |
118 void BookmarkIndex::GetBookmarksMatching(const base::string16& input_query, | 119 void BookmarkIndex::GetBookmarksMatching(const base::string16& input_query, |
119 size_t max_count, | 120 size_t max_count, |
120 std::vector<BookmarkMatch>* results) { | 121 std::vector<BookmarkMatch>* results) { |
121 const base::string16 query = Normalize(input_query); | 122 const base::string16 query = Normalize(input_query); |
122 std::vector<base::string16> terms = ExtractQueryWords(query); | 123 std::vector<base::string16> terms = ExtractQueryWords(query); |
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
198 std::vector<BookmarkMatch>* results) { | 199 std::vector<BookmarkMatch>* results) { |
199 // Check that the result matches the query. The previous search | 200 // Check that the result matches the query. The previous search |
200 // was a simple per-word search, while the more complex matching | 201 // was a simple per-word search, while the more complex matching |
201 // of QueryParser may filter it out. For example, the query | 202 // of QueryParser may filter it out. For example, the query |
202 // ["thi"] will match the bookmark titled [Thinking], but since | 203 // ["thi"] will match the bookmark titled [Thinking], but since |
203 // ["thi"] is quoted we don't want to do a prefix match. | 204 // ["thi"] is quoted we don't want to do a prefix match. |
204 query_parser::QueryWordVector title_words, url_words; | 205 query_parser::QueryWordVector title_words, url_words; |
205 const base::string16 lower_title = | 206 const base::string16 lower_title = |
206 base::i18n::ToLower(Normalize(node->GetTitle())); | 207 base::i18n::ToLower(Normalize(node->GetTitle())); |
207 parser->ExtractQueryWords(lower_title, &title_words); | 208 parser->ExtractQueryWords(lower_title, &title_words); |
| 209 base::OffsetAdjuster::Adjustments adjustments; |
208 if (index_urls_) { | 210 if (index_urls_) { |
209 parser->ExtractQueryWords( | 211 parser->ExtractQueryWords(bookmark_utils::CleanUpUrlForMatching( |
210 bookmark_utils::CleanUpUrlForMatching(node->url(), languages_), | 212 node->url(), languages_, &adjustments), &url_words); |
211 &url_words); | |
212 } | 213 } |
213 query_parser::Snippet::MatchPositions title_matches, url_matches; | 214 query_parser::Snippet::MatchPositions title_matches, url_matches; |
214 for (size_t i = 0; i < query_nodes.size(); ++i) { | 215 for (size_t i = 0; i < query_nodes.size(); ++i) { |
215 const bool has_title_matches = | 216 const bool has_title_matches = |
216 query_nodes[i]->HasMatchIn(title_words, &title_matches); | 217 query_nodes[i]->HasMatchIn(title_words, &title_matches); |
217 const bool has_url_matches = index_urls_ && | 218 const bool has_url_matches = index_urls_ && |
218 query_nodes[i]->HasMatchIn(url_words, &url_matches); | 219 query_nodes[i]->HasMatchIn(url_words, &url_matches); |
219 if (!has_title_matches && !has_url_matches) | 220 if (!has_title_matches && !has_url_matches) |
220 return; | 221 return; |
221 query_parser::QueryParser::SortAndCoalesceMatchPositions(&title_matches); | 222 query_parser::QueryParser::SortAndCoalesceMatchPositions(&title_matches); |
222 if (index_urls_) | 223 if (index_urls_) |
223 query_parser::QueryParser::SortAndCoalesceMatchPositions(&url_matches); | 224 query_parser::QueryParser::SortAndCoalesceMatchPositions(&url_matches); |
224 } | 225 } |
225 BookmarkMatch match; | 226 BookmarkMatch match; |
226 if (lower_title.length() == node->GetTitle().length()) { | 227 if (lower_title.length() == node->GetTitle().length()) { |
227 // Only use title matches if the lowercase string is the same length | 228 // Only use title matches if the lowercase string is the same length |
228 // as the original string, otherwise the matches are meaningless. | 229 // as the original string, otherwise the matches are meaningless. |
229 // TODO(mpearson): revise match positions appropriately. | 230 // TODO(mpearson): revise match positions appropriately. |
230 match.title_match_positions.swap(title_matches); | 231 match.title_match_positions.swap(title_matches); |
231 } | 232 } |
232 if (index_urls_) | 233 if (index_urls_) { |
| 234 // Now that we're done processing this entry, correct the offsets of the |
| 235 // matches in |url_matches| so they point to offsets in the original URL |
| 236 // spec, not the cleaned-up URL string that we used for matching. |
| 237 std::vector<size_t> offsets = |
| 238 BookmarkMatch::OffsetsFromMatchPositions(url_matches); |
| 239 base::OffsetAdjuster::UnadjustOffsets(adjustments, &offsets); |
| 240 url_matches = |
| 241 BookmarkMatch::ReplaceOffsetsInMatchPositions(url_matches, offsets); |
233 match.url_match_positions.swap(url_matches); | 242 match.url_match_positions.swap(url_matches); |
| 243 } |
234 match.node = node; | 244 match.node = node; |
235 results->push_back(match); | 245 results->push_back(match); |
236 } | 246 } |
237 | 247 |
238 bool BookmarkIndex::GetBookmarksMatchingTerm(const base::string16& term, | 248 bool BookmarkIndex::GetBookmarksMatchingTerm(const base::string16& term, |
239 bool first_term, | 249 bool first_term, |
240 Matches* matches) { | 250 Matches* matches) { |
241 Index::const_iterator i = index_.lower_bound(term); | 251 Index::const_iterator i = index_.lower_bound(term); |
242 if (i == index_.end()) | 252 if (i == index_.end()) |
243 return false; | 253 return false; |
(...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
337 Index::iterator i = index_.find(term); | 347 Index::iterator i = index_.find(term); |
338 if (i == index_.end()) { | 348 if (i == index_.end()) { |
339 // We can get here if the node has the same term more than once. For | 349 // We can get here if the node has the same term more than once. For |
340 // example, a bookmark with the title 'foo foo' would end up here. | 350 // example, a bookmark with the title 'foo foo' would end up here. |
341 return; | 351 return; |
342 } | 352 } |
343 i->second.erase(node); | 353 i->second.erase(node); |
344 if (i->second.empty()) | 354 if (i->second.empty()) |
345 index_.erase(i); | 355 index_.erase(i); |
346 } | 356 } |
OLD | NEW |