| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/browser/bookmarks/bookmark_index.h" | 5 #include "chrome/browser/bookmarks/bookmark_index.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 #include <functional> | 8 #include <functional> |
| 9 #include <iterator> | 9 #include <iterator> |
| 10 #include <list> | 10 #include <list> |
| 11 | 11 |
| 12 #include "base/i18n/case_conversion.h" | 12 #include "base/i18n/case_conversion.h" |
| 13 #include "base/logging.h" | 13 #include "base/logging.h" |
| 14 #include "base/strings/string16.h" | 14 #include "base/strings/string16.h" |
| 15 #include "base/strings/utf_offset_string_conversions.h" |
| 15 #include "chrome/browser/bookmarks/bookmark_utils.h" | 16 #include "chrome/browser/bookmarks/bookmark_utils.h" |
| 16 #include "components/bookmarks/core/browser/bookmark_client.h" | 17 #include "components/bookmarks/core/browser/bookmark_client.h" |
| 17 #include "components/bookmarks/core/browser/bookmark_match.h" | 18 #include "components/bookmarks/core/browser/bookmark_match.h" |
| 18 #include "components/bookmarks/core/browser/bookmark_node.h" | 19 #include "components/bookmarks/core/browser/bookmark_node.h" |
| 19 #include "components/query_parser/query_parser.h" | 20 #include "components/query_parser/query_parser.h" |
| 20 #include "components/query_parser/snippet.h" | 21 #include "components/query_parser/snippet.h" |
| 21 #include "third_party/icu/source/common/unicode/normalizer2.h" | 22 #include "third_party/icu/source/common/unicode/normalizer2.h" |
| 22 | 23 |
| 23 typedef BookmarkClient::NodeTypedCountPair NodeTypedCountPair; | 24 typedef BookmarkClient::NodeTypedCountPair NodeTypedCountPair; |
| 24 typedef BookmarkClient::NodeTypedCountPairs NodeTypedCountPairs; | 25 typedef BookmarkClient::NodeTypedCountPairs NodeTypedCountPairs; |
| (...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 107 } | 108 } |
| 108 | 109 |
| 109 void BookmarkIndex::Add(const BookmarkNode* node) { | 110 void BookmarkIndex::Add(const BookmarkNode* node) { |
| 110 if (!node->is_url()) | 111 if (!node->is_url()) |
| 111 return; | 112 return; |
| 112 std::vector<base::string16> terms = | 113 std::vector<base::string16> terms = |
| 113 ExtractQueryWords(Normalize(node->GetTitle())); | 114 ExtractQueryWords(Normalize(node->GetTitle())); |
| 114 for (size_t i = 0; i < terms.size(); ++i) | 115 for (size_t i = 0; i < terms.size(); ++i) |
| 115 RegisterNode(terms[i], node); | 116 RegisterNode(terms[i], node); |
| 116 if (index_urls_) { | 117 if (index_urls_) { |
| 117 terms = ExtractQueryWords( | 118 terms = ExtractQueryWords(bookmark_utils::CleanUpUrlForMatching( |
| 118 bookmark_utils::CleanUpUrlForMatching(node->url(), languages_)); | 119 node->url(), languages_, NULL)); |
| 119 for (size_t i = 0; i < terms.size(); ++i) | 120 for (size_t i = 0; i < terms.size(); ++i) |
| 120 RegisterNode(terms[i], node); | 121 RegisterNode(terms[i], node); |
| 121 } | 122 } |
| 122 } | 123 } |
| 123 | 124 |
| 124 void BookmarkIndex::Remove(const BookmarkNode* node) { | 125 void BookmarkIndex::Remove(const BookmarkNode* node) { |
| 125 if (!node->is_url()) | 126 if (!node->is_url()) |
| 126 return; | 127 return; |
| 127 | 128 |
| 128 std::vector<base::string16> terms = | 129 std::vector<base::string16> terms = |
| 129 ExtractQueryWords(Normalize(node->GetTitle())); | 130 ExtractQueryWords(Normalize(node->GetTitle())); |
| 130 for (size_t i = 0; i < terms.size(); ++i) | 131 for (size_t i = 0; i < terms.size(); ++i) |
| 131 UnregisterNode(terms[i], node); | 132 UnregisterNode(terms[i], node); |
| 132 if (index_urls_) { | 133 if (index_urls_) { |
| 133 terms = ExtractQueryWords( | 134 terms = ExtractQueryWords(bookmark_utils::CleanUpUrlForMatching( |
| 134 bookmark_utils::CleanUpUrlForMatching(node->url(), languages_)); | 135 node->url(), languages_, NULL)); |
| 135 for (size_t i = 0; i < terms.size(); ++i) | 136 for (size_t i = 0; i < terms.size(); ++i) |
| 136 UnregisterNode(terms[i], node); | 137 UnregisterNode(terms[i], node); |
| 137 } | 138 } |
| 138 } | 139 } |
| 139 | 140 |
| 140 void BookmarkIndex::GetBookmarksMatching(const base::string16& input_query, | 141 void BookmarkIndex::GetBookmarksMatching(const base::string16& input_query, |
| 141 size_t max_count, | 142 size_t max_count, |
| 142 std::vector<BookmarkMatch>* results) { | 143 std::vector<BookmarkMatch>* results) { |
| 143 const base::string16 query = Normalize(input_query); | 144 const base::string16 query = Normalize(input_query); |
| 144 std::vector<base::string16> terms = ExtractQueryWords(query); | 145 std::vector<base::string16> terms = ExtractQueryWords(query); |
| (...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 208 std::vector<BookmarkMatch>* results) { | 209 std::vector<BookmarkMatch>* results) { |
| 209 // Check that the result matches the query. The previous search | 210 // Check that the result matches the query. The previous search |
| 210 // was a simple per-word search, while the more complex matching | 211 // was a simple per-word search, while the more complex matching |
| 211 // of QueryParser may filter it out. For example, the query | 212 // of QueryParser may filter it out. For example, the query |
| 212 // ["thi"] will match the bookmark titled [Thinking], but since | 213 // ["thi"] will match the bookmark titled [Thinking], but since |
| 213 // ["thi"] is quoted we don't want to do a prefix match. | 214 // ["thi"] is quoted we don't want to do a prefix match. |
| 214 query_parser::QueryWordVector title_words, url_words; | 215 query_parser::QueryWordVector title_words, url_words; |
| 215 const base::string16 lower_title = | 216 const base::string16 lower_title = |
| 216 base::i18n::ToLower(Normalize(node->GetTitle())); | 217 base::i18n::ToLower(Normalize(node->GetTitle())); |
| 217 parser->ExtractQueryWords(lower_title, &title_words); | 218 parser->ExtractQueryWords(lower_title, &title_words); |
| 219 base::OffsetAdjuster::Adjustments adjustments; |
| 218 if (index_urls_) { | 220 if (index_urls_) { |
| 219 parser->ExtractQueryWords( | 221 parser->ExtractQueryWords(bookmark_utils::CleanUpUrlForMatching( |
| 220 bookmark_utils::CleanUpUrlForMatching(node->url(), languages_), | 222 node->url(), languages_, &adjustments), &url_words); |
| 221 &url_words); | |
| 222 } | 223 } |
| 223 query_parser::Snippet::MatchPositions title_matches, url_matches; | 224 query_parser::Snippet::MatchPositions title_matches, url_matches; |
| 224 for (size_t i = 0; i < query_nodes.size(); ++i) { | 225 for (size_t i = 0; i < query_nodes.size(); ++i) { |
| 225 const bool has_title_matches = | 226 const bool has_title_matches = |
| 226 query_nodes[i]->HasMatchIn(title_words, &title_matches); | 227 query_nodes[i]->HasMatchIn(title_words, &title_matches); |
| 227 const bool has_url_matches = index_urls_ && | 228 const bool has_url_matches = index_urls_ && |
| 228 query_nodes[i]->HasMatchIn(url_words, &url_matches); | 229 query_nodes[i]->HasMatchIn(url_words, &url_matches); |
| 229 if (!has_title_matches && !has_url_matches) | 230 if (!has_title_matches && !has_url_matches) |
| 230 return; | 231 return; |
| 231 query_parser::QueryParser::SortAndCoalesceMatchPositions(&title_matches); | 232 query_parser::QueryParser::SortAndCoalesceMatchPositions(&title_matches); |
| 232 if (index_urls_) | 233 if (index_urls_) |
| 233 query_parser::QueryParser::SortAndCoalesceMatchPositions(&url_matches); | 234 query_parser::QueryParser::SortAndCoalesceMatchPositions(&url_matches); |
| 234 } | 235 } |
| 235 BookmarkMatch match; | 236 BookmarkMatch match; |
| 236 if (lower_title.length() == node->GetTitle().length()) { | 237 if (lower_title.length() == node->GetTitle().length()) { |
| 237 // Only use title matches if the lowercase string is the same length | 238 // Only use title matches if the lowercase string is the same length |
| 238 // as the original string, otherwise the matches are meaningless. | 239 // as the original string, otherwise the matches are meaningless. |
| 239 // TODO(mpearson): revise match positions appropriately. | 240 // TODO(mpearson): revise match positions appropriately. |
| 240 match.title_match_positions.swap(title_matches); | 241 match.title_match_positions.swap(title_matches); |
| 241 } | 242 } |
| 242 if (index_urls_) | 243 if (index_urls_) { |
| 244 // Now that we're done processing this entry, correct the offsets of the |
| 245 // matches in |url_matches| so they point to offsets in the original URL |
| 246 // spec, not the cleaned-up URL string that we used for matching. |
| 247 std::vector<size_t> offsets = |
| 248 BookmarkMatch::OffsetsFromMatchPositions(url_matches); |
| 249 base::OffsetAdjuster::UnadjustOffsets(adjustments, &offsets); |
| 250 url_matches = |
| 251 BookmarkMatch::ReplaceOffsetsInMatchPositions(url_matches, offsets); |
| 243 match.url_match_positions.swap(url_matches); | 252 match.url_match_positions.swap(url_matches); |
| 253 } |
| 244 match.node = node; | 254 match.node = node; |
| 245 results->push_back(match); | 255 results->push_back(match); |
| 246 } | 256 } |
| 247 | 257 |
| 248 bool BookmarkIndex::GetBookmarksMatchingTerm(const base::string16& term, | 258 bool BookmarkIndex::GetBookmarksMatchingTerm(const base::string16& term, |
| 249 bool first_term, | 259 bool first_term, |
| 250 Matches* matches) { | 260 Matches* matches) { |
| 251 Index::const_iterator i = index_.lower_bound(term); | 261 Index::const_iterator i = index_.lower_bound(term); |
| 252 if (i == index_.end()) | 262 if (i == index_.end()) |
| 253 return false; | 263 return false; |
| (...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 347 Index::iterator i = index_.find(term); | 357 Index::iterator i = index_.find(term); |
| 348 if (i == index_.end()) { | 358 if (i == index_.end()) { |
| 349 // We can get here if the node has the same term more than once. For | 359 // We can get here if the node has the same term more than once. For |
| 350 // example, a bookmark with the title 'foo foo' would end up here. | 360 // example, a bookmark with the title 'foo foo' would end up here. |
| 351 return; | 361 return; |
| 352 } | 362 } |
| 353 i->second.erase(node); | 363 i->second.erase(node); |
| 354 if (i->second.empty()) | 364 if (i->second.empty()) |
| 355 index_.erase(i); | 365 index_.erase(i); |
| 356 } | 366 } |
| OLD | NEW |