Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/browser/bookmarks/bookmark_index.h" | 5 #include "chrome/browser/bookmarks/bookmark_index.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 #include <iterator> | 8 #include <iterator> |
| 9 #include <list> | 9 #include <list> |
| 10 | 10 |
| 11 #include "base/i18n/case_conversion.h" | 11 #include "base/i18n/case_conversion.h" |
| 12 #include "base/strings/string16.h" | 12 #include "base/strings/string16.h" |
| 13 #include "chrome/browser/bookmarks/bookmark_model.h" | 13 #include "chrome/browser/bookmarks/bookmark_model.h" |
| 14 #include "chrome/browser/bookmarks/bookmark_utils.h" | |
| 14 #include "chrome/browser/history/history_service.h" | 15 #include "chrome/browser/history/history_service.h" |
| 15 #include "chrome/browser/history/history_service_factory.h" | 16 #include "chrome/browser/history/history_service_factory.h" |
| 16 #include "chrome/browser/history/url_database.h" | 17 #include "chrome/browser/history/url_database.h" |
| 17 #include "components/bookmarks/core/browser/bookmark_title_match.h" | 18 #include "chrome/browser/omnibox/omnibox_field_trial.h" |
| 19 #include "components/bookmarks/core/browser/bookmark_match.h" | |
| 18 #include "components/query_parser/query_parser.h" | 20 #include "components/query_parser/query_parser.h" |
| 21 #include "components/query_parser/snippet.h" | |
| 19 #include "third_party/icu/source/common/unicode/normalizer2.h" | 22 #include "third_party/icu/source/common/unicode/normalizer2.h" |
| 20 | 23 |
| 21 namespace { | 24 namespace { |
| 22 | 25 |
| 23 // Returns a normalized version of the UTF16 string |text|. If it fails to | 26 // Returns a normalized version of the UTF16 string |text|. If it fails to |
| 24 // normalize the string, returns |text| itself as a best-effort. | 27 // normalize the string, returns |text| itself as a best-effort. |
| 25 base::string16 Normalize(const base::string16& text) { | 28 base::string16 Normalize(const base::string16& text) { |
| 26 UErrorCode status = U_ZERO_ERROR; | 29 UErrorCode status = U_ZERO_ERROR; |
| 27 const icu::Normalizer2* normalizer2 = | 30 const icu::Normalizer2* normalizer2 = |
| 28 icu::Normalizer2::getInstance(NULL, "nfkc", UNORM2_COMPOSE, status); | 31 icu::Normalizer2::getInstance(NULL, "nfkc", UNORM2_COMPOSE, status); |
| (...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 64 | 67 |
| 65 BookmarkIndex::NodeSet::const_iterator | 68 BookmarkIndex::NodeSet::const_iterator |
| 66 BookmarkIndex::Match::nodes_begin() const { | 69 BookmarkIndex::Match::nodes_begin() const { |
| 67 return nodes.empty() ? terms.front()->second.begin() : nodes.begin(); | 70 return nodes.empty() ? terms.front()->second.begin() : nodes.begin(); |
| 68 } | 71 } |
| 69 | 72 |
| 70 BookmarkIndex::NodeSet::const_iterator BookmarkIndex::Match::nodes_end() const { | 73 BookmarkIndex::NodeSet::const_iterator BookmarkIndex::Match::nodes_end() const { |
| 71 return nodes.empty() ? terms.front()->second.end() : nodes.end(); | 74 return nodes.empty() ? terms.front()->second.end() : nodes.end(); |
| 72 } | 75 } |
| 73 | 76 |
| 74 BookmarkIndex::BookmarkIndex(content::BrowserContext* browser_context) | 77 BookmarkIndex::BookmarkIndex(content::BrowserContext* browser_context, |
| 75 : browser_context_(browser_context) { | 78 const std::string& languages) |
| 79 : index_urls_(OmniboxFieldTrial::BookmarksIndexURLsValue()), | |
| 80 browser_context_(browser_context), | |
| 81 languages_(languages) { | |
| 76 } | 82 } |
| 77 | 83 |
| 78 BookmarkIndex::~BookmarkIndex() { | 84 BookmarkIndex::~BookmarkIndex() { |
| 79 } | 85 } |
| 80 | 86 |
| 81 void BookmarkIndex::Add(const BookmarkNode* node) { | 87 void BookmarkIndex::Add(const BookmarkNode* node) { |
| 82 if (!node->is_url()) | 88 if (!node->is_url()) |
| 83 return; | 89 return; |
| 84 std::vector<base::string16> terms = | 90 std::vector<base::string16> terms = |
| 85 ExtractQueryWords(Normalize(node->GetTitle())); | 91 ExtractQueryWords(Normalize(node->GetTitle())); |
| 86 for (size_t i = 0; i < terms.size(); ++i) | 92 for (size_t i = 0; i < terms.size(); ++i) |
| 87 RegisterNode(terms[i], node); | 93 RegisterNode(terms[i], node); |
| 94 if (index_urls_) { | |
| 95 terms = ExtractQueryWords( | |
| 96 bookmark_utils::CleanUpUrlForMatching(node->url(), languages_)); | |
| 97 for (size_t i = 0; i < terms.size(); ++i) | |
| 98 RegisterNode(terms[i], node); | |
| 99 } | |
| 88 } | 100 } |
| 89 | 101 |
| 90 void BookmarkIndex::Remove(const BookmarkNode* node) { | 102 void BookmarkIndex::Remove(const BookmarkNode* node) { |
| 91 if (!node->is_url()) | 103 if (!node->is_url()) |
| 92 return; | 104 return; |
| 93 | 105 |
| 94 std::vector<base::string16> terms = | 106 std::vector<base::string16> terms = |
| 95 ExtractQueryWords(Normalize(node->GetTitle())); | 107 ExtractQueryWords(Normalize(node->GetTitle())); |
| 96 for (size_t i = 0; i < terms.size(); ++i) | 108 for (size_t i = 0; i < terms.size(); ++i) |
| 97 UnregisterNode(terms[i], node); | 109 UnregisterNode(terms[i], node); |
| 110 if (index_urls_) { | |
| 111 terms = ExtractQueryWords( | |
| 112 bookmark_utils::CleanUpUrlForMatching(node->url(), languages_)); | |
| 113 for (size_t i = 0; i < terms.size(); ++i) | |
| 114 UnregisterNode(terms[i], node); | |
| 115 } | |
| 98 } | 116 } |
| 99 | 117 |
| 100 void BookmarkIndex::GetBookmarksWithTitlesMatching( | 118 void BookmarkIndex::GetBookmarksMatching(const base::string16& input_query, |
| 101 const base::string16& input_query, | 119 size_t max_count, |
| 102 size_t max_count, | 120 std::vector<BookmarkMatch>* results) { |
| 103 std::vector<BookmarkTitleMatch>* results) { | |
| 104 const base::string16 query = Normalize(input_query); | 121 const base::string16 query = Normalize(input_query); |
| 105 std::vector<base::string16> terms = ExtractQueryWords(query); | 122 std::vector<base::string16> terms = ExtractQueryWords(query); |
| 106 if (terms.empty()) | 123 if (terms.empty()) |
| 107 return; | 124 return; |
| 108 | 125 |
| 109 Matches matches; | 126 Matches matches; |
| 110 for (size_t i = 0; i < terms.size(); ++i) { | 127 for (size_t i = 0; i < terms.size(); ++i) { |
| 111 if (!GetBookmarksWithTitleMatchingTerm(terms[i], i == 0, &matches)) | 128 if (!GetBookmarksMatchingTerm(terms[i], i == 0, &matches)) |
| 112 return; | 129 return; |
| 113 } | 130 } |
| 114 | 131 |
| 115 NodeTypedCountPairs node_typed_counts; | 132 NodeTypedCountPairs node_typed_counts; |
| 116 SortMatches(matches, &node_typed_counts); | 133 SortMatches(matches, &node_typed_counts); |
| 117 | 134 |
| 118 // We use a QueryParser to fill in match positions for us. It's not the most | 135 // We use a QueryParser to fill in match positions for us. It's not the most |
| 119 // efficient way to go about this, but by the time we get here we know what | 136 // efficient way to go about this, but by the time we get here we know what |
| 120 // matches and so this shouldn't be performance critical. | 137 // matches and so this shouldn't be performance critical. |
| 121 query_parser::QueryParser parser; | 138 query_parser::QueryParser parser; |
| (...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 171 | 188 |
| 172 NodeTypedCountPair pair(*i, typed_count); | 189 NodeTypedCountPair pair(*i, typed_count); |
| 173 node_typed_counts->push_back(pair); | 190 node_typed_counts->push_back(pair); |
| 174 } | 191 } |
| 175 } | 192 } |
| 176 | 193 |
| 177 void BookmarkIndex::AddMatchToResults( | 194 void BookmarkIndex::AddMatchToResults( |
| 178 const BookmarkNode* node, | 195 const BookmarkNode* node, |
| 179 query_parser::QueryParser* parser, | 196 query_parser::QueryParser* parser, |
| 180 const std::vector<query_parser::QueryNode*>& query_nodes, | 197 const std::vector<query_parser::QueryNode*>& query_nodes, |
| 181 std::vector<BookmarkTitleMatch>* results) { | 198 std::vector<BookmarkMatch>* results) { |
| 182 BookmarkTitleMatch title_match; | |
| 183 // Check that the result matches the query. The previous search | 199 // Check that the result matches the query. The previous search |
| 184 // was a simple per-word search, while the more complex matching | 200 // was a simple per-word search, while the more complex matching |
| 185 // of QueryParser may filter it out. For example, the query | 201 // of QueryParser may filter it out. For example, the query |
| 186 // ["thi"] will match the bookmark titled [Thinking], but since | 202 // ["thi"] will match the bookmark titled [Thinking], but since |
| 187 // ["thi"] is quoted we don't want to do a prefix match. | 203 // ["thi"] is quoted we don't want to do a prefix match. |
| 188 if (parser->DoesQueryMatch(Normalize(node->GetTitle()), query_nodes, | 204 std::vector<query_parser::QueryWord> title_words, url_words; |
|
Peter Kasting
2014/04/17 22:30:37
Nit: Again, the more we can use typedefs in place
Mark P
2014/04/18 14:52:43
Done. This mostly helped throughout query_parser.
| |
| 189 &(title_match.match_positions))) { | 205 const base::string16 lower_title = |
| 190 title_match.node = node; | 206 base::i18n::ToLower(Normalize(node->GetTitle())); |
| 191 results->push_back(title_match); | 207 parser->ExtractQueryWords(lower_title, &title_words); |
| 208 if (index_urls_) { | |
| 209 parser->ExtractQueryWords( | |
| 210 bookmark_utils::CleanUpUrlForMatching(node->url(), languages_), | |
| 211 &url_words); | |
| 192 } | 212 } |
| 213 query_parser::Snippet::MatchPositions title_matches, url_matches; | |
| 214 for (size_t i = 0; i < query_nodes.size(); ++i) { | |
| 215 const bool has_title_matches = | |
| 216 query_nodes[i]->HasMatchIn(title_words, &title_matches); | |
| 217 const bool has_url_matches = index_urls_ && | |
| 218 query_nodes[i]->HasMatchIn(url_words, &url_matches); | |
| 219 if (!has_title_matches && !has_url_matches) | |
| 220 return; | |
| 221 query_parser::QueryParser::SortAndCoalesceMatchPositions(&title_matches); | |
| 222 if (index_urls_) | |
| 223 query_parser::QueryParser::SortAndCoalesceMatchPositions(&url_matches); | |
| 224 } | |
| 225 BookmarkMatch match; | |
| 226 if (lower_title.length() == node->GetTitle().length()) { | |
| 227 // Only use title matches if the lowercase string is the same length | |
| 228 // as the original string, otherwise the matches are meaningless. | |
| 229 // TODO(mpearson): revise match positions appropriately. | |
| 230 match.title_match_positions.swap(title_matches); | |
| 231 } | |
| 232 if (index_urls_) | |
| 233 match.url_match_positions.swap(url_matches); | |
| 234 match.node = node; | |
| 235 results->push_back(match); | |
| 193 } | 236 } |
| 194 | 237 |
| 195 bool BookmarkIndex::GetBookmarksWithTitleMatchingTerm(const base::string16& term , | 238 bool BookmarkIndex::GetBookmarksMatchingTerm(const base::string16& term, |
| 196 bool first_term, | 239 bool first_term, |
| 197 Matches* matches) { | 240 Matches* matches) { |
| 198 Index::const_iterator i = index_.lower_bound(term); | 241 Index::const_iterator i = index_.lower_bound(term); |
| 199 if (i == index_.end()) | 242 if (i == index_.end()) |
| 200 return false; | 243 return false; |
| 201 | 244 |
| 202 if (!query_parser::QueryParser::IsWordLongEnoughForPrefixSearch(term)) { | 245 if (!query_parser::QueryParser::IsWordLongEnoughForPrefixSearch(term)) { |
| 203 // Term is too short for prefix match, compare using exact match. | 246 // Term is too short for prefix match, compare using exact match. |
| 204 if (i->first != term) | 247 if (i->first != term) |
| 205 return false; // No bookmarks with this term. | 248 return false; // No bookmarks with this term. |
| (...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 294 Index::iterator i = index_.find(term); | 337 Index::iterator i = index_.find(term); |
| 295 if (i == index_.end()) { | 338 if (i == index_.end()) { |
| 296 // We can get here if the node has the same term more than once. For | 339 // We can get here if the node has the same term more than once. For |
| 297 // example, a bookmark with the title 'foo foo' would end up here. | 340 // example, a bookmark with the title 'foo foo' would end up here. |
| 298 return; | 341 return; |
| 299 } | 342 } |
| 300 i->second.erase(node); | 343 i->second.erase(node); |
| 301 if (i->second.empty()) | 344 if (i->second.empty()) |
| 302 index_.erase(i); | 345 index_.erase(i); |
| 303 } | 346 } |
| OLD | NEW |