| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/browser/bookmarks/bookmark_index.h" | 5 #include "chrome/browser/bookmarks/bookmark_index.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 #include <iterator> | 8 #include <iterator> |
| 9 #include <list> | 9 #include <list> |
| 10 | 10 |
| 11 #include "base/i18n/case_conversion.h" | 11 #include "base/i18n/case_conversion.h" |
| 12 #include "base/strings/string16.h" | 12 #include "base/strings/string16.h" |
| 13 #include "chrome/browser/bookmarks/bookmark_model.h" | 13 #include "components/bookmarks/core/browser/bookmark_client.h" |
| 14 #include "chrome/browser/history/history_service.h" | 14 #include "components/bookmarks/core/browser/bookmark_node.h" |
| 15 #include "chrome/browser/history/history_service_factory.h" | |
| 16 #include "chrome/browser/history/url_database.h" | |
| 17 #include "components/bookmarks/core/browser/bookmark_title_match.h" | 15 #include "components/bookmarks/core/browser/bookmark_title_match.h" |
| 18 #include "components/query_parser/query_parser.h" | 16 #include "components/query_parser/query_parser.h" |
| 19 #include "third_party/icu/source/common/unicode/normalizer2.h" | 17 #include "third_party/icu/source/common/unicode/normalizer2.h" |
| 20 | 18 |
| 21 namespace { | 19 namespace { |
| 22 | 20 |
| 23 // Returns a normalized version of the UTF16 string |text|. If it fails to | 21 // Returns a normalized version of the UTF16 string |text|. If it fails to |
| 24 // normalize the string, returns |text| itself as a best-effort. | 22 // normalize the string, returns |text| itself as a best-effort. |
| 25 base::string16 Normalize(const base::string16& text) { | 23 base::string16 Normalize(const base::string16& text) { |
| 26 UErrorCode status = U_ZERO_ERROR; | 24 UErrorCode status = U_ZERO_ERROR; |
| (...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 64 | 62 |
| 65 BookmarkIndex::NodeSet::const_iterator | 63 BookmarkIndex::NodeSet::const_iterator |
| 66 BookmarkIndex::Match::nodes_begin() const { | 64 BookmarkIndex::Match::nodes_begin() const { |
| 67 return nodes.empty() ? terms.front()->second.begin() : nodes.begin(); | 65 return nodes.empty() ? terms.front()->second.begin() : nodes.begin(); |
| 68 } | 66 } |
| 69 | 67 |
| 70 BookmarkIndex::NodeSet::const_iterator BookmarkIndex::Match::nodes_end() const { | 68 BookmarkIndex::NodeSet::const_iterator BookmarkIndex::Match::nodes_end() const { |
| 71 return nodes.empty() ? terms.front()->second.end() : nodes.end(); | 69 return nodes.empty() ? terms.front()->second.end() : nodes.end(); |
| 72 } | 70 } |
| 73 | 71 |
| 74 BookmarkIndex::BookmarkIndex(content::BrowserContext* browser_context) | 72 BookmarkIndex::BookmarkIndex() { |
| 75 : browser_context_(browser_context) { | |
| 76 } | 73 } |
| 77 | 74 |
| 78 BookmarkIndex::~BookmarkIndex() { | 75 BookmarkIndex::~BookmarkIndex() { |
| 79 } | 76 } |
| 80 | 77 |
| 81 void BookmarkIndex::Add(const BookmarkNode* node) { | 78 void BookmarkIndex::Add(const BookmarkNode* node) { |
| 82 if (!node->is_url()) | 79 if (!node->is_url()) |
| 83 return; | 80 return; |
| 84 std::vector<base::string16> terms = | 81 std::vector<base::string16> terms = |
| 85 ExtractQueryWords(Normalize(node->GetTitle())); | 82 ExtractQueryWords(Normalize(node->GetTitle())); |
| 86 for (size_t i = 0; i < terms.size(); ++i) | 83 for (size_t i = 0; i < terms.size(); ++i) |
| 87 RegisterNode(terms[i], node); | 84 RegisterNode(terms[i], node); |
| 88 } | 85 } |
| 89 | 86 |
| 90 void BookmarkIndex::Remove(const BookmarkNode* node) { | 87 void BookmarkIndex::Remove(const BookmarkNode* node) { |
| 91 if (!node->is_url()) | 88 if (!node->is_url()) |
| 92 return; | 89 return; |
| 93 | 90 |
| 94 std::vector<base::string16> terms = | 91 std::vector<base::string16> terms = |
| 95 ExtractQueryWords(Normalize(node->GetTitle())); | 92 ExtractQueryWords(Normalize(node->GetTitle())); |
| 96 for (size_t i = 0; i < terms.size(); ++i) | 93 for (size_t i = 0; i < terms.size(); ++i) |
| 97 UnregisterNode(terms[i], node); | 94 UnregisterNode(terms[i], node); |
| 98 } | 95 } |
| 99 | 96 |
| 100 void BookmarkIndex::GetBookmarksWithTitlesMatching( | 97 void BookmarkIndex::GetBookmarksWithTitlesMatching( |
| 98 BookmarkClient* client, |
| 101 const base::string16& input_query, | 99 const base::string16& input_query, |
| 102 size_t max_count, | 100 size_t max_count, |
| 103 std::vector<BookmarkTitleMatch>* results) { | 101 std::vector<BookmarkTitleMatch>* results) { |
| 104 const base::string16 query = Normalize(input_query); | 102 const base::string16 query = Normalize(input_query); |
| 105 std::vector<base::string16> terms = ExtractQueryWords(query); | 103 std::vector<base::string16> terms = ExtractQueryWords(query); |
| 106 if (terms.empty()) | 104 if (terms.empty()) |
| 107 return; | 105 return; |
| 108 | 106 |
| 109 Matches matches; | 107 Matches matches; |
| 110 for (size_t i = 0; i < terms.size(); ++i) { | 108 for (size_t i = 0; i < terms.size(); ++i) { |
| 111 if (!GetBookmarksWithTitleMatchingTerm(terms[i], i == 0, &matches)) | 109 if (!GetBookmarksWithTitleMatchingTerm(terms[i], i == 0, &matches)) |
| 112 return; | 110 return; |
| 113 } | 111 } |
| 114 | 112 |
| 115 NodeTypedCountPairs node_typed_counts; | 113 NodeTypedCountPairs node_typed_counts; |
| 116 SortMatches(matches, &node_typed_counts); | 114 SortMatches(client, matches, &node_typed_counts); |
| 117 | 115 |
| 118 // We use a QueryParser to fill in match positions for us. It's not the most | 116 // We use a QueryParser to fill in match positions for us. It's not the most |
| 119 // efficient way to go about this, but by the time we get here we know what | 117 // efficient way to go about this, but by the time we get here we know what |
| 120 // matches and so this shouldn't be performance critical. | 118 // matches and so this shouldn't be performance critical. |
| 121 query_parser::QueryParser parser; | 119 query_parser::QueryParser parser; |
| 122 ScopedVector<query_parser::QueryNode> query_nodes; | 120 ScopedVector<query_parser::QueryNode> query_nodes; |
| 123 parser.ParseQueryNodes(query, &query_nodes.get()); | 121 parser.ParseQueryNodes(query, &query_nodes.get()); |
| 124 | 122 |
| 125 // The highest typed counts should be at the beginning of the results vector | 123 // The highest typed counts should be at the beginning of the results vector |
| 126 // so that the best matches will always be included in the results. The loop | 124 // so that the best matches will always be included in the results. The loop |
| 127 // that calculates result relevance in HistoryContentsProvider::ConvertResults | 125 // that calculates result relevance in HistoryContentsProvider::ConvertResults |
| 128 // will run backwards to assure higher relevance will be attributed to the | 126 // will run backwards to assure higher relevance will be attributed to the |
| 129 // best matches. | 127 // best matches. |
| 130 for (NodeTypedCountPairs::const_iterator i = node_typed_counts.begin(); | 128 for (NodeTypedCountPairs::const_iterator i = node_typed_counts.begin(); |
| 131 i != node_typed_counts.end() && results->size() < max_count; ++i) | 129 i != node_typed_counts.end() && results->size() < max_count; ++i) |
| 132 AddMatchToResults(i->first, &parser, query_nodes.get(), results); | 130 AddMatchToResults(i->first, &parser, query_nodes.get(), results); |
| 133 } | 131 } |
| 134 | 132 |
| 135 void BookmarkIndex::SortMatches(const Matches& matches, | 133 void BookmarkIndex::SortMatches(BookmarkClient* client, |
| 134 const Matches& matches, |
| 136 NodeTypedCountPairs* node_typed_counts) const { | 135 NodeTypedCountPairs* node_typed_counts) const { |
| 137 HistoryService* const history_service = browser_context_ ? | 136 for (Matches::const_iterator i = matches.begin(); i != matches.end(); ++i) { |
| 138 HistoryServiceFactory::GetForProfile( | 137 const NodeSet& nodes = |
| 139 Profile::FromBrowserContext(browser_context_), | 138 i->nodes.empty() ? i->terms.front()->second : i->nodes; |
| 140 Profile::EXPLICIT_ACCESS) : NULL; | 139 client->ExtractBookmarkNodePairs(nodes, node_typed_counts); |
| 141 | 140 } |
| 142 history::URLDatabase* url_db = history_service ? | |
| 143 history_service->InMemoryDatabase() : NULL; | |
| 144 | |
| 145 for (Matches::const_iterator i = matches.begin(); i != matches.end(); ++i) | |
| 146 ExtractBookmarkNodePairs(url_db, *i, node_typed_counts); | |
| 147 | 141 |
| 148 std::sort(node_typed_counts->begin(), node_typed_counts->end(), | 142 std::sort(node_typed_counts->begin(), node_typed_counts->end(), |
| 149 &NodeTypedCountPairSortFunc); | 143 &NodeTypedCountPairSortFunc); |
| 150 // Eliminate duplicates. | 144 // Eliminate duplicates. |
| 151 node_typed_counts->erase(std::unique(node_typed_counts->begin(), | 145 node_typed_counts->erase(std::unique(node_typed_counts->begin(), |
| 152 node_typed_counts->end()), | 146 node_typed_counts->end()), |
| 153 node_typed_counts->end()); | 147 node_typed_counts->end()); |
| 154 } | 148 } |
| 155 | 149 |
| 156 void BookmarkIndex::ExtractBookmarkNodePairs( | |
| 157 history::URLDatabase* url_db, | |
| 158 const Match& match, | |
| 159 NodeTypedCountPairs* node_typed_counts) const { | |
| 160 | |
| 161 for (NodeSet::const_iterator i = match.nodes_begin(); | |
| 162 i != match.nodes_end(); ++i) { | |
| 163 int typed_count = 0; | |
| 164 | |
| 165 // If |url_db| is the InMemoryDatabase, it might not cache all URLRows, but | |
| 166 // it guarantees to contain those with |typed_count| > 0. Thus, if we cannot | |
| 167 // fetch the URLRow, it is safe to assume that its |typed_count| is 0. | |
| 168 history::URLRow url; | |
| 169 if (url_db && url_db->GetRowForURL((*i)->url(), &url)) | |
| 170 typed_count = url.typed_count(); | |
| 171 | |
| 172 NodeTypedCountPair pair(*i, typed_count); | |
| 173 node_typed_counts->push_back(pair); | |
| 174 } | |
| 175 } | |
| 176 | |
| 177 void BookmarkIndex::AddMatchToResults( | 150 void BookmarkIndex::AddMatchToResults( |
| 178 const BookmarkNode* node, | 151 const BookmarkNode* node, |
| 179 query_parser::QueryParser* parser, | 152 query_parser::QueryParser* parser, |
| 180 const std::vector<query_parser::QueryNode*>& query_nodes, | 153 const std::vector<query_parser::QueryNode*>& query_nodes, |
| 181 std::vector<BookmarkTitleMatch>* results) { | 154 std::vector<BookmarkTitleMatch>* results) { |
| 182 BookmarkTitleMatch title_match; | 155 BookmarkTitleMatch title_match; |
| 183 // Check that the result matches the query. The previous search | 156 // Check that the result matches the query. The previous search |
| 184 // was a simple per-word search, while the more complex matching | 157 // was a simple per-word search, while the more complex matching |
| 185 // of QueryParser may filter it out. For example, the query | 158 // of QueryParser may filter it out. For example, the query |
| 186 // ["thi"] will match the bookmark titled [Thinking], but since | 159 // ["thi"] will match the bookmark titled [Thinking], but since |
| (...skipping 107 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 294 Index::iterator i = index_.find(term); | 267 Index::iterator i = index_.find(term); |
| 295 if (i == index_.end()) { | 268 if (i == index_.end()) { |
| 296 // We can get here if the node has the same term more than once. For | 269 // We can get here if the node has the same term more than once. For |
| 297 // example, a bookmark with the title 'foo foo' would end up here. | 270 // example, a bookmark with the title 'foo foo' would end up here. |
| 298 return; | 271 return; |
| 299 } | 272 } |
| 300 i->second.erase(node); | 273 i->second.erase(node); |
| 301 if (i->second.empty()) | 274 if (i->second.empty()) |
| 302 index_.erase(i); | 275 index_.erase(i); |
| 303 } | 276 } |
| OLD | NEW |