| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/browser/bookmarks/bookmark_index.h" | 5 #include "chrome/browser/bookmarks/bookmark_index.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 #include <functional> |
| 8 #include <iterator> | 9 #include <iterator> |
| 9 #include <list> | 10 #include <list> |
| 10 | 11 |
| 11 #include "base/i18n/case_conversion.h" | 12 #include "base/i18n/case_conversion.h" |
| 12 #include "base/strings/string16.h" | 13 #include "base/strings/string16.h" |
| 13 #include "chrome/browser/bookmarks/bookmark_model.h" | 14 #include "components/bookmarks/core/browser/bookmark_client.h" |
| 14 #include "chrome/browser/history/history_service.h" | 15 #include "components/bookmarks/core/browser/bookmark_node.h" |
| 15 #include "chrome/browser/history/history_service_factory.h" | |
| 16 #include "chrome/browser/history/url_database.h" | |
| 17 #include "components/bookmarks/core/browser/bookmark_title_match.h" | 16 #include "components/bookmarks/core/browser/bookmark_title_match.h" |
| 18 #include "components/query_parser/query_parser.h" | 17 #include "components/query_parser/query_parser.h" |
| 19 #include "third_party/icu/source/common/unicode/normalizer2.h" | 18 #include "third_party/icu/source/common/unicode/normalizer2.h" |
| 20 | 19 |
| 21 namespace { | 20 namespace { |
| 22 | 21 |
| 23 // Returns a normalized version of the UTF16 string |text|. If it fails to | 22 // Returns a normalized version of the UTF16 string |text|. If it fails to |
| 24 // normalize the string, returns |text| itself as a best-effort. | 23 // normalize the string, returns |text| itself as a best-effort. |
| 25 base::string16 Normalize(const base::string16& text) { | 24 base::string16 Normalize(const base::string16& text) { |
| 26 UErrorCode status = U_ZERO_ERROR; | 25 UErrorCode status = U_ZERO_ERROR; |
| 27 const icu::Normalizer2* normalizer2 = | 26 const icu::Normalizer2* normalizer2 = |
| 28 icu::Normalizer2::getInstance(NULL, "nfkc", UNORM2_COMPOSE, status); | 27 icu::Normalizer2::getInstance(NULL, "nfkc", UNORM2_COMPOSE, status); |
| 29 icu::UnicodeString unicode_text(text.data(), text.length()); | 28 icu::UnicodeString unicode_text(text.data(), text.length()); |
| 30 icu::UnicodeString unicode_normalized_text; | 29 icu::UnicodeString unicode_normalized_text; |
| 31 normalizer2->normalize(unicode_text, unicode_normalized_text, status); | 30 normalizer2->normalize(unicode_text, unicode_normalized_text, status); |
| 32 if (U_FAILURE(status)) | 31 if (U_FAILURE(status)) |
| 33 return text; | 32 return text; |
| 34 return base::string16(unicode_normalized_text.getBuffer(), | 33 return base::string16(unicode_normalized_text.getBuffer(), |
| 35 unicode_normalized_text.length()); | 34 unicode_normalized_text.length()); |
| 36 } | 35 } |
| 37 | 36 |
| 37 // Sort functor for NodeTypedCountPairs. We sort in decreasing order of typed |
| 38 // count so that the best matches will always be added to the results. |
| 39 struct NodeTypedCountPairSortFunctor |
| 40 : std::binary_function<BookmarkClient::NodeTypedCountPair, |
| 41 BookmarkClient::NodeTypedCountPair, |
| 42 bool> { |
| 43 bool operator()(const BookmarkClient::NodeTypedCountPair& a, |
| 44 const BookmarkClient::NodeTypedCountPair& b) const { |
| 45 return a.second > b.second; |
| 46 } |
| 47 }; |
| 48 |
| 49 // Extract the const Node* stored in a NodeTypedCountPair. |
| 50 struct NodeFromNodeTypedCountPairFunctor |
| 51 : std::unary_function<BookmarkClient::NodeTypedCountPair, |
| 52 const BookmarkNode*> { |
| 53 const BookmarkNode* operator()( |
| 54 const BookmarkClient::NodeTypedCountPair& pair) const { |
| 55 return pair.first; |
| 56 } |
| 57 }; |
| 58 |
| 38 } // namespace | 59 } // namespace |
| 39 | 60 |
| 40 // Used when finding the set of bookmarks that match a query. Each match | 61 // Used when finding the set of bookmarks that match a query. Each match |
| 41 // represents a set of terms (as an interator into the Index) matching the | 62 // represents a set of terms (as an interator into the Index) matching the |
| 42 // query as well as the set of nodes that contain those terms in their titles. | 63 // query as well as the set of nodes that contain those terms in their titles. |
| 43 struct BookmarkIndex::Match { | 64 struct BookmarkIndex::Match { |
| 44 // List of terms matching the query. | 65 // List of terms matching the query. |
| 45 std::list<Index::const_iterator> terms; | 66 std::list<Index::const_iterator> terms; |
| 46 | 67 |
| 47 // The set of nodes matching the terms. As an optimization this is empty | 68 // The set of nodes matching the terms. As an optimization this is empty |
| (...skipping 16 matching lines...) Expand all Loading... |
| 64 | 85 |
| 65 BookmarkIndex::NodeSet::const_iterator | 86 BookmarkIndex::NodeSet::const_iterator |
| 66 BookmarkIndex::Match::nodes_begin() const { | 87 BookmarkIndex::Match::nodes_begin() const { |
| 67 return nodes.empty() ? terms.front()->second.begin() : nodes.begin(); | 88 return nodes.empty() ? terms.front()->second.begin() : nodes.begin(); |
| 68 } | 89 } |
| 69 | 90 |
| 70 BookmarkIndex::NodeSet::const_iterator BookmarkIndex::Match::nodes_end() const { | 91 BookmarkIndex::NodeSet::const_iterator BookmarkIndex::Match::nodes_end() const { |
| 71 return nodes.empty() ? terms.front()->second.end() : nodes.end(); | 92 return nodes.empty() ? terms.front()->second.end() : nodes.end(); |
| 72 } | 93 } |
| 73 | 94 |
| 74 BookmarkIndex::BookmarkIndex(content::BrowserContext* browser_context) | 95 BookmarkIndex::BookmarkIndex(BookmarkClient* client) : client_(client) { |
| 75 : browser_context_(browser_context) { | |
| 76 } | 96 } |
| 77 | 97 |
| 78 BookmarkIndex::~BookmarkIndex() { | 98 BookmarkIndex::~BookmarkIndex() { |
| 79 } | 99 } |
| 80 | 100 |
| 81 void BookmarkIndex::Add(const BookmarkNode* node) { | 101 void BookmarkIndex::Add(const BookmarkNode* node) { |
| 82 if (!node->is_url()) | 102 if (!node->is_url()) |
| 83 return; | 103 return; |
| 84 std::vector<base::string16> terms = | 104 std::vector<base::string16> terms = |
| 85 ExtractQueryWords(Normalize(node->GetTitle())); | 105 ExtractQueryWords(Normalize(node->GetTitle())); |
| (...skipping 19 matching lines...) Expand all Loading... |
| 105 std::vector<base::string16> terms = ExtractQueryWords(query); | 125 std::vector<base::string16> terms = ExtractQueryWords(query); |
| 106 if (terms.empty()) | 126 if (terms.empty()) |
| 107 return; | 127 return; |
| 108 | 128 |
| 109 Matches matches; | 129 Matches matches; |
| 110 for (size_t i = 0; i < terms.size(); ++i) { | 130 for (size_t i = 0; i < terms.size(); ++i) { |
| 111 if (!GetBookmarksWithTitleMatchingTerm(terms[i], i == 0, &matches)) | 131 if (!GetBookmarksWithTitleMatchingTerm(terms[i], i == 0, &matches)) |
| 112 return; | 132 return; |
| 113 } | 133 } |
| 114 | 134 |
| 115 NodeTypedCountPairs node_typed_counts; | 135 Nodes sorted_nodes; |
| 116 SortMatches(matches, &node_typed_counts); | 136 SortMatches(matches, &sorted_nodes); |
| 117 | 137 |
| 118 // We use a QueryParser to fill in match positions for us. It's not the most | 138 // We use a QueryParser to fill in match positions for us. It's not the most |
| 119 // efficient way to go about this, but by the time we get here we know what | 139 // efficient way to go about this, but by the time we get here we know what |
| 120 // matches and so this shouldn't be performance critical. | 140 // matches and so this shouldn't be performance critical. |
| 121 query_parser::QueryParser parser; | 141 query_parser::QueryParser parser; |
| 122 ScopedVector<query_parser::QueryNode> query_nodes; | 142 ScopedVector<query_parser::QueryNode> query_nodes; |
| 123 parser.ParseQueryNodes(query, &query_nodes.get()); | 143 parser.ParseQueryNodes(query, &query_nodes.get()); |
| 124 | 144 |
| 125 // The highest typed counts should be at the beginning of the results vector | 145 // The highest typed counts should be at the beginning of the results vector |
| 126 // so that the best matches will always be included in the results. The loop | 146 // so that the best matches will always be included in the results. The loop |
| 127 // that calculates result relevance in HistoryContentsProvider::ConvertResults | 147 // that calculates result relevance in HistoryContentsProvider::ConvertResults |
| 128 // will run backwards to assure higher relevance will be attributed to the | 148 // will run backwards to assure higher relevance will be attributed to the |
| 129 // best matches. | 149 // best matches. |
| 130 for (NodeTypedCountPairs::const_iterator i = node_typed_counts.begin(); | 150 for (Nodes::const_iterator i = sorted_nodes.begin(); |
| 131 i != node_typed_counts.end() && results->size() < max_count; ++i) | 151 i != sorted_nodes.end() && results->size() < max_count; |
| 132 AddMatchToResults(i->first, &parser, query_nodes.get(), results); | 152 ++i) |
| 153 AddMatchToResults(*i, &parser, query_nodes.get(), results); |
| 133 } | 154 } |
| 134 | 155 |
| 135 void BookmarkIndex::SortMatches(const Matches& matches, | 156 void BookmarkIndex::SortMatches(const Matches& matches, |
| 136 NodeTypedCountPairs* node_typed_counts) const { | 157 Nodes* sorted_nodes) const { |
| 137 HistoryService* const history_service = browser_context_ ? | 158 NodeSet nodes; |
| 138 HistoryServiceFactory::GetForProfile( | 159 for (Matches::const_iterator i = matches.begin(); i != matches.end(); ++i) { |
| 139 Profile::FromBrowserContext(browser_context_), | 160 nodes.insert(i->nodes_begin(), i->nodes_end()); |
| 140 Profile::EXPLICIT_ACCESS) : NULL; | 161 } |
| 141 | 162 if (client_->SupportsTypedCountForNodes()) { |
| 142 history::URLDatabase* url_db = history_service ? | 163 BookmarkClient::NodeTypedCountPairs node_typed_counts; |
| 143 history_service->InMemoryDatabase() : NULL; | 164 client_->GetTypedCountForNodes(nodes, &node_typed_counts); |
| 144 | 165 std::sort(node_typed_counts.begin(), |
| 145 for (Matches::const_iterator i = matches.begin(); i != matches.end(); ++i) | 166 node_typed_counts.end(), |
| 146 ExtractBookmarkNodePairs(url_db, *i, node_typed_counts); | 167 NodeTypedCountPairSortFunctor()); |
| 147 | 168 sorted_nodes->reserve(sorted_nodes->size() + node_typed_counts.size()); |
| 148 std::sort(node_typed_counts->begin(), node_typed_counts->end(), | 169 std::transform(node_typed_counts.begin(), |
| 149 &NodeTypedCountPairSortFunc); | 170 node_typed_counts.end(), |
| 150 // Eliminate duplicates. | 171 std::back_inserter(*sorted_nodes), |
| 151 node_typed_counts->erase(std::unique(node_typed_counts->begin(), | 172 NodeFromNodeTypedCountPairFunctor()); |
| 152 node_typed_counts->end()), | 173 } else { |
| 153 node_typed_counts->end()); | 174 sorted_nodes->insert(sorted_nodes->end(), nodes.begin(), nodes.end()); |
| 154 } | |
| 155 | |
| 156 void BookmarkIndex::ExtractBookmarkNodePairs( | |
| 157 history::URLDatabase* url_db, | |
| 158 const Match& match, | |
| 159 NodeTypedCountPairs* node_typed_counts) const { | |
| 160 | |
| 161 for (NodeSet::const_iterator i = match.nodes_begin(); | |
| 162 i != match.nodes_end(); ++i) { | |
| 163 int typed_count = 0; | |
| 164 | |
| 165 // If |url_db| is the InMemoryDatabase, it might not cache all URLRows, but | |
| 166 // it guarantees to contain those with |typed_count| > 0. Thus, if we cannot | |
| 167 // fetch the URLRow, it is safe to assume that its |typed_count| is 0. | |
| 168 history::URLRow url; | |
| 169 if (url_db && url_db->GetRowForURL((*i)->url(), &url)) | |
| 170 typed_count = url.typed_count(); | |
| 171 | |
| 172 NodeTypedCountPair pair(*i, typed_count); | |
| 173 node_typed_counts->push_back(pair); | |
| 174 } | 175 } |
| 175 } | 176 } |
| 176 | 177 |
| 177 void BookmarkIndex::AddMatchToResults( | 178 void BookmarkIndex::AddMatchToResults( |
| 178 const BookmarkNode* node, | 179 const BookmarkNode* node, |
| 179 query_parser::QueryParser* parser, | 180 query_parser::QueryParser* parser, |
| 180 const std::vector<query_parser::QueryNode*>& query_nodes, | 181 const std::vector<query_parser::QueryNode*>& query_nodes, |
| 181 std::vector<BookmarkTitleMatch>* results) { | 182 std::vector<BookmarkTitleMatch>* results) { |
| 182 BookmarkTitleMatch title_match; | 183 BookmarkTitleMatch title_match; |
| 183 // Check that the result matches the query. The previous search | 184 // Check that the result matches the query. The previous search |
| (...skipping 110 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 294 Index::iterator i = index_.find(term); | 295 Index::iterator i = index_.find(term); |
| 295 if (i == index_.end()) { | 296 if (i == index_.end()) { |
| 296 // We can get here if the node has the same term more than once. For | 297 // We can get here if the node has the same term more than once. For |
| 297 // example, a bookmark with the title 'foo foo' would end up here. | 298 // example, a bookmark with the title 'foo foo' would end up here. |
| 298 return; | 299 return; |
| 299 } | 300 } |
| 300 i->second.erase(node); | 301 i->second.erase(node); |
| 301 if (i->second.empty()) | 302 if (i->second.empty()) |
| 302 index_.erase(i); | 303 index_.erase(i); |
| 303 } | 304 } |
| OLD | NEW |