OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/bookmarks/bookmark_index.h" | 5 #include "chrome/browser/bookmarks/bookmark_index.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
| 8 #include <functional> |
8 #include <iterator> | 9 #include <iterator> |
9 #include <list> | 10 #include <list> |
10 | 11 |
11 #include "base/i18n/case_conversion.h" | 12 #include "base/i18n/case_conversion.h" |
12 #include "base/strings/string16.h" | 13 #include "base/strings/string16.h" |
13 #include "chrome/browser/bookmarks/bookmark_model.h" | 14 #include "components/bookmarks/core/browser/bookmark_client.h" |
14 #include "chrome/browser/history/history_service.h" | 15 #include "components/bookmarks/core/browser/bookmark_node.h" |
15 #include "chrome/browser/history/history_service_factory.h" | |
16 #include "chrome/browser/history/url_database.h" | |
17 #include "components/bookmarks/core/browser/bookmark_title_match.h" | 16 #include "components/bookmarks/core/browser/bookmark_title_match.h" |
18 #include "components/query_parser/query_parser.h" | 17 #include "components/query_parser/query_parser.h" |
19 #include "third_party/icu/source/common/unicode/normalizer2.h" | 18 #include "third_party/icu/source/common/unicode/normalizer2.h" |
20 | 19 |
21 namespace { | 20 namespace { |
22 | 21 |
23 // Returns a normalized version of the UTF16 string |text|. If it fails to | 22 // Returns a normalized version of the UTF16 string |text|. If it fails to |
24 // normalize the string, returns |text| itself as a best-effort. | 23 // normalize the string, returns |text| itself as a best-effort. |
25 base::string16 Normalize(const base::string16& text) { | 24 base::string16 Normalize(const base::string16& text) { |
26 UErrorCode status = U_ZERO_ERROR; | 25 UErrorCode status = U_ZERO_ERROR; |
27 const icu::Normalizer2* normalizer2 = | 26 const icu::Normalizer2* normalizer2 = |
28 icu::Normalizer2::getInstance(NULL, "nfkc", UNORM2_COMPOSE, status); | 27 icu::Normalizer2::getInstance(NULL, "nfkc", UNORM2_COMPOSE, status); |
29 icu::UnicodeString unicode_text(text.data(), text.length()); | 28 icu::UnicodeString unicode_text(text.data(), text.length()); |
30 icu::UnicodeString unicode_normalized_text; | 29 icu::UnicodeString unicode_normalized_text; |
31 normalizer2->normalize(unicode_text, unicode_normalized_text, status); | 30 normalizer2->normalize(unicode_text, unicode_normalized_text, status); |
32 if (U_FAILURE(status)) | 31 if (U_FAILURE(status)) |
33 return text; | 32 return text; |
34 return base::string16(unicode_normalized_text.getBuffer(), | 33 return base::string16(unicode_normalized_text.getBuffer(), |
35 unicode_normalized_text.length()); | 34 unicode_normalized_text.length()); |
36 } | 35 } |
37 | 36 |
| 37 // Sort functor for NodeTypedCountPairs. We sort in decreasing order of typed |
| 38 // count so that the best matches will always be added to the results. |
| 39 struct NodeTypedCountPairSortFunctor |
| 40 : std::binary_function<BookmarkClient::NodeTypedCountPair, |
| 41 BookmarkClient::NodeTypedCountPair, |
| 42 bool> { |
| 43 bool operator()(const BookmarkClient::NodeTypedCountPair& a, |
| 44 const BookmarkClient::NodeTypedCountPair& b) const { |
| 45 return a.second > b.second; |
| 46 } |
| 47 }; |
| 48 |
| 49 // Extract the const Node* stored in a NodeTypedCountPair. |
| 50 struct NodeFromNodeTypedCountPairFunctor |
| 51 : std::unary_function<BookmarkClient::NodeTypedCountPair, |
| 52 const BookmarkNode*> { |
| 53 const BookmarkNode* operator()( |
| 54 const BookmarkClient::NodeTypedCountPair& pair) const { |
| 55 return pair.first; |
| 56 } |
| 57 }; |
| 58 |
38 } // namespace | 59 } // namespace |
39 | 60 |
40 // Used when finding the set of bookmarks that match a query. Each match | 61 // Used when finding the set of bookmarks that match a query. Each match |
41 // represents a set of terms (as an interator into the Index) matching the | 62 // represents a set of terms (as an interator into the Index) matching the |
42 // query as well as the set of nodes that contain those terms in their titles. | 63 // query as well as the set of nodes that contain those terms in their titles. |
43 struct BookmarkIndex::Match { | 64 struct BookmarkIndex::Match { |
44 // List of terms matching the query. | 65 // List of terms matching the query. |
45 std::list<Index::const_iterator> terms; | 66 std::list<Index::const_iterator> terms; |
46 | 67 |
47 // The set of nodes matching the terms. As an optimization this is empty | 68 // The set of nodes matching the terms. As an optimization this is empty |
(...skipping 16 matching lines...) Expand all Loading... |
64 | 85 |
65 BookmarkIndex::NodeSet::const_iterator | 86 BookmarkIndex::NodeSet::const_iterator |
66 BookmarkIndex::Match::nodes_begin() const { | 87 BookmarkIndex::Match::nodes_begin() const { |
67 return nodes.empty() ? terms.front()->second.begin() : nodes.begin(); | 88 return nodes.empty() ? terms.front()->second.begin() : nodes.begin(); |
68 } | 89 } |
69 | 90 |
70 BookmarkIndex::NodeSet::const_iterator BookmarkIndex::Match::nodes_end() const { | 91 BookmarkIndex::NodeSet::const_iterator BookmarkIndex::Match::nodes_end() const { |
71 return nodes.empty() ? terms.front()->second.end() : nodes.end(); | 92 return nodes.empty() ? terms.front()->second.end() : nodes.end(); |
72 } | 93 } |
73 | 94 |
74 BookmarkIndex::BookmarkIndex(content::BrowserContext* browser_context) | 95 BookmarkIndex::BookmarkIndex(BookmarkClient* client) : client_(client) { |
75 : browser_context_(browser_context) { | |
76 } | 96 } |
77 | 97 |
78 BookmarkIndex::~BookmarkIndex() { | 98 BookmarkIndex::~BookmarkIndex() { |
79 } | 99 } |
80 | 100 |
81 void BookmarkIndex::Add(const BookmarkNode* node) { | 101 void BookmarkIndex::Add(const BookmarkNode* node) { |
82 if (!node->is_url()) | 102 if (!node->is_url()) |
83 return; | 103 return; |
84 std::vector<base::string16> terms = | 104 std::vector<base::string16> terms = |
85 ExtractQueryWords(Normalize(node->GetTitle())); | 105 ExtractQueryWords(Normalize(node->GetTitle())); |
(...skipping 19 matching lines...) Expand all Loading... |
105 std::vector<base::string16> terms = ExtractQueryWords(query); | 125 std::vector<base::string16> terms = ExtractQueryWords(query); |
106 if (terms.empty()) | 126 if (terms.empty()) |
107 return; | 127 return; |
108 | 128 |
109 Matches matches; | 129 Matches matches; |
110 for (size_t i = 0; i < terms.size(); ++i) { | 130 for (size_t i = 0; i < terms.size(); ++i) { |
111 if (!GetBookmarksWithTitleMatchingTerm(terms[i], i == 0, &matches)) | 131 if (!GetBookmarksWithTitleMatchingTerm(terms[i], i == 0, &matches)) |
112 return; | 132 return; |
113 } | 133 } |
114 | 134 |
115 NodeTypedCountPairs node_typed_counts; | 135 Nodes sorted_nodes; |
116 SortMatches(matches, &node_typed_counts); | 136 SortMatches(matches, &sorted_nodes); |
117 | 137 |
118 // We use a QueryParser to fill in match positions for us. It's not the most | 138 // We use a QueryParser to fill in match positions for us. It's not the most |
119 // efficient way to go about this, but by the time we get here we know what | 139 // efficient way to go about this, but by the time we get here we know what |
120 // matches and so this shouldn't be performance critical. | 140 // matches and so this shouldn't be performance critical. |
121 query_parser::QueryParser parser; | 141 query_parser::QueryParser parser; |
122 ScopedVector<query_parser::QueryNode> query_nodes; | 142 ScopedVector<query_parser::QueryNode> query_nodes; |
123 parser.ParseQueryNodes(query, &query_nodes.get()); | 143 parser.ParseQueryNodes(query, &query_nodes.get()); |
124 | 144 |
125 // The highest typed counts should be at the beginning of the results vector | 145 // The highest typed counts should be at the beginning of the results vector |
126 // so that the best matches will always be included in the results. The loop | 146 // so that the best matches will always be included in the results. The loop |
127 // that calculates result relevance in HistoryContentsProvider::ConvertResults | 147 // that calculates result relevance in HistoryContentsProvider::ConvertResults |
128 // will run backwards to assure higher relevance will be attributed to the | 148 // will run backwards to assure higher relevance will be attributed to the |
129 // best matches. | 149 // best matches. |
130 for (NodeTypedCountPairs::const_iterator i = node_typed_counts.begin(); | 150 for (Nodes::const_iterator i = sorted_nodes.begin(); |
131 i != node_typed_counts.end() && results->size() < max_count; ++i) | 151 i != sorted_nodes.end() && results->size() < max_count; |
132 AddMatchToResults(i->first, &parser, query_nodes.get(), results); | 152 ++i) |
| 153 AddMatchToResults(*i, &parser, query_nodes.get(), results); |
133 } | 154 } |
134 | 155 |
135 void BookmarkIndex::SortMatches(const Matches& matches, | 156 void BookmarkIndex::SortMatches(const Matches& matches, |
136 NodeTypedCountPairs* node_typed_counts) const { | 157 Nodes* sorted_nodes) const { |
137 HistoryService* const history_service = browser_context_ ? | 158 NodeSet nodes; |
138 HistoryServiceFactory::GetForProfile( | 159 for (Matches::const_iterator i = matches.begin(); i != matches.end(); ++i) { |
139 Profile::FromBrowserContext(browser_context_), | 160 nodes.insert(i->nodes_begin(), i->nodes_end()); |
140 Profile::EXPLICIT_ACCESS) : NULL; | 161 } |
141 | 162 if (client_->SupportsTypedCountForNodes()) { |
142 history::URLDatabase* url_db = history_service ? | 163 BookmarkClient::NodeTypedCountPairs node_typed_counts; |
143 history_service->InMemoryDatabase() : NULL; | 164 client_->GetTypedCountForNodes(nodes, &node_typed_counts); |
144 | 165 std::sort(node_typed_counts.begin(), |
145 for (Matches::const_iterator i = matches.begin(); i != matches.end(); ++i) | 166 node_typed_counts.end(), |
146 ExtractBookmarkNodePairs(url_db, *i, node_typed_counts); | 167 NodeTypedCountPairSortFunctor()); |
147 | 168 sorted_nodes->reserve(sorted_nodes->size() + node_typed_counts.size()); |
148 std::sort(node_typed_counts->begin(), node_typed_counts->end(), | 169 std::transform(node_typed_counts.begin(), |
149 &NodeTypedCountPairSortFunc); | 170 node_typed_counts.end(), |
150 // Eliminate duplicates. | 171 std::back_inserter(*sorted_nodes), |
151 node_typed_counts->erase(std::unique(node_typed_counts->begin(), | 172 NodeFromNodeTypedCountPairFunctor()); |
152 node_typed_counts->end()), | 173 } else { |
153 node_typed_counts->end()); | 174 sorted_nodes->insert(sorted_nodes->end(), nodes.begin(), nodes.end()); |
154 } | |
155 | |
156 void BookmarkIndex::ExtractBookmarkNodePairs( | |
157 history::URLDatabase* url_db, | |
158 const Match& match, | |
159 NodeTypedCountPairs* node_typed_counts) const { | |
160 | |
161 for (NodeSet::const_iterator i = match.nodes_begin(); | |
162 i != match.nodes_end(); ++i) { | |
163 int typed_count = 0; | |
164 | |
165 // If |url_db| is the InMemoryDatabase, it might not cache all URLRows, but | |
166 // it guarantees to contain those with |typed_count| > 0. Thus, if we cannot | |
167 // fetch the URLRow, it is safe to assume that its |typed_count| is 0. | |
168 history::URLRow url; | |
169 if (url_db && url_db->GetRowForURL((*i)->url(), &url)) | |
170 typed_count = url.typed_count(); | |
171 | |
172 NodeTypedCountPair pair(*i, typed_count); | |
173 node_typed_counts->push_back(pair); | |
174 } | 175 } |
175 } | 176 } |
176 | 177 |
177 void BookmarkIndex::AddMatchToResults( | 178 void BookmarkIndex::AddMatchToResults( |
178 const BookmarkNode* node, | 179 const BookmarkNode* node, |
179 query_parser::QueryParser* parser, | 180 query_parser::QueryParser* parser, |
180 const std::vector<query_parser::QueryNode*>& query_nodes, | 181 const std::vector<query_parser::QueryNode*>& query_nodes, |
181 std::vector<BookmarkTitleMatch>* results) { | 182 std::vector<BookmarkTitleMatch>* results) { |
182 BookmarkTitleMatch title_match; | 183 BookmarkTitleMatch title_match; |
183 // Check that the result matches the query. The previous search | 184 // Check that the result matches the query. The previous search |
(...skipping 110 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
294 Index::iterator i = index_.find(term); | 295 Index::iterator i = index_.find(term); |
295 if (i == index_.end()) { | 296 if (i == index_.end()) { |
296 // We can get here if the node has the same term more than once. For | 297 // We can get here if the node has the same term more than once. For |
297 // example, a bookmark with the title 'foo foo' would end up here. | 298 // example, a bookmark with the title 'foo foo' would end up here. |
298 return; | 299 return; |
299 } | 300 } |
300 i->second.erase(node); | 301 i->second.erase(node); |
301 if (i->second.empty()) | 302 if (i->second.empty()) |
302 index_.erase(i); | 303 index_.erase(i); |
303 } | 304 } |
OLD | NEW |