OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/bookmarks/bookmark_index.h" | 5 #include "chrome/browser/bookmarks/bookmark_index.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
| 8 #include <functional> |
8 #include <iterator> | 9 #include <iterator> |
9 #include <list> | 10 #include <list> |
10 | 11 |
11 #include "base/i18n/case_conversion.h" | 12 #include "base/i18n/case_conversion.h" |
| 13 #include "base/logging.h" |
12 #include "base/strings/string16.h" | 14 #include "base/strings/string16.h" |
13 #include "chrome/browser/bookmarks/bookmark_model.h" | |
14 #include "chrome/browser/bookmarks/bookmark_utils.h" | 15 #include "chrome/browser/bookmarks/bookmark_utils.h" |
15 #include "chrome/browser/history/history_service.h" | 16 #include "components/bookmarks/core/browser/bookmark_client.h" |
16 #include "chrome/browser/history/history_service_factory.h" | |
17 #include "chrome/browser/history/url_database.h" | |
18 #include "components/bookmarks/core/browser/bookmark_match.h" | 17 #include "components/bookmarks/core/browser/bookmark_match.h" |
| 18 #include "components/bookmarks/core/browser/bookmark_node.h" |
19 #include "components/query_parser/query_parser.h" | 19 #include "components/query_parser/query_parser.h" |
20 #include "components/query_parser/snippet.h" | 20 #include "components/query_parser/snippet.h" |
21 #include "third_party/icu/source/common/unicode/normalizer2.h" | 21 #include "third_party/icu/source/common/unicode/normalizer2.h" |
22 | 22 |
| 23 typedef BookmarkClient::NodeTypedCountPair NodeTypedCountPair; |
| 24 typedef BookmarkClient::NodeTypedCountPairs NodeTypedCountPairs; |
| 25 |
23 namespace { | 26 namespace { |
24 | 27 |
25 // Returns a normalized version of the UTF16 string |text|. If it fails to | 28 // Returns a normalized version of the UTF16 string |text|. If it fails to |
26 // normalize the string, returns |text| itself as a best-effort. | 29 // normalize the string, returns |text| itself as a best-effort. |
27 base::string16 Normalize(const base::string16& text) { | 30 base::string16 Normalize(const base::string16& text) { |
28 UErrorCode status = U_ZERO_ERROR; | 31 UErrorCode status = U_ZERO_ERROR; |
29 const icu::Normalizer2* normalizer2 = | 32 const icu::Normalizer2* normalizer2 = |
30 icu::Normalizer2::getInstance(NULL, "nfkc", UNORM2_COMPOSE, status); | 33 icu::Normalizer2::getInstance(NULL, "nfkc", UNORM2_COMPOSE, status); |
31 icu::UnicodeString unicode_text(text.data(), text.length()); | 34 icu::UnicodeString unicode_text(text.data(), text.length()); |
32 icu::UnicodeString unicode_normalized_text; | 35 icu::UnicodeString unicode_normalized_text; |
33 normalizer2->normalize(unicode_text, unicode_normalized_text, status); | 36 normalizer2->normalize(unicode_text, unicode_normalized_text, status); |
34 if (U_FAILURE(status)) | 37 if (U_FAILURE(status)) |
35 return text; | 38 return text; |
36 return base::string16(unicode_normalized_text.getBuffer(), | 39 return base::string16(unicode_normalized_text.getBuffer(), |
37 unicode_normalized_text.length()); | 40 unicode_normalized_text.length()); |
38 } | 41 } |
39 | 42 |
| 43 // Sort functor for NodeTypedCountPairs. We sort in decreasing order of typed |
| 44 // count so that the best matches will always be added to the results. |
| 45 struct NodeTypedCountPairSortFunctor |
| 46 : std::binary_function<NodeTypedCountPair, NodeTypedCountPair, bool> { |
| 47 bool operator()(const NodeTypedCountPair& a, |
| 48 const NodeTypedCountPair& b) const { |
| 49 return a.second > b.second; |
| 50 } |
| 51 }; |
| 52 |
| 53 // Extract the const Node* stored in a BookmarkClient::NodeTypedCountPair. |
| 54 struct NodeTypedCountPairExtractNodeFunctor |
| 55 : std::unary_function<NodeTypedCountPair, const BookmarkNode*> { |
| 56 const BookmarkNode* operator()(const NodeTypedCountPair& pair) const { |
| 57 return pair.first; |
| 58 } |
| 59 }; |
| 60 |
40 } // namespace | 61 } // namespace |
41 | 62 |
42 // Used when finding the set of bookmarks that match a query. Each match | 63 // Used when finding the set of bookmarks that match a query. Each match |
43 // represents a set of terms (as an interator into the Index) matching the | 64 // represents a set of terms (as an interator into the Index) matching the |
44 // query as well as the set of nodes that contain those terms in their titles. | 65 // query as well as the set of nodes that contain those terms in their titles. |
45 struct BookmarkIndex::Match { | 66 struct BookmarkIndex::Match { |
46 // List of terms matching the query. | 67 // List of terms matching the query. |
47 std::list<Index::const_iterator> terms; | 68 std::list<Index::const_iterator> terms; |
48 | 69 |
49 // The set of nodes matching the terms. As an optimization this is empty | 70 // The set of nodes matching the terms. As an optimization this is empty |
(...skipping 16 matching lines...) Expand all Loading... |
66 | 87 |
67 BookmarkIndex::NodeSet::const_iterator | 88 BookmarkIndex::NodeSet::const_iterator |
68 BookmarkIndex::Match::nodes_begin() const { | 89 BookmarkIndex::Match::nodes_begin() const { |
69 return nodes.empty() ? terms.front()->second.begin() : nodes.begin(); | 90 return nodes.empty() ? terms.front()->second.begin() : nodes.begin(); |
70 } | 91 } |
71 | 92 |
72 BookmarkIndex::NodeSet::const_iterator BookmarkIndex::Match::nodes_end() const { | 93 BookmarkIndex::NodeSet::const_iterator BookmarkIndex::Match::nodes_end() const { |
73 return nodes.empty() ? terms.front()->second.end() : nodes.end(); | 94 return nodes.empty() ? terms.front()->second.end() : nodes.end(); |
74 } | 95 } |
75 | 96 |
76 BookmarkIndex::BookmarkIndex(content::BrowserContext* browser_context, | 97 BookmarkIndex::BookmarkIndex(BookmarkClient* client, |
77 bool index_urls, | 98 bool index_urls, |
78 const std::string& languages) | 99 const std::string& languages) |
79 : index_urls_(index_urls), | 100 : client_(client), |
80 browser_context_(browser_context), | 101 languages_(languages), |
81 languages_(languages) { | 102 index_urls_(index_urls) { |
| 103 DCHECK(client_); |
82 } | 104 } |
83 | 105 |
84 BookmarkIndex::~BookmarkIndex() { | 106 BookmarkIndex::~BookmarkIndex() { |
85 } | 107 } |
86 | 108 |
87 void BookmarkIndex::Add(const BookmarkNode* node) { | 109 void BookmarkIndex::Add(const BookmarkNode* node) { |
88 if (!node->is_url()) | 110 if (!node->is_url()) |
89 return; | 111 return; |
90 std::vector<base::string16> terms = | 112 std::vector<base::string16> terms = |
91 ExtractQueryWords(Normalize(node->GetTitle())); | 113 ExtractQueryWords(Normalize(node->GetTitle())); |
(...skipping 30 matching lines...) Expand all Loading... |
122 std::vector<base::string16> terms = ExtractQueryWords(query); | 144 std::vector<base::string16> terms = ExtractQueryWords(query); |
123 if (terms.empty()) | 145 if (terms.empty()) |
124 return; | 146 return; |
125 | 147 |
126 Matches matches; | 148 Matches matches; |
127 for (size_t i = 0; i < terms.size(); ++i) { | 149 for (size_t i = 0; i < terms.size(); ++i) { |
128 if (!GetBookmarksMatchingTerm(terms[i], i == 0, &matches)) | 150 if (!GetBookmarksMatchingTerm(terms[i], i == 0, &matches)) |
129 return; | 151 return; |
130 } | 152 } |
131 | 153 |
132 NodeTypedCountPairs node_typed_counts; | 154 Nodes sorted_nodes; |
133 SortMatches(matches, &node_typed_counts); | 155 SortMatches(matches, &sorted_nodes); |
134 | 156 |
135 // We use a QueryParser to fill in match positions for us. It's not the most | 157 // We use a QueryParser to fill in match positions for us. It's not the most |
136 // efficient way to go about this, but by the time we get here we know what | 158 // efficient way to go about this, but by the time we get here we know what |
137 // matches and so this shouldn't be performance critical. | 159 // matches and so this shouldn't be performance critical. |
138 query_parser::QueryParser parser; | 160 query_parser::QueryParser parser; |
139 ScopedVector<query_parser::QueryNode> query_nodes; | 161 ScopedVector<query_parser::QueryNode> query_nodes; |
140 parser.ParseQueryNodes(query, &query_nodes.get()); | 162 parser.ParseQueryNodes(query, &query_nodes.get()); |
141 | 163 |
142 // The highest typed counts should be at the beginning of the results vector | 164 // The highest typed counts should be at the beginning of the results vector |
143 // so that the best matches will always be included in the results. The loop | 165 // so that the best matches will always be included in the results. The loop |
144 // that calculates result relevance in HistoryContentsProvider::ConvertResults | 166 // that calculates result relevance in HistoryContentsProvider::ConvertResults |
145 // will run backwards to assure higher relevance will be attributed to the | 167 // will run backwards to assure higher relevance will be attributed to the |
146 // best matches. | 168 // best matches. |
147 for (NodeTypedCountPairs::const_iterator i = node_typed_counts.begin(); | 169 for (Nodes::const_iterator i = sorted_nodes.begin(); |
148 i != node_typed_counts.end() && results->size() < max_count; ++i) | 170 i != sorted_nodes.end() && results->size() < max_count; |
149 AddMatchToResults(i->first, &parser, query_nodes.get(), results); | 171 ++i) |
| 172 AddMatchToResults(*i, &parser, query_nodes.get(), results); |
150 } | 173 } |
151 | 174 |
152 void BookmarkIndex::SortMatches(const Matches& matches, | 175 void BookmarkIndex::SortMatches(const Matches& matches, |
153 NodeTypedCountPairs* node_typed_counts) const { | 176 Nodes* sorted_nodes) const { |
154 HistoryService* const history_service = browser_context_ ? | 177 NodeSet nodes; |
155 HistoryServiceFactory::GetForProfile( | 178 for (Matches::const_iterator i = matches.begin(); i != matches.end(); ++i) { |
156 Profile::FromBrowserContext(browser_context_), | 179 #if !defined(OS_ANDROID) |
157 Profile::EXPLICIT_ACCESS) : NULL; | 180 nodes.insert(i->nodes_begin(), i->nodes_end()); |
158 | 181 #else |
159 history::URLDatabase* url_db = history_service ? | 182 // Work around a bug in the implementation of std::set::insert in the STL |
160 history_service->InMemoryDatabase() : NULL; | 183 // used on android (http://crbug.com/367050). |
161 | 184 for (NodeSet::const_iterator n = i->nodes_begin(); n != i->nodes_end(); ++n) |
162 for (Matches::const_iterator i = matches.begin(); i != matches.end(); ++i) | 185 nodes.insert(nodes.end(), *n); |
163 ExtractBookmarkNodePairs(url_db, *i, node_typed_counts); | 186 #endif |
164 | 187 } |
165 std::sort(node_typed_counts->begin(), node_typed_counts->end(), | 188 sorted_nodes->reserve(sorted_nodes->size() + nodes.size()); |
166 &NodeTypedCountPairSortFunc); | 189 if (client_->SupportsTypedCountForNodes()) { |
167 // Eliminate duplicates. | 190 NodeTypedCountPairs node_typed_counts; |
168 node_typed_counts->erase(std::unique(node_typed_counts->begin(), | 191 client_->GetTypedCountForNodes(nodes, &node_typed_counts); |
169 node_typed_counts->end()), | 192 std::sort(node_typed_counts.begin(), |
170 node_typed_counts->end()); | 193 node_typed_counts.end(), |
171 } | 194 NodeTypedCountPairSortFunctor()); |
172 | 195 std::transform(node_typed_counts.begin(), |
173 void BookmarkIndex::ExtractBookmarkNodePairs( | 196 node_typed_counts.end(), |
174 history::URLDatabase* url_db, | 197 std::back_inserter(*sorted_nodes), |
175 const Match& match, | 198 NodeTypedCountPairExtractNodeFunctor()); |
176 NodeTypedCountPairs* node_typed_counts) const { | 199 } else { |
177 | 200 sorted_nodes->insert(sorted_nodes->end(), nodes.begin(), nodes.end()); |
178 for (NodeSet::const_iterator i = match.nodes_begin(); | |
179 i != match.nodes_end(); ++i) { | |
180 int typed_count = 0; | |
181 | |
182 // If |url_db| is the InMemoryDatabase, it might not cache all URLRows, but | |
183 // it guarantees to contain those with |typed_count| > 0. Thus, if we cannot | |
184 // fetch the URLRow, it is safe to assume that its |typed_count| is 0. | |
185 history::URLRow url; | |
186 if (url_db && url_db->GetRowForURL((*i)->url(), &url)) | |
187 typed_count = url.typed_count(); | |
188 | |
189 NodeTypedCountPair pair(*i, typed_count); | |
190 node_typed_counts->push_back(pair); | |
191 } | 201 } |
192 } | 202 } |
193 | 203 |
194 void BookmarkIndex::AddMatchToResults( | 204 void BookmarkIndex::AddMatchToResults( |
195 const BookmarkNode* node, | 205 const BookmarkNode* node, |
196 query_parser::QueryParser* parser, | 206 query_parser::QueryParser* parser, |
197 const query_parser::QueryNodeStarVector& query_nodes, | 207 const query_parser::QueryNodeStarVector& query_nodes, |
198 std::vector<BookmarkMatch>* results) { | 208 std::vector<BookmarkMatch>* results) { |
199 // Check that the result matches the query. The previous search | 209 // Check that the result matches the query. The previous search |
200 // was a simple per-word search, while the more complex matching | 210 // was a simple per-word search, while the more complex matching |
(...skipping 136 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
337 Index::iterator i = index_.find(term); | 347 Index::iterator i = index_.find(term); |
338 if (i == index_.end()) { | 348 if (i == index_.end()) { |
339 // We can get here if the node has the same term more than once. For | 349 // We can get here if the node has the same term more than once. For |
340 // example, a bookmark with the title 'foo foo' would end up here. | 350 // example, a bookmark with the title 'foo foo' would end up here. |
341 return; | 351 return; |
342 } | 352 } |
343 i->second.erase(node); | 353 i->second.erase(node); |
344 if (i->second.empty()) | 354 if (i->second.empty()) |
345 index_.erase(i); | 355 index_.erase(i); |
346 } | 356 } |
OLD | NEW |