OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/bookmarks/bookmark_index.h" | 5 #include "chrome/browser/bookmarks/bookmark_index.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <functional> | |
8 #include <iterator> | 9 #include <iterator> |
9 #include <list> | 10 #include <list> |
10 | 11 |
11 #include "base/i18n/case_conversion.h" | 12 #include "base/i18n/case_conversion.h" |
13 #include "base/logging.h" | |
12 #include "base/strings/string16.h" | 14 #include "base/strings/string16.h" |
13 #include "chrome/browser/bookmarks/bookmark_model.h" | |
14 #include "chrome/browser/bookmarks/bookmark_utils.h" | 15 #include "chrome/browser/bookmarks/bookmark_utils.h" |
15 #include "chrome/browser/history/history_service.h" | 16 #include "components/bookmarks/core/browser/bookmark_client.h" |
16 #include "chrome/browser/history/history_service_factory.h" | |
17 #include "chrome/browser/history/url_database.h" | |
18 #include "components/bookmarks/core/browser/bookmark_match.h" | 17 #include "components/bookmarks/core/browser/bookmark_match.h" |
18 #include "components/bookmarks/core/browser/bookmark_node.h" | |
19 #include "components/query_parser/query_parser.h" | 19 #include "components/query_parser/query_parser.h" |
20 #include "components/query_parser/snippet.h" | 20 #include "components/query_parser/snippet.h" |
21 #include "third_party/icu/source/common/unicode/normalizer2.h" | 21 #include "third_party/icu/source/common/unicode/normalizer2.h" |
22 | 22 |
23 typedef BookmarkClient::NodeTypedCountPair NodeTypedCountPair; | |
24 typedef BookmarkClient::NodeTypedCountPairs NodeTypedCountPairs; | |
25 | |
23 namespace { | 26 namespace { |
24 | 27 |
25 // Returns a normalized version of the UTF16 string |text|. If it fails to | 28 // Returns a normalized version of the UTF16 string |text|. If it fails to |
26 // normalize the string, returns |text| itself as a best-effort. | 29 // normalize the string, returns |text| itself as a best-effort. |
27 base::string16 Normalize(const base::string16& text) { | 30 base::string16 Normalize(const base::string16& text) { |
28 UErrorCode status = U_ZERO_ERROR; | 31 UErrorCode status = U_ZERO_ERROR; |
29 const icu::Normalizer2* normalizer2 = | 32 const icu::Normalizer2* normalizer2 = |
30 icu::Normalizer2::getInstance(NULL, "nfkc", UNORM2_COMPOSE, status); | 33 icu::Normalizer2::getInstance(NULL, "nfkc", UNORM2_COMPOSE, status); |
31 icu::UnicodeString unicode_text(text.data(), text.length()); | 34 icu::UnicodeString unicode_text(text.data(), text.length()); |
32 icu::UnicodeString unicode_normalized_text; | 35 icu::UnicodeString unicode_normalized_text; |
33 normalizer2->normalize(unicode_text, unicode_normalized_text, status); | 36 normalizer2->normalize(unicode_text, unicode_normalized_text, status); |
34 if (U_FAILURE(status)) | 37 if (U_FAILURE(status)) |
35 return text; | 38 return text; |
36 return base::string16(unicode_normalized_text.getBuffer(), | 39 return base::string16(unicode_normalized_text.getBuffer(), |
37 unicode_normalized_text.length()); | 40 unicode_normalized_text.length()); |
38 } | 41 } |
39 | 42 |
43 // Sort functor for NodeTypedCountPairs. We sort in decreasing order of typed | |
44 // count so that the best matches will always be added to the results. | |
45 struct NodeTypedCountPairSortFunctor | |
46 : std::binary_function<NodeTypedCountPair, NodeTypedCountPair, bool> { | |
47 bool operator()(const NodeTypedCountPair& a, | |
48 const NodeTypedCountPair& b) const { | |
49 return a.second > b.second; | |
50 } | |
51 }; | |
52 | |
53 // Extract the const Node* stored in a BookmarkClient::NodeTypedCountPair. | |
54 struct NodeTypedCountPairExtractNodeFunctor | |
55 : std::unary_function<NodeTypedCountPair, const BookmarkNode*> { | |
56 const BookmarkNode* operator()(const NodeTypedCountPair& pair) const { | |
57 return pair.first; | |
58 } | |
59 }; | |
60 | |
40 } // namespace | 61 } // namespace |
41 | 62 |
42 // Used when finding the set of bookmarks that match a query. Each match | 63 // Used when finding the set of bookmarks that match a query. Each match |
43 // represents a set of terms (as an interator into the Index) matching the | 64 // represents a set of terms (as an interator into the Index) matching the |
44 // query as well as the set of nodes that contain those terms in their titles. | 65 // query as well as the set of nodes that contain those terms in their titles. |
45 struct BookmarkIndex::Match { | 66 struct BookmarkIndex::Match { |
46 // List of terms matching the query. | 67 // List of terms matching the query. |
47 std::list<Index::const_iterator> terms; | 68 std::list<Index::const_iterator> terms; |
48 | 69 |
49 // The set of nodes matching the terms. As an optimization this is empty | 70 // The set of nodes matching the terms. As an optimization this is empty |
(...skipping 16 matching lines...) Expand all Loading... | |
66 | 87 |
67 BookmarkIndex::NodeSet::const_iterator | 88 BookmarkIndex::NodeSet::const_iterator |
68 BookmarkIndex::Match::nodes_begin() const { | 89 BookmarkIndex::Match::nodes_begin() const { |
69 return nodes.empty() ? terms.front()->second.begin() : nodes.begin(); | 90 return nodes.empty() ? terms.front()->second.begin() : nodes.begin(); |
70 } | 91 } |
71 | 92 |
72 BookmarkIndex::NodeSet::const_iterator BookmarkIndex::Match::nodes_end() const { | 93 BookmarkIndex::NodeSet::const_iterator BookmarkIndex::Match::nodes_end() const { |
73 return nodes.empty() ? terms.front()->second.end() : nodes.end(); | 94 return nodes.empty() ? terms.front()->second.end() : nodes.end(); |
74 } | 95 } |
75 | 96 |
76 BookmarkIndex::BookmarkIndex(content::BrowserContext* browser_context, | 97 BookmarkIndex::BookmarkIndex(BookmarkClient* client, |
77 bool index_urls, | 98 bool index_urls, |
78 const std::string& languages) | 99 const std::string& languages) |
79 : index_urls_(index_urls), | 100 : client_(client), languages_(languages), index_urls_(index_urls) { |
sky
2014/04/23 20:32:07
nit: one member per line, like BookmarkModel.
sdefresne
2014/04/24 12:30:15
Done.
| |
80 browser_context_(browser_context), | 101 DCHECK(client_); |
81 languages_(languages) { | |
82 } | 102 } |
83 | 103 |
84 BookmarkIndex::~BookmarkIndex() { | 104 BookmarkIndex::~BookmarkIndex() { |
85 } | 105 } |
86 | 106 |
87 void BookmarkIndex::Add(const BookmarkNode* node) { | 107 void BookmarkIndex::Add(const BookmarkNode* node) { |
88 if (!node->is_url()) | 108 if (!node->is_url()) |
89 return; | 109 return; |
90 std::vector<base::string16> terms = | 110 std::vector<base::string16> terms = |
91 ExtractQueryWords(Normalize(node->GetTitle())); | 111 ExtractQueryWords(Normalize(node->GetTitle())); |
(...skipping 30 matching lines...) Expand all Loading... | |
122 std::vector<base::string16> terms = ExtractQueryWords(query); | 142 std::vector<base::string16> terms = ExtractQueryWords(query); |
123 if (terms.empty()) | 143 if (terms.empty()) |
124 return; | 144 return; |
125 | 145 |
126 Matches matches; | 146 Matches matches; |
127 for (size_t i = 0; i < terms.size(); ++i) { | 147 for (size_t i = 0; i < terms.size(); ++i) { |
128 if (!GetBookmarksMatchingTerm(terms[i], i == 0, &matches)) | 148 if (!GetBookmarksMatchingTerm(terms[i], i == 0, &matches)) |
129 return; | 149 return; |
130 } | 150 } |
131 | 151 |
132 NodeTypedCountPairs node_typed_counts; | 152 Nodes sorted_nodes; |
133 SortMatches(matches, &node_typed_counts); | 153 SortMatches(matches, &sorted_nodes); |
134 | 154 |
135 // We use a QueryParser to fill in match positions for us. It's not the most | 155 // We use a QueryParser to fill in match positions for us. It's not the most |
136 // efficient way to go about this, but by the time we get here we know what | 156 // efficient way to go about this, but by the time we get here we know what |
137 // matches and so this shouldn't be performance critical. | 157 // matches and so this shouldn't be performance critical. |
138 query_parser::QueryParser parser; | 158 query_parser::QueryParser parser; |
139 ScopedVector<query_parser::QueryNode> query_nodes; | 159 ScopedVector<query_parser::QueryNode> query_nodes; |
140 parser.ParseQueryNodes(query, &query_nodes.get()); | 160 parser.ParseQueryNodes(query, &query_nodes.get()); |
141 | 161 |
142 // The highest typed counts should be at the beginning of the results vector | 162 // The highest typed counts should be at the beginning of the results vector |
143 // so that the best matches will always be included in the results. The loop | 163 // so that the best matches will always be included in the results. The loop |
144 // that calculates result relevance in HistoryContentsProvider::ConvertResults | 164 // that calculates result relevance in HistoryContentsProvider::ConvertResults |
145 // will run backwards to assure higher relevance will be attributed to the | 165 // will run backwards to assure higher relevance will be attributed to the |
146 // best matches. | 166 // best matches. |
147 for (NodeTypedCountPairs::const_iterator i = node_typed_counts.begin(); | 167 for (Nodes::const_iterator i = sorted_nodes.begin(); |
148 i != node_typed_counts.end() && results->size() < max_count; ++i) | 168 i != sorted_nodes.end() && results->size() < max_count; |
149 AddMatchToResults(i->first, &parser, query_nodes.get(), results); | 169 ++i) |
170 AddMatchToResults(*i, &parser, query_nodes.get(), results); | |
150 } | 171 } |
151 | 172 |
152 void BookmarkIndex::SortMatches(const Matches& matches, | 173 void BookmarkIndex::SortMatches(const Matches& matches, |
153 NodeTypedCountPairs* node_typed_counts) const { | 174 Nodes* sorted_nodes) const { |
154 HistoryService* const history_service = browser_context_ ? | 175 NodeSet nodes; |
155 HistoryServiceFactory::GetForProfile( | 176 for (Matches::const_iterator i = matches.begin(); i != matches.end(); ++i) { |
156 Profile::FromBrowserContext(browser_context_), | 177 nodes.insert(i->nodes_begin(), i->nodes_end()); |
157 Profile::EXPLICIT_ACCESS) : NULL; | 178 } |
158 | 179 sorted_nodes->reserve(sorted_nodes->size() + nodes.size()); |
159 history::URLDatabase* url_db = history_service ? | 180 if (client_->SupportsTypedCountForNodes()) { |
160 history_service->InMemoryDatabase() : NULL; | 181 NodeTypedCountPairs node_typed_counts; |
161 | 182 client_->GetTypedCountForNodes(nodes, &node_typed_counts); |
162 for (Matches::const_iterator i = matches.begin(); i != matches.end(); ++i) | 183 std::sort(node_typed_counts.begin(), |
163 ExtractBookmarkNodePairs(url_db, *i, node_typed_counts); | 184 node_typed_counts.end(), |
164 | 185 NodeTypedCountPairSortFunctor()); |
165 std::sort(node_typed_counts->begin(), node_typed_counts->end(), | 186 std::transform(node_typed_counts.begin(), |
166 &NodeTypedCountPairSortFunc); | 187 node_typed_counts.end(), |
167 // Eliminate duplicates. | 188 std::back_inserter(*sorted_nodes), |
168 node_typed_counts->erase(std::unique(node_typed_counts->begin(), | 189 NodeTypedCountPairExtractNodeFunctor()); |
169 node_typed_counts->end()), | 190 } else { |
170 node_typed_counts->end()); | 191 sorted_nodes->insert(sorted_nodes->end(), nodes.begin(), nodes.end()); |
171 } | |
172 | |
173 void BookmarkIndex::ExtractBookmarkNodePairs( | |
174 history::URLDatabase* url_db, | |
175 const Match& match, | |
176 NodeTypedCountPairs* node_typed_counts) const { | |
177 | |
178 for (NodeSet::const_iterator i = match.nodes_begin(); | |
179 i != match.nodes_end(); ++i) { | |
180 int typed_count = 0; | |
181 | |
182 // If |url_db| is the InMemoryDatabase, it might not cache all URLRows, but | |
183 // it guarantees to contain those with |typed_count| > 0. Thus, if we cannot | |
184 // fetch the URLRow, it is safe to assume that its |typed_count| is 0. | |
185 history::URLRow url; | |
186 if (url_db && url_db->GetRowForURL((*i)->url(), &url)) | |
187 typed_count = url.typed_count(); | |
188 | |
189 NodeTypedCountPair pair(*i, typed_count); | |
190 node_typed_counts->push_back(pair); | |
191 } | 192 } |
192 } | 193 } |
193 | 194 |
194 void BookmarkIndex::AddMatchToResults( | 195 void BookmarkIndex::AddMatchToResults( |
195 const BookmarkNode* node, | 196 const BookmarkNode* node, |
196 query_parser::QueryParser* parser, | 197 query_parser::QueryParser* parser, |
197 const query_parser::QueryNodeStarVector& query_nodes, | 198 const query_parser::QueryNodeStarVector& query_nodes, |
198 std::vector<BookmarkMatch>* results) { | 199 std::vector<BookmarkMatch>* results) { |
199 // Check that the result matches the query. The previous search | 200 // Check that the result matches the query. The previous search |
200 // was a simple per-word search, while the more complex matching | 201 // was a simple per-word search, while the more complex matching |
(...skipping 136 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
337 Index::iterator i = index_.find(term); | 338 Index::iterator i = index_.find(term); |
338 if (i == index_.end()) { | 339 if (i == index_.end()) { |
339 // We can get here if the node has the same term more than once. For | 340 // We can get here if the node has the same term more than once. For |
340 // example, a bookmark with the title 'foo foo' would end up here. | 341 // example, a bookmark with the title 'foo foo' would end up here. |
341 return; | 342 return; |
342 } | 343 } |
343 i->second.erase(node); | 344 i->second.erase(node); |
344 if (i->second.empty()) | 345 if (i->second.empty()) |
345 index_.erase(i); | 346 index_.erase(i); |
346 } | 347 } |
OLD | NEW |