Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(55)

Side by Side Diff: components/bookmarks/browser/bookmark_index.cc

Issue 2537223008: Add TitledUrlIndex for indexing arbitrary title/URL pairs (Closed)
Patch Set: refactor in-place to preserve history Created 4 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2014 The Chromium Authors. All rights reserved. 1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/bookmarks/browser/bookmark_index.h" 5 #include "components/bookmarks/browser/bookmark_index.h"
6 6
7 #include <stdint.h> 7 #include <stdint.h>
8 8
9 #include <algorithm>
10 #include <functional>
11 #include <iterator>
12 #include <list>
13
14 #include "base/i18n/case_conversion.h" 9 #include "base/i18n/case_conversion.h"
15 #include "base/logging.h" 10 #include "base/logging.h"
16 #include "base/stl_util.h" 11 #include "base/stl_util.h"
17 #include "base/strings/utf_offset_string_conversions.h" 12 #include "base/strings/utf_offset_string_conversions.h"
18 #include "build/build_config.h" 13 #include "build/build_config.h"
19 #include "components/bookmarks/browser/bookmark_client.h" 14 #include "components/bookmarks/browser/bookmark_client.h"
20 #include "components/bookmarks/browser/bookmark_match.h" 15 #include "components/bookmarks/browser/bookmark_match.h"
21 #include "components/bookmarks/browser/bookmark_node.h"
22 #include "components/bookmarks/browser/bookmark_utils.h" 16 #include "components/bookmarks/browser/bookmark_utils.h"
17 #include "components/bookmarks/browser/titled_url_node.h"
23 #include "components/query_parser/snippet.h" 18 #include "components/query_parser/snippet.h"
24 #include "third_party/icu/source/common/unicode/normalizer2.h" 19 #include "third_party/icu/source/common/unicode/normalizer2.h"
25 #include "third_party/icu/source/common/unicode/utypes.h" 20 #include "third_party/icu/source/common/unicode/utypes.h"
26 21
27 namespace bookmarks { 22 namespace bookmarks {
28 23
29 typedef BookmarkClient::NodeTypedCountPair NodeTypedCountPair; 24 typedef BookmarkClient::NodeTypedCountPair NodeTypedCountPair;
30 typedef BookmarkClient::NodeTypedCountPairs NodeTypedCountPairs; 25 typedef BookmarkClient::NodeTypedCountPairs NodeTypedCountPairs;
31 26
32 namespace { 27 namespace {
(...skipping 23 matching lines...) Expand all
56 51
57 // Sort functor for NodeTypedCountPairs. We sort in decreasing order of typed 52 // Sort functor for NodeTypedCountPairs. We sort in decreasing order of typed
58 // count so that the best matches will always be added to the results. 53 // count so that the best matches will always be added to the results.
59 struct NodeTypedCountPairSortFunctor { 54 struct NodeTypedCountPairSortFunctor {
60 bool operator()(const NodeTypedCountPair& a, 55 bool operator()(const NodeTypedCountPair& a,
61 const NodeTypedCountPair& b) const { 56 const NodeTypedCountPair& b) const {
62 return a.second > b.second; 57 return a.second > b.second;
63 } 58 }
64 }; 59 };
65 60
66 // Extract the const Node* stored in a BookmarkClient::NodeTypedCountPair. 61 // Extract the const TitledUrlNode* stored in a NodeTypedCountPair.
67 struct NodeTypedCountPairExtractNodeFunctor { 62 struct NodeTypedCountPairExtractNodeFunctor {
68 const BookmarkNode* operator()(const NodeTypedCountPair& pair) const { 63 const TitledUrlNode* operator()(const NodeTypedCountPair& pair) const {
69 return pair.first; 64 return pair.first;
70 } 65 }
71 }; 66 };
72 67
73 } // namespace 68 } // namespace
74 69
75 BookmarkIndex::BookmarkIndex(BookmarkClient* client) 70 BookmarkIndex::BookmarkIndex(BookmarkClient* client)
76 : client_(client) { 71 : client_(client) {
77 DCHECK(client_); 72 DCHECK(client_);
78 } 73 }
79 74
80 BookmarkIndex::~BookmarkIndex() { 75 BookmarkIndex::~BookmarkIndex() {
81 } 76 }
82 77
83 void BookmarkIndex::Add(const BookmarkNode* node) { 78 void BookmarkIndex::Add(const TitledUrlNode* node) {
84 if (!node->is_url()) 79 if (!node->GetTitledUrlNodeUrl().is_valid())
85 return; 80 return;
86 std::vector<base::string16> terms = 81 std::vector<base::string16> terms =
87 ExtractQueryWords(Normalize(node->GetTitle())); 82 ExtractQueryWords(Normalize(node->GetTitledUrlNodeTitle()));
88 for (size_t i = 0; i < terms.size(); ++i) 83 for (size_t i = 0; i < terms.size(); ++i)
89 RegisterNode(terms[i], node); 84 RegisterNode(terms[i], node);
90 terms = 85 terms = ExtractQueryWords(
91 ExtractQueryWords(CleanUpUrlForMatching(node->url(), nullptr)); 86 CleanUpUrlForMatching(node->GetTitledUrlNodeUrl(), nullptr));
92 for (size_t i = 0; i < terms.size(); ++i) 87 for (size_t i = 0; i < terms.size(); ++i)
93 RegisterNode(terms[i], node); 88 RegisterNode(terms[i], node);
94 } 89 }
95 90
96 void BookmarkIndex::Remove(const BookmarkNode* node) { 91 void BookmarkIndex::Remove(const TitledUrlNode* node) {
97 if (!node->is_url()) 92 if (!node->GetTitledUrlNodeUrl().is_valid())
98 return; 93 return;
99 94
100 std::vector<base::string16> terms = 95 std::vector<base::string16> terms =
101 ExtractQueryWords(Normalize(node->GetTitle())); 96 ExtractQueryWords(Normalize(node->GetTitledUrlNodeTitle()));
102 for (size_t i = 0; i < terms.size(); ++i) 97 for (size_t i = 0; i < terms.size(); ++i)
103 UnregisterNode(terms[i], node); 98 UnregisterNode(terms[i], node);
104 terms = 99 terms = ExtractQueryWords(
105 ExtractQueryWords(CleanUpUrlForMatching(node->url(), nullptr)); 100 CleanUpUrlForMatching(node->GetTitledUrlNodeUrl(), nullptr));
106 for (size_t i = 0; i < terms.size(); ++i) 101 for (size_t i = 0; i < terms.size(); ++i)
107 UnregisterNode(terms[i], node); 102 UnregisterNode(terms[i], node);
108 } 103 }
109 104
110 void BookmarkIndex::GetBookmarksMatching( 105 void BookmarkIndex::GetResultsMatching(
111 const base::string16& input_query, 106 const base::string16& input_query,
112 size_t max_count, 107 size_t max_count,
113 query_parser::MatchingAlgorithm matching_algorithm, 108 query_parser::MatchingAlgorithm matching_algorithm,
114 std::vector<BookmarkMatch>* results) { 109 std::vector<BookmarkMatch>* results) {
115 const base::string16 query = Normalize(input_query); 110 const base::string16 query = Normalize(input_query);
116 std::vector<base::string16> terms = ExtractQueryWords(query); 111 std::vector<base::string16> terms = ExtractQueryWords(query);
117 if (terms.empty()) 112 if (terms.empty())
118 return; 113 return;
119 114
120 NodeSet matches; 115 NodeSet matches;
121 for (size_t i = 0; i < terms.size(); ++i) { 116 for (size_t i = 0; i < terms.size(); ++i) {
122 if (!GetBookmarksMatchingTerm( 117 if (!GetResultsMatchingTerm(terms[i], i == 0, matching_algorithm,
123 terms[i], i == 0, matching_algorithm, &matches)) { 118 &matches)) {
124 return; 119 return;
125 } 120 }
126 } 121 }
127 122
128 Nodes sorted_nodes; 123 Nodes sorted_nodes;
129 SortMatches(matches, &sorted_nodes); 124 SortMatches(matches, &sorted_nodes);
130 125
131 // We use a QueryParser to fill in match positions for us. It's not the most 126 // We use a QueryParser to fill in match positions for us. It's not the most
132 // efficient way to go about this, but by the time we get here we know what 127 // efficient way to go about this, but by the time we get here we know what
133 // matches and so this shouldn't be performance critical. 128 // matches and so this shouldn't be performance critical.
(...skipping 24 matching lines...) Expand all
158 std::transform(node_typed_counts.begin(), 153 std::transform(node_typed_counts.begin(),
159 node_typed_counts.end(), 154 node_typed_counts.end(),
160 std::back_inserter(*sorted_nodes), 155 std::back_inserter(*sorted_nodes),
161 NodeTypedCountPairExtractNodeFunctor()); 156 NodeTypedCountPairExtractNodeFunctor());
162 } else { 157 } else {
163 sorted_nodes->insert(sorted_nodes->end(), matches.begin(), matches.end()); 158 sorted_nodes->insert(sorted_nodes->end(), matches.begin(), matches.end());
164 } 159 }
165 } 160 }
166 161
167 void BookmarkIndex::AddMatchToResults( 162 void BookmarkIndex::AddMatchToResults(
168 const BookmarkNode* node, 163 const TitledUrlNode* node,
169 query_parser::QueryParser* parser, 164 query_parser::QueryParser* parser,
170 const query_parser::QueryNodeVector& query_nodes, 165 const query_parser::QueryNodeVector& query_nodes,
171 std::vector<BookmarkMatch>* results) { 166 std::vector<BookmarkMatch>* results) {
172 // Check that the result matches the query. The previous search 167 // Check that the result matches the query. The previous search
173 // was a simple per-word search, while the more complex matching 168 // was a simple per-word search, while the more complex matching
174 // of QueryParser may filter it out. For example, the query 169 // of QueryParser may filter it out. For example, the query
175 // ["thi"] will match the bookmark titled [Thinking], but since 170 // ["thi"] will match the title [Thinking], but since
176 // ["thi"] is quoted we don't want to do a prefix match. 171 // ["thi"] is quoted we don't want to do a prefix match.
177 query_parser::QueryWordVector title_words, url_words; 172 query_parser::QueryWordVector title_words, url_words;
178 const base::string16 lower_title = 173 const base::string16 lower_title =
179 base::i18n::ToLower(Normalize(node->GetTitle())); 174 base::i18n::ToLower(Normalize(node->GetTitledUrlNodeTitle()));
180 parser->ExtractQueryWords(lower_title, &title_words); 175 parser->ExtractQueryWords(lower_title, &title_words);
181 base::OffsetAdjuster::Adjustments adjustments; 176 base::OffsetAdjuster::Adjustments adjustments;
182 parser->ExtractQueryWords( 177 parser->ExtractQueryWords(
183 CleanUpUrlForMatching(node->url(), &adjustments), 178 CleanUpUrlForMatching(node->GetTitledUrlNodeUrl(), &adjustments),
184 &url_words); 179 &url_words);
185 query_parser::Snippet::MatchPositions title_matches, url_matches; 180 query_parser::Snippet::MatchPositions title_matches, url_matches;
186 for (const auto& node : query_nodes) { 181 for (const auto& node : query_nodes) {
187 const bool has_title_matches = 182 const bool has_title_matches =
188 node->HasMatchIn(title_words, &title_matches); 183 node->HasMatchIn(title_words, &title_matches);
189 const bool has_url_matches = node->HasMatchIn(url_words, &url_matches); 184 const bool has_url_matches = node->HasMatchIn(url_words, &url_matches);
190 if (!has_title_matches && !has_url_matches) 185 if (!has_title_matches && !has_url_matches)
191 return; 186 return;
192 query_parser::QueryParser::SortAndCoalesceMatchPositions(&title_matches); 187 query_parser::QueryParser::SortAndCoalesceMatchPositions(&title_matches);
193 query_parser::QueryParser::SortAndCoalesceMatchPositions(&url_matches); 188 query_parser::QueryParser::SortAndCoalesceMatchPositions(&url_matches);
194 } 189 }
195 BookmarkMatch match; 190 BookmarkMatch match;
196 if (lower_title.length() == node->GetTitle().length()) { 191 if (lower_title.length() == node->GetTitledUrlNodeTitle().length()) {
197 // Only use title matches if the lowercase string is the same length 192 // Only use title matches if the lowercase string is the same length
198 // as the original string, otherwise the matches are meaningless. 193 // as the original string, otherwise the matches are meaningless.
199 // TODO(mpearson): revise match positions appropriately. 194 // TODO(mpearson): revise match positions appropriately.
200 match.title_match_positions.swap(title_matches); 195 match.title_match_positions.swap(title_matches);
201 } 196 }
202 // Now that we're done processing this entry, correct the offsets of the 197 // Now that we're done processing this entry, correct the offsets of the
203 // matches in |url_matches| so they point to offsets in the original URL 198 // matches in |url_matches| so they point to offsets in the original URL
204 // spec, not the cleaned-up URL string that we used for matching. 199 // spec, not the cleaned-up URL string that we used for matching.
205 std::vector<size_t> offsets = 200 std::vector<size_t> offsets =
206 BookmarkMatch::OffsetsFromMatchPositions(url_matches); 201 BookmarkMatch::OffsetsFromMatchPositions(url_matches);
207 base::OffsetAdjuster::UnadjustOffsets(adjustments, &offsets); 202 base::OffsetAdjuster::UnadjustOffsets(adjustments, &offsets);
208 url_matches = 203 url_matches =
209 BookmarkMatch::ReplaceOffsetsInMatchPositions(url_matches, offsets); 204 BookmarkMatch::ReplaceOffsetsInMatchPositions(url_matches, offsets);
210 match.url_match_positions.swap(url_matches); 205 match.url_match_positions.swap(url_matches);
211 match.node = node; 206 match.node = node;
212 results->push_back(match); 207 results->push_back(match);
213 } 208 }
214 209
215 bool BookmarkIndex::GetBookmarksMatchingTerm( 210 bool BookmarkIndex::GetResultsMatchingTerm(
216 const base::string16& term, 211 const base::string16& term,
217 bool first_term, 212 bool first_term,
218 query_parser::MatchingAlgorithm matching_algorithm, 213 query_parser::MatchingAlgorithm matching_algorithm,
219 NodeSet* matches) { 214 NodeSet* matches) {
220 Index::const_iterator i = index_.lower_bound(term); 215 Index::const_iterator i = index_.lower_bound(term);
221 if (i == index_.end()) 216 if (i == index_.end())
222 return false; 217 return false;
223 218
224 if (!query_parser::QueryParser::IsWordLongEnoughForPrefixSearch( 219 if (!query_parser::QueryParser::IsWordLongEnoughForPrefixSearch(
225 term, matching_algorithm)) { 220 term, matching_algorithm)) {
226 // Term is too short for prefix match, compare using exact match. 221 // Term is too short for prefix match, compare using exact match.
227 if (i->first != term) 222 if (i->first != term)
228 return false; // No bookmarks with this term. 223 return false; // No title/URL pairs with this term.
229 224
230 if (first_term) { 225 if (first_term) {
231 (*matches) = i->second; 226 (*matches) = i->second;
232 return true; 227 return true;
233 } 228 }
234 *matches = base::STLSetIntersection<NodeSet>(i->second, *matches); 229 *matches = base::STLSetIntersection<NodeSet>(i->second, *matches);
235 } else { 230 } else {
236 // Loop through index adding all entries that start with term to 231 // Loop through index adding all entries that start with term to
237 // |prefix_matches|. 232 // |prefix_matches|.
238 NodeSet tmp_prefix_matches; 233 NodeSet tmp_prefix_matches;
(...skipping 26 matching lines...) Expand all
265 if (query.empty()) 260 if (query.empty())
266 return std::vector<base::string16>(); 261 return std::vector<base::string16>();
267 query_parser::QueryParser parser; 262 query_parser::QueryParser parser;
268 parser.ParseQueryWords(base::i18n::ToLower(query), 263 parser.ParseQueryWords(base::i18n::ToLower(query),
269 query_parser::MatchingAlgorithm::DEFAULT, 264 query_parser::MatchingAlgorithm::DEFAULT,
270 &terms); 265 &terms);
271 return terms; 266 return terms;
272 } 267 }
273 268
274 void BookmarkIndex::RegisterNode(const base::string16& term, 269 void BookmarkIndex::RegisterNode(const base::string16& term,
275 const BookmarkNode* node) { 270 const TitledUrlNode* node) {
276 index_[term].insert(node); 271 index_[term].insert(node);
277 } 272 }
278 273
279 void BookmarkIndex::UnregisterNode(const base::string16& term, 274 void BookmarkIndex::UnregisterNode(const base::string16& term,
280 const BookmarkNode* node) { 275 const TitledUrlNode* node) {
281 Index::iterator i = index_.find(term); 276 Index::iterator i = index_.find(term);
282 if (i == index_.end()) { 277 if (i == index_.end()) {
283 // We can get here if the node has the same term more than once. For 278 // We can get here if the node has the same term more than once. For
284 // example, a bookmark with the title 'foo foo' would end up here. 279 // example, a node with the title 'foo foo' would end up here.
285 return; 280 return;
286 } 281 }
287 i->second.erase(node); 282 i->second.erase(node);
288 if (i->second.empty()) 283 if (i->second.empty())
289 index_.erase(i); 284 index_.erase(i);
290 } 285 }
291 286
292 } // namespace bookmarks 287 } // namespace bookmarks
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698