components/bookmarks/browser/bookmark_index.cc - Issue 882823004: Omnibox: BookmarksProvider: Make Multiple Prefix Matches Work

Side by Side Diff: components/bookmarks/browser/bookmark_index.cc

Issue 882823004: Omnibox: BookmarksProvider: Make Multiple Prefix Matches Work (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Created 5 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2014 The Chromium Authors. All rights reserved.	1 // Copyright 2014 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "components/bookmarks/browser/bookmark_index.h"	5 #include "components/bookmarks/browser/bookmark_index.h"

6	6

7 #include <algorithm>	7 #include <algorithm>

8 #include <functional>	8 #include <functional>

9 #include <iterator>	9 #include <iterator>

10 #include <list>	10 #include <list>

(...skipping 44 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
55 // Extract the const Node* stored in a BookmarkClient::NodeTypedCountPair.	55 // Extract the const Node* stored in a BookmarkClient::NodeTypedCountPair.

56 struct NodeTypedCountPairExtractNodeFunctor	56 struct NodeTypedCountPairExtractNodeFunctor

57 : std::unary_function<NodeTypedCountPair, const BookmarkNode*> {	57 : std::unary_function<NodeTypedCountPair, const BookmarkNode*> {

58 const BookmarkNode* operator()(const NodeTypedCountPair& pair) const {	58 const BookmarkNode* operator()(const NodeTypedCountPair& pair) const {

59 return pair.first;	59 return pair.first;

60 }	60 }

61 };	61 };

62	62

63 } // namespace	63 } // namespace

64	64

65 // Used when finding the set of bookmarks that match a query. Each match

66 // represents a set of terms (as an interator into the Index) matching the

67 // query as well as the set of nodes that contain those terms in their titles.

68 struct BookmarkIndex::Match {

69 // List of terms matching the query.

70 std::list<Index::const_iterator> terms;

71

72 // The set of nodes matching the terms. As an optimization this is empty

73 // when we match only one term, and is filled in when we get more than one

74 // term. We can do this as when we have only one matching term we know

75 // the set of matching nodes is terms.front()->second.

76 //

77 // Use nodes_begin() and nodes_end() to get an iterator over the set as

78 // it handles the necessary switching between nodes and terms.front().

79 NodeSet nodes;

80

81 // Returns an iterator to the beginning of the matching nodes. See

82 // description of nodes for why this should be used over nodes.begin().

83 NodeSet::const_iterator nodes_begin() const;

84

85 // Returns an iterator to the beginning of the matching nodes. See

86 // description of nodes for why this should be used over nodes.end().

87 NodeSet::const_iterator nodes_end() const;

88 };

89

90 BookmarkIndex::NodeSet::const_iterator

91 BookmarkIndex::Match::nodes_begin() const {

92 return nodes.empty() ? terms.front()->second.begin() : nodes.begin();

93 }

94

95 BookmarkIndex::NodeSet::const_iterator BookmarkIndex::Match::nodes_end() const {

96 return nodes.empty() ? terms.front()->second.end() : nodes.end();

97 }

98

99 BookmarkIndex::BookmarkIndex(BookmarkClient* client,	65 BookmarkIndex::BookmarkIndex(BookmarkClient* client,

100 const std::string& languages)	66 const std::string& languages)

101 : client_(client),	67 : client_(client),

102 languages_(languages) {	68 languages_(languages) {

103 DCHECK(client_);	69 DCHECK(client_);

104 }	70 }

105	71

106 BookmarkIndex::~BookmarkIndex() {	72 BookmarkIndex::~BookmarkIndex() {

107 }	73 }

108	74

(...skipping 27 matching lines...) Expand all Loading...
136 void BookmarkIndex::GetBookmarksMatching(	102 void BookmarkIndex::GetBookmarksMatching(

137 const base::string16& input_query,	103 const base::string16& input_query,

138 size_t max_count,	104 size_t max_count,

139 query_parser::MatchingAlgorithm matching_algorithm,	105 query_parser::MatchingAlgorithm matching_algorithm,

140 std::vector<BookmarkMatch>* results) {	106 std::vector<BookmarkMatch>* results) {

141 const base::string16 query = Normalize(input_query);	107 const base::string16 query = Normalize(input_query);

142 std::vector<base::string16> terms = ExtractQueryWords(query);	108 std::vector<base::string16> terms = ExtractQueryWords(query);

143 if (terms.empty())	109 if (terms.empty())

144 return;	110 return;

145	111

146 Matches matches;	112 NodeSet matches;

147 for (size_t i = 0; i < terms.size(); ++i) {	113 for (size_t i = 0; i < terms.size(); ++i) {

148 if (!GetBookmarksMatchingTerm(	114 if (!GetBookmarksMatchingTerm(

149 terms[i], i == 0, matching_algorithm, &matches)) {	115 terms[i], i == 0, matching_algorithm, &matches)) {

150 return;	116 return;

151 }	117 }

152 }	118 }

153	119

154 Nodes sorted_nodes;	120 Nodes sorted_nodes;

155 SortMatches(matches, &sorted_nodes);	121 SortMatches(matches, &sorted_nodes);

156	122

157 // We use a QueryParser to fill in match positions for us. It's not the most	123 // We use a QueryParser to fill in match positions for us. It's not the most

158 // efficient way to go about this, but by the time we get here we know what	124 // efficient way to go about this, but by the time we get here we know what

159 // matches and so this shouldn't be performance critical.	125 // matches and so this shouldn't be performance critical.

160 query_parser::QueryParser parser;	126 query_parser::QueryParser parser;

161 ScopedVector<query_parser::QueryNode> query_nodes;	127 ScopedVector<query_parser::QueryNode> query_nodes;

162 parser.ParseQueryNodes(query, matching_algorithm, &query_nodes.get());	128 parser.ParseQueryNodes(query, matching_algorithm, &query_nodes.get());

163	129

164 // The highest typed counts should be at the beginning of the results vector	130 // The highest typed counts should be at the beginning of the results vector

165 // so that the best matches will always be included in the results. The loop	131 // so that the best matches will always be included in the results. The loop

166 // that calculates result relevance in HistoryContentsProvider::ConvertResults	132 // that calculates result relevance in HistoryContentsProvider::ConvertResults

167 // will run backwards to assure higher relevance will be attributed to the	133 // will run backwards to assure higher relevance will be attributed to the

168 // best matches.	134 // best matches.

169 for (Nodes::const_iterator i = sorted_nodes.begin();	135 for (Nodes::const_iterator i = sorted_nodes.begin();

170 i != sorted_nodes.end() && results->size() < max_count;	136 i != sorted_nodes.end() && results->size() < max_count;

171 ++i)	137 ++i)

172 AddMatchToResults(*i, &parser, query_nodes.get(), results);	138 AddMatchToResults(*i, &parser, query_nodes.get(), results);

173 }	139 }

174	140

175 void BookmarkIndex::SortMatches(const Matches& matches,	141 void BookmarkIndex::SortMatches(const NodeSet& matches,

176 Nodes* sorted_nodes) const {	142 Nodes* sorted_nodes) const {

177 NodeSet nodes;	143 sorted_nodes->reserve(matches.size());

178 for (Matches::const_iterator i = matches.begin(); i != matches.end(); ++i) {

179 #if !defined(OS_ANDROID)

180 nodes.insert(i->nodes_begin(), i->nodes_end());

181 #else

182 // Work around a bug in the implementation of std::set::insert in the STL

183 // used on android (http://crbug.com/367050).

184 for (NodeSet::const_iterator n = i->nodes_begin(); n != i->nodes_end(); ++n)

185 nodes.insert(nodes.end(), *n);

186 #endif

187 }

188 sorted_nodes->reserve(sorted_nodes->size() + nodes.size());

189 if (client_->SupportsTypedCountForNodes()) {	144 if (client_->SupportsTypedCountForNodes()) {

190 NodeTypedCountPairs node_typed_counts;	145 NodeTypedCountPairs node_typed_counts;

191 client_->GetTypedCountForNodes(nodes, &node_typed_counts);	146 client_->GetTypedCountForNodes(matches, &node_typed_counts);

192 std::sort(node_typed_counts.begin(),	147 std::sort(node_typed_counts.begin(),

193 node_typed_counts.end(),	148 node_typed_counts.end(),

194 NodeTypedCountPairSortFunctor());	149 NodeTypedCountPairSortFunctor());

195 std::transform(node_typed_counts.begin(),	150 std::transform(node_typed_counts.begin(),

196 node_typed_counts.end(),	151 node_typed_counts.end(),

197 std::back_inserter(*sorted_nodes),	152 std::back_inserter(*sorted_nodes),

198 NodeTypedCountPairExtractNodeFunctor());	153 NodeTypedCountPairExtractNodeFunctor());

199 } else {	154 } else {

200 sorted_nodes->insert(sorted_nodes->end(), nodes.begin(), nodes.end());	155 sorted_nodes->insert(sorted_nodes->end(), matches.begin(), matches.end());

201 }	156 }

202 }	157 }

203	158

204 void BookmarkIndex::AddMatchToResults(	159 void BookmarkIndex::AddMatchToResults(

205 const BookmarkNode* node,	160 const BookmarkNode* node,

206 query_parser::QueryParser* parser,	161 query_parser::QueryParser* parser,

207 const query_parser::QueryNodeStarVector& query_nodes,	162 const query_parser::QueryNodeStarVector& query_nodes,

208 std::vector<BookmarkMatch>* results) {	163 std::vector<BookmarkMatch>* results) {

209 // Check that the result matches the query. The previous search	164 // Check that the result matches the query. The previous search

210 // was a simple per-word search, while the more complex matching	165 // was a simple per-word search, while the more complex matching

(...skipping 36 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
247 BookmarkMatch::ReplaceOffsetsInMatchPositions(url_matches, offsets);	202 BookmarkMatch::ReplaceOffsetsInMatchPositions(url_matches, offsets);

248 match.url_match_positions.swap(url_matches);	203 match.url_match_positions.swap(url_matches);

249 match.node = node;	204 match.node = node;

250 results->push_back(match);	205 results->push_back(match);

251 }	206 }

252	207

253 bool BookmarkIndex::GetBookmarksMatchingTerm(	208 bool BookmarkIndex::GetBookmarksMatchingTerm(

254 const base::string16& term,	209 const base::string16& term,

255 bool first_term,	210 bool first_term,

256 query_parser::MatchingAlgorithm matching_algorithm,	211 query_parser::MatchingAlgorithm matching_algorithm,

257 Matches* matches) {	212 NodeSet* matches) {

258 Index::const_iterator i = index_.lower_bound(term);	213 Index::const_iterator i = index_.lower_bound(term);

259 if (i == index_.end())	214 if (i == index_.end())

260 return false;	215 return false;

261	216

262 if (!query_parser::QueryParser::IsWordLongEnoughForPrefixSearch(	217 if (!query_parser::QueryParser::IsWordLongEnoughForPrefixSearch(

263 term, matching_algorithm)) {	218 term, matching_algorithm)) {

264 // Term is too short for prefix match, compare using exact match.	219 // Term is too short for prefix match, compare using exact match.

265 if (i->first != term)	220 if (i->first != term)

266 return false; // No bookmarks with this term.	221 return false; // No bookmarks with this term.

267	222

268 if (first_term) {	223 if (first_term) {

269 Match match;	224 (*matches) = i->second;

270 match.terms.push_back(i);

271 matches->push_back(match);

272 return true;	225 return true;

273 }	226 }

274 CombineMatchesInPlace(i, matches);	227 CombineMatchesInPlace(i->second, matches);
	Peter Kasting 2015/01/28 01:59:40 Is the efficiency gain of this function important, Is the efficiency gain of this function important, versus doing: matches = STLSetIntersection(i->second, matches); ...using stl_util.h? (2 places) Mark P 2015/01/28 18:35:10 I don't know anything about the efficiency gain; I Show quoted text On 2015/01/28 01:59:40, Peter Kasting wrote: > Is the efficiency gain of this function important, versus doing: > > matches = STLSetIntersection(i->second, matches); > > ...using stl_util.h? (2 places) I don't know anything about the efficiency gain; I merely know this is how the function was before. I'm inclined to go with your cleaner suggestion (both places). Removed the whole CombineMatchesInPlace function.
275 } else if (first_term) {	228 } else {

276 // This is the first term and we're doing a prefix match. Loop through	229 // Loop through index adding all entries that start with term to

277 // index adding all entries that start with term to matches.	230 // \|prefix_matches\|.

	231 NodeSet tmp_prefix_matches;

	232 NodeSet* prefix_matches = &tmp_prefix_matches;

	233 // If this is the first term, then store the result directly in \|matches\|

	234 // to avoid calls to CombineMatchesInPlace (which does a copy).

	235 if (first_term)

	236 prefix_matches = matches;
	Peter Kasting 2015/01/28 01:59:40 Nit: NodeSet* prefix_matches = first_term ? m Nit: NodeSet* prefix_matches = first_term ? matches : &tmp_prefix_matches; Mark P 2015/01/28 18:35:10 Done. Show quoted text On 2015/01/28 01:59:40, Peter Kasting wrote: > Nit: > > NodeSet* prefix_matches = first_term ? matches : &tmp_prefix_matches; Done.
278 while (i != index_.end() &&	237 while (i != index_.end() &&

279 i->first.size() >= term.size() &&	238 i->first.size() >= term.size() &&

280 term.compare(0, term.size(), i->first, 0, term.size()) == 0) {	239 term.compare(0, term.size(), i->first, 0, term.size()) == 0) {

281 Match match;	240 prefix_matches->insert(i->second.begin(), i->second.end());

282 match.terms.push_back(i);

283 matches->push_back(match);

284 ++i;	241 ++i;

285 }	242 }

286 } else {	243 if (!first_term)

287 // Prefix match and not the first term. Loop through index combining	244 CombineMatchesInPlace(*prefix_matches, matches);

288 // current matches in matches with term, placing result in result.

289 Matches result;

290 while (i != index_.end() &&

291 i->first.size() >= term.size() &&

292 term.compare(0, term.size(), i->first, 0, term.size()) == 0) {

293 CombineMatches(i, *matches, &result);

294 ++i;

295 }

296 matches->swap(result);

297 }	245 }

298 return !matches->empty();	246 return !matches->empty();

299 }	247 }

300	248

301 void BookmarkIndex::CombineMatchesInPlace(const Index::const_iterator& index_i,	249 void BookmarkIndex::CombineMatchesInPlace(const NodeSet& matches_to_incorporate,

302 Matches* matches) {	250 NodeSet* matches) {

303 for (size_t i = 0; i < matches->size(); ) {	251 NodeSet intersection;

304 Match* match = &((*matches)[i]);	252 std::set_intersection(matches->begin(),

305 NodeSet intersection;	253 matches->end(),

306 std::set_intersection(match->nodes_begin(), match->nodes_end(),	254 matches_to_incorporate.begin(),

307 index_i->second.begin(), index_i->second.end(),	255 matches_to_incorporate.end(),

308 std::inserter(intersection, intersection.begin()));	256 std::inserter(intersection, intersection.begin()));

309 if (intersection.empty()) {	257 if (intersection.empty()) {

310 matches->erase(matches->begin() + i);	258 // In case of an empty intersection, skip the swap() for efficiency.
	Peter Kasting 2015/01/28 01:59:40 Nit: Can move this above conditional and eliminate Nit: Can move this above conditional and eliminate {} Mark P 2015/01/28 18:35:10 Now obsolete. Show quoted text On 2015/01/28 01:59:40, Peter Kasting wrote: > Nit: Can move this above conditional and eliminate {} Now obsolete.
311 } else {	259 matches->clear();

312 match->terms.push_back(index_i);	260 } else {

313 match->nodes.swap(intersection);	261 matches->swap(intersection);

314 ++i;

315 }

316 }	262 }

317 }	263 }

318	264

319 void BookmarkIndex::CombineMatches(const Index::const_iterator& index_i,

320 const Matches& current_matches,

321 Matches* result) {

322 for (size_t i = 0; i < current_matches.size(); ++i) {

323 const Match& match = current_matches[i];

324 NodeSet intersection;

325 std::set_intersection(match.nodes_begin(), match.nodes_end(),

326 index_i->second.begin(), index_i->second.end(),

327 std::inserter(intersection, intersection.begin()));

328 if (!intersection.empty()) {

329 result->push_back(Match());

330 Match& combined_match = result->back();

331 combined_match.terms = match.terms;

332 combined_match.terms.push_back(index_i);

333 combined_match.nodes.swap(intersection);

334 }

335 }

336 }

337

338 std::vector<base::string16> BookmarkIndex::ExtractQueryWords(	265 std::vector<base::string16> BookmarkIndex::ExtractQueryWords(

339 const base::string16& query) {	266 const base::string16& query) {

340 std::vector<base::string16> terms;	267 std::vector<base::string16> terms;

341 if (query.empty())	268 if (query.empty())

342 return std::vector<base::string16>();	269 return std::vector<base::string16>();

343 query_parser::QueryParser parser;	270 query_parser::QueryParser parser;

344 parser.ParseQueryWords(base::i18n::ToLower(query),	271 parser.ParseQueryWords(base::i18n::ToLower(query),

345 query_parser::MatchingAlgorithm::DEFAULT,	272 query_parser::MatchingAlgorithm::DEFAULT,

346 &terms);	273 &terms);

347 return terms;	274 return terms;

(...skipping 11 matching lines...) Expand all Loading...
359 // We can get here if the node has the same term more than once. For	286 // We can get here if the node has the same term more than once. For

360 // example, a bookmark with the title 'foo foo' would end up here.	287 // example, a bookmark with the title 'foo foo' would end up here.

361 return;	288 return;

362 }	289 }

363 i->second.erase(node);	290 i->second.erase(node);

364 if (i->second.empty())	291 if (i->second.empty())

365 index_.erase(i);	292 index_.erase(i);

366 }	293 }

367	294

368 } // namespace bookmarks	295 } // namespace bookmarks

OLD	NEW

« no previous file with comments | « components/bookmarks/browser/bookmark_index.h ('k') | components/bookmarks/browser/bookmark_index_unittest.cc » ('j') | no next file with comments »