chrome/browser/bookmarks/bookmark_index.cc - Issue 184663002: Omnibox: Make URLs of Bookmarks Searchable

Side by Side Diff: chrome/browser/bookmarks/bookmark_index.cc

Issue 184663002: Omnibox: Make URLs of Bookmarks Searchable (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Peter's comments Created 6 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "chrome/browser/bookmarks/bookmark_index.h"	5 #include "chrome/browser/bookmarks/bookmark_index.h"

6	6

7 #include <algorithm>	7 #include <algorithm>

8 #include <iterator>	8 #include <iterator>

9 #include <list>	9 #include <list>

10	10

11 #include "base/i18n/case_conversion.h"	11 #include "base/i18n/case_conversion.h"

12 #include "base/strings/string16.h"	12 #include "base/strings/string16.h"

13 #include "chrome/browser/bookmarks/bookmark_model.h"	13 #include "chrome/browser/bookmarks/bookmark_model.h"

	14 #include "chrome/browser/bookmarks/bookmark_utils.h"

14 #include "chrome/browser/history/history_service.h"	15 #include "chrome/browser/history/history_service.h"

15 #include "chrome/browser/history/history_service_factory.h"	16 #include "chrome/browser/history/history_service_factory.h"

16 #include "chrome/browser/history/url_database.h"	17 #include "chrome/browser/history/url_database.h"

17 #include "components/bookmarks/core/browser/bookmark_title_match.h"	18 #include "chrome/browser/omnibox/omnibox_field_trial.h"

	19 #include "components/bookmarks/core/browser/bookmark_match.h"

18 #include "components/query_parser/query_parser.h"	20 #include "components/query_parser/query_parser.h"

	21 #include "components/query_parser/snippet.h"

19 #include "third_party/icu/source/common/unicode/normalizer2.h"	22 #include "third_party/icu/source/common/unicode/normalizer2.h"

20	23

21 namespace {	24 namespace {

22	25

23 // Returns a normalized version of the UTF16 string \|text\|. If it fails to	26 // Returns a normalized version of the UTF16 string \|text\|. If it fails to

24 // normalize the string, returns \|text\| itself as a best-effort.	27 // normalize the string, returns \|text\| itself as a best-effort.

25 base::string16 Normalize(const base::string16& text) {	28 base::string16 Normalize(const base::string16& text) {

26 UErrorCode status = U_ZERO_ERROR;	29 UErrorCode status = U_ZERO_ERROR;

27 const icu::Normalizer2* normalizer2 =	30 const icu::Normalizer2* normalizer2 =

28 icu::Normalizer2::getInstance(NULL, "nfkc", UNORM2_COMPOSE, status);	31 icu::Normalizer2::getInstance(NULL, "nfkc", UNORM2_COMPOSE, status);

(...skipping 35 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
64	67

65 BookmarkIndex::NodeSet::const_iterator	68 BookmarkIndex::NodeSet::const_iterator

66 BookmarkIndex::Match::nodes_begin() const {	69 BookmarkIndex::Match::nodes_begin() const {

67 return nodes.empty() ? terms.front()->second.begin() : nodes.begin();	70 return nodes.empty() ? terms.front()->second.begin() : nodes.begin();

68 }	71 }

69	72

70 BookmarkIndex::NodeSet::const_iterator BookmarkIndex::Match::nodes_end() const {	73 BookmarkIndex::NodeSet::const_iterator BookmarkIndex::Match::nodes_end() const {

71 return nodes.empty() ? terms.front()->second.end() : nodes.end();	74 return nodes.empty() ? terms.front()->second.end() : nodes.end();

72 }	75 }

73	76

74 BookmarkIndex::BookmarkIndex(content::BrowserContext* browser_context)	77 BookmarkIndex::BookmarkIndex(content::BrowserContext* browser_context,

75 : browser_context_(browser_context) {	78 const std::string& languages)

	79 : index_urls_(OmniboxFieldTrial::BookmarksIndexURLsValue()),

	80 browser_context_(browser_context),

	81 languages_(languages) {

76 }	82 }

77	83

78 BookmarkIndex::~BookmarkIndex() {	84 BookmarkIndex::~BookmarkIndex() {

79 }	85 }

80	86

81 void BookmarkIndex::Add(const BookmarkNode* node) {	87 void BookmarkIndex::Add(const BookmarkNode* node) {

82 if (!node->is_url())	88 if (!node->is_url())

83 return;	89 return;

84 std::vector<base::string16> terms =	90 std::vector<base::string16> terms =

85 ExtractQueryWords(Normalize(node->GetTitle()));	91 ExtractQueryWords(Normalize(node->GetTitle()));

86 for (size_t i = 0; i < terms.size(); ++i)	92 for (size_t i = 0; i < terms.size(); ++i)

87 RegisterNode(terms[i], node);	93 RegisterNode(terms[i], node);

	94 if (index_urls_) {

	95 terms = ExtractQueryWords(

	96 bookmark_utils::CleanUpUrlForMatching(node->url(), languages_));

	97 for (size_t i = 0; i < terms.size(); ++i)

	98 RegisterNode(terms[i], node);

	99 }

88 }	100 }

89	101

90 void BookmarkIndex::Remove(const BookmarkNode* node) {	102 void BookmarkIndex::Remove(const BookmarkNode* node) {

91 if (!node->is_url())	103 if (!node->is_url())

92 return;	104 return;

93	105

94 std::vector<base::string16> terms =	106 std::vector<base::string16> terms =

95 ExtractQueryWords(Normalize(node->GetTitle()));	107 ExtractQueryWords(Normalize(node->GetTitle()));

96 for (size_t i = 0; i < terms.size(); ++i)	108 for (size_t i = 0; i < terms.size(); ++i)

97 UnregisterNode(terms[i], node);	109 UnregisterNode(terms[i], node);

	110 if (index_urls_) {

	111 terms = ExtractQueryWords(

	112 bookmark_utils::CleanUpUrlForMatching(node->url(), languages_));

	113 for (size_t i = 0; i < terms.size(); ++i)

	114 UnregisterNode(terms[i], node);

	115 }

98 }	116 }

99	117

100 void BookmarkIndex::GetBookmarksWithTitlesMatching(	118 void BookmarkIndex::GetBookmarksMatching(const base::string16& input_query,

101 const base::string16& input_query,	119 size_t max_count,

102 size_t max_count,	120 std::vector<BookmarkMatch>* results) {

103 std::vector<BookmarkTitleMatch>* results) {

104 const base::string16 query = Normalize(input_query);	121 const base::string16 query = Normalize(input_query);

105 std::vector<base::string16> terms = ExtractQueryWords(query);	122 std::vector<base::string16> terms = ExtractQueryWords(query);

106 if (terms.empty())	123 if (terms.empty())

107 return;	124 return;

108	125

109 Matches matches;	126 Matches matches;

110 for (size_t i = 0; i < terms.size(); ++i) {	127 for (size_t i = 0; i < terms.size(); ++i) {

111 if (!GetBookmarksWithTitleMatchingTerm(terms[i], i == 0, &matches))	128 if (!GetBookmarksMatchingTerm(terms[i], i == 0, &matches))

112 return;	129 return;

113 }	130 }

114	131

115 NodeTypedCountPairs node_typed_counts;	132 NodeTypedCountPairs node_typed_counts;

116 SortMatches(matches, &node_typed_counts);	133 SortMatches(matches, &node_typed_counts);

117	134

118 // We use a QueryParser to fill in match positions for us. It's not the most	135 // We use a QueryParser to fill in match positions for us. It's not the most

119 // efficient way to go about this, but by the time we get here we know what	136 // efficient way to go about this, but by the time we get here we know what

120 // matches and so this shouldn't be performance critical.	137 // matches and so this shouldn't be performance critical.

121 query_parser::QueryParser parser;	138 query_parser::QueryParser parser;

(...skipping 49 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
171	188

172 NodeTypedCountPair pair(*i, typed_count);	189 NodeTypedCountPair pair(*i, typed_count);

173 node_typed_counts->push_back(pair);	190 node_typed_counts->push_back(pair);

174 }	191 }

175 }	192 }

176	193

177 void BookmarkIndex::AddMatchToResults(	194 void BookmarkIndex::AddMatchToResults(

178 const BookmarkNode* node,	195 const BookmarkNode* node,

179 query_parser::QueryParser* parser,	196 query_parser::QueryParser* parser,

180 const std::vector<query_parser::QueryNode*>& query_nodes,	197 const std::vector<query_parser::QueryNode*>& query_nodes,

181 std::vector<BookmarkTitleMatch>* results) {	198 std::vector<BookmarkMatch>* results) {

182 BookmarkTitleMatch title_match;

183 // Check that the result matches the query. The previous search	199 // Check that the result matches the query. The previous search

184 // was a simple per-word search, while the more complex matching	200 // was a simple per-word search, while the more complex matching

185 // of QueryParser may filter it out. For example, the query	201 // of QueryParser may filter it out. For example, the query

186 // ["thi"] will match the bookmark titled [Thinking], but since	202 // ["thi"] will match the bookmark titled [Thinking], but since

187 // ["thi"] is quoted we don't want to do a prefix match.	203 // ["thi"] is quoted we don't want to do a prefix match.

188 if (parser->DoesQueryMatch(Normalize(node->GetTitle()), query_nodes,	204 std::vector<query_parser::QueryWord> title_words, url_words;
	Peter Kasting 2014/04/17 22:30:37 Nit: Again, the more we can use typedefs in place Nit: Again, the more we can use typedefs in place of things like std::vector<query_parser::XXX> the better. Also applies to this function's arguments. Mark P 2014/04/18 14:52:43 Done. This mostly helped throughout query_parser. Show quoted text On 2014/04/17 22:30:37, Peter Kasting wrote: > Nit: Again, the more we can use typedefs in place of things like > std::vector<query_parser::XXX> the better. Also applies to this function's > arguments. Done. This mostly helped throughout query_parser.cc
189 &(title_match.match_positions))) {	205 const base::string16 lower_title =

190 title_match.node = node;	206 base::i18n::ToLower(Normalize(node->GetTitle()));

191 results->push_back(title_match);	207 parser->ExtractQueryWords(lower_title, &title_words);

	208 if (index_urls_) {

	209 parser->ExtractQueryWords(

	210 bookmark_utils::CleanUpUrlForMatching(node->url(), languages_),

	211 &url_words);

192 }	212 }

	213 query_parser::Snippet::MatchPositions title_matches, url_matches;

	214 for (size_t i = 0; i < query_nodes.size(); ++i) {

	215 const bool has_title_matches =

	216 query_nodes[i]->HasMatchIn(title_words, &title_matches);

	217 const bool has_url_matches = index_urls_ &&

	218 query_nodes[i]->HasMatchIn(url_words, &url_matches);

	219 if (!has_title_matches && !has_url_matches)

	220 return;

	221 query_parser::QueryParser::SortAndCoalesceMatchPositions(&title_matches);

	222 if (index_urls_)

	223 query_parser::QueryParser::SortAndCoalesceMatchPositions(&url_matches);

	224 }

	225 BookmarkMatch match;

	226 if (lower_title.length() == node->GetTitle().length()) {

	227 // Only use title matches if the lowercase string is the same length

	228 // as the original string, otherwise the matches are meaningless.

	229 // TODO(mpearson): revise match positions appropriately.

	230 match.title_match_positions.swap(title_matches);

	231 }

	232 if (index_urls_)

	233 match.url_match_positions.swap(url_matches);

	234 match.node = node;

	235 results->push_back(match);

193 }	236 }

194	237

195 bool BookmarkIndex::GetBookmarksWithTitleMatchingTerm(const base::string16& term ,	238 bool BookmarkIndex::GetBookmarksMatchingTerm(const base::string16& term,

196 bool first_term,	239 bool first_term,

197 Matches* matches) {	240 Matches* matches) {

198 Index::const_iterator i = index_.lower_bound(term);	241 Index::const_iterator i = index_.lower_bound(term);

199 if (i == index_.end())	242 if (i == index_.end())

200 return false;	243 return false;

201	244

202 if (!query_parser::QueryParser::IsWordLongEnoughForPrefixSearch(term)) {	245 if (!query_parser::QueryParser::IsWordLongEnoughForPrefixSearch(term)) {

203 // Term is too short for prefix match, compare using exact match.	246 // Term is too short for prefix match, compare using exact match.

204 if (i->first != term)	247 if (i->first != term)

205 return false; // No bookmarks with this term.	248 return false; // No bookmarks with this term.

(...skipping 88 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
294 Index::iterator i = index_.find(term);	337 Index::iterator i = index_.find(term);

295 if (i == index_.end()) {	338 if (i == index_.end()) {

296 // We can get here if the node has the same term more than once. For	339 // We can get here if the node has the same term more than once. For

297 // example, a bookmark with the title 'foo foo' would end up here.	340 // example, a bookmark with the title 'foo foo' would end up here.

298 return;	341 return;

299 }	342 }

300 i->second.erase(node);	343 i->second.erase(node);

301 if (i->second.empty())	344 if (i->second.empty())

302 index_.erase(i);	345 index_.erase(i);

303 }	346 }

OLD	NEW