chrome/browser/bookmarks/bookmark_index.cc - Issue 184663002: Omnibox: Make URLs of Bookmarks Searchable

Side by Side Diff: chrome/browser/bookmarks/bookmark_index.cc

Issue 184663002: Omnibox: Make URLs of Bookmarks Searchable (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: tested; works Created 6 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

« chrome/browser/bookmarks/bookmark_index.h ('K') | « chrome/browser/bookmarks/bookmark_index.h ('k') | chrome/browser/bookmarks/bookmark_index_unittest.cc » ('j') | chrome/browser/bookmarks/bookmark_index_unittest.cc » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "chrome/browser/bookmarks/bookmark_index.h"	5 #include "chrome/browser/bookmarks/bookmark_index.h"

6	6

7 #include <algorithm>	7 #include <algorithm>

8 #include <iterator>	8 #include <iterator>

9 #include <list>	9 #include <list>

10	10

11 #include "base/i18n/case_conversion.h"	11 #include "base/i18n/case_conversion.h"

12 #include "base/strings/string16.h"	12 #include "base/strings/string16.h"

	13 #include "chrome/browser/bookmarks/bookmark_match.h"

13 #include "chrome/browser/bookmarks/bookmark_model.h"	14 #include "chrome/browser/bookmarks/bookmark_model.h"

14 #include "chrome/browser/bookmarks/bookmark_title_match.h"	15 #include "chrome/browser/bookmarks/bookmark_utils.h"

15 #include "chrome/browser/history/history_service.h"	16 #include "chrome/browser/history/history_service.h"

16 #include "chrome/browser/history/history_service_factory.h"	17 #include "chrome/browser/history/history_service_factory.h"

17 #include "chrome/browser/history/query_parser.h"	18 #include "chrome/browser/history/query_parser.h"

18 #include "chrome/browser/history/url_database.h"	19 #include "chrome/browser/history/url_database.h"

	20 #include "chrome/browser/omnibox/omnibox_field_trial.h"

19 #include "third_party/icu/source/common/unicode/normalizer2.h"	21 #include "third_party/icu/source/common/unicode/normalizer2.h"

20	22

21 namespace {	23 namespace {

22	24

23 // Returns a normalized version of the UTF16 string \|text\|. If it fails to	25 // Returns a normalized version of the UTF16 string \|text\|. If it fails to

24 // normalize the string, returns \|text\| itself as a best-effort.	26 // normalize the string, returns \|text\| itself as a best-effort.

25 base::string16 Normalize(const base::string16& text) {	27 base::string16 Normalize(const base::string16& text) {

26 UErrorCode status = U_ZERO_ERROR;	28 UErrorCode status = U_ZERO_ERROR;

27 const icu::Normalizer2* normalizer2 =	29 const icu::Normalizer2* normalizer2 =

28 icu::Normalizer2::getInstance(NULL, "nfkc", UNORM2_COMPOSE, status);	30 icu::Normalizer2::getInstance(NULL, "nfkc", UNORM2_COMPOSE, status);

(...skipping 35 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
64	66

65 BookmarkIndex::NodeSet::const_iterator	67 BookmarkIndex::NodeSet::const_iterator

66 BookmarkIndex::Match::nodes_begin() const {	68 BookmarkIndex::Match::nodes_begin() const {

67 return nodes.empty() ? terms.front()->second.begin() : nodes.begin();	69 return nodes.empty() ? terms.front()->second.begin() : nodes.begin();

68 }	70 }

69	71

70 BookmarkIndex::NodeSet::const_iterator BookmarkIndex::Match::nodes_end() const {	72 BookmarkIndex::NodeSet::const_iterator BookmarkIndex::Match::nodes_end() const {

71 return nodes.empty() ? terms.front()->second.end() : nodes.end();	73 return nodes.empty() ? terms.front()->second.end() : nodes.end();

72 }	74 }

73	75

74 BookmarkIndex::BookmarkIndex(content::BrowserContext* browser_context)	76 BookmarkIndex::BookmarkIndex(content::BrowserContext* browser_context,

75 : browser_context_(browser_context) {	77 const std::string& languages)

	78 : index_urls_(OmniboxFieldTrial::BookmarksIndexURLsValue()),

	79 browser_context_(browser_context),

	80 languages_(languages) {

76 }	81 }

77	82

78 BookmarkIndex::~BookmarkIndex() {	83 BookmarkIndex::~BookmarkIndex() {

79 }	84 }

80	85

81 void BookmarkIndex::Add(const BookmarkNode* node) {	86 void BookmarkIndex::Add(const BookmarkNode* node) {

82 if (!node->is_url())	87 if (!node->is_url())

83 return;	88 return;

84 std::vector<base::string16> terms =	89 std::vector<base::string16> terms =

85 ExtractQueryWords(Normalize(node->GetTitle()));	90 ExtractQueryWords(Normalize(node->GetTitle()));

86 for (size_t i = 0; i < terms.size(); ++i)	91 for (size_t i = 0; i < terms.size(); ++i)

87 RegisterNode(terms[i], node);	92 RegisterNode(terms[i], node);

	93 if (index_urls_) {

	94 terms = ExtractQueryWords(

	95 bookmark_utils::CleanUpUrlForMatching(node->url(), languages_));

	96 for (size_t i = 0; i < terms.size(); ++i)

	97 RegisterNode(terms[i], node);

	98 }

88 }	99 }

89	100

90 void BookmarkIndex::Remove(const BookmarkNode* node) {	101 void BookmarkIndex::Remove(const BookmarkNode* node) {

91 if (!node->is_url())	102 if (!node->is_url())

92 return;	103 return;

93	104

94 std::vector<base::string16> terms =	105 std::vector<base::string16> terms =

95 ExtractQueryWords(Normalize(node->GetTitle()));	106 ExtractQueryWords(Normalize(node->GetTitle()));

96 for (size_t i = 0; i < terms.size(); ++i)	107 for (size_t i = 0; i < terms.size(); ++i)

97 UnregisterNode(terms[i], node);	108 UnregisterNode(terms[i], node);

	109 if (index_urls_) {

	110 terms = ExtractQueryWords(

	111 bookmark_utils::CleanUpUrlForMatching(node->url(), languages_));

	112 for (size_t i = 0; i < terms.size(); ++i)

	113 UnregisterNode(terms[i], node);

	114 }

98 }	115 }

99	116

100 void BookmarkIndex::GetBookmarksWithTitlesMatching(	117 void BookmarkIndex::GetBookmarksMatching(const base::string16& input_query,

101 const base::string16& input_query,	118 size_t max_count,

102 size_t max_count,	119 std::vector<BookmarkMatch>* results) {

103 std::vector<BookmarkTitleMatch>* results) {

104 const base::string16 query = Normalize(input_query);	120 const base::string16 query = Normalize(input_query);

105 std::vector<base::string16> terms = ExtractQueryWords(query);	121 std::vector<base::string16> terms = ExtractQueryWords(query);

106 if (terms.empty())	122 if (terms.empty())

107 return;	123 return;

108	124

109 Matches matches;	125 Matches matches;

110 for (size_t i = 0; i < terms.size(); ++i) {	126 for (size_t i = 0; i < terms.size(); ++i) {

111 if (!GetBookmarksWithTitleMatchingTerm(terms[i], i == 0, &matches))	127 if (!GetBookmarksMatchingTerm(terms[i], i == 0, &matches))

112 return;	128 return;

113 }	129 }

114	130

115 NodeTypedCountPairs node_typed_counts;	131 NodeTypedCountPairs node_typed_counts;

116 SortMatches(matches, &node_typed_counts);	132 SortMatches(matches, &node_typed_counts);

117	133

118 // We use a QueryParser to fill in match positions for us. It's not the most	134 // We use a QueryParser to fill in match positions for us. It's not the most

119 // efficient way to go about this, but by the time we get here we know what	135 // efficient way to go about this, but by the time we get here we know what

120 // matches and so this shouldn't be performance critical.	136 // matches and so this shouldn't be performance critical.

121 QueryParser parser;	137 QueryParser parser;

(...skipping 49 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
171	187

172 NodeTypedCountPair pair(*i, typed_count);	188 NodeTypedCountPair pair(*i, typed_count);

173 node_typed_counts->push_back(pair);	189 node_typed_counts->push_back(pair);

174 }	190 }

175 }	191 }

176	192

177 void BookmarkIndex::AddMatchToResults(	193 void BookmarkIndex::AddMatchToResults(

178 const BookmarkNode* node,	194 const BookmarkNode* node,

179 QueryParser* parser,	195 QueryParser* parser,

180 const std::vector<QueryNode*>& query_nodes,	196 const std::vector<QueryNode*>& query_nodes,

181 std::vector<BookmarkTitleMatch>* results) {	197 std::vector<BookmarkMatch>* results) {

182 BookmarkTitleMatch title_match;

183 // Check that the result matches the query. The previous search	198 // Check that the result matches the query. The previous search

184 // was a simple per-word search, while the more complex matching	199 // was a simple per-word search, while the more complex matching

185 // of QueryParser may filter it out. For example, the query	200 // of QueryParser may filter it out. For example, the query

186 // ["thi"] will match the bookmark titled [Thinking], but since	201 // ["thi"] will match the bookmark titled [Thinking], but since

187 // ["thi"] is quoted we don't want to do a prefix match.	202 // ["thi"] is quoted we don't want to do a prefix match.

188 if (parser->DoesQueryMatch(Normalize(node->GetTitle()), query_nodes,	203 std::vector<QueryWord> title_words, url_words;

189 &(title_match.match_positions))) {	204 const base::string16 lower_title =

190 title_match.node = node;	205 base::i18n::ToLower(Normalize(node->GetTitle()));

191 results->push_back(title_match);	206 parser->ExtractQueryWords(lower_title, &title_words);

	207 if (index_urls_) {

	208 parser->ExtractQueryWords(

	209 bookmark_utils::CleanUpUrlForMatching(node->url(), languages_),

	210 &url_words);

192 }	211 }

	212 Snippet::MatchPositions title_matches, url_matches;

	213 for (size_t i = 0; i < query_nodes.size(); ++i) {

	214 const bool has_title_matches =

	215 query_nodes[i]->HasMatchIn(title_words, &title_matches);

	216 const bool has_url_matches = index_urls_ &&

	217 query_nodes[i]->HasMatchIn(url_words, &url_matches);

	218 if (!has_title_matches && !has_url_matches)

	219 return;

	220 QueryParser::CoalesceAndSortMatchPositions(&title_matches);

	221 if (index_urls_)

	222 QueryParser::CoalesceAndSortMatchPositions(&url_matches);

	223 }

	224 BookmarkMatch match;

	225 if (lower_title.length() == node->GetTitle().length()) {

	226 // Only use title matches if the lowercase string is the same length

	227 // as the original string, otherwise the matches are meaningless.

	228 // TODO(mpearson): revise match positions appropriately.

	229 match.title_match_positions.swap(title_matches);

	230 }

	231 if (index_urls_)

	232 match.url_match_positions.swap(url_matches);

	233 match.node = node;

	234 results->push_back(match);

193 }	235 }

194	236

195 bool BookmarkIndex::GetBookmarksWithTitleMatchingTerm(const base::string16& term ,	237 bool BookmarkIndex::GetBookmarksMatchingTerm(const base::string16& term,

196 bool first_term,	238 bool first_term,

197 Matches* matches) {	239 Matches* matches) {

198 Index::const_iterator i = index_.lower_bound(term);	240 Index::const_iterator i = index_.lower_bound(term);

199 if (i == index_.end())	241 if (i == index_.end())

200 return false;	242 return false;

201	243

202 if (!QueryParser::IsWordLongEnoughForPrefixSearch(term)) {	244 if (!QueryParser::IsWordLongEnoughForPrefixSearch(term)) {

203 // Term is too short for prefix match, compare using exact match.	245 // Term is too short for prefix match, compare using exact match.

204 if (i->first != term)	246 if (i->first != term)

205 return false; // No bookmarks with this term.	247 return false; // No bookmarks with this term.

(...skipping 88 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
294 Index::iterator i = index_.find(term);	336 Index::iterator i = index_.find(term);

295 if (i == index_.end()) {	337 if (i == index_.end()) {

296 // We can get here if the node has the same term more than once. For	338 // We can get here if the node has the same term more than once. For

297 // example, a bookmark with the title 'foo foo' would end up here.	339 // example, a bookmark with the title 'foo foo' would end up here.

298 return;	340 return;

299 }	341 }

300 i->second.erase(node);	342 i->second.erase(node);

301 if (i->second.empty())	343 if (i->second.empty())

302 index_.erase(i);	344 index_.erase(i);

303 }	345 }

OLD	NEW