components/omnibox/browser/url_index_private_data.cc - Issue 2364523003: Revert of Generating autocomplete results with and without word breaks in the Omnibox.

Side by Side Diff: components/omnibox/browser/url_index_private_data.cc

Issue 2364523003: Revert of Generating autocomplete results with and without word breaks in the Omnibox. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: revert using git, then upload patch Created 4 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "components/omnibox/browser/url_index_private_data.h"	5 #include "components/omnibox/browser/url_index_private_data.h"

6	6

7 #include <stdint.h>	7 #include <stdint.h>

8	8

9 #include <functional>	9 #include <functional>

10 #include <iterator>	10 #include <iterator>

(...skipping 63 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
74 WordStartsMapEntry;	74 WordStartsMapEntry;

75	75

76 // Algorithm Functions ---------------------------------------------------------	76 // Algorithm Functions ---------------------------------------------------------

77	77

78 // Comparison function for sorting search terms by descending length.	78 // Comparison function for sorting search terms by descending length.

79 bool LengthGreater(const base::string16& string_a,	79 bool LengthGreater(const base::string16& string_a,

80 const base::string16& string_b) {	80 const base::string16& string_b) {

81 return string_a.length() > string_b.length();	81 return string_a.length() > string_b.length();

82 }	82 }

83	83

	84

84 // UpdateRecentVisitsFromHistoryDBTask -----------------------------------------	85 // UpdateRecentVisitsFromHistoryDBTask -----------------------------------------

85	86

86 // HistoryDBTask used to update the recent visit data for a particular	87 // HistoryDBTask used to update the recent visit data for a particular

87 // row from the history database.	88 // row from the history database.

88 class UpdateRecentVisitsFromHistoryDBTask : public history::HistoryDBTask {	89 class UpdateRecentVisitsFromHistoryDBTask : public history::HistoryDBTask {

89 public:	90 public:

90 explicit UpdateRecentVisitsFromHistoryDBTask(	91 explicit UpdateRecentVisitsFromHistoryDBTask(

91 URLIndexPrivateData* private_data,	92 URLIndexPrivateData* private_data,

92 history::URLID url_id);	93 history::URLID url_id);

93	94

(...skipping 56 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
150 post_filter_item_count_(0),	151 post_filter_item_count_(0),

151 post_scoring_item_count_(0) {	152 post_scoring_item_count_(0) {

152 }	153 }

153	154

154 ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms(	155 ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms(

155 base::string16 search_string,	156 base::string16 search_string,

156 size_t cursor_position,	157 size_t cursor_position,

157 size_t max_matches,	158 size_t max_matches,

158 bookmarks::BookmarkModel* bookmark_model,	159 bookmarks::BookmarkModel* bookmark_model,

159 TemplateURLService* template_url_service) {	160 TemplateURLService* template_url_service) {

	161 // If cursor position is set and useful (not at either end of the

	162 // string), allow the search string to be broken at cursor position.

	163 // We do this by pretending there's a space where the cursor is.

	164 if ((cursor_position != base::string16::npos) &&

	165 (cursor_position < search_string.length()) &&

	166 (cursor_position > 0)) {

	167 search_string.insert(cursor_position, base::ASCIIToUTF16(" "));

	168 }

160 pre_filter_item_count_ = 0;	169 pre_filter_item_count_ = 0;

161 post_filter_item_count_ = 0;	170 post_filter_item_count_ = 0;

162 post_scoring_item_count_ = 0;	171 post_scoring_item_count_ = 0;

163 // The search string we receive may contain escaped characters. For reducing	172 // The search string we receive may contain escaped characters. For reducing

164 // the index we need individual, lower-cased words, ignoring escapings. For	173 // the index we need individual, lower-cased words, ignoring escapings. For

165 // the final filtering we need whitespace separated substrings possibly	174 // the final filtering we need whitespace separated substrings possibly

166 // containing escaped characters.	175 // containing escaped characters.

167 base::string16 lower_raw_string(base::i18n::ToLower(search_string));	176 base::string16 lower_raw_string(base::i18n::ToLower(search_string));

168 base::string16 lower_unescaped_string =	177 base::string16 lower_unescaped_string =

169 net::UnescapeURLComponent(lower_raw_string,	178 net::UnescapeURLComponent(lower_raw_string,

170 net::UnescapeRule::SPACES \| net::UnescapeRule::PATH_SEPARATORS \|	179 net::UnescapeRule::SPACES \| net::UnescapeRule::PATH_SEPARATORS \|

171 net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS);	180 net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS);

172

173 // Extract individual 'words' (as opposed to 'terms'; see below) from the	181 // Extract individual 'words' (as opposed to 'terms'; see below) from the

174 // search string. When the user types "colspec=ID%20Mstone Release" we get	182 // search string. When the user types "colspec=ID%20Mstone Release" we get

175 // four 'words': "colspec", "id", "mstone" and "release".	183 // four 'words': "colspec", "id", "mstone" and "release".

176 String16Vector lower_words(	184 String16Vector lower_words(

177 String16VectorFromString16(lower_unescaped_string, false, nullptr));	185 String16VectorFromString16(lower_unescaped_string, false, nullptr));

178 ScoredHistoryMatches scored_items;	186 ScoredHistoryMatches scored_items;

179	187

180 // Do nothing if we have indexed no words (probably because we've not been	188 // Do nothing if we have indexed no words (probably because we've not been

181 // initialized yet) or the search string has no words.	189 // initialized yet) or the search string has no words.

182 if (word_list_.empty() \|\| lower_words.empty()) {	190 if (word_list_.empty() \|\| lower_words.empty()) {

(...skipping 42 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
225 // "sort=pri&colspec=ID%20Mstone%20Release" we want to make sure that that	233 // "sort=pri&colspec=ID%20Mstone%20Release" we want to make sure that that

226 // specific substring appears in the URL or page title.	234 // specific substring appears in the URL or page title.

227	235

228 // We call these 'terms' (as opposed to 'words'; see above) as in this case	236 // We call these 'terms' (as opposed to 'words'; see above) as in this case

229 // we only want to break up the search string on 'true' whitespace rather than	237 // we only want to break up the search string on 'true' whitespace rather than

230 // escaped whitespace. When the user types "colspec=ID%20Mstone Release" we	238 // escaped whitespace. When the user types "colspec=ID%20Mstone Release" we

231 // get two 'terms': "colspec=id%20mstone" and "release".	239 // get two 'terms': "colspec=id%20mstone" and "release".

232 String16Vector lower_raw_terms = base::SplitString(	240 String16Vector lower_raw_terms = base::SplitString(

233 lower_raw_string, base::kWhitespaceUTF16, base::KEEP_WHITESPACE,	241 lower_raw_string, base::kWhitespaceUTF16, base::KEEP_WHITESPACE,

234 base::SPLIT_WANT_NONEMPTY);	242 base::SPLIT_WANT_NONEMPTY);

235

236 if (lower_raw_terms.empty()) {	243 if (lower_raw_terms.empty()) {

237 // Don't score matches when there are no terms to score against. (It's	244 // Don't score matches when there are no terms to score against. (It's

238 // possible that the word break iterater that extracts words to search	245 // possible that the word break iterater that extracts words to search

239 // for in the database allows some whitespace "words" whereas SplitString	246 // for in the database allows some whitespace "words" whereas SplitString

240 // excludes a long list of whitespace.) One could write a scoring	247 // excludes a long list of whitespace.) One could write a scoring

241 // function that gives a reasonable order to matches when there	248 // function that gives a reasonable order to matches when there

242 // are no terms (i.e., all the words are some form of whitespace),	249 // are no terms (i.e., all the words are some form of whitespace),

243 // but this is such a rare edge case that it's not worth the time.	250 // but this is such a rare edge case that it's not worth the time.

244 return scored_items;	251 return scored_items;

245 }	252 }

246

247 scored_items =	253 scored_items =

248 std::for_each(	254 std::for_each(

249 history_id_set.begin(), history_id_set.end(),	255 history_id_set.begin(), history_id_set.end(),

250 AddHistoryMatch(bookmark_model, template_url_service, *this,	256 AddHistoryMatch(bookmark_model, template_url_service, *this,

251 lower_raw_string, lower_raw_terms, base::Time::Now()))	257 lower_raw_string, lower_raw_terms,

252 .ScoredMatches();	258 base::Time::Now())).ScoredMatches();

253	259

254 // If cursor position is set and useful (not at either end of the string),

255 // allow the search string to be broken at cursor position. We do this by

256 // pretending there's a space where the cursor is.

257 // This phenomena will be referred to as space ghosting.

258 // Going forward we perform a similar process for obtaining

259 // scored items as above. In fact all of the variable names are the same

260 // except that they are prepended with 'transformed_' reflecting that these

261 // variables are related to space ghosting.

262 if ((cursor_position != base::string16::npos) &&

263 (cursor_position < search_string.length()) && (cursor_position > 0)) {

264 // The original search_string broken at cursor position.

265 base::string16 transformed_search_string(search_string);

266 transformed_search_string.insert(cursor_position, base::ASCIIToUTF16(" "));

267

268 // The same as lower_raw_string and defined for the same reasons.

269 base::string16 transformed_lower_raw_string(

270 base::i18n::ToLower(transformed_search_string));

271 String16Vector transformed_lower_raw_terms =

272 base::SplitString(transformed_lower_raw_string, base::kWhitespaceUTF16,

273 base::KEEP_WHITESPACE, base::SPLIT_WANT_NONEMPTY);

274 if (!transformed_lower_raw_terms.empty()) {

275 base::string16 transormed_lower_unescaped_string =

276 net::UnescapeURLComponent(

277 transformed_lower_raw_string,

278 net::UnescapeRule::SPACES \| net::UnescapeRule::PATH_SEPARATORS \|

279 net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS);

280 String16Vector transformed_lower_words(String16VectorFromString16(

281 transormed_lower_unescaped_string, false, nullptr));

282 HistoryIDSet transformed_history_id_set =

283 HistoryIDSetFromWords(transformed_lower_words);

284

285 if (history_id_set != transformed_history_id_set) {

286 pre_filter_item_count_ = history_id_set.size();

287 // Trim the candidate pool if it is large as above.

288 was_trimmed = (pre_filter_item_count_ > kItemsToScoreLimit);

289 if (was_trimmed) {

290 HistoryIDVector transformed_history_ids;

291 std::copy(transformed_history_id_set.begin(),

292 transformed_history_id_set.end(),

293 std::back_inserter(transformed_history_ids));

294 // Trim down the set by sorting by typed-count, visit-count, and last

295 // visit.

296 HistoryItemFactorGreater item_factor_functor(history_info_map_);

297 std::partial_sort(

298 transformed_history_ids.begin(),

299 transformed_history_ids.begin() + kItemsToScoreLimit,

300 transformed_history_ids.end(), item_factor_functor);

301 transformed_history_id_set.clear();

302 std::copy(transformed_history_ids.begin(),

303 transformed_history_ids.begin() + kItemsToScoreLimit,

304 std::inserter(transformed_history_id_set,

305 transformed_history_id_set.end()));

306 }

307 history_id_set.insert(transformed_history_id_set.begin(),

308 transformed_history_id_set.end());

309 ScoredHistoryMatches transformed_scored_items =

310 std::for_each(

311 history_id_set.begin(), history_id_set.end(),

312 AddHistoryMatch(bookmark_model, template_url_service, *this,

313 transformed_lower_raw_string,

314 transformed_lower_raw_terms, base::Time::Now()))

315 .ScoredMatches();

316 scored_items.insert(scored_items.end(),

317 transformed_scored_items.begin(),

318 transformed_scored_items.end());

319 }

320 }

321 }

322 // Select and sort only the top \|max_matches\| results.	260 // Select and sort only the top \|max_matches\| results.

323 if (scored_items.size() > max_matches) {	261 if (scored_items.size() > max_matches) {

324 std::partial_sort(scored_items.begin(), scored_items.begin() + max_matches,	262 std::partial_sort(scored_items.begin(),

	263 scored_items.begin() +

	264 max_matches,

325 scored_items.end(),	265 scored_items.end(),

326 ScoredHistoryMatch::MatchScoreGreater);	266 ScoredHistoryMatch::MatchScoreGreater);

327 scored_items.resize(max_matches);	267 scored_items.resize(max_matches);

328 } else {	268 } else {

329 std::sort(scored_items.begin(), scored_items.end(),	269 std::sort(scored_items.begin(), scored_items.end(),

330 ScoredHistoryMatch::MatchScoreGreater);	270 ScoredHistoryMatch::MatchScoreGreater);

331 }	271 }

332 post_scoring_item_count_ = scored_items.size();	272 post_scoring_item_count_ = scored_items.size();

333	273

334 if (was_trimmed) {	274 if (was_trimmed) {

335 search_term_cache_.clear(); // Invalidate the term cache.	275 search_term_cache_.clear(); // Invalidate the term cache.

336 } else {	276 } else {

337 // Remove any stale SearchTermCacheItems.	277 // Remove any stale SearchTermCacheItems.

(...skipping 966 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1304 return true;	1244 return true;

1305 }	1245 }

1306	1246

1307 // static	1247 // static

1308 bool URLIndexPrivateData::URLSchemeIsWhitelisted(	1248 bool URLIndexPrivateData::URLSchemeIsWhitelisted(

1309 const GURL& gurl,	1249 const GURL& gurl,

1310 const std::set<std::string>& whitelist) {	1250 const std::set<std::string>& whitelist) {

1311 return whitelist.find(gurl.scheme()) != whitelist.end();	1251 return whitelist.find(gurl.scheme()) != whitelist.end();

1312 }	1252 }

1313	1253

	1254

1314 // SearchTermCacheItem ---------------------------------------------------------	1255 // SearchTermCacheItem ---------------------------------------------------------

1315	1256

1316 URLIndexPrivateData::SearchTermCacheItem::SearchTermCacheItem(	1257 URLIndexPrivateData::SearchTermCacheItem::SearchTermCacheItem(

1317 const WordIDSet& word_id_set,	1258 const WordIDSet& word_id_set,

1318 const HistoryIDSet& history_id_set)	1259 const HistoryIDSet& history_id_set)

1319 : word_id_set_(word_id_set), history_id_set_(history_id_set), used_(true) {	1260 : word_id_set_(word_id_set), history_id_set_(history_id_set), used_(true) {

1320 }	1261 }

1321	1262

1322 URLIndexPrivateData::SearchTermCacheItem::SearchTermCacheItem() : used_(true) {	1263 URLIndexPrivateData::SearchTermCacheItem::SearchTermCacheItem() : used_(true) {

1323 }	1264 }

(...skipping 89 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1413 // First cut: typed count, visit count, recency.	1354 // First cut: typed count, visit count, recency.

1414 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks	1355 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks

1415 // recently visited (within the last 12/24 hours) as highly important. Get	1356 // recently visited (within the last 12/24 hours) as highly important. Get

1416 // input from mpearson.	1357 // input from mpearson.

1417 if (r1.typed_count() != r2.typed_count())	1358 if (r1.typed_count() != r2.typed_count())

1418 return (r1.typed_count() > r2.typed_count());	1359 return (r1.typed_count() > r2.typed_count());

1419 if (r1.visit_count() != r2.visit_count())	1360 if (r1.visit_count() != r2.visit_count())

1420 return (r1.visit_count() > r2.visit_count());	1361 return (r1.visit_count() > r2.visit_count());

1421 return (r1.last_visit() > r2.last_visit());	1362 return (r1.last_visit() > r2.last_visit());

1422 }	1363 }

OLD	NEW

« no previous file with comments | « components/omnibox/browser/url_index_private_data.h ('k') | no next file » | no next file with comments »