components/omnibox/browser/url_index_private_data.cc - Issue 1841653003: Drop |languages| from {Format,Elide}Url* and IDNToUnicode

Side by Side Diff: components/omnibox/browser/url_index_private_data.cc

Issue 1841653003: Drop |languages| from {Format,Elide}Url* and IDNToUnicode (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: fix typo in elide_url.cc Created 4 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "components/omnibox/browser/url_index_private_data.h"	5 #include "components/omnibox/browser/url_index_private_data.h"

6	6

7 #include <stdint.h>	7 #include <stdint.h>

8	8

9 #include <functional>	9 #include <functional>

10 #include <iterator>	10 #include <iterator>

(...skipping 138 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
149 saved_cache_version_(kCurrentCacheFileVersion),	149 saved_cache_version_(kCurrentCacheFileVersion),

150 pre_filter_item_count_(0),	150 pre_filter_item_count_(0),

151 post_filter_item_count_(0),	151 post_filter_item_count_(0),

152 post_scoring_item_count_(0) {	152 post_scoring_item_count_(0) {

153 }	153 }

154	154

155 ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms(	155 ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms(

156 base::string16 search_string,	156 base::string16 search_string,

157 size_t cursor_position,	157 size_t cursor_position,

158 size_t max_matches,	158 size_t max_matches,

159 const std::string& languages,

160 bookmarks::BookmarkModel* bookmark_model,	159 bookmarks::BookmarkModel* bookmark_model,

161 TemplateURLService* template_url_service) {	160 TemplateURLService* template_url_service) {

162 // If cursor position is set and useful (not at either end of the	161 // If cursor position is set and useful (not at either end of the

163 // string), allow the search string to be broken at cursor position.	162 // string), allow the search string to be broken at cursor position.

164 // We do this by pretending there's a space where the cursor is.	163 // We do this by pretending there's a space where the cursor is.

165 if ((cursor_position != base::string16::npos) &&	164 if ((cursor_position != base::string16::npos) &&

166 (cursor_position < search_string.length()) &&	165 (cursor_position < search_string.length()) &&

167 (cursor_position > 0)) {	166 (cursor_position > 0)) {

168 search_string.insert(cursor_position, base::ASCIIToUTF16(" "));	167 search_string.insert(cursor_position, base::ASCIIToUTF16(" "));

169 }	168 }

170 pre_filter_item_count_ = 0;	169 pre_filter_item_count_ = 0;

171 post_filter_item_count_ = 0;	170 post_filter_item_count_ = 0;

172 post_scoring_item_count_ = 0;	171 post_scoring_item_count_ = 0;

173 // The search string we receive may contain escaped characters. For reducing	172 // The search string we receive may contain escaped characters. For reducing

174 // the index we need individual, lower-cased words, ignoring escapings. For	173 // the index we need individual, lower-cased words, ignoring escapings. For

175 // the final filtering we need whitespace separated substrings possibly	174 // the final filtering we need whitespace separated substrings possibly

176 // containing escaped characters.	175 // containing escaped characters.

177 base::string16 lower_raw_string(base::i18n::ToLower(search_string));	176 base::string16 lower_raw_string(base::i18n::ToLower(search_string));

178 base::string16 lower_unescaped_string =	177 base::string16 lower_unescaped_string =

179 net::UnescapeURLComponent(lower_raw_string,	178 net::UnescapeURLComponent(lower_raw_string,

180 net::UnescapeRule::SPACES \| net::UnescapeRule::PATH_SEPARATORS \|	179 net::UnescapeRule::SPACES \| net::UnescapeRule::PATH_SEPARATORS \|

181 net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS);	180 net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS);

182 // Extract individual 'words' (as opposed to 'terms'; see below) from the	181 // Extract individual 'words' (as opposed to 'terms'; see below) from the

183 // search string. When the user types "colspec=ID%20Mstone Release" we get	182 // search string. When the user types "colspec=ID%20Mstone Release" we get

184 // four 'words': "colspec", "id", "mstone" and "release".	183 // four 'words': "colspec", "id", "mstone" and "release".

185 String16Vector lower_words(	184 String16Vector lower_words(

186 String16VectorFromString16(lower_unescaped_string, false, NULL));	185 String16VectorFromString16(lower_unescaped_string, false, nullptr));

187 ScoredHistoryMatches scored_items;	186 ScoredHistoryMatches scored_items;

188	187

189 // Do nothing if we have indexed no words (probably because we've not been	188 // Do nothing if we have indexed no words (probably because we've not been

190 // initialized yet) or the search string has no words.	189 // initialized yet) or the search string has no words.

191 if (word_list_.empty() \|\| lower_words.empty()) {	190 if (word_list_.empty() \|\| lower_words.empty()) {

192 search_term_cache_.clear(); // Invalidate the term cache.	191 search_term_cache_.clear(); // Invalidate the term cache.

193 return scored_items;	192 return scored_items;

194 }	193 }

195	194

196 // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep	195 // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep

(...skipping 51 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
248 // excludes a long list of whitespace.) One could write a scoring	247 // excludes a long list of whitespace.) One could write a scoring

249 // function that gives a reasonable order to matches when there	248 // function that gives a reasonable order to matches when there

250 // are no terms (i.e., all the words are some form of whitespace),	249 // are no terms (i.e., all the words are some form of whitespace),

251 // but this is such a rare edge case that it's not worth the time.	250 // but this is such a rare edge case that it's not worth the time.

252 return scored_items;	251 return scored_items;

253 }	252 }

254 scored_items =	253 scored_items =

255 std::for_each(	254 std::for_each(

256 history_id_set.begin(), history_id_set.end(),	255 history_id_set.begin(), history_id_set.end(),

257 AddHistoryMatch(bookmark_model, template_url_service, *this,	256 AddHistoryMatch(bookmark_model, template_url_service, *this,

258 languages, lower_raw_string, lower_raw_terms,	257 lower_raw_string, lower_raw_terms,

259 base::Time::Now())).ScoredMatches();	258 base::Time::Now())).ScoredMatches();

260	259

261 // Select and sort only the top \|max_matches\| results.	260 // Select and sort only the top \|max_matches\| results.

262 if (scored_items.size() > max_matches) {	261 if (scored_items.size() > max_matches) {

263 std::partial_sort(scored_items.begin(),	262 std::partial_sort(scored_items.begin(),

264 scored_items.begin() +	263 scored_items.begin() +

265 max_matches,	264 max_matches,

266 scored_items.end(),	265 scored_items.end(),

267 ScoredHistoryMatch::MatchScoreGreater);	266 ScoredHistoryMatch::MatchScoreGreater);

268 scored_items.resize(max_matches);	267 scored_items.resize(max_matches);

(...skipping 15 matching lines...) Expand all Loading...
284 ++cache_iter;	283 ++cache_iter;

285 }	284 }

286 }	285 }

287	286

288 return scored_items;	287 return scored_items;

289 }	288 }

290	289

291 bool URLIndexPrivateData::UpdateURL(	290 bool URLIndexPrivateData::UpdateURL(

292 history::HistoryService* history_service,	291 history::HistoryService* history_service,

293 const history::URLRow& row,	292 const history::URLRow& row,

294 const std::string& languages,

295 const std::set<std::string>& scheme_whitelist,	293 const std::set<std::string>& scheme_whitelist,

296 base::CancelableTaskTracker* tracker) {	294 base::CancelableTaskTracker* tracker) {

297 // The row may or may not already be in our index. If it is not already	295 // The row may or may not already be in our index. If it is not already

298 // indexed and it qualifies then it gets indexed. If it is already	296 // indexed and it qualifies then it gets indexed. If it is already

299 // indexed and still qualifies then it gets updated, otherwise it	297 // indexed and still qualifies then it gets updated, otherwise it

300 // is deleted from the index.	298 // is deleted from the index.

301 bool row_was_updated = false;	299 bool row_was_updated = false;

302 history::URLID row_id = row.id();	300 history::URLID row_id = row.id();

303 HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id);	301 HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id);

304 if (row_pos == history_info_map_.end()) {	302 if (row_pos == history_info_map_.end()) {

305 // This new row should be indexed if it qualifies.	303 // This new row should be indexed if it qualifies.

306 history::URLRow new_row(row);	304 history::URLRow new_row(row);

307 new_row.set_id(row_id);	305 new_row.set_id(row_id);

308 row_was_updated = RowQualifiesAsSignificant(new_row, base::Time()) &&	306 row_was_updated = RowQualifiesAsSignificant(new_row, base::Time()) &&

309 IndexRow(NULL,	307 IndexRow(nullptr,

310 history_service,	308 history_service,

311 new_row,	309 new_row,

312 languages,

313 scheme_whitelist,	310 scheme_whitelist,

314 tracker);	311 tracker);

315 } else if (RowQualifiesAsSignificant(row, base::Time())) {	312 } else if (RowQualifiesAsSignificant(row, base::Time())) {

316 // This indexed row still qualifies and will be re-indexed.	313 // This indexed row still qualifies and will be re-indexed.

317 // The url won't have changed but the title, visit count, etc.	314 // The url won't have changed but the title, visit count, etc.

318 // might have changed.	315 // might have changed.

319 history::URLRow& row_to_update = row_pos->second.url_row;	316 history::URLRow& row_to_update = row_pos->second.url_row;

320 bool title_updated = row_to_update.title() != row.title();	317 bool title_updated = row_to_update.title() != row.title();

321 if (row_to_update.visit_count() != row.visit_count() \|\|	318 if (row_to_update.visit_count() != row.visit_count() \|\|

322 row_to_update.typed_count() != row.typed_count() \|\|	319 row_to_update.typed_count() != row.typed_count() \|\|

323 row_to_update.last_visit() != row.last_visit() \|\| title_updated) {	320 row_to_update.last_visit() != row.last_visit() \|\| title_updated) {

324 row_to_update.set_visit_count(row.visit_count());	321 row_to_update.set_visit_count(row.visit_count());

325 row_to_update.set_typed_count(row.typed_count());	322 row_to_update.set_typed_count(row.typed_count());

326 row_to_update.set_last_visit(row.last_visit());	323 row_to_update.set_last_visit(row.last_visit());

327 // If something appears to have changed, update the recent visits	324 // If something appears to have changed, update the recent visits

328 // information.	325 // information.

329 ScheduleUpdateRecentVisits(history_service, row_id, tracker);	326 ScheduleUpdateRecentVisits(history_service, row_id, tracker);

330 // While the URL is guaranteed to remain stable, the title may have	327 // While the URL is guaranteed to remain stable, the title may have

331 // changed. If so, then update the index with the changed words.	328 // changed. If so, then update the index with the changed words.

332 if (title_updated) {	329 if (title_updated) {

333 // Clear all words associated with this row and re-index both the	330 // Clear all words associated with this row and re-index both the

334 // URL and title.	331 // URL and title.

335 RemoveRowWordsFromIndex(row_to_update);	332 RemoveRowWordsFromIndex(row_to_update);

336 row_to_update.set_title(row.title());	333 row_to_update.set_title(row.title());

337 RowWordStarts word_starts;	334 RowWordStarts word_starts;

338 AddRowWordsToIndex(row_to_update, &word_starts, languages);	335 AddRowWordsToIndex(row_to_update, &word_starts);

339 word_starts_map_[row_id] = word_starts;	336 word_starts_map_[row_id] = word_starts;

340 }	337 }

341 row_was_updated = true;	338 row_was_updated = true;

342 }	339 }

343 } else {	340 } else {

344 // This indexed row no longer qualifies and will be de-indexed by	341 // This indexed row no longer qualifies and will be de-indexed by

345 // clearing all words associated with this row.	342 // clearing all words associated with this row.

346 RemoveRowFromIndex(row);	343 RemoveRowFromIndex(row);

347 row_was_updated = true;	344 row_was_updated = true;

348 }	345 }

(...skipping 54 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
403 HistoryInfoMapItemHasURL(url));	400 HistoryInfoMapItemHasURL(url));

404 if (pos == history_info_map_.end())	401 if (pos == history_info_map_.end())

405 return false;	402 return false;

406 RemoveRowFromIndex(pos->second.url_row);	403 RemoveRowFromIndex(pos->second.url_row);

407 search_term_cache_.clear(); // This invalidates the cache.	404 search_term_cache_.clear(); // This invalidates the cache.

408 return true;	405 return true;

409 }	406 }

410	407

411 // static	408 // static

412 scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::RestoreFromFile(	409 scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::RestoreFromFile(

413 const base::FilePath& file_path,	410 const base::FilePath& file_path) {

414 const std::string& languages) {

415 base::TimeTicks beginning_time = base::TimeTicks::Now();	411 base::TimeTicks beginning_time = base::TimeTicks::Now();

416 if (!base::PathExists(file_path))	412 if (!base::PathExists(file_path))

417 return NULL;	413 return nullptr;

418 std::string data;	414 std::string data;

419 // If there is no cache file then simply give up. This will cause us to	415 // If there is no cache file then simply give up. This will cause us to

420 // attempt to rebuild from the history database.	416 // attempt to rebuild from the history database.

421 if (!base::ReadFileToString(file_path, &data))	417 if (!base::ReadFileToString(file_path, &data))

422 return NULL;	418 return nullptr;

423	419

424 scoped_refptr<URLIndexPrivateData> restored_data(new URLIndexPrivateData);	420 scoped_refptr<URLIndexPrivateData> restored_data(new URLIndexPrivateData);

425 InMemoryURLIndexCacheItem index_cache;	421 InMemoryURLIndexCacheItem index_cache;

426 if (!index_cache.ParseFromArray(data.c_str(), data.size())) {	422 if (!index_cache.ParseFromArray(data.c_str(), data.size())) {

427 LOG(WARNING) << "Failed to parse URLIndexPrivateData cache data read from "	423 LOG(WARNING) << "Failed to parse URLIndexPrivateData cache data read from "

428 << file_path.value();	424 << file_path.value();

429 return restored_data;	425 return restored_data;

430 }	426 }

431	427

432 if (!restored_data->RestorePrivateData(index_cache, languages))	428 if (!restored_data->RestorePrivateData(index_cache))

433 return NULL;	429 return nullptr;

434	430

435 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime",	431 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime",

436 base::TimeTicks::Now() - beginning_time);	432 base::TimeTicks::Now() - beginning_time);

437 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems",	433 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems",

438 restored_data->history_id_word_map_.size());	434 restored_data->history_id_word_map_.size());

439 UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size());	435 UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size());

440 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords",	436 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords",

441 restored_data->word_map_.size());	437 restored_data->word_map_.size());

442 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars",	438 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars",

443 restored_data->char_word_map_.size());	439 restored_data->char_word_map_.size());

444 if (restored_data->Empty())	440 if (restored_data->Empty())

445 return NULL; // 'No data' is the same as a failed reload.	441 return nullptr; // 'No data' is the same as a failed reload.

446 return restored_data;	442 return restored_data;

447 }	443 }

448	444

449 // static	445 // static

450 scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::RebuildFromHistory(	446 scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::RebuildFromHistory(

451 history::HistoryDatabase* history_db,	447 history::HistoryDatabase* history_db,

452 const std::string& languages,

453 const std::set<std::string>& scheme_whitelist) {	448 const std::set<std::string>& scheme_whitelist) {

454 if (!history_db)	449 if (!history_db)

455 return NULL;	450 return nullptr;

456	451

457 base::TimeTicks beginning_time = base::TimeTicks::Now();	452 base::TimeTicks beginning_time = base::TimeTicks::Now();

458	453

459 scoped_refptr<URLIndexPrivateData>	454 scoped_refptr<URLIndexPrivateData>

460 rebuilt_data(new URLIndexPrivateData);	455 rebuilt_data(new URLIndexPrivateData);

461 history::URLDatabase::URLEnumerator history_enum;	456 history::URLDatabase::URLEnumerator history_enum;

462 if (!history_db->InitURLEnumeratorForSignificant(&history_enum))	457 if (!history_db->InitURLEnumeratorForSignificant(&history_enum))

463 return NULL;	458 return nullptr;

464 rebuilt_data->last_time_rebuilt_from_history_ = base::Time::Now();	459 rebuilt_data->last_time_rebuilt_from_history_ = base::Time::Now();

465 for (history::URLRow row; history_enum.GetNextURL(&row);) {	460 for (history::URLRow row; history_enum.GetNextURL(&row);) {

466 rebuilt_data->IndexRow(	461 rebuilt_data->IndexRow(

467 history_db, NULL, row, languages, scheme_whitelist, NULL);	462 history_db, nullptr, row, scheme_whitelist, nullptr);

468 }	463 }

469	464

470 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime",	465 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime",

471 base::TimeTicks::Now() - beginning_time);	466 base::TimeTicks::Now() - beginning_time);

472 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems",	467 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems",

473 rebuilt_data->history_id_word_map_.size());	468 rebuilt_data->history_id_word_map_.size());

474 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords",	469 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords",

475 rebuilt_data->word_map_.size());	470 rebuilt_data->word_map_.size());

476 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars",	471 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars",

477 rebuilt_data->char_word_map_.size());	472 rebuilt_data->char_word_map_.size());

(...skipping 217 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
695 word_id_set.swap(new_word_id_set);	690 word_id_set.swap(new_word_id_set);

696 }	691 }

697 }	692 }

698 return word_id_set;	693 return word_id_set;

699 }	694 }

700	695

701 bool URLIndexPrivateData::IndexRow(	696 bool URLIndexPrivateData::IndexRow(

702 history::HistoryDatabase* history_db,	697 history::HistoryDatabase* history_db,

703 history::HistoryService* history_service,	698 history::HistoryService* history_service,

704 const history::URLRow& row,	699 const history::URLRow& row,

705 const std::string& languages,

706 const std::set<std::string>& scheme_whitelist,	700 const std::set<std::string>& scheme_whitelist,

707 base::CancelableTaskTracker* tracker) {	701 base::CancelableTaskTracker* tracker) {

708 const GURL& gurl(row.url());	702 const GURL& gurl(row.url());

709	703

710 // Index only URLs with a whitelisted scheme.	704 // Index only URLs with a whitelisted scheme.

711 if (!URLSchemeIsWhitelisted(gurl, scheme_whitelist))	705 if (!URLSchemeIsWhitelisted(gurl, scheme_whitelist))

712 return false;	706 return false;

713	707

714 history::URLID row_id = row.id();	708 history::URLID row_id = row.id();

715 // Strip out username and password before saving and indexing.	709 // Strip out username and password before saving and indexing.

716 base::string16 url(url_formatter::FormatUrl(	710 base::string16 url(url_formatter::FormatUrl(

717 gurl, languages, url_formatter::kFormatUrlOmitUsernamePassword,	711 gurl, url_formatter::kFormatUrlOmitUsernamePassword,

718 net::UnescapeRule::NONE, nullptr, nullptr, nullptr));	712 net::UnescapeRule::NONE, nullptr, nullptr, nullptr));

719	713

720 HistoryID history_id = static_cast<HistoryID>(row_id);	714 HistoryID history_id = static_cast<HistoryID>(row_id);

721 DCHECK_LT(history_id, std::numeric_limits<HistoryID>::max());	715 DCHECK_LT(history_id, std::numeric_limits<HistoryID>::max());

722	716

723 // Add the row for quick lookup in the history info store.	717 // Add the row for quick lookup in the history info store.

724 history::URLRow new_row(GURL(url), row_id);	718 history::URLRow new_row(GURL(url), row_id);

725 new_row.set_visit_count(row.visit_count());	719 new_row.set_visit_count(row.visit_count());

726 new_row.set_typed_count(row.typed_count());	720 new_row.set_typed_count(row.typed_count());

727 new_row.set_last_visit(row.last_visit());	721 new_row.set_last_visit(row.last_visit());

728 new_row.set_title(row.title());	722 new_row.set_title(row.title());

729 history_info_map_[history_id].url_row = new_row;	723 history_info_map_[history_id].url_row = new_row;

730	724

731 // Index the words contained in the URL and title of the row.	725 // Index the words contained in the URL and title of the row.

732 RowWordStarts word_starts;	726 RowWordStarts word_starts;

733 AddRowWordsToIndex(new_row, &word_starts, languages);	727 AddRowWordsToIndex(new_row, &word_starts);

734 word_starts_map_[history_id] = word_starts;	728 word_starts_map_[history_id] = word_starts;

735	729

736 // Update the recent visits information or schedule the update	730 // Update the recent visits information or schedule the update

737 // as appropriate.	731 // as appropriate.

738 if (history_db) {	732 if (history_db) {

739 // We'd like to check that we're on the history DB thread.	733 // We'd like to check that we're on the history DB thread.

740 // However, unittest code actually calls this on the UI thread.	734 // However, unittest code actually calls this on the UI thread.

741 // So we don't do any thread checks.	735 // So we don't do any thread checks.

742 history::VisitVector recent_visits;	736 history::VisitVector recent_visits;

743 // Make sure the private data is going to get as many recent visits as	737 // Make sure the private data is going to get as many recent visits as

744 // ScoredHistoryMatch::GetFrequency() hopes to use.	738 // ScoredHistoryMatch::GetFrequency() hopes to use.

745 DCHECK_GE(kMaxVisitsToStoreInCache, ScoredHistoryMatch::kMaxVisitsToScore);	739 DCHECK_GE(kMaxVisitsToStoreInCache, ScoredHistoryMatch::kMaxVisitsToScore);

746 if (history_db->GetMostRecentVisitsForURL(row_id,	740 if (history_db->GetMostRecentVisitsForURL(row_id,

747 kMaxVisitsToStoreInCache,	741 kMaxVisitsToStoreInCache,

748 &recent_visits))	742 &recent_visits))

749 UpdateRecentVisits(row_id, recent_visits);	743 UpdateRecentVisits(row_id, recent_visits);

750 } else {	744 } else {

751 DCHECK(tracker);	745 DCHECK(tracker);

752 DCHECK(history_service);	746 DCHECK(history_service);

753 ScheduleUpdateRecentVisits(history_service, row_id, tracker);	747 ScheduleUpdateRecentVisits(history_service, row_id, tracker);

754 }	748 }

755	749

756 return true;	750 return true;

757 }	751 }

758	752

759 void URLIndexPrivateData::AddRowWordsToIndex(const history::URLRow& row,	753 void URLIndexPrivateData::AddRowWordsToIndex(const history::URLRow& row,

760 RowWordStarts* word_starts,	754 RowWordStarts* word_starts) {

761 const std::string& languages) {

762 HistoryID history_id = static_cast<HistoryID>(row.id());	755 HistoryID history_id = static_cast<HistoryID>(row.id());

763 // Split URL into individual, unique words then add in the title words.	756 // Split URL into individual, unique words then add in the title words.

764 const GURL& gurl(row.url());	757 const GURL& gurl(row.url());

765 const base::string16& url =	758 const base::string16& url =

766 bookmarks::CleanUpUrlForMatching(gurl, languages, NULL);	759 bookmarks::CleanUpUrlForMatching(gurl, nullptr);

767 String16Set url_words = String16SetFromString16(url,	760 String16Set url_words = String16SetFromString16(url,

768 word_starts ? &word_starts->url_word_starts_ : NULL);	761 word_starts ? &word_starts->url_word_starts_ : nullptr);

769 const base::string16& title = bookmarks::CleanUpTitleForMatching(row.title());	762 const base::string16& title = bookmarks::CleanUpTitleForMatching(row.title());

770 String16Set title_words = String16SetFromString16(title,	763 String16Set title_words = String16SetFromString16(title,

771 word_starts ? &word_starts->title_word_starts_ : NULL);	764 word_starts ? &word_starts->title_word_starts_ : nullptr);

772 String16Set words = base::STLSetUnion<String16Set>(url_words, title_words);	765 String16Set words = base::STLSetUnion<String16Set>(url_words, title_words);

773 for (String16Set::iterator word_iter = words.begin();	766 for (String16Set::iterator word_iter = words.begin();

774 word_iter != words.end(); ++word_iter)	767 word_iter != words.end(); ++word_iter)

775 AddWordToIndex(*word_iter, history_id);	768 AddWordToIndex(*word_iter, history_id);

776	769

777 search_term_cache_.clear(); // Invalidate the term cache.	770 search_term_cache_.clear(); // Invalidate the term cache.

778 }	771 }

779	772

780 void URLIndexPrivateData::AddWordToIndex(const base::string16& term,	773 void URLIndexPrivateData::AddWordToIndex(const base::string16& term,

781 HistoryID history_id) {	774 HistoryID history_id) {

(...skipping 257 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1039 for (WordStarts::const_iterator i = word_starts.url_word_starts_.begin();	1032 for (WordStarts::const_iterator i = word_starts.url_word_starts_.begin();

1040 i != word_starts.url_word_starts_.end(); ++i)	1033 i != word_starts.url_word_starts_.end(); ++i)

1041 map_entry->add_url_word_starts(*i);	1034 map_entry->add_url_word_starts(*i);

1042 for (WordStarts::const_iterator i = word_starts.title_word_starts_.begin();	1035 for (WordStarts::const_iterator i = word_starts.title_word_starts_.begin();

1043 i != word_starts.title_word_starts_.end(); ++i)	1036 i != word_starts.title_word_starts_.end(); ++i)

1044 map_entry->add_title_word_starts(*i);	1037 map_entry->add_title_word_starts(*i);

1045 }	1038 }

1046 }	1039 }

1047	1040

1048 bool URLIndexPrivateData::RestorePrivateData(	1041 bool URLIndexPrivateData::RestorePrivateData(

1049 const InMemoryURLIndexCacheItem& cache,	1042 const InMemoryURLIndexCacheItem& cache) {

1050 const std::string& languages) {

1051 last_time_rebuilt_from_history_ =	1043 last_time_rebuilt_from_history_ =

1052 base::Time::FromInternalValue(cache.last_rebuild_timestamp());	1044 base::Time::FromInternalValue(cache.last_rebuild_timestamp());

1053 const base::TimeDelta rebuilt_ago =	1045 const base::TimeDelta rebuilt_ago =

1054 base::Time::Now() - last_time_rebuilt_from_history_;	1046 base::Time::Now() - last_time_rebuilt_from_history_;

1055 if ((rebuilt_ago > base::TimeDelta::FromDays(7)) \|\|	1047 if ((rebuilt_ago > base::TimeDelta::FromDays(7)) \|\|

1056 (rebuilt_ago < base::TimeDelta::FromDays(-1))) {	1048 (rebuilt_ago < base::TimeDelta::FromDays(-1))) {

1057 // Cache is more than a week old or, somehow, from some time in the future.	1049 // Cache is more than a week old or, somehow, from some time in the future.

1058 // It's probably a good time to rebuild the index from history to	1050 // It's probably a good time to rebuild the index from history to

1059 // allow synced entries to now appear, expired entries to disappear, etc.	1051 // allow synced entries to now appear, expired entries to disappear, etc.

1060 // Allow one day in the future to make the cache not rebuild on simple	1052 // Allow one day in the future to make the cache not rebuild on simple

1061 // system clock changes such as time zone changes.	1053 // system clock changes such as time zone changes.

1062 return false;	1054 return false;

1063 }	1055 }

1064 if (cache.has_version()) {	1056 if (cache.has_version()) {

1065 if (cache.version() < kCurrentCacheFileVersion) {	1057 if (cache.version() < kCurrentCacheFileVersion) {

1066 // Don't try to restore an old format cache file. (This will cause	1058 // Don't try to restore an old format cache file. (This will cause

1067 // the InMemoryURLIndex to schedule rebuilding the URLIndexPrivateData	1059 // the InMemoryURLIndex to schedule rebuilding the URLIndexPrivateData

1068 // from history.)	1060 // from history.)

1069 return false;	1061 return false;

1070 }	1062 }

1071 restored_cache_version_ = cache.version();	1063 restored_cache_version_ = cache.version();

1072 }	1064 }

1073 return RestoreWordList(cache) && RestoreWordMap(cache) &&	1065 return RestoreWordList(cache) && RestoreWordMap(cache) &&

1074 RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) &&	1066 RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) &&

1075 RestoreHistoryInfoMap(cache) && RestoreWordStartsMap(cache, languages);	1067 RestoreHistoryInfoMap(cache) && RestoreWordStartsMap(cache);

1076 }	1068 }

1077	1069

1078 bool URLIndexPrivateData::RestoreWordList(	1070 bool URLIndexPrivateData::RestoreWordList(

1079 const InMemoryURLIndexCacheItem& cache) {	1071 const InMemoryURLIndexCacheItem& cache) {

1080 if (!cache.has_word_list())	1072 if (!cache.has_word_list())

1081 return false;	1073 return false;

1082 const WordListItem& list_item(cache.word_list());	1074 const WordListItem& list_item(cache.word_list());

1083 uint32_t expected_item_count = list_item.word_count();	1075 uint32_t expected_item_count = list_item.word_count();

1084 uint32_t actual_item_count = list_item.word_size();	1076 uint32_t actual_item_count = list_item.word_size();

1085 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)	1077 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)

(...skipping 111 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1197 visits.push_back(std::make_pair(	1189 visits.push_back(std::make_pair(

1198 base::Time::FromInternalValue(iter->visits(i).visit_time()),	1190 base::Time::FromInternalValue(iter->visits(i).visit_time()),

1199 ui::PageTransitionFromInt(iter->visits(i).transition_type())));	1191 ui::PageTransitionFromInt(iter->visits(i).transition_type())));

1200 }	1192 }

1201 history_info_map_[history_id].visits = visits;	1193 history_info_map_[history_id].visits = visits;

1202 }	1194 }

1203 return true;	1195 return true;

1204 }	1196 }

1205	1197

1206 bool URLIndexPrivateData::RestoreWordStartsMap(	1198 bool URLIndexPrivateData::RestoreWordStartsMap(

1207 const InMemoryURLIndexCacheItem& cache,	1199 const InMemoryURLIndexCacheItem& cache) {

1208 const std::string& languages) {

1209 // Note that this function must be called after RestoreHistoryInfoMap() has	1200 // Note that this function must be called after RestoreHistoryInfoMap() has

1210 // been run as the word starts may have to be recalculated from the urls and	1201 // been run as the word starts may have to be recalculated from the urls and

1211 // page titles.	1202 // page titles.

1212 if (cache.has_word_starts_map()) {	1203 if (cache.has_word_starts_map()) {

1213 const WordStartsMapItem& list_item(cache.word_starts_map());	1204 const WordStartsMapItem& list_item(cache.word_starts_map());

1214 uint32_t expected_item_count = list_item.item_count();	1205 uint32_t expected_item_count = list_item.item_count();

1215 uint32_t actual_item_count = list_item.word_starts_map_entry_size();	1206 uint32_t actual_item_count = list_item.word_starts_map_entry_size();

1216 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)	1207 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)

1217 return false;	1208 return false;

1218 const RepeatedPtrField<WordStartsMapEntry>&	1209 const RepeatedPtrField<WordStartsMapEntry>&

(...skipping 15 matching lines...) Expand all Loading...
1234 word_starts_map_[history_id] = word_starts;	1225 word_starts_map_[history_id] = word_starts;

1235 }	1226 }

1236 } else {	1227 } else {

1237 // Since the cache did not contain any word starts we must rebuild then from	1228 // Since the cache did not contain any word starts we must rebuild then from

1238 // the URL and page titles.	1229 // the URL and page titles.

1239 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin();	1230 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin();

1240 iter != history_info_map_.end(); ++iter) {	1231 iter != history_info_map_.end(); ++iter) {

1241 RowWordStarts word_starts;	1232 RowWordStarts word_starts;

1242 const history::URLRow& row(iter->second.url_row);	1233 const history::URLRow& row(iter->second.url_row);

1243 const base::string16& url =	1234 const base::string16& url =

1244 bookmarks::CleanUpUrlForMatching(row.url(), languages, NULL);	1235 bookmarks::CleanUpUrlForMatching(row.url(), nullptr);

1245 String16VectorFromString16(url, false, &word_starts.url_word_starts_);	1236 String16VectorFromString16(url, false, &word_starts.url_word_starts_);

1246 const base::string16& title =	1237 const base::string16& title =

1247 bookmarks::CleanUpTitleForMatching(row.title());	1238 bookmarks::CleanUpTitleForMatching(row.title());

1248 String16VectorFromString16(title, false, &word_starts.title_word_starts_);	1239 String16VectorFromString16(title, false, &word_starts.title_word_starts_);

1249 word_starts_map_[iter->first] = word_starts;	1240 word_starts_map_[iter->first] = word_starts;

1250 }	1241 }

1251 }	1242 }

1252 return true;	1243 return true;

1253 }	1244 }

1254	1245

(...skipping 21 matching lines...) Expand all Loading...
1276	1267

1277 URLIndexPrivateData::SearchTermCacheItem::~SearchTermCacheItem() {	1268 URLIndexPrivateData::SearchTermCacheItem::~SearchTermCacheItem() {

1278 }	1269 }

1279	1270

1280 // URLIndexPrivateData::AddHistoryMatch ----------------------------------------	1271 // URLIndexPrivateData::AddHistoryMatch ----------------------------------------

1281	1272

1282 URLIndexPrivateData::AddHistoryMatch::AddHistoryMatch(	1273 URLIndexPrivateData::AddHistoryMatch::AddHistoryMatch(

1283 bookmarks::BookmarkModel* bookmark_model,	1274 bookmarks::BookmarkModel* bookmark_model,

1284 TemplateURLService* template_url_service,	1275 TemplateURLService* template_url_service,

1285 const URLIndexPrivateData& private_data,	1276 const URLIndexPrivateData& private_data,

1286 const std::string& languages,

1287 const base::string16& lower_string,	1277 const base::string16& lower_string,

1288 const String16Vector& lower_terms,	1278 const String16Vector& lower_terms,

1289 const base::Time now)	1279 const base::Time now)

1290 : bookmark_model_(bookmark_model),	1280 : bookmark_model_(bookmark_model),

1291 template_url_service_(template_url_service),	1281 template_url_service_(template_url_service),

1292 private_data_(private_data),	1282 private_data_(private_data),

1293 languages_(languages),

1294 lower_string_(lower_string),	1283 lower_string_(lower_string),

1295 lower_terms_(lower_terms),	1284 lower_terms_(lower_terms),

1296 now_(now) {	1285 now_(now) {

1297 // Calculate offsets for each term. For instance, the offset for	1286 // Calculate offsets for each term. For instance, the offset for

1298 // ".net" should be 1, indicating that the actual word-part of the term	1287 // ".net" should be 1, indicating that the actual word-part of the term

1299 // starts at offset 1.	1288 // starts at offset 1.

1300 lower_terms_to_word_starts_offsets_.resize(lower_terms_.size(), 0u);	1289 lower_terms_to_word_starts_offsets_.resize(lower_terms_.size(), 0u);

1301 for (size_t i = 0; i < lower_terms_.size(); ++i) {	1290 for (size_t i = 0; i < lower_terms_.size(); ++i) {

1302 base::i18n::BreakIterator iter(lower_terms_[i],	1291 base::i18n::BreakIterator iter(lower_terms_[i],

1303 base::i18n::BreakIterator::BREAK_WORD);	1292 base::i18n::BreakIterator::BREAK_WORD);

(...skipping 19 matching lines...) Expand all Loading...
1323 const HistoryID history_id) {	1312 const HistoryID history_id) {

1324 HistoryInfoMap::const_iterator hist_pos =	1313 HistoryInfoMap::const_iterator hist_pos =

1325 private_data_.history_info_map_.find(history_id);	1314 private_data_.history_info_map_.find(history_id);

1326 if (hist_pos != private_data_.history_info_map_.end()) {	1315 if (hist_pos != private_data_.history_info_map_.end()) {

1327 const history::URLRow& hist_item = hist_pos->second.url_row;	1316 const history::URLRow& hist_item = hist_pos->second.url_row;

1328 const VisitInfoVector& visits = hist_pos->second.visits;	1317 const VisitInfoVector& visits = hist_pos->second.visits;

1329 WordStartsMap::const_iterator starts_pos =	1318 WordStartsMap::const_iterator starts_pos =

1330 private_data_.word_starts_map_.find(history_id);	1319 private_data_.word_starts_map_.find(history_id);

1331 DCHECK(starts_pos != private_data_.word_starts_map_.end());	1320 DCHECK(starts_pos != private_data_.word_starts_map_.end());

1332 ScoredHistoryMatch match(	1321 ScoredHistoryMatch match(

1333 hist_item, visits, languages_, lower_string_, lower_terms_,	1322 hist_item, visits, lower_string_, lower_terms_,

1334 lower_terms_to_word_starts_offsets_, starts_pos->second,	1323 lower_terms_to_word_starts_offsets_, starts_pos->second,

1335 bookmark_model_ && bookmark_model_->IsBookmarked(hist_item.url()),	1324 bookmark_model_ && bookmark_model_->IsBookmarked(hist_item.url()),

1336 template_url_service_, now_);	1325 template_url_service_, now_);

1337 if (match.raw_score > 0)	1326 if (match.raw_score > 0)

1338 scored_matches_.push_back(match);	1327 scored_matches_.push_back(match);

1339 }	1328 }

1340 }	1329 }

1341	1330

1342	1331

1343 // URLIndexPrivateData::HistoryItemFactorGreater -------------------------------	1332 // URLIndexPrivateData::HistoryItemFactorGreater -------------------------------

(...skipping 20 matching lines...) Expand all Loading...
1364 // First cut: typed count, visit count, recency.	1353 // First cut: typed count, visit count, recency.

1365 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks	1354 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks

1366 // recently visited (within the last 12/24 hours) as highly important. Get	1355 // recently visited (within the last 12/24 hours) as highly important. Get

1367 // input from mpearson.	1356 // input from mpearson.

1368 if (r1.typed_count() != r2.typed_count())	1357 if (r1.typed_count() != r2.typed_count())

1369 return (r1.typed_count() > r2.typed_count());	1358 return (r1.typed_count() > r2.typed_count());

1370 if (r1.visit_count() != r2.visit_count())	1359 if (r1.visit_count() != r2.visit_count())

1371 return (r1.visit_count() > r2.visit_count());	1360 return (r1.visit_count() > r2.visit_count());

1372 return (r1.last_visit() > r2.last_visit());	1361 return (r1.last_visit() > r2.last_visit());

1373 }	1362 }

OLD	NEW

« no previous file with comments | « components/omnibox/browser/url_index_private_data.h ('k') | components/omnibox/browser/zero_suggest_provider.cc » ('j') | no next file with comments »