Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(109)

Side by Side Diff: components/omnibox/browser/url_index_private_data.cc

Issue 1841653003: Drop |languages| from {Format,Elide}Url* and IDNToUnicode (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: fix typo in elide_url.cc Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/omnibox/browser/url_index_private_data.h" 5 #include "components/omnibox/browser/url_index_private_data.h"
6 6
7 #include <stdint.h> 7 #include <stdint.h>
8 8
9 #include <functional> 9 #include <functional>
10 #include <iterator> 10 #include <iterator>
(...skipping 138 matching lines...) Expand 10 before | Expand all | Expand 10 after
149 saved_cache_version_(kCurrentCacheFileVersion), 149 saved_cache_version_(kCurrentCacheFileVersion),
150 pre_filter_item_count_(0), 150 pre_filter_item_count_(0),
151 post_filter_item_count_(0), 151 post_filter_item_count_(0),
152 post_scoring_item_count_(0) { 152 post_scoring_item_count_(0) {
153 } 153 }
154 154
155 ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms( 155 ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms(
156 base::string16 search_string, 156 base::string16 search_string,
157 size_t cursor_position, 157 size_t cursor_position,
158 size_t max_matches, 158 size_t max_matches,
159 const std::string& languages,
160 bookmarks::BookmarkModel* bookmark_model, 159 bookmarks::BookmarkModel* bookmark_model,
161 TemplateURLService* template_url_service) { 160 TemplateURLService* template_url_service) {
162 // If cursor position is set and useful (not at either end of the 161 // If cursor position is set and useful (not at either end of the
163 // string), allow the search string to be broken at cursor position. 162 // string), allow the search string to be broken at cursor position.
164 // We do this by pretending there's a space where the cursor is. 163 // We do this by pretending there's a space where the cursor is.
165 if ((cursor_position != base::string16::npos) && 164 if ((cursor_position != base::string16::npos) &&
166 (cursor_position < search_string.length()) && 165 (cursor_position < search_string.length()) &&
167 (cursor_position > 0)) { 166 (cursor_position > 0)) {
168 search_string.insert(cursor_position, base::ASCIIToUTF16(" ")); 167 search_string.insert(cursor_position, base::ASCIIToUTF16(" "));
169 } 168 }
170 pre_filter_item_count_ = 0; 169 pre_filter_item_count_ = 0;
171 post_filter_item_count_ = 0; 170 post_filter_item_count_ = 0;
172 post_scoring_item_count_ = 0; 171 post_scoring_item_count_ = 0;
173 // The search string we receive may contain escaped characters. For reducing 172 // The search string we receive may contain escaped characters. For reducing
174 // the index we need individual, lower-cased words, ignoring escapings. For 173 // the index we need individual, lower-cased words, ignoring escapings. For
175 // the final filtering we need whitespace separated substrings possibly 174 // the final filtering we need whitespace separated substrings possibly
176 // containing escaped characters. 175 // containing escaped characters.
177 base::string16 lower_raw_string(base::i18n::ToLower(search_string)); 176 base::string16 lower_raw_string(base::i18n::ToLower(search_string));
178 base::string16 lower_unescaped_string = 177 base::string16 lower_unescaped_string =
179 net::UnescapeURLComponent(lower_raw_string, 178 net::UnescapeURLComponent(lower_raw_string,
180 net::UnescapeRule::SPACES | net::UnescapeRule::PATH_SEPARATORS | 179 net::UnescapeRule::SPACES | net::UnescapeRule::PATH_SEPARATORS |
181 net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS); 180 net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS);
182 // Extract individual 'words' (as opposed to 'terms'; see below) from the 181 // Extract individual 'words' (as opposed to 'terms'; see below) from the
183 // search string. When the user types "colspec=ID%20Mstone Release" we get 182 // search string. When the user types "colspec=ID%20Mstone Release" we get
184 // four 'words': "colspec", "id", "mstone" and "release". 183 // four 'words': "colspec", "id", "mstone" and "release".
185 String16Vector lower_words( 184 String16Vector lower_words(
186 String16VectorFromString16(lower_unescaped_string, false, NULL)); 185 String16VectorFromString16(lower_unescaped_string, false, nullptr));
187 ScoredHistoryMatches scored_items; 186 ScoredHistoryMatches scored_items;
188 187
189 // Do nothing if we have indexed no words (probably because we've not been 188 // Do nothing if we have indexed no words (probably because we've not been
190 // initialized yet) or the search string has no words. 189 // initialized yet) or the search string has no words.
191 if (word_list_.empty() || lower_words.empty()) { 190 if (word_list_.empty() || lower_words.empty()) {
192 search_term_cache_.clear(); // Invalidate the term cache. 191 search_term_cache_.clear(); // Invalidate the term cache.
193 return scored_items; 192 return scored_items;
194 } 193 }
195 194
196 // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep 195 // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after
248 // excludes a long list of whitespace.) One could write a scoring 247 // excludes a long list of whitespace.) One could write a scoring
249 // function that gives a reasonable order to matches when there 248 // function that gives a reasonable order to matches when there
250 // are no terms (i.e., all the words are some form of whitespace), 249 // are no terms (i.e., all the words are some form of whitespace),
251 // but this is such a rare edge case that it's not worth the time. 250 // but this is such a rare edge case that it's not worth the time.
252 return scored_items; 251 return scored_items;
253 } 252 }
254 scored_items = 253 scored_items =
255 std::for_each( 254 std::for_each(
256 history_id_set.begin(), history_id_set.end(), 255 history_id_set.begin(), history_id_set.end(),
257 AddHistoryMatch(bookmark_model, template_url_service, *this, 256 AddHistoryMatch(bookmark_model, template_url_service, *this,
258 languages, lower_raw_string, lower_raw_terms, 257 lower_raw_string, lower_raw_terms,
259 base::Time::Now())).ScoredMatches(); 258 base::Time::Now())).ScoredMatches();
260 259
261 // Select and sort only the top |max_matches| results. 260 // Select and sort only the top |max_matches| results.
262 if (scored_items.size() > max_matches) { 261 if (scored_items.size() > max_matches) {
263 std::partial_sort(scored_items.begin(), 262 std::partial_sort(scored_items.begin(),
264 scored_items.begin() + 263 scored_items.begin() +
265 max_matches, 264 max_matches,
266 scored_items.end(), 265 scored_items.end(),
267 ScoredHistoryMatch::MatchScoreGreater); 266 ScoredHistoryMatch::MatchScoreGreater);
268 scored_items.resize(max_matches); 267 scored_items.resize(max_matches);
(...skipping 15 matching lines...) Expand all
284 ++cache_iter; 283 ++cache_iter;
285 } 284 }
286 } 285 }
287 286
288 return scored_items; 287 return scored_items;
289 } 288 }
290 289
291 bool URLIndexPrivateData::UpdateURL( 290 bool URLIndexPrivateData::UpdateURL(
292 history::HistoryService* history_service, 291 history::HistoryService* history_service,
293 const history::URLRow& row, 292 const history::URLRow& row,
294 const std::string& languages,
295 const std::set<std::string>& scheme_whitelist, 293 const std::set<std::string>& scheme_whitelist,
296 base::CancelableTaskTracker* tracker) { 294 base::CancelableTaskTracker* tracker) {
297 // The row may or may not already be in our index. If it is not already 295 // The row may or may not already be in our index. If it is not already
298 // indexed and it qualifies then it gets indexed. If it is already 296 // indexed and it qualifies then it gets indexed. If it is already
299 // indexed and still qualifies then it gets updated, otherwise it 297 // indexed and still qualifies then it gets updated, otherwise it
300 // is deleted from the index. 298 // is deleted from the index.
301 bool row_was_updated = false; 299 bool row_was_updated = false;
302 history::URLID row_id = row.id(); 300 history::URLID row_id = row.id();
303 HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id); 301 HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id);
304 if (row_pos == history_info_map_.end()) { 302 if (row_pos == history_info_map_.end()) {
305 // This new row should be indexed if it qualifies. 303 // This new row should be indexed if it qualifies.
306 history::URLRow new_row(row); 304 history::URLRow new_row(row);
307 new_row.set_id(row_id); 305 new_row.set_id(row_id);
308 row_was_updated = RowQualifiesAsSignificant(new_row, base::Time()) && 306 row_was_updated = RowQualifiesAsSignificant(new_row, base::Time()) &&
309 IndexRow(NULL, 307 IndexRow(nullptr,
310 history_service, 308 history_service,
311 new_row, 309 new_row,
312 languages,
313 scheme_whitelist, 310 scheme_whitelist,
314 tracker); 311 tracker);
315 } else if (RowQualifiesAsSignificant(row, base::Time())) { 312 } else if (RowQualifiesAsSignificant(row, base::Time())) {
316 // This indexed row still qualifies and will be re-indexed. 313 // This indexed row still qualifies and will be re-indexed.
317 // The url won't have changed but the title, visit count, etc. 314 // The url won't have changed but the title, visit count, etc.
318 // might have changed. 315 // might have changed.
319 history::URLRow& row_to_update = row_pos->second.url_row; 316 history::URLRow& row_to_update = row_pos->second.url_row;
320 bool title_updated = row_to_update.title() != row.title(); 317 bool title_updated = row_to_update.title() != row.title();
321 if (row_to_update.visit_count() != row.visit_count() || 318 if (row_to_update.visit_count() != row.visit_count() ||
322 row_to_update.typed_count() != row.typed_count() || 319 row_to_update.typed_count() != row.typed_count() ||
323 row_to_update.last_visit() != row.last_visit() || title_updated) { 320 row_to_update.last_visit() != row.last_visit() || title_updated) {
324 row_to_update.set_visit_count(row.visit_count()); 321 row_to_update.set_visit_count(row.visit_count());
325 row_to_update.set_typed_count(row.typed_count()); 322 row_to_update.set_typed_count(row.typed_count());
326 row_to_update.set_last_visit(row.last_visit()); 323 row_to_update.set_last_visit(row.last_visit());
327 // If something appears to have changed, update the recent visits 324 // If something appears to have changed, update the recent visits
328 // information. 325 // information.
329 ScheduleUpdateRecentVisits(history_service, row_id, tracker); 326 ScheduleUpdateRecentVisits(history_service, row_id, tracker);
330 // While the URL is guaranteed to remain stable, the title may have 327 // While the URL is guaranteed to remain stable, the title may have
331 // changed. If so, then update the index with the changed words. 328 // changed. If so, then update the index with the changed words.
332 if (title_updated) { 329 if (title_updated) {
333 // Clear all words associated with this row and re-index both the 330 // Clear all words associated with this row and re-index both the
334 // URL and title. 331 // URL and title.
335 RemoveRowWordsFromIndex(row_to_update); 332 RemoveRowWordsFromIndex(row_to_update);
336 row_to_update.set_title(row.title()); 333 row_to_update.set_title(row.title());
337 RowWordStarts word_starts; 334 RowWordStarts word_starts;
338 AddRowWordsToIndex(row_to_update, &word_starts, languages); 335 AddRowWordsToIndex(row_to_update, &word_starts);
339 word_starts_map_[row_id] = word_starts; 336 word_starts_map_[row_id] = word_starts;
340 } 337 }
341 row_was_updated = true; 338 row_was_updated = true;
342 } 339 }
343 } else { 340 } else {
344 // This indexed row no longer qualifies and will be de-indexed by 341 // This indexed row no longer qualifies and will be de-indexed by
345 // clearing all words associated with this row. 342 // clearing all words associated with this row.
346 RemoveRowFromIndex(row); 343 RemoveRowFromIndex(row);
347 row_was_updated = true; 344 row_was_updated = true;
348 } 345 }
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after
403 HistoryInfoMapItemHasURL(url)); 400 HistoryInfoMapItemHasURL(url));
404 if (pos == history_info_map_.end()) 401 if (pos == history_info_map_.end())
405 return false; 402 return false;
406 RemoveRowFromIndex(pos->second.url_row); 403 RemoveRowFromIndex(pos->second.url_row);
407 search_term_cache_.clear(); // This invalidates the cache. 404 search_term_cache_.clear(); // This invalidates the cache.
408 return true; 405 return true;
409 } 406 }
410 407
411 // static 408 // static
412 scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::RestoreFromFile( 409 scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::RestoreFromFile(
413 const base::FilePath& file_path, 410 const base::FilePath& file_path) {
414 const std::string& languages) {
415 base::TimeTicks beginning_time = base::TimeTicks::Now(); 411 base::TimeTicks beginning_time = base::TimeTicks::Now();
416 if (!base::PathExists(file_path)) 412 if (!base::PathExists(file_path))
417 return NULL; 413 return nullptr;
418 std::string data; 414 std::string data;
419 // If there is no cache file then simply give up. This will cause us to 415 // If there is no cache file then simply give up. This will cause us to
420 // attempt to rebuild from the history database. 416 // attempt to rebuild from the history database.
421 if (!base::ReadFileToString(file_path, &data)) 417 if (!base::ReadFileToString(file_path, &data))
422 return NULL; 418 return nullptr;
423 419
424 scoped_refptr<URLIndexPrivateData> restored_data(new URLIndexPrivateData); 420 scoped_refptr<URLIndexPrivateData> restored_data(new URLIndexPrivateData);
425 InMemoryURLIndexCacheItem index_cache; 421 InMemoryURLIndexCacheItem index_cache;
426 if (!index_cache.ParseFromArray(data.c_str(), data.size())) { 422 if (!index_cache.ParseFromArray(data.c_str(), data.size())) {
427 LOG(WARNING) << "Failed to parse URLIndexPrivateData cache data read from " 423 LOG(WARNING) << "Failed to parse URLIndexPrivateData cache data read from "
428 << file_path.value(); 424 << file_path.value();
429 return restored_data; 425 return restored_data;
430 } 426 }
431 427
432 if (!restored_data->RestorePrivateData(index_cache, languages)) 428 if (!restored_data->RestorePrivateData(index_cache))
433 return NULL; 429 return nullptr;
434 430
435 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime", 431 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime",
436 base::TimeTicks::Now() - beginning_time); 432 base::TimeTicks::Now() - beginning_time);
437 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", 433 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems",
438 restored_data->history_id_word_map_.size()); 434 restored_data->history_id_word_map_.size());
439 UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size()); 435 UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size());
440 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", 436 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords",
441 restored_data->word_map_.size()); 437 restored_data->word_map_.size());
442 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", 438 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars",
443 restored_data->char_word_map_.size()); 439 restored_data->char_word_map_.size());
444 if (restored_data->Empty()) 440 if (restored_data->Empty())
445 return NULL; // 'No data' is the same as a failed reload. 441 return nullptr; // 'No data' is the same as a failed reload.
446 return restored_data; 442 return restored_data;
447 } 443 }
448 444
449 // static 445 // static
450 scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::RebuildFromHistory( 446 scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::RebuildFromHistory(
451 history::HistoryDatabase* history_db, 447 history::HistoryDatabase* history_db,
452 const std::string& languages,
453 const std::set<std::string>& scheme_whitelist) { 448 const std::set<std::string>& scheme_whitelist) {
454 if (!history_db) 449 if (!history_db)
455 return NULL; 450 return nullptr;
456 451
457 base::TimeTicks beginning_time = base::TimeTicks::Now(); 452 base::TimeTicks beginning_time = base::TimeTicks::Now();
458 453
459 scoped_refptr<URLIndexPrivateData> 454 scoped_refptr<URLIndexPrivateData>
460 rebuilt_data(new URLIndexPrivateData); 455 rebuilt_data(new URLIndexPrivateData);
461 history::URLDatabase::URLEnumerator history_enum; 456 history::URLDatabase::URLEnumerator history_enum;
462 if (!history_db->InitURLEnumeratorForSignificant(&history_enum)) 457 if (!history_db->InitURLEnumeratorForSignificant(&history_enum))
463 return NULL; 458 return nullptr;
464 rebuilt_data->last_time_rebuilt_from_history_ = base::Time::Now(); 459 rebuilt_data->last_time_rebuilt_from_history_ = base::Time::Now();
465 for (history::URLRow row; history_enum.GetNextURL(&row);) { 460 for (history::URLRow row; history_enum.GetNextURL(&row);) {
466 rebuilt_data->IndexRow( 461 rebuilt_data->IndexRow(
467 history_db, NULL, row, languages, scheme_whitelist, NULL); 462 history_db, nullptr, row, scheme_whitelist, nullptr);
468 } 463 }
469 464
470 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime", 465 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime",
471 base::TimeTicks::Now() - beginning_time); 466 base::TimeTicks::Now() - beginning_time);
472 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", 467 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems",
473 rebuilt_data->history_id_word_map_.size()); 468 rebuilt_data->history_id_word_map_.size());
474 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", 469 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords",
475 rebuilt_data->word_map_.size()); 470 rebuilt_data->word_map_.size());
476 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", 471 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars",
477 rebuilt_data->char_word_map_.size()); 472 rebuilt_data->char_word_map_.size());
(...skipping 217 matching lines...) Expand 10 before | Expand all | Expand 10 after
695 word_id_set.swap(new_word_id_set); 690 word_id_set.swap(new_word_id_set);
696 } 691 }
697 } 692 }
698 return word_id_set; 693 return word_id_set;
699 } 694 }
700 695
701 bool URLIndexPrivateData::IndexRow( 696 bool URLIndexPrivateData::IndexRow(
702 history::HistoryDatabase* history_db, 697 history::HistoryDatabase* history_db,
703 history::HistoryService* history_service, 698 history::HistoryService* history_service,
704 const history::URLRow& row, 699 const history::URLRow& row,
705 const std::string& languages,
706 const std::set<std::string>& scheme_whitelist, 700 const std::set<std::string>& scheme_whitelist,
707 base::CancelableTaskTracker* tracker) { 701 base::CancelableTaskTracker* tracker) {
708 const GURL& gurl(row.url()); 702 const GURL& gurl(row.url());
709 703
710 // Index only URLs with a whitelisted scheme. 704 // Index only URLs with a whitelisted scheme.
711 if (!URLSchemeIsWhitelisted(gurl, scheme_whitelist)) 705 if (!URLSchemeIsWhitelisted(gurl, scheme_whitelist))
712 return false; 706 return false;
713 707
714 history::URLID row_id = row.id(); 708 history::URLID row_id = row.id();
715 // Strip out username and password before saving and indexing. 709 // Strip out username and password before saving and indexing.
716 base::string16 url(url_formatter::FormatUrl( 710 base::string16 url(url_formatter::FormatUrl(
717 gurl, languages, url_formatter::kFormatUrlOmitUsernamePassword, 711 gurl, url_formatter::kFormatUrlOmitUsernamePassword,
718 net::UnescapeRule::NONE, nullptr, nullptr, nullptr)); 712 net::UnescapeRule::NONE, nullptr, nullptr, nullptr));
719 713
720 HistoryID history_id = static_cast<HistoryID>(row_id); 714 HistoryID history_id = static_cast<HistoryID>(row_id);
721 DCHECK_LT(history_id, std::numeric_limits<HistoryID>::max()); 715 DCHECK_LT(history_id, std::numeric_limits<HistoryID>::max());
722 716
723 // Add the row for quick lookup in the history info store. 717 // Add the row for quick lookup in the history info store.
724 history::URLRow new_row(GURL(url), row_id); 718 history::URLRow new_row(GURL(url), row_id);
725 new_row.set_visit_count(row.visit_count()); 719 new_row.set_visit_count(row.visit_count());
726 new_row.set_typed_count(row.typed_count()); 720 new_row.set_typed_count(row.typed_count());
727 new_row.set_last_visit(row.last_visit()); 721 new_row.set_last_visit(row.last_visit());
728 new_row.set_title(row.title()); 722 new_row.set_title(row.title());
729 history_info_map_[history_id].url_row = new_row; 723 history_info_map_[history_id].url_row = new_row;
730 724
731 // Index the words contained in the URL and title of the row. 725 // Index the words contained in the URL and title of the row.
732 RowWordStarts word_starts; 726 RowWordStarts word_starts;
733 AddRowWordsToIndex(new_row, &word_starts, languages); 727 AddRowWordsToIndex(new_row, &word_starts);
734 word_starts_map_[history_id] = word_starts; 728 word_starts_map_[history_id] = word_starts;
735 729
736 // Update the recent visits information or schedule the update 730 // Update the recent visits information or schedule the update
737 // as appropriate. 731 // as appropriate.
738 if (history_db) { 732 if (history_db) {
739 // We'd like to check that we're on the history DB thread. 733 // We'd like to check that we're on the history DB thread.
740 // However, unittest code actually calls this on the UI thread. 734 // However, unittest code actually calls this on the UI thread.
741 // So we don't do any thread checks. 735 // So we don't do any thread checks.
742 history::VisitVector recent_visits; 736 history::VisitVector recent_visits;
743 // Make sure the private data is going to get as many recent visits as 737 // Make sure the private data is going to get as many recent visits as
744 // ScoredHistoryMatch::GetFrequency() hopes to use. 738 // ScoredHistoryMatch::GetFrequency() hopes to use.
745 DCHECK_GE(kMaxVisitsToStoreInCache, ScoredHistoryMatch::kMaxVisitsToScore); 739 DCHECK_GE(kMaxVisitsToStoreInCache, ScoredHistoryMatch::kMaxVisitsToScore);
746 if (history_db->GetMostRecentVisitsForURL(row_id, 740 if (history_db->GetMostRecentVisitsForURL(row_id,
747 kMaxVisitsToStoreInCache, 741 kMaxVisitsToStoreInCache,
748 &recent_visits)) 742 &recent_visits))
749 UpdateRecentVisits(row_id, recent_visits); 743 UpdateRecentVisits(row_id, recent_visits);
750 } else { 744 } else {
751 DCHECK(tracker); 745 DCHECK(tracker);
752 DCHECK(history_service); 746 DCHECK(history_service);
753 ScheduleUpdateRecentVisits(history_service, row_id, tracker); 747 ScheduleUpdateRecentVisits(history_service, row_id, tracker);
754 } 748 }
755 749
756 return true; 750 return true;
757 } 751 }
758 752
759 void URLIndexPrivateData::AddRowWordsToIndex(const history::URLRow& row, 753 void URLIndexPrivateData::AddRowWordsToIndex(const history::URLRow& row,
760 RowWordStarts* word_starts, 754 RowWordStarts* word_starts) {
761 const std::string& languages) {
762 HistoryID history_id = static_cast<HistoryID>(row.id()); 755 HistoryID history_id = static_cast<HistoryID>(row.id());
763 // Split URL into individual, unique words then add in the title words. 756 // Split URL into individual, unique words then add in the title words.
764 const GURL& gurl(row.url()); 757 const GURL& gurl(row.url());
765 const base::string16& url = 758 const base::string16& url =
766 bookmarks::CleanUpUrlForMatching(gurl, languages, NULL); 759 bookmarks::CleanUpUrlForMatching(gurl, nullptr);
767 String16Set url_words = String16SetFromString16(url, 760 String16Set url_words = String16SetFromString16(url,
768 word_starts ? &word_starts->url_word_starts_ : NULL); 761 word_starts ? &word_starts->url_word_starts_ : nullptr);
769 const base::string16& title = bookmarks::CleanUpTitleForMatching(row.title()); 762 const base::string16& title = bookmarks::CleanUpTitleForMatching(row.title());
770 String16Set title_words = String16SetFromString16(title, 763 String16Set title_words = String16SetFromString16(title,
771 word_starts ? &word_starts->title_word_starts_ : NULL); 764 word_starts ? &word_starts->title_word_starts_ : nullptr);
772 String16Set words = base::STLSetUnion<String16Set>(url_words, title_words); 765 String16Set words = base::STLSetUnion<String16Set>(url_words, title_words);
773 for (String16Set::iterator word_iter = words.begin(); 766 for (String16Set::iterator word_iter = words.begin();
774 word_iter != words.end(); ++word_iter) 767 word_iter != words.end(); ++word_iter)
775 AddWordToIndex(*word_iter, history_id); 768 AddWordToIndex(*word_iter, history_id);
776 769
777 search_term_cache_.clear(); // Invalidate the term cache. 770 search_term_cache_.clear(); // Invalidate the term cache.
778 } 771 }
779 772
780 void URLIndexPrivateData::AddWordToIndex(const base::string16& term, 773 void URLIndexPrivateData::AddWordToIndex(const base::string16& term,
781 HistoryID history_id) { 774 HistoryID history_id) {
(...skipping 257 matching lines...) Expand 10 before | Expand all | Expand 10 after
1039 for (WordStarts::const_iterator i = word_starts.url_word_starts_.begin(); 1032 for (WordStarts::const_iterator i = word_starts.url_word_starts_.begin();
1040 i != word_starts.url_word_starts_.end(); ++i) 1033 i != word_starts.url_word_starts_.end(); ++i)
1041 map_entry->add_url_word_starts(*i); 1034 map_entry->add_url_word_starts(*i);
1042 for (WordStarts::const_iterator i = word_starts.title_word_starts_.begin(); 1035 for (WordStarts::const_iterator i = word_starts.title_word_starts_.begin();
1043 i != word_starts.title_word_starts_.end(); ++i) 1036 i != word_starts.title_word_starts_.end(); ++i)
1044 map_entry->add_title_word_starts(*i); 1037 map_entry->add_title_word_starts(*i);
1045 } 1038 }
1046 } 1039 }
1047 1040
1048 bool URLIndexPrivateData::RestorePrivateData( 1041 bool URLIndexPrivateData::RestorePrivateData(
1049 const InMemoryURLIndexCacheItem& cache, 1042 const InMemoryURLIndexCacheItem& cache) {
1050 const std::string& languages) {
1051 last_time_rebuilt_from_history_ = 1043 last_time_rebuilt_from_history_ =
1052 base::Time::FromInternalValue(cache.last_rebuild_timestamp()); 1044 base::Time::FromInternalValue(cache.last_rebuild_timestamp());
1053 const base::TimeDelta rebuilt_ago = 1045 const base::TimeDelta rebuilt_ago =
1054 base::Time::Now() - last_time_rebuilt_from_history_; 1046 base::Time::Now() - last_time_rebuilt_from_history_;
1055 if ((rebuilt_ago > base::TimeDelta::FromDays(7)) || 1047 if ((rebuilt_ago > base::TimeDelta::FromDays(7)) ||
1056 (rebuilt_ago < base::TimeDelta::FromDays(-1))) { 1048 (rebuilt_ago < base::TimeDelta::FromDays(-1))) {
1057 // Cache is more than a week old or, somehow, from some time in the future. 1049 // Cache is more than a week old or, somehow, from some time in the future.
1058 // It's probably a good time to rebuild the index from history to 1050 // It's probably a good time to rebuild the index from history to
1059 // allow synced entries to now appear, expired entries to disappear, etc. 1051 // allow synced entries to now appear, expired entries to disappear, etc.
1060 // Allow one day in the future to make the cache not rebuild on simple 1052 // Allow one day in the future to make the cache not rebuild on simple
1061 // system clock changes such as time zone changes. 1053 // system clock changes such as time zone changes.
1062 return false; 1054 return false;
1063 } 1055 }
1064 if (cache.has_version()) { 1056 if (cache.has_version()) {
1065 if (cache.version() < kCurrentCacheFileVersion) { 1057 if (cache.version() < kCurrentCacheFileVersion) {
1066 // Don't try to restore an old format cache file. (This will cause 1058 // Don't try to restore an old format cache file. (This will cause
1067 // the InMemoryURLIndex to schedule rebuilding the URLIndexPrivateData 1059 // the InMemoryURLIndex to schedule rebuilding the URLIndexPrivateData
1068 // from history.) 1060 // from history.)
1069 return false; 1061 return false;
1070 } 1062 }
1071 restored_cache_version_ = cache.version(); 1063 restored_cache_version_ = cache.version();
1072 } 1064 }
1073 return RestoreWordList(cache) && RestoreWordMap(cache) && 1065 return RestoreWordList(cache) && RestoreWordMap(cache) &&
1074 RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) && 1066 RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) &&
1075 RestoreHistoryInfoMap(cache) && RestoreWordStartsMap(cache, languages); 1067 RestoreHistoryInfoMap(cache) && RestoreWordStartsMap(cache);
1076 } 1068 }
1077 1069
1078 bool URLIndexPrivateData::RestoreWordList( 1070 bool URLIndexPrivateData::RestoreWordList(
1079 const InMemoryURLIndexCacheItem& cache) { 1071 const InMemoryURLIndexCacheItem& cache) {
1080 if (!cache.has_word_list()) 1072 if (!cache.has_word_list())
1081 return false; 1073 return false;
1082 const WordListItem& list_item(cache.word_list()); 1074 const WordListItem& list_item(cache.word_list());
1083 uint32_t expected_item_count = list_item.word_count(); 1075 uint32_t expected_item_count = list_item.word_count();
1084 uint32_t actual_item_count = list_item.word_size(); 1076 uint32_t actual_item_count = list_item.word_size();
1085 if (actual_item_count == 0 || actual_item_count != expected_item_count) 1077 if (actual_item_count == 0 || actual_item_count != expected_item_count)
(...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after
1197 visits.push_back(std::make_pair( 1189 visits.push_back(std::make_pair(
1198 base::Time::FromInternalValue(iter->visits(i).visit_time()), 1190 base::Time::FromInternalValue(iter->visits(i).visit_time()),
1199 ui::PageTransitionFromInt(iter->visits(i).transition_type()))); 1191 ui::PageTransitionFromInt(iter->visits(i).transition_type())));
1200 } 1192 }
1201 history_info_map_[history_id].visits = visits; 1193 history_info_map_[history_id].visits = visits;
1202 } 1194 }
1203 return true; 1195 return true;
1204 } 1196 }
1205 1197
1206 bool URLIndexPrivateData::RestoreWordStartsMap( 1198 bool URLIndexPrivateData::RestoreWordStartsMap(
1207 const InMemoryURLIndexCacheItem& cache, 1199 const InMemoryURLIndexCacheItem& cache) {
1208 const std::string& languages) {
1209 // Note that this function must be called after RestoreHistoryInfoMap() has 1200 // Note that this function must be called after RestoreHistoryInfoMap() has
1210 // been run as the word starts may have to be recalculated from the urls and 1201 // been run as the word starts may have to be recalculated from the urls and
1211 // page titles. 1202 // page titles.
1212 if (cache.has_word_starts_map()) { 1203 if (cache.has_word_starts_map()) {
1213 const WordStartsMapItem& list_item(cache.word_starts_map()); 1204 const WordStartsMapItem& list_item(cache.word_starts_map());
1214 uint32_t expected_item_count = list_item.item_count(); 1205 uint32_t expected_item_count = list_item.item_count();
1215 uint32_t actual_item_count = list_item.word_starts_map_entry_size(); 1206 uint32_t actual_item_count = list_item.word_starts_map_entry_size();
1216 if (actual_item_count == 0 || actual_item_count != expected_item_count) 1207 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1217 return false; 1208 return false;
1218 const RepeatedPtrField<WordStartsMapEntry>& 1209 const RepeatedPtrField<WordStartsMapEntry>&
(...skipping 15 matching lines...) Expand all
1234 word_starts_map_[history_id] = word_starts; 1225 word_starts_map_[history_id] = word_starts;
1235 } 1226 }
1236 } else { 1227 } else {
1237 // Since the cache did not contain any word starts we must rebuild then from 1228 // Since the cache did not contain any word starts we must rebuild then from
1238 // the URL and page titles. 1229 // the URL and page titles.
1239 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin(); 1230 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin();
1240 iter != history_info_map_.end(); ++iter) { 1231 iter != history_info_map_.end(); ++iter) {
1241 RowWordStarts word_starts; 1232 RowWordStarts word_starts;
1242 const history::URLRow& row(iter->second.url_row); 1233 const history::URLRow& row(iter->second.url_row);
1243 const base::string16& url = 1234 const base::string16& url =
1244 bookmarks::CleanUpUrlForMatching(row.url(), languages, NULL); 1235 bookmarks::CleanUpUrlForMatching(row.url(), nullptr);
1245 String16VectorFromString16(url, false, &word_starts.url_word_starts_); 1236 String16VectorFromString16(url, false, &word_starts.url_word_starts_);
1246 const base::string16& title = 1237 const base::string16& title =
1247 bookmarks::CleanUpTitleForMatching(row.title()); 1238 bookmarks::CleanUpTitleForMatching(row.title());
1248 String16VectorFromString16(title, false, &word_starts.title_word_starts_); 1239 String16VectorFromString16(title, false, &word_starts.title_word_starts_);
1249 word_starts_map_[iter->first] = word_starts; 1240 word_starts_map_[iter->first] = word_starts;
1250 } 1241 }
1251 } 1242 }
1252 return true; 1243 return true;
1253 } 1244 }
1254 1245
(...skipping 21 matching lines...) Expand all
1276 1267
1277 URLIndexPrivateData::SearchTermCacheItem::~SearchTermCacheItem() { 1268 URLIndexPrivateData::SearchTermCacheItem::~SearchTermCacheItem() {
1278 } 1269 }
1279 1270
1280 // URLIndexPrivateData::AddHistoryMatch ---------------------------------------- 1271 // URLIndexPrivateData::AddHistoryMatch ----------------------------------------
1281 1272
1282 URLIndexPrivateData::AddHistoryMatch::AddHistoryMatch( 1273 URLIndexPrivateData::AddHistoryMatch::AddHistoryMatch(
1283 bookmarks::BookmarkModel* bookmark_model, 1274 bookmarks::BookmarkModel* bookmark_model,
1284 TemplateURLService* template_url_service, 1275 TemplateURLService* template_url_service,
1285 const URLIndexPrivateData& private_data, 1276 const URLIndexPrivateData& private_data,
1286 const std::string& languages,
1287 const base::string16& lower_string, 1277 const base::string16& lower_string,
1288 const String16Vector& lower_terms, 1278 const String16Vector& lower_terms,
1289 const base::Time now) 1279 const base::Time now)
1290 : bookmark_model_(bookmark_model), 1280 : bookmark_model_(bookmark_model),
1291 template_url_service_(template_url_service), 1281 template_url_service_(template_url_service),
1292 private_data_(private_data), 1282 private_data_(private_data),
1293 languages_(languages),
1294 lower_string_(lower_string), 1283 lower_string_(lower_string),
1295 lower_terms_(lower_terms), 1284 lower_terms_(lower_terms),
1296 now_(now) { 1285 now_(now) {
1297 // Calculate offsets for each term. For instance, the offset for 1286 // Calculate offsets for each term. For instance, the offset for
1298 // ".net" should be 1, indicating that the actual word-part of the term 1287 // ".net" should be 1, indicating that the actual word-part of the term
1299 // starts at offset 1. 1288 // starts at offset 1.
1300 lower_terms_to_word_starts_offsets_.resize(lower_terms_.size(), 0u); 1289 lower_terms_to_word_starts_offsets_.resize(lower_terms_.size(), 0u);
1301 for (size_t i = 0; i < lower_terms_.size(); ++i) { 1290 for (size_t i = 0; i < lower_terms_.size(); ++i) {
1302 base::i18n::BreakIterator iter(lower_terms_[i], 1291 base::i18n::BreakIterator iter(lower_terms_[i],
1303 base::i18n::BreakIterator::BREAK_WORD); 1292 base::i18n::BreakIterator::BREAK_WORD);
(...skipping 19 matching lines...) Expand all
1323 const HistoryID history_id) { 1312 const HistoryID history_id) {
1324 HistoryInfoMap::const_iterator hist_pos = 1313 HistoryInfoMap::const_iterator hist_pos =
1325 private_data_.history_info_map_.find(history_id); 1314 private_data_.history_info_map_.find(history_id);
1326 if (hist_pos != private_data_.history_info_map_.end()) { 1315 if (hist_pos != private_data_.history_info_map_.end()) {
1327 const history::URLRow& hist_item = hist_pos->second.url_row; 1316 const history::URLRow& hist_item = hist_pos->second.url_row;
1328 const VisitInfoVector& visits = hist_pos->second.visits; 1317 const VisitInfoVector& visits = hist_pos->second.visits;
1329 WordStartsMap::const_iterator starts_pos = 1318 WordStartsMap::const_iterator starts_pos =
1330 private_data_.word_starts_map_.find(history_id); 1319 private_data_.word_starts_map_.find(history_id);
1331 DCHECK(starts_pos != private_data_.word_starts_map_.end()); 1320 DCHECK(starts_pos != private_data_.word_starts_map_.end());
1332 ScoredHistoryMatch match( 1321 ScoredHistoryMatch match(
1333 hist_item, visits, languages_, lower_string_, lower_terms_, 1322 hist_item, visits, lower_string_, lower_terms_,
1334 lower_terms_to_word_starts_offsets_, starts_pos->second, 1323 lower_terms_to_word_starts_offsets_, starts_pos->second,
1335 bookmark_model_ && bookmark_model_->IsBookmarked(hist_item.url()), 1324 bookmark_model_ && bookmark_model_->IsBookmarked(hist_item.url()),
1336 template_url_service_, now_); 1325 template_url_service_, now_);
1337 if (match.raw_score > 0) 1326 if (match.raw_score > 0)
1338 scored_matches_.push_back(match); 1327 scored_matches_.push_back(match);
1339 } 1328 }
1340 } 1329 }
1341 1330
1342 1331
1343 // URLIndexPrivateData::HistoryItemFactorGreater ------------------------------- 1332 // URLIndexPrivateData::HistoryItemFactorGreater -------------------------------
(...skipping 20 matching lines...) Expand all
1364 // First cut: typed count, visit count, recency. 1353 // First cut: typed count, visit count, recency.
1365 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks 1354 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks
1366 // recently visited (within the last 12/24 hours) as highly important. Get 1355 // recently visited (within the last 12/24 hours) as highly important. Get
1367 // input from mpearson. 1356 // input from mpearson.
1368 if (r1.typed_count() != r2.typed_count()) 1357 if (r1.typed_count() != r2.typed_count())
1369 return (r1.typed_count() > r2.typed_count()); 1358 return (r1.typed_count() > r2.typed_count());
1370 if (r1.visit_count() != r2.visit_count()) 1359 if (r1.visit_count() != r2.visit_count())
1371 return (r1.visit_count() > r2.visit_count()); 1360 return (r1.visit_count() > r2.visit_count());
1372 return (r1.last_visit() > r2.last_visit()); 1361 return (r1.last_visit() > r2.last_visit());
1373 } 1362 }
OLDNEW
« no previous file with comments | « components/omnibox/browser/url_index_private_data.h ('k') | components/omnibox/browser/zero_suggest_provider.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698