OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "components/omnibox/browser/url_index_private_data.h" | 5 #include "components/omnibox/browser/url_index_private_data.h" |
6 | 6 |
7 #include <stdint.h> | 7 #include <stdint.h> |
8 | 8 |
9 #include <functional> | 9 #include <functional> |
10 #include <iterator> | 10 #include <iterator> |
(...skipping 138 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
149 saved_cache_version_(kCurrentCacheFileVersion), | 149 saved_cache_version_(kCurrentCacheFileVersion), |
150 pre_filter_item_count_(0), | 150 pre_filter_item_count_(0), |
151 post_filter_item_count_(0), | 151 post_filter_item_count_(0), |
152 post_scoring_item_count_(0) { | 152 post_scoring_item_count_(0) { |
153 } | 153 } |
154 | 154 |
155 ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms( | 155 ScoredHistoryMatches URLIndexPrivateData::HistoryItemsForTerms( |
156 base::string16 search_string, | 156 base::string16 search_string, |
157 size_t cursor_position, | 157 size_t cursor_position, |
158 size_t max_matches, | 158 size_t max_matches, |
159 const std::string& languages, | |
160 bookmarks::BookmarkModel* bookmark_model, | 159 bookmarks::BookmarkModel* bookmark_model, |
161 TemplateURLService* template_url_service) { | 160 TemplateURLService* template_url_service) { |
162 // If cursor position is set and useful (not at either end of the | 161 // If cursor position is set and useful (not at either end of the |
163 // string), allow the search string to be broken at cursor position. | 162 // string), allow the search string to be broken at cursor position. |
164 // We do this by pretending there's a space where the cursor is. | 163 // We do this by pretending there's a space where the cursor is. |
165 if ((cursor_position != base::string16::npos) && | 164 if ((cursor_position != base::string16::npos) && |
166 (cursor_position < search_string.length()) && | 165 (cursor_position < search_string.length()) && |
167 (cursor_position > 0)) { | 166 (cursor_position > 0)) { |
168 search_string.insert(cursor_position, base::ASCIIToUTF16(" ")); | 167 search_string.insert(cursor_position, base::ASCIIToUTF16(" ")); |
169 } | 168 } |
170 pre_filter_item_count_ = 0; | 169 pre_filter_item_count_ = 0; |
171 post_filter_item_count_ = 0; | 170 post_filter_item_count_ = 0; |
172 post_scoring_item_count_ = 0; | 171 post_scoring_item_count_ = 0; |
173 // The search string we receive may contain escaped characters. For reducing | 172 // The search string we receive may contain escaped characters. For reducing |
174 // the index we need individual, lower-cased words, ignoring escapings. For | 173 // the index we need individual, lower-cased words, ignoring escapings. For |
175 // the final filtering we need whitespace separated substrings possibly | 174 // the final filtering we need whitespace separated substrings possibly |
176 // containing escaped characters. | 175 // containing escaped characters. |
177 base::string16 lower_raw_string(base::i18n::ToLower(search_string)); | 176 base::string16 lower_raw_string(base::i18n::ToLower(search_string)); |
178 base::string16 lower_unescaped_string = | 177 base::string16 lower_unescaped_string = |
179 net::UnescapeURLComponent(lower_raw_string, | 178 net::UnescapeURLComponent(lower_raw_string, |
180 net::UnescapeRule::SPACES | net::UnescapeRule::PATH_SEPARATORS | | 179 net::UnescapeRule::SPACES | net::UnescapeRule::PATH_SEPARATORS | |
181 net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS); | 180 net::UnescapeRule::URL_SPECIAL_CHARS_EXCEPT_PATH_SEPARATORS); |
182 // Extract individual 'words' (as opposed to 'terms'; see below) from the | 181 // Extract individual 'words' (as opposed to 'terms'; see below) from the |
183 // search string. When the user types "colspec=ID%20Mstone Release" we get | 182 // search string. When the user types "colspec=ID%20Mstone Release" we get |
184 // four 'words': "colspec", "id", "mstone" and "release". | 183 // four 'words': "colspec", "id", "mstone" and "release". |
185 String16Vector lower_words( | 184 String16Vector lower_words( |
186 String16VectorFromString16(lower_unescaped_string, false, NULL)); | 185 String16VectorFromString16(lower_unescaped_string, false, nullptr)); |
187 ScoredHistoryMatches scored_items; | 186 ScoredHistoryMatches scored_items; |
188 | 187 |
189 // Do nothing if we have indexed no words (probably because we've not been | 188 // Do nothing if we have indexed no words (probably because we've not been |
190 // initialized yet) or the search string has no words. | 189 // initialized yet) or the search string has no words. |
191 if (word_list_.empty() || lower_words.empty()) { | 190 if (word_list_.empty() || lower_words.empty()) { |
192 search_term_cache_.clear(); // Invalidate the term cache. | 191 search_term_cache_.clear(); // Invalidate the term cache. |
193 return scored_items; | 192 return scored_items; |
194 } | 193 } |
195 | 194 |
196 // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep | 195 // Reset used_ flags for search_term_cache_. We use a basic mark-and-sweep |
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
248 // excludes a long list of whitespace.) One could write a scoring | 247 // excludes a long list of whitespace.) One could write a scoring |
249 // function that gives a reasonable order to matches when there | 248 // function that gives a reasonable order to matches when there |
250 // are no terms (i.e., all the words are some form of whitespace), | 249 // are no terms (i.e., all the words are some form of whitespace), |
251 // but this is such a rare edge case that it's not worth the time. | 250 // but this is such a rare edge case that it's not worth the time. |
252 return scored_items; | 251 return scored_items; |
253 } | 252 } |
254 scored_items = | 253 scored_items = |
255 std::for_each( | 254 std::for_each( |
256 history_id_set.begin(), history_id_set.end(), | 255 history_id_set.begin(), history_id_set.end(), |
257 AddHistoryMatch(bookmark_model, template_url_service, *this, | 256 AddHistoryMatch(bookmark_model, template_url_service, *this, |
258 languages, lower_raw_string, lower_raw_terms, | 257 lower_raw_string, lower_raw_terms, |
259 base::Time::Now())).ScoredMatches(); | 258 base::Time::Now())).ScoredMatches(); |
260 | 259 |
261 // Select and sort only the top |max_matches| results. | 260 // Select and sort only the top |max_matches| results. |
262 if (scored_items.size() > max_matches) { | 261 if (scored_items.size() > max_matches) { |
263 std::partial_sort(scored_items.begin(), | 262 std::partial_sort(scored_items.begin(), |
264 scored_items.begin() + | 263 scored_items.begin() + |
265 max_matches, | 264 max_matches, |
266 scored_items.end(), | 265 scored_items.end(), |
267 ScoredHistoryMatch::MatchScoreGreater); | 266 ScoredHistoryMatch::MatchScoreGreater); |
268 scored_items.resize(max_matches); | 267 scored_items.resize(max_matches); |
(...skipping 15 matching lines...) Expand all Loading... |
284 ++cache_iter; | 283 ++cache_iter; |
285 } | 284 } |
286 } | 285 } |
287 | 286 |
288 return scored_items; | 287 return scored_items; |
289 } | 288 } |
290 | 289 |
291 bool URLIndexPrivateData::UpdateURL( | 290 bool URLIndexPrivateData::UpdateURL( |
292 history::HistoryService* history_service, | 291 history::HistoryService* history_service, |
293 const history::URLRow& row, | 292 const history::URLRow& row, |
294 const std::string& languages, | |
295 const std::set<std::string>& scheme_whitelist, | 293 const std::set<std::string>& scheme_whitelist, |
296 base::CancelableTaskTracker* tracker) { | 294 base::CancelableTaskTracker* tracker) { |
297 // The row may or may not already be in our index. If it is not already | 295 // The row may or may not already be in our index. If it is not already |
298 // indexed and it qualifies then it gets indexed. If it is already | 296 // indexed and it qualifies then it gets indexed. If it is already |
299 // indexed and still qualifies then it gets updated, otherwise it | 297 // indexed and still qualifies then it gets updated, otherwise it |
300 // is deleted from the index. | 298 // is deleted from the index. |
301 bool row_was_updated = false; | 299 bool row_was_updated = false; |
302 history::URLID row_id = row.id(); | 300 history::URLID row_id = row.id(); |
303 HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id); | 301 HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id); |
304 if (row_pos == history_info_map_.end()) { | 302 if (row_pos == history_info_map_.end()) { |
305 // This new row should be indexed if it qualifies. | 303 // This new row should be indexed if it qualifies. |
306 history::URLRow new_row(row); | 304 history::URLRow new_row(row); |
307 new_row.set_id(row_id); | 305 new_row.set_id(row_id); |
308 row_was_updated = RowQualifiesAsSignificant(new_row, base::Time()) && | 306 row_was_updated = RowQualifiesAsSignificant(new_row, base::Time()) && |
309 IndexRow(NULL, | 307 IndexRow(nullptr, |
310 history_service, | 308 history_service, |
311 new_row, | 309 new_row, |
312 languages, | |
313 scheme_whitelist, | 310 scheme_whitelist, |
314 tracker); | 311 tracker); |
315 } else if (RowQualifiesAsSignificant(row, base::Time())) { | 312 } else if (RowQualifiesAsSignificant(row, base::Time())) { |
316 // This indexed row still qualifies and will be re-indexed. | 313 // This indexed row still qualifies and will be re-indexed. |
317 // The url won't have changed but the title, visit count, etc. | 314 // The url won't have changed but the title, visit count, etc. |
318 // might have changed. | 315 // might have changed. |
319 history::URLRow& row_to_update = row_pos->second.url_row; | 316 history::URLRow& row_to_update = row_pos->second.url_row; |
320 bool title_updated = row_to_update.title() != row.title(); | 317 bool title_updated = row_to_update.title() != row.title(); |
321 if (row_to_update.visit_count() != row.visit_count() || | 318 if (row_to_update.visit_count() != row.visit_count() || |
322 row_to_update.typed_count() != row.typed_count() || | 319 row_to_update.typed_count() != row.typed_count() || |
323 row_to_update.last_visit() != row.last_visit() || title_updated) { | 320 row_to_update.last_visit() != row.last_visit() || title_updated) { |
324 row_to_update.set_visit_count(row.visit_count()); | 321 row_to_update.set_visit_count(row.visit_count()); |
325 row_to_update.set_typed_count(row.typed_count()); | 322 row_to_update.set_typed_count(row.typed_count()); |
326 row_to_update.set_last_visit(row.last_visit()); | 323 row_to_update.set_last_visit(row.last_visit()); |
327 // If something appears to have changed, update the recent visits | 324 // If something appears to have changed, update the recent visits |
328 // information. | 325 // information. |
329 ScheduleUpdateRecentVisits(history_service, row_id, tracker); | 326 ScheduleUpdateRecentVisits(history_service, row_id, tracker); |
330 // While the URL is guaranteed to remain stable, the title may have | 327 // While the URL is guaranteed to remain stable, the title may have |
331 // changed. If so, then update the index with the changed words. | 328 // changed. If so, then update the index with the changed words. |
332 if (title_updated) { | 329 if (title_updated) { |
333 // Clear all words associated with this row and re-index both the | 330 // Clear all words associated with this row and re-index both the |
334 // URL and title. | 331 // URL and title. |
335 RemoveRowWordsFromIndex(row_to_update); | 332 RemoveRowWordsFromIndex(row_to_update); |
336 row_to_update.set_title(row.title()); | 333 row_to_update.set_title(row.title()); |
337 RowWordStarts word_starts; | 334 RowWordStarts word_starts; |
338 AddRowWordsToIndex(row_to_update, &word_starts, languages); | 335 AddRowWordsToIndex(row_to_update, &word_starts); |
339 word_starts_map_[row_id] = word_starts; | 336 word_starts_map_[row_id] = word_starts; |
340 } | 337 } |
341 row_was_updated = true; | 338 row_was_updated = true; |
342 } | 339 } |
343 } else { | 340 } else { |
344 // This indexed row no longer qualifies and will be de-indexed by | 341 // This indexed row no longer qualifies and will be de-indexed by |
345 // clearing all words associated with this row. | 342 // clearing all words associated with this row. |
346 RemoveRowFromIndex(row); | 343 RemoveRowFromIndex(row); |
347 row_was_updated = true; | 344 row_was_updated = true; |
348 } | 345 } |
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
403 HistoryInfoMapItemHasURL(url)); | 400 HistoryInfoMapItemHasURL(url)); |
404 if (pos == history_info_map_.end()) | 401 if (pos == history_info_map_.end()) |
405 return false; | 402 return false; |
406 RemoveRowFromIndex(pos->second.url_row); | 403 RemoveRowFromIndex(pos->second.url_row); |
407 search_term_cache_.clear(); // This invalidates the cache. | 404 search_term_cache_.clear(); // This invalidates the cache. |
408 return true; | 405 return true; |
409 } | 406 } |
410 | 407 |
411 // static | 408 // static |
412 scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::RestoreFromFile( | 409 scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::RestoreFromFile( |
413 const base::FilePath& file_path, | 410 const base::FilePath& file_path) { |
414 const std::string& languages) { | |
415 base::TimeTicks beginning_time = base::TimeTicks::Now(); | 411 base::TimeTicks beginning_time = base::TimeTicks::Now(); |
416 if (!base::PathExists(file_path)) | 412 if (!base::PathExists(file_path)) |
417 return NULL; | 413 return nullptr; |
418 std::string data; | 414 std::string data; |
419 // If there is no cache file then simply give up. This will cause us to | 415 // If there is no cache file then simply give up. This will cause us to |
420 // attempt to rebuild from the history database. | 416 // attempt to rebuild from the history database. |
421 if (!base::ReadFileToString(file_path, &data)) | 417 if (!base::ReadFileToString(file_path, &data)) |
422 return NULL; | 418 return nullptr; |
423 | 419 |
424 scoped_refptr<URLIndexPrivateData> restored_data(new URLIndexPrivateData); | 420 scoped_refptr<URLIndexPrivateData> restored_data(new URLIndexPrivateData); |
425 InMemoryURLIndexCacheItem index_cache; | 421 InMemoryURLIndexCacheItem index_cache; |
426 if (!index_cache.ParseFromArray(data.c_str(), data.size())) { | 422 if (!index_cache.ParseFromArray(data.c_str(), data.size())) { |
427 LOG(WARNING) << "Failed to parse URLIndexPrivateData cache data read from " | 423 LOG(WARNING) << "Failed to parse URLIndexPrivateData cache data read from " |
428 << file_path.value(); | 424 << file_path.value(); |
429 return restored_data; | 425 return restored_data; |
430 } | 426 } |
431 | 427 |
432 if (!restored_data->RestorePrivateData(index_cache, languages)) | 428 if (!restored_data->RestorePrivateData(index_cache)) |
433 return NULL; | 429 return nullptr; |
434 | 430 |
435 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime", | 431 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime", |
436 base::TimeTicks::Now() - beginning_time); | 432 base::TimeTicks::Now() - beginning_time); |
437 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", | 433 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", |
438 restored_data->history_id_word_map_.size()); | 434 restored_data->history_id_word_map_.size()); |
439 UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size()); | 435 UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size()); |
440 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", | 436 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", |
441 restored_data->word_map_.size()); | 437 restored_data->word_map_.size()); |
442 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", | 438 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", |
443 restored_data->char_word_map_.size()); | 439 restored_data->char_word_map_.size()); |
444 if (restored_data->Empty()) | 440 if (restored_data->Empty()) |
445 return NULL; // 'No data' is the same as a failed reload. | 441 return nullptr; // 'No data' is the same as a failed reload. |
446 return restored_data; | 442 return restored_data; |
447 } | 443 } |
448 | 444 |
449 // static | 445 // static |
450 scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::RebuildFromHistory( | 446 scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::RebuildFromHistory( |
451 history::HistoryDatabase* history_db, | 447 history::HistoryDatabase* history_db, |
452 const std::string& languages, | |
453 const std::set<std::string>& scheme_whitelist) { | 448 const std::set<std::string>& scheme_whitelist) { |
454 if (!history_db) | 449 if (!history_db) |
455 return NULL; | 450 return nullptr; |
456 | 451 |
457 base::TimeTicks beginning_time = base::TimeTicks::Now(); | 452 base::TimeTicks beginning_time = base::TimeTicks::Now(); |
458 | 453 |
459 scoped_refptr<URLIndexPrivateData> | 454 scoped_refptr<URLIndexPrivateData> |
460 rebuilt_data(new URLIndexPrivateData); | 455 rebuilt_data(new URLIndexPrivateData); |
461 history::URLDatabase::URLEnumerator history_enum; | 456 history::URLDatabase::URLEnumerator history_enum; |
462 if (!history_db->InitURLEnumeratorForSignificant(&history_enum)) | 457 if (!history_db->InitURLEnumeratorForSignificant(&history_enum)) |
463 return NULL; | 458 return nullptr; |
464 rebuilt_data->last_time_rebuilt_from_history_ = base::Time::Now(); | 459 rebuilt_data->last_time_rebuilt_from_history_ = base::Time::Now(); |
465 for (history::URLRow row; history_enum.GetNextURL(&row);) { | 460 for (history::URLRow row; history_enum.GetNextURL(&row);) { |
466 rebuilt_data->IndexRow( | 461 rebuilt_data->IndexRow( |
467 history_db, NULL, row, languages, scheme_whitelist, NULL); | 462 history_db, nullptr, row, scheme_whitelist, nullptr); |
468 } | 463 } |
469 | 464 |
470 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime", | 465 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime", |
471 base::TimeTicks::Now() - beginning_time); | 466 base::TimeTicks::Now() - beginning_time); |
472 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", | 467 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", |
473 rebuilt_data->history_id_word_map_.size()); | 468 rebuilt_data->history_id_word_map_.size()); |
474 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", | 469 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", |
475 rebuilt_data->word_map_.size()); | 470 rebuilt_data->word_map_.size()); |
476 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", | 471 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", |
477 rebuilt_data->char_word_map_.size()); | 472 rebuilt_data->char_word_map_.size()); |
(...skipping 217 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
695 word_id_set.swap(new_word_id_set); | 690 word_id_set.swap(new_word_id_set); |
696 } | 691 } |
697 } | 692 } |
698 return word_id_set; | 693 return word_id_set; |
699 } | 694 } |
700 | 695 |
701 bool URLIndexPrivateData::IndexRow( | 696 bool URLIndexPrivateData::IndexRow( |
702 history::HistoryDatabase* history_db, | 697 history::HistoryDatabase* history_db, |
703 history::HistoryService* history_service, | 698 history::HistoryService* history_service, |
704 const history::URLRow& row, | 699 const history::URLRow& row, |
705 const std::string& languages, | |
706 const std::set<std::string>& scheme_whitelist, | 700 const std::set<std::string>& scheme_whitelist, |
707 base::CancelableTaskTracker* tracker) { | 701 base::CancelableTaskTracker* tracker) { |
708 const GURL& gurl(row.url()); | 702 const GURL& gurl(row.url()); |
709 | 703 |
710 // Index only URLs with a whitelisted scheme. | 704 // Index only URLs with a whitelisted scheme. |
711 if (!URLSchemeIsWhitelisted(gurl, scheme_whitelist)) | 705 if (!URLSchemeIsWhitelisted(gurl, scheme_whitelist)) |
712 return false; | 706 return false; |
713 | 707 |
714 history::URLID row_id = row.id(); | 708 history::URLID row_id = row.id(); |
715 // Strip out username and password before saving and indexing. | 709 // Strip out username and password before saving and indexing. |
716 base::string16 url(url_formatter::FormatUrl( | 710 base::string16 url(url_formatter::FormatUrl( |
717 gurl, languages, url_formatter::kFormatUrlOmitUsernamePassword, | 711 gurl, url_formatter::kFormatUrlOmitUsernamePassword, |
718 net::UnescapeRule::NONE, nullptr, nullptr, nullptr)); | 712 net::UnescapeRule::NONE, nullptr, nullptr, nullptr)); |
719 | 713 |
720 HistoryID history_id = static_cast<HistoryID>(row_id); | 714 HistoryID history_id = static_cast<HistoryID>(row_id); |
721 DCHECK_LT(history_id, std::numeric_limits<HistoryID>::max()); | 715 DCHECK_LT(history_id, std::numeric_limits<HistoryID>::max()); |
722 | 716 |
723 // Add the row for quick lookup in the history info store. | 717 // Add the row for quick lookup in the history info store. |
724 history::URLRow new_row(GURL(url), row_id); | 718 history::URLRow new_row(GURL(url), row_id); |
725 new_row.set_visit_count(row.visit_count()); | 719 new_row.set_visit_count(row.visit_count()); |
726 new_row.set_typed_count(row.typed_count()); | 720 new_row.set_typed_count(row.typed_count()); |
727 new_row.set_last_visit(row.last_visit()); | 721 new_row.set_last_visit(row.last_visit()); |
728 new_row.set_title(row.title()); | 722 new_row.set_title(row.title()); |
729 history_info_map_[history_id].url_row = new_row; | 723 history_info_map_[history_id].url_row = new_row; |
730 | 724 |
731 // Index the words contained in the URL and title of the row. | 725 // Index the words contained in the URL and title of the row. |
732 RowWordStarts word_starts; | 726 RowWordStarts word_starts; |
733 AddRowWordsToIndex(new_row, &word_starts, languages); | 727 AddRowWordsToIndex(new_row, &word_starts); |
734 word_starts_map_[history_id] = word_starts; | 728 word_starts_map_[history_id] = word_starts; |
735 | 729 |
736 // Update the recent visits information or schedule the update | 730 // Update the recent visits information or schedule the update |
737 // as appropriate. | 731 // as appropriate. |
738 if (history_db) { | 732 if (history_db) { |
739 // We'd like to check that we're on the history DB thread. | 733 // We'd like to check that we're on the history DB thread. |
740 // However, unittest code actually calls this on the UI thread. | 734 // However, unittest code actually calls this on the UI thread. |
741 // So we don't do any thread checks. | 735 // So we don't do any thread checks. |
742 history::VisitVector recent_visits; | 736 history::VisitVector recent_visits; |
743 // Make sure the private data is going to get as many recent visits as | 737 // Make sure the private data is going to get as many recent visits as |
744 // ScoredHistoryMatch::GetFrequency() hopes to use. | 738 // ScoredHistoryMatch::GetFrequency() hopes to use. |
745 DCHECK_GE(kMaxVisitsToStoreInCache, ScoredHistoryMatch::kMaxVisitsToScore); | 739 DCHECK_GE(kMaxVisitsToStoreInCache, ScoredHistoryMatch::kMaxVisitsToScore); |
746 if (history_db->GetMostRecentVisitsForURL(row_id, | 740 if (history_db->GetMostRecentVisitsForURL(row_id, |
747 kMaxVisitsToStoreInCache, | 741 kMaxVisitsToStoreInCache, |
748 &recent_visits)) | 742 &recent_visits)) |
749 UpdateRecentVisits(row_id, recent_visits); | 743 UpdateRecentVisits(row_id, recent_visits); |
750 } else { | 744 } else { |
751 DCHECK(tracker); | 745 DCHECK(tracker); |
752 DCHECK(history_service); | 746 DCHECK(history_service); |
753 ScheduleUpdateRecentVisits(history_service, row_id, tracker); | 747 ScheduleUpdateRecentVisits(history_service, row_id, tracker); |
754 } | 748 } |
755 | 749 |
756 return true; | 750 return true; |
757 } | 751 } |
758 | 752 |
759 void URLIndexPrivateData::AddRowWordsToIndex(const history::URLRow& row, | 753 void URLIndexPrivateData::AddRowWordsToIndex(const history::URLRow& row, |
760 RowWordStarts* word_starts, | 754 RowWordStarts* word_starts) { |
761 const std::string& languages) { | |
762 HistoryID history_id = static_cast<HistoryID>(row.id()); | 755 HistoryID history_id = static_cast<HistoryID>(row.id()); |
763 // Split URL into individual, unique words then add in the title words. | 756 // Split URL into individual, unique words then add in the title words. |
764 const GURL& gurl(row.url()); | 757 const GURL& gurl(row.url()); |
765 const base::string16& url = | 758 const base::string16& url = |
766 bookmarks::CleanUpUrlForMatching(gurl, languages, NULL); | 759 bookmarks::CleanUpUrlForMatching(gurl, nullptr); |
767 String16Set url_words = String16SetFromString16(url, | 760 String16Set url_words = String16SetFromString16(url, |
768 word_starts ? &word_starts->url_word_starts_ : NULL); | 761 word_starts ? &word_starts->url_word_starts_ : nullptr); |
769 const base::string16& title = bookmarks::CleanUpTitleForMatching(row.title()); | 762 const base::string16& title = bookmarks::CleanUpTitleForMatching(row.title()); |
770 String16Set title_words = String16SetFromString16(title, | 763 String16Set title_words = String16SetFromString16(title, |
771 word_starts ? &word_starts->title_word_starts_ : NULL); | 764 word_starts ? &word_starts->title_word_starts_ : nullptr); |
772 String16Set words = base::STLSetUnion<String16Set>(url_words, title_words); | 765 String16Set words = base::STLSetUnion<String16Set>(url_words, title_words); |
773 for (String16Set::iterator word_iter = words.begin(); | 766 for (String16Set::iterator word_iter = words.begin(); |
774 word_iter != words.end(); ++word_iter) | 767 word_iter != words.end(); ++word_iter) |
775 AddWordToIndex(*word_iter, history_id); | 768 AddWordToIndex(*word_iter, history_id); |
776 | 769 |
777 search_term_cache_.clear(); // Invalidate the term cache. | 770 search_term_cache_.clear(); // Invalidate the term cache. |
778 } | 771 } |
779 | 772 |
780 void URLIndexPrivateData::AddWordToIndex(const base::string16& term, | 773 void URLIndexPrivateData::AddWordToIndex(const base::string16& term, |
781 HistoryID history_id) { | 774 HistoryID history_id) { |
(...skipping 257 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1039 for (WordStarts::const_iterator i = word_starts.url_word_starts_.begin(); | 1032 for (WordStarts::const_iterator i = word_starts.url_word_starts_.begin(); |
1040 i != word_starts.url_word_starts_.end(); ++i) | 1033 i != word_starts.url_word_starts_.end(); ++i) |
1041 map_entry->add_url_word_starts(*i); | 1034 map_entry->add_url_word_starts(*i); |
1042 for (WordStarts::const_iterator i = word_starts.title_word_starts_.begin(); | 1035 for (WordStarts::const_iterator i = word_starts.title_word_starts_.begin(); |
1043 i != word_starts.title_word_starts_.end(); ++i) | 1036 i != word_starts.title_word_starts_.end(); ++i) |
1044 map_entry->add_title_word_starts(*i); | 1037 map_entry->add_title_word_starts(*i); |
1045 } | 1038 } |
1046 } | 1039 } |
1047 | 1040 |
1048 bool URLIndexPrivateData::RestorePrivateData( | 1041 bool URLIndexPrivateData::RestorePrivateData( |
1049 const InMemoryURLIndexCacheItem& cache, | 1042 const InMemoryURLIndexCacheItem& cache) { |
1050 const std::string& languages) { | |
1051 last_time_rebuilt_from_history_ = | 1043 last_time_rebuilt_from_history_ = |
1052 base::Time::FromInternalValue(cache.last_rebuild_timestamp()); | 1044 base::Time::FromInternalValue(cache.last_rebuild_timestamp()); |
1053 const base::TimeDelta rebuilt_ago = | 1045 const base::TimeDelta rebuilt_ago = |
1054 base::Time::Now() - last_time_rebuilt_from_history_; | 1046 base::Time::Now() - last_time_rebuilt_from_history_; |
1055 if ((rebuilt_ago > base::TimeDelta::FromDays(7)) || | 1047 if ((rebuilt_ago > base::TimeDelta::FromDays(7)) || |
1056 (rebuilt_ago < base::TimeDelta::FromDays(-1))) { | 1048 (rebuilt_ago < base::TimeDelta::FromDays(-1))) { |
1057 // Cache is more than a week old or, somehow, from some time in the future. | 1049 // Cache is more than a week old or, somehow, from some time in the future. |
1058 // It's probably a good time to rebuild the index from history to | 1050 // It's probably a good time to rebuild the index from history to |
1059 // allow synced entries to now appear, expired entries to disappear, etc. | 1051 // allow synced entries to now appear, expired entries to disappear, etc. |
1060 // Allow one day in the future to make the cache not rebuild on simple | 1052 // Allow one day in the future to make the cache not rebuild on simple |
1061 // system clock changes such as time zone changes. | 1053 // system clock changes such as time zone changes. |
1062 return false; | 1054 return false; |
1063 } | 1055 } |
1064 if (cache.has_version()) { | 1056 if (cache.has_version()) { |
1065 if (cache.version() < kCurrentCacheFileVersion) { | 1057 if (cache.version() < kCurrentCacheFileVersion) { |
1066 // Don't try to restore an old format cache file. (This will cause | 1058 // Don't try to restore an old format cache file. (This will cause |
1067 // the InMemoryURLIndex to schedule rebuilding the URLIndexPrivateData | 1059 // the InMemoryURLIndex to schedule rebuilding the URLIndexPrivateData |
1068 // from history.) | 1060 // from history.) |
1069 return false; | 1061 return false; |
1070 } | 1062 } |
1071 restored_cache_version_ = cache.version(); | 1063 restored_cache_version_ = cache.version(); |
1072 } | 1064 } |
1073 return RestoreWordList(cache) && RestoreWordMap(cache) && | 1065 return RestoreWordList(cache) && RestoreWordMap(cache) && |
1074 RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) && | 1066 RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) && |
1075 RestoreHistoryInfoMap(cache) && RestoreWordStartsMap(cache, languages); | 1067 RestoreHistoryInfoMap(cache) && RestoreWordStartsMap(cache); |
1076 } | 1068 } |
1077 | 1069 |
1078 bool URLIndexPrivateData::RestoreWordList( | 1070 bool URLIndexPrivateData::RestoreWordList( |
1079 const InMemoryURLIndexCacheItem& cache) { | 1071 const InMemoryURLIndexCacheItem& cache) { |
1080 if (!cache.has_word_list()) | 1072 if (!cache.has_word_list()) |
1081 return false; | 1073 return false; |
1082 const WordListItem& list_item(cache.word_list()); | 1074 const WordListItem& list_item(cache.word_list()); |
1083 uint32_t expected_item_count = list_item.word_count(); | 1075 uint32_t expected_item_count = list_item.word_count(); |
1084 uint32_t actual_item_count = list_item.word_size(); | 1076 uint32_t actual_item_count = list_item.word_size(); |
1085 if (actual_item_count == 0 || actual_item_count != expected_item_count) | 1077 if (actual_item_count == 0 || actual_item_count != expected_item_count) |
(...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1197 visits.push_back(std::make_pair( | 1189 visits.push_back(std::make_pair( |
1198 base::Time::FromInternalValue(iter->visits(i).visit_time()), | 1190 base::Time::FromInternalValue(iter->visits(i).visit_time()), |
1199 ui::PageTransitionFromInt(iter->visits(i).transition_type()))); | 1191 ui::PageTransitionFromInt(iter->visits(i).transition_type()))); |
1200 } | 1192 } |
1201 history_info_map_[history_id].visits = visits; | 1193 history_info_map_[history_id].visits = visits; |
1202 } | 1194 } |
1203 return true; | 1195 return true; |
1204 } | 1196 } |
1205 | 1197 |
1206 bool URLIndexPrivateData::RestoreWordStartsMap( | 1198 bool URLIndexPrivateData::RestoreWordStartsMap( |
1207 const InMemoryURLIndexCacheItem& cache, | 1199 const InMemoryURLIndexCacheItem& cache) { |
1208 const std::string& languages) { | |
1209 // Note that this function must be called after RestoreHistoryInfoMap() has | 1200 // Note that this function must be called after RestoreHistoryInfoMap() has |
1210 // been run as the word starts may have to be recalculated from the urls and | 1201 // been run as the word starts may have to be recalculated from the urls and |
1211 // page titles. | 1202 // page titles. |
1212 if (cache.has_word_starts_map()) { | 1203 if (cache.has_word_starts_map()) { |
1213 const WordStartsMapItem& list_item(cache.word_starts_map()); | 1204 const WordStartsMapItem& list_item(cache.word_starts_map()); |
1214 uint32_t expected_item_count = list_item.item_count(); | 1205 uint32_t expected_item_count = list_item.item_count(); |
1215 uint32_t actual_item_count = list_item.word_starts_map_entry_size(); | 1206 uint32_t actual_item_count = list_item.word_starts_map_entry_size(); |
1216 if (actual_item_count == 0 || actual_item_count != expected_item_count) | 1207 if (actual_item_count == 0 || actual_item_count != expected_item_count) |
1217 return false; | 1208 return false; |
1218 const RepeatedPtrField<WordStartsMapEntry>& | 1209 const RepeatedPtrField<WordStartsMapEntry>& |
(...skipping 15 matching lines...) Expand all Loading... |
1234 word_starts_map_[history_id] = word_starts; | 1225 word_starts_map_[history_id] = word_starts; |
1235 } | 1226 } |
1236 } else { | 1227 } else { |
1237 // Since the cache did not contain any word starts we must rebuild then from | 1228 // Since the cache did not contain any word starts we must rebuild then from |
1238 // the URL and page titles. | 1229 // the URL and page titles. |
1239 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin(); | 1230 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin(); |
1240 iter != history_info_map_.end(); ++iter) { | 1231 iter != history_info_map_.end(); ++iter) { |
1241 RowWordStarts word_starts; | 1232 RowWordStarts word_starts; |
1242 const history::URLRow& row(iter->second.url_row); | 1233 const history::URLRow& row(iter->second.url_row); |
1243 const base::string16& url = | 1234 const base::string16& url = |
1244 bookmarks::CleanUpUrlForMatching(row.url(), languages, NULL); | 1235 bookmarks::CleanUpUrlForMatching(row.url(), nullptr); |
1245 String16VectorFromString16(url, false, &word_starts.url_word_starts_); | 1236 String16VectorFromString16(url, false, &word_starts.url_word_starts_); |
1246 const base::string16& title = | 1237 const base::string16& title = |
1247 bookmarks::CleanUpTitleForMatching(row.title()); | 1238 bookmarks::CleanUpTitleForMatching(row.title()); |
1248 String16VectorFromString16(title, false, &word_starts.title_word_starts_); | 1239 String16VectorFromString16(title, false, &word_starts.title_word_starts_); |
1249 word_starts_map_[iter->first] = word_starts; | 1240 word_starts_map_[iter->first] = word_starts; |
1250 } | 1241 } |
1251 } | 1242 } |
1252 return true; | 1243 return true; |
1253 } | 1244 } |
1254 | 1245 |
(...skipping 21 matching lines...) Expand all Loading... |
1276 | 1267 |
1277 URLIndexPrivateData::SearchTermCacheItem::~SearchTermCacheItem() { | 1268 URLIndexPrivateData::SearchTermCacheItem::~SearchTermCacheItem() { |
1278 } | 1269 } |
1279 | 1270 |
1280 // URLIndexPrivateData::AddHistoryMatch ---------------------------------------- | 1271 // URLIndexPrivateData::AddHistoryMatch ---------------------------------------- |
1281 | 1272 |
1282 URLIndexPrivateData::AddHistoryMatch::AddHistoryMatch( | 1273 URLIndexPrivateData::AddHistoryMatch::AddHistoryMatch( |
1283 bookmarks::BookmarkModel* bookmark_model, | 1274 bookmarks::BookmarkModel* bookmark_model, |
1284 TemplateURLService* template_url_service, | 1275 TemplateURLService* template_url_service, |
1285 const URLIndexPrivateData& private_data, | 1276 const URLIndexPrivateData& private_data, |
1286 const std::string& languages, | |
1287 const base::string16& lower_string, | 1277 const base::string16& lower_string, |
1288 const String16Vector& lower_terms, | 1278 const String16Vector& lower_terms, |
1289 const base::Time now) | 1279 const base::Time now) |
1290 : bookmark_model_(bookmark_model), | 1280 : bookmark_model_(bookmark_model), |
1291 template_url_service_(template_url_service), | 1281 template_url_service_(template_url_service), |
1292 private_data_(private_data), | 1282 private_data_(private_data), |
1293 languages_(languages), | |
1294 lower_string_(lower_string), | 1283 lower_string_(lower_string), |
1295 lower_terms_(lower_terms), | 1284 lower_terms_(lower_terms), |
1296 now_(now) { | 1285 now_(now) { |
1297 // Calculate offsets for each term. For instance, the offset for | 1286 // Calculate offsets for each term. For instance, the offset for |
1298 // ".net" should be 1, indicating that the actual word-part of the term | 1287 // ".net" should be 1, indicating that the actual word-part of the term |
1299 // starts at offset 1. | 1288 // starts at offset 1. |
1300 lower_terms_to_word_starts_offsets_.resize(lower_terms_.size(), 0u); | 1289 lower_terms_to_word_starts_offsets_.resize(lower_terms_.size(), 0u); |
1301 for (size_t i = 0; i < lower_terms_.size(); ++i) { | 1290 for (size_t i = 0; i < lower_terms_.size(); ++i) { |
1302 base::i18n::BreakIterator iter(lower_terms_[i], | 1291 base::i18n::BreakIterator iter(lower_terms_[i], |
1303 base::i18n::BreakIterator::BREAK_WORD); | 1292 base::i18n::BreakIterator::BREAK_WORD); |
(...skipping 19 matching lines...) Expand all Loading... |
1323 const HistoryID history_id) { | 1312 const HistoryID history_id) { |
1324 HistoryInfoMap::const_iterator hist_pos = | 1313 HistoryInfoMap::const_iterator hist_pos = |
1325 private_data_.history_info_map_.find(history_id); | 1314 private_data_.history_info_map_.find(history_id); |
1326 if (hist_pos != private_data_.history_info_map_.end()) { | 1315 if (hist_pos != private_data_.history_info_map_.end()) { |
1327 const history::URLRow& hist_item = hist_pos->second.url_row; | 1316 const history::URLRow& hist_item = hist_pos->second.url_row; |
1328 const VisitInfoVector& visits = hist_pos->second.visits; | 1317 const VisitInfoVector& visits = hist_pos->second.visits; |
1329 WordStartsMap::const_iterator starts_pos = | 1318 WordStartsMap::const_iterator starts_pos = |
1330 private_data_.word_starts_map_.find(history_id); | 1319 private_data_.word_starts_map_.find(history_id); |
1331 DCHECK(starts_pos != private_data_.word_starts_map_.end()); | 1320 DCHECK(starts_pos != private_data_.word_starts_map_.end()); |
1332 ScoredHistoryMatch match( | 1321 ScoredHistoryMatch match( |
1333 hist_item, visits, languages_, lower_string_, lower_terms_, | 1322 hist_item, visits, lower_string_, lower_terms_, |
1334 lower_terms_to_word_starts_offsets_, starts_pos->second, | 1323 lower_terms_to_word_starts_offsets_, starts_pos->second, |
1335 bookmark_model_ && bookmark_model_->IsBookmarked(hist_item.url()), | 1324 bookmark_model_ && bookmark_model_->IsBookmarked(hist_item.url()), |
1336 template_url_service_, now_); | 1325 template_url_service_, now_); |
1337 if (match.raw_score > 0) | 1326 if (match.raw_score > 0) |
1338 scored_matches_.push_back(match); | 1327 scored_matches_.push_back(match); |
1339 } | 1328 } |
1340 } | 1329 } |
1341 | 1330 |
1342 | 1331 |
1343 // URLIndexPrivateData::HistoryItemFactorGreater ------------------------------- | 1332 // URLIndexPrivateData::HistoryItemFactorGreater ------------------------------- |
(...skipping 20 matching lines...) Expand all Loading... |
1364 // First cut: typed count, visit count, recency. | 1353 // First cut: typed count, visit count, recency. |
1365 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks | 1354 // TODO(mrossetti): This is too simplistic. Consider an approach which ranks |
1366 // recently visited (within the last 12/24 hours) as highly important. Get | 1355 // recently visited (within the last 12/24 hours) as highly important. Get |
1367 // input from mpearson. | 1356 // input from mpearson. |
1368 if (r1.typed_count() != r2.typed_count()) | 1357 if (r1.typed_count() != r2.typed_count()) |
1369 return (r1.typed_count() > r2.typed_count()); | 1358 return (r1.typed_count() > r2.typed_count()); |
1370 if (r1.visit_count() != r2.visit_count()) | 1359 if (r1.visit_count() != r2.visit_count()) |
1371 return (r1.visit_count() > r2.visit_count()); | 1360 return (r1.visit_count() > r2.visit_count()); |
1372 return (r1.last_visit() > r2.last_visit()); | 1361 return (r1.last_visit() > r2.last_visit()); |
1373 } | 1362 } |
OLD | NEW |