| OLD | NEW |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/browser/history/in_memory_url_index.h" | 5 #include "chrome/browser/history/in_memory_url_index.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 #include <functional> | 8 #include <functional> |
| 9 #include <iterator> | 9 #include <iterator> |
| 10 #include <limits> | 10 #include <limits> |
| 11 #include <numeric> | 11 #include <numeric> |
| 12 | 12 |
| 13 #include "base/file_util.h" | 13 #include "base/file_util.h" |
| 14 #include "base/i18n/case_conversion.h" | 14 #include "base/i18n/case_conversion.h" |
| 15 #include "base/metrics/histogram.h" | 15 #include "base/metrics/histogram.h" |
| 16 #include "base/string_util.h" | |
| 17 #include "base/threading/thread_restrictions.h" | 16 #include "base/threading/thread_restrictions.h" |
| 18 #include "base/time.h" | |
| 19 #include "base/utf_string_conversions.h" | 17 #include "base/utf_string_conversions.h" |
| 20 #include "chrome/browser/autocomplete/autocomplete.h" | 18 #include "chrome/browser/autocomplete/autocomplete.h" |
| 21 #include "chrome/browser/autocomplete/history_provider_util.h" | 19 #include "chrome/browser/history/history_notifications.h" |
| 22 #include "chrome/browser/history/url_database.h" | 20 #include "chrome/browser/history/url_database.h" |
| 23 #include "chrome/browser/profiles/profile.h" | 21 #include "chrome/common/chrome_notification_types.h" |
| 24 #include "chrome/common/url_constants.h" | 22 #include "chrome/common/url_constants.h" |
| 25 #include "googleurl/src/url_parse.h" | 23 #include "content/public/browser/notification_service.h" |
| 26 #include "googleurl/src/url_util.h" | |
| 27 #include "net/base/escape.h" | |
| 28 #include "net/base/net_util.h" | 24 #include "net/base/net_util.h" |
| 29 #include "third_party/protobuf/src/google/protobuf/repeated_field.h" | |
| 30 #include "ui/base/l10n/l10n_util.h" | 25 #include "ui/base/l10n/l10n_util.h" |
| 31 | 26 |
| 32 using google::protobuf::RepeatedField; | 27 using google::protobuf::RepeatedField; |
| 33 using google::protobuf::RepeatedPtrField; | 28 using google::protobuf::RepeatedPtrField; |
| 34 using in_memory_url_index::InMemoryURLIndexCacheItem; | 29 using in_memory_url_index::InMemoryURLIndexCacheItem; |
| 35 | 30 |
| 36 namespace history { | 31 namespace history { |
| 37 | 32 |
| 38 typedef imui::InMemoryURLIndexCacheItem_WordListItem WordListItem; | 33 typedef imui::InMemoryURLIndexCacheItem_WordListItem WordListItem; |
| 39 typedef imui::InMemoryURLIndexCacheItem_WordMapItem_WordMapEntry WordMapEntry; | 34 typedef imui::InMemoryURLIndexCacheItem_WordMapItem_WordMapEntry WordMapEntry; |
| (...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 105 return 0; | 100 return 0; |
| 106 int score = kScoreRank[i]; | 101 int score = kScoreRank[i]; |
| 107 if (i > 0) { | 102 if (i > 0) { |
| 108 score += (value - value_ranks[i]) * | 103 score += (value - value_ranks[i]) * |
| 109 (kScoreRank[i - 1] - kScoreRank[i]) / | 104 (kScoreRank[i - 1] - kScoreRank[i]) / |
| 110 (value_ranks[i - 1] - value_ranks[i]); | 105 (value_ranks[i - 1] - value_ranks[i]); |
| 111 } | 106 } |
| 112 return score; | 107 return score; |
| 113 } | 108 } |
| 114 | 109 |
| 115 InMemoryURLIndex::InMemoryURLIndex(const FilePath& history_dir) | 110 InMemoryURLIndex::InMemoryURLIndex(Profile* profile, |
| 111 const FilePath& history_dir) |
| 116 : history_dir_(history_dir), | 112 : history_dir_(history_dir), |
| 117 private_data_(new URLIndexPrivateData), | 113 private_data_(new URLIndexPrivateData), |
| 118 cached_at_shutdown_(false) { | 114 cached_at_shutdown_(false) { |
| 119 InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_); | 115 InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_); |
| 116 if (profile) { |
| 117 content::Source<Profile> source(profile); |
| 118 registrar_.Add(this, chrome::NOTIFICATION_HISTORY_URL_VISITED, source); |
| 119 registrar_.Add(this, chrome::NOTIFICATION_HISTORY_TYPED_URLS_MODIFIED, |
| 120 source); |
| 121 registrar_.Add(this, chrome::NOTIFICATION_HISTORY_URLS_DELETED, source); |
| 122 } |
| 120 } | 123 } |
| 121 | 124 |
| 122 // Called only by unit tests. | 125 // Called only by unit tests. |
| 123 InMemoryURLIndex::InMemoryURLIndex() | 126 InMemoryURLIndex::InMemoryURLIndex() |
| 124 : private_data_(new URLIndexPrivateData), | 127 : private_data_(new URLIndexPrivateData), |
| 125 cached_at_shutdown_(false) { | 128 cached_at_shutdown_(false) { |
| 126 InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_); | 129 InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_); |
| 127 } | 130 } |
| 128 | 131 |
| 129 InMemoryURLIndex::~InMemoryURLIndex() { | 132 InMemoryURLIndex::~InMemoryURLIndex() { |
| (...skipping 18 matching lines...) Expand all Loading... |
| 148 // Indexing | 151 // Indexing |
| 149 | 152 |
| 150 bool InMemoryURLIndex::Init(URLDatabase* history_db, | 153 bool InMemoryURLIndex::Init(URLDatabase* history_db, |
| 151 const std::string& languages) { | 154 const std::string& languages) { |
| 152 // TODO(mrossetti): Register for profile/language change notifications. | 155 // TODO(mrossetti): Register for profile/language change notifications. |
| 153 languages_ = languages; | 156 languages_ = languages; |
| 154 return ReloadFromHistory(history_db, false); | 157 return ReloadFromHistory(history_db, false); |
| 155 } | 158 } |
| 156 | 159 |
| 157 void InMemoryURLIndex::ShutDown() { | 160 void InMemoryURLIndex::ShutDown() { |
| 158 // Write our cache. | 161 registrar_.RemoveAll(); |
| 159 SaveToCacheFile(); | 162 SaveToCacheFile(); |
| 160 cached_at_shutdown_ = true; | 163 cached_at_shutdown_ = true; |
| 161 } | 164 } |
| 162 | 165 |
| 166 void InMemoryURLIndex::Observe(int type, |
| 167 const content::NotificationSource& source, |
| 168 const content::NotificationDetails& details) { |
| 169 switch (type) { |
| 170 case chrome::NOTIFICATION_HISTORY_URL_VISITED: |
| 171 OnURLVisited(content::Details<URLVisitedDetails>(details).ptr()); |
| 172 break; |
| 173 case chrome::NOTIFICATION_HISTORY_TYPED_URLS_MODIFIED: |
| 174 OnURLsModified( |
| 175 content::Details<history::URLsModifiedDetails>(details).ptr()); |
| 176 break; |
| 177 case chrome::NOTIFICATION_HISTORY_URLS_DELETED: |
| 178 OnURLsDeleted( |
| 179 content::Details<history::URLsDeletedDetails>(details).ptr()); |
| 180 break; |
| 181 default: |
| 182 // For simplicity, the unit tests send us all notifications, even when |
| 183 // we haven't registered for them, so don't assert here. |
| 184 break; |
| 185 } |
| 186 } |
| 187 |
| 188 void InMemoryURLIndex::OnURLVisited(const URLVisitedDetails* details) { |
| 189 UpdateURL(details->row); |
| 190 } |
| 191 |
| 192 void InMemoryURLIndex::OnURLsModified(const URLsModifiedDetails* details) { |
| 193 for (std::vector<history::URLRow>::const_iterator row = |
| 194 details->changed_urls.begin(); |
| 195 row != details->changed_urls.end(); ++row) |
| 196 UpdateURL(*row); |
| 197 } |
| 198 |
| 199 void InMemoryURLIndex::OnURLsDeleted(const URLsDeletedDetails* details) { |
| 200 if (details->all_history) { |
| 201 ClearPrivateData(); |
| 202 } else { |
| 203 for (std::vector<URLRow>::const_iterator row = details->rows.begin(); |
| 204 row != details->rows.end(); ++row) |
| 205 DeleteURL(*row); |
| 206 } |
| 207 } |
| 208 |
| 163 void InMemoryURLIndex::IndexRow(const URLRow& row) { | 209 void InMemoryURLIndex::IndexRow(const URLRow& row) { |
| 164 const GURL& gurl(row.url()); | 210 const GURL& gurl(row.url()); |
| 165 | 211 |
| 166 // Index only URLs with a whitelisted scheme. | 212 // Index only URLs with a whitelisted scheme. |
| 167 if (!InMemoryURLIndex::URLSchemeIsWhitelisted(gurl)) | 213 if (!InMemoryURLIndex::URLSchemeIsWhitelisted(gurl)) |
| 168 return; | 214 return; |
| 169 | 215 |
| 170 URLID row_id = row.id(); | 216 URLID row_id = row.id(); |
| 171 // Strip out username and password before saving and indexing. | 217 // Strip out username and password before saving and indexing. |
| 172 string16 url(net::FormatUrl(gurl, languages_, | 218 string16 url(net::FormatUrl(gurl, languages_, |
| (...skipping 169 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 342 int size = data.size(); | 388 int size = data.size(); |
| 343 if (file_util::WriteFile(file_path, data.c_str(), size) != size) { | 389 if (file_util::WriteFile(file_path, data.c_str(), size) != size) { |
| 344 LOG(WARNING) << "Failed to write " << file_path.value(); | 390 LOG(WARNING) << "Failed to write " << file_path.value(); |
| 345 return false; | 391 return false; |
| 346 } | 392 } |
| 347 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexSaveCacheTime", | 393 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexSaveCacheTime", |
| 348 base::TimeTicks::Now() - beginning_time); | 394 base::TimeTicks::Now() - beginning_time); |
| 349 return true; | 395 return true; |
| 350 } | 396 } |
| 351 | 397 |
| 352 void InMemoryURLIndex::UpdateURL(URLID row_id, const URLRow& row) { | 398 void InMemoryURLIndex::UpdateURL(const URLRow& row) { |
| 353 // The row may or may not already be in our index. If it is not already | 399 // The row may or may not already be in our index. If it is not already |
| 354 // indexed and it qualifies then it gets indexed. If it is already | 400 // indexed and it qualifies then it gets indexed. If it is already |
| 355 // indexed and still qualifies then it gets updated, otherwise it | 401 // indexed and still qualifies then it gets updated, otherwise it |
| 356 // is deleted from the index. | 402 // is deleted from the index. |
| 357 HistoryInfoMap::iterator row_pos = | 403 HistoryInfoMap::iterator row_pos = |
| 358 private_data_->history_info_map_.find(row_id); | 404 private_data_->history_info_map_.find(row.id()); |
| 359 if (row_pos == private_data_->history_info_map_.end()) { | 405 if (row_pos == private_data_->history_info_map_.end()) { |
| 360 // This new row should be indexed if it qualifies. | 406 // This new row should be indexed if it qualifies. |
| 361 URLRow new_row(row); | 407 if (RowQualifiesAsSignificant(row, base::Time())) |
| 362 new_row.set_id(row_id); | 408 IndexRow(row); |
| 363 if (RowQualifiesAsSignificant(new_row, base::Time())) | |
| 364 IndexRow(new_row); | |
| 365 } else if (RowQualifiesAsSignificant(row, base::Time())) { | 409 } else if (RowQualifiesAsSignificant(row, base::Time())) { |
| 366 // This indexed row still qualifies and will be re-indexed. | 410 // This indexed row still qualifies and will be re-indexed. |
| 367 // The url won't have changed but the title, visit count, etc. | 411 // The url won't have changed but the title, visit count, etc. |
| 368 // might have changed. | 412 // might have changed. |
| 369 URLRow& updated_row = row_pos->second; | 413 URLRow& old_row = row_pos->second; |
| 370 updated_row.set_visit_count(row.visit_count()); | 414 old_row.set_visit_count(row.visit_count()); |
| 371 updated_row.set_typed_count(row.typed_count()); | 415 old_row.set_typed_count(row.typed_count()); |
| 372 updated_row.set_last_visit(row.last_visit()); | 416 old_row.set_last_visit(row.last_visit()); |
| 373 // While the URL is guaranteed to remain stable, the title may have changed. | 417 // While the URL is guaranteed to remain stable, the title may have changed. |
| 374 // If so, then we need to update the index with the changed words. | 418 // If so, then we need to update the index with the changed words. |
| 375 if (updated_row.title() != row.title()) { | 419 if (old_row.title() != row.title()) { |
| 376 // Clear all words associated with this row and re-index both the | 420 // Clear all words associated with this row and re-index both the |
| 377 // URL and title. | 421 // URL and title. |
| 378 RemoveRowWordsFromIndex(updated_row); | 422 RemoveRowWordsFromIndex(row); |
| 379 updated_row.set_title(row.title()); | 423 old_row.set_title(row.title()); |
| 380 AddRowWordsToIndex(updated_row); | 424 AddRowWordsToIndex(old_row); |
| 381 } | 425 } |
| 382 } else { | 426 } else { |
| 383 // This indexed row no longer qualifies and will be de-indexed by | 427 // This indexed row no longer qualifies and will be de-indexed by |
| 384 // clearing all words associated with this row. | 428 // clearing all words associated with this row. |
| 385 URLRow& removed_row = row_pos->second; | 429 RemoveRowFromIndex(row); |
| 386 RemoveRowFromIndex(removed_row); | |
| 387 } | 430 } |
| 388 // This invalidates the cache. | 431 // This invalidates the cache. |
| 389 search_term_cache_.clear(); | 432 search_term_cache_.clear(); |
| 390 } | 433 } |
| 391 | 434 |
| 392 void InMemoryURLIndex::DeleteURL(URLID row_id) { | 435 void InMemoryURLIndex::DeleteURL(const URLRow& row) { |
| 393 // Note that this does not remove any reference to this row from the | 436 RemoveRowFromIndex(row); |
| 394 // word_id_history_map_. That map will continue to contain (and return) | 437 search_term_cache_.clear(); // Invalidate the word cache. |
| 395 // hits against this row until that map is rebuilt, but since the | |
| 396 // history_info_map_ no longer references the row no erroneous results | |
| 397 // will propagate to the user. | |
| 398 private_data_->history_info_map_.erase(row_id); | |
| 399 // This invalidates the word cache. | |
| 400 search_term_cache_.clear(); | |
| 401 } | 438 } |
| 402 | 439 |
| 403 // Searching | 440 // Searching |
| 404 | 441 |
| 405 ScoredHistoryMatches InMemoryURLIndex::HistoryItemsForTerms( | 442 ScoredHistoryMatches InMemoryURLIndex::HistoryItemsForTerms( |
| 406 const String16Vector& terms) { | 443 const String16Vector& terms) { |
| 407 ScoredHistoryMatches scored_items; | 444 ScoredHistoryMatches scored_items; |
| 408 | 445 |
| 409 // Do nothing if we have indexed no words (probably because we've not been | 446 // Do nothing if we have indexed no words (probably because we've not been |
| 410 // initialized yet). | 447 // initialized yet). |
| (...skipping 658 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1069 if (iter->has_title()) { | 1106 if (iter->has_title()) { |
| 1070 string16 title(UTF8ToUTF16(iter->title())); | 1107 string16 title(UTF8ToUTF16(iter->title())); |
| 1071 url_row.set_title(title); | 1108 url_row.set_title(title); |
| 1072 } | 1109 } |
| 1073 private_data_->history_info_map_[history_id] = url_row; | 1110 private_data_->history_info_map_[history_id] = url_row; |
| 1074 } | 1111 } |
| 1075 return true; | 1112 return true; |
| 1076 } | 1113 } |
| 1077 | 1114 |
| 1078 } // namespace history | 1115 } // namespace history |
| OLD | NEW |