| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/browser/history/url_index_private_data.h" | 5 #include "chrome/browser/history/url_index_private_data.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 #include <functional> | 8 #include <functional> |
| 9 #include <iterator> | 9 #include <iterator> |
| 10 #include <limits> | 10 #include <limits> |
| 11 #include <numeric> | 11 #include <numeric> |
| 12 | 12 |
| 13 #include "base/file_util.h" | 13 #include "base/file_util.h" |
| 14 #include "base/i18n/case_conversion.h" | 14 #include "base/i18n/case_conversion.h" |
| 15 #include "base/metrics/histogram.h" | 15 #include "base/metrics/histogram.h" |
| 16 #include "base/string_util.h" | 16 #include "base/string_util.h" |
| 17 #include "base/threading/thread_restrictions.h" | 17 #include "base/time.h" |
| 18 #include "base/utf_string_conversions.h" | 18 #include "base/utf_string_conversions.h" |
| 19 #include "chrome/browser/autocomplete/autocomplete.h" | 19 #include "chrome/browser/autocomplete/autocomplete.h" |
| 20 #include "chrome/browser/history/history_database.h" | 20 #include "chrome/browser/history/history_database.h" |
| 21 #include "chrome/common/url_constants.h" | 21 #include "chrome/browser/history/in_memory_url_index.h" |
| 22 #include "content/public/browser/notification_details.h" |
| 23 #include "content/public/browser/notification_service.h" |
| 24 #include "content/public/browser/notification_source.h" |
| 22 #include "net/base/net_util.h" | 25 #include "net/base/net_util.h" |
| 23 #include "third_party/protobuf/src/google/protobuf/repeated_field.h" | 26 #include "third_party/protobuf/src/google/protobuf/repeated_field.h" |
| 24 | 27 |
| 25 using google::protobuf::RepeatedField; | 28 using google::protobuf::RepeatedField; |
| 26 using google::protobuf::RepeatedPtrField; | 29 using google::protobuf::RepeatedPtrField; |
| 27 using in_memory_url_index::InMemoryURLIndexCacheItem; | 30 using in_memory_url_index::InMemoryURLIndexCacheItem; |
| 28 | 31 |
| 29 namespace history { | 32 namespace history { |
| 30 | 33 |
| 31 typedef imui::InMemoryURLIndexCacheItem_WordListItem WordListItem; | 34 typedef imui::InMemoryURLIndexCacheItem_WordListItem WordListItem; |
| (...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 111 } | 114 } |
| 112 return score; | 115 return score; |
| 113 } | 116 } |
| 114 | 117 |
| 115 // InMemoryURLIndex's Private Data --------------------------------------------- | 118 // InMemoryURLIndex's Private Data --------------------------------------------- |
| 116 | 119 |
| 117 URLIndexPrivateData::URLIndexPrivateData() | 120 URLIndexPrivateData::URLIndexPrivateData() |
| 118 : pre_filter_item_count_(0), | 121 : pre_filter_item_count_(0), |
| 119 post_filter_item_count_(0), | 122 post_filter_item_count_(0), |
| 120 post_scoring_item_count_(0) { | 123 post_scoring_item_count_(0) { |
| 121 URLIndexPrivateData::InitializeSchemeWhitelist(&scheme_whitelist_); | |
| 122 } | 124 } |
| 123 | 125 |
| 124 URLIndexPrivateData::~URLIndexPrivateData() {} | 126 URLIndexPrivateData::~URLIndexPrivateData() {} |
| 125 | 127 |
| 126 void URLIndexPrivateData::Clear() { | 128 void URLIndexPrivateData::Clear() { |
| 127 word_list_.clear(); | 129 word_list_.clear(); |
| 128 available_words_.clear(); | 130 available_words_.clear(); |
| 129 word_map_.clear(); | 131 word_map_.clear(); |
| 130 char_word_map_.clear(); | 132 char_word_map_.clear(); |
| 131 word_id_history_map_.clear(); | 133 word_id_history_map_.clear(); |
| 132 history_id_word_map_.clear(); | 134 history_id_word_map_.clear(); |
| 133 history_info_map_.clear(); | 135 history_info_map_.clear(); |
| 134 } | 136 } |
| 135 | 137 |
| 138 bool URLIndexPrivateData::Empty() const { |
| 139 return history_info_map_.empty(); |
| 140 } |
| 141 |
| 142 URLIndexPrivateData* URLIndexPrivateData::Duplicate() const { |
| 143 scoped_refptr<URLIndexPrivateData> data_copy = new URLIndexPrivateData; |
| 144 data_copy->word_list_ = word_list_; |
| 145 data_copy->available_words_ = available_words_; |
| 146 data_copy->word_map_ = word_map_; |
| 147 data_copy->char_word_map_ = char_word_map_; |
| 148 data_copy->word_id_history_map_ = word_id_history_map_; |
| 149 data_copy->history_id_word_map_ = history_id_word_map_; |
| 150 data_copy->history_info_map_ = history_info_map_; |
| 151 return data_copy.release(); |
| 152 // Not copied: |
| 153 // search_term_cache_ |
| 154 // pre_filter_item_count_ |
| 155 // post_filter_item_count_ |
| 156 // post_scoring_item_count_ |
| 157 }; |
| 158 |
| 136 // Cache Updating -------------------------------------------------------------- | 159 // Cache Updating -------------------------------------------------------------- |
| 137 | 160 |
| 138 bool URLIndexPrivateData::IndexRow(const URLRow& row) { | 161 bool URLIndexPrivateData::IndexRow( |
| 162 const URLRow& row, |
| 163 const std::string& languages, |
| 164 const std::set<std::string>& scheme_whitelist) { |
| 139 const GURL& gurl(row.url()); | 165 const GURL& gurl(row.url()); |
| 140 | 166 |
| 141 // Index only URLs with a whitelisted scheme. | 167 // Index only URLs with a whitelisted scheme. |
| 142 if (!URLIndexPrivateData::URLSchemeIsWhitelisted(gurl)) | 168 if (!URLSchemeIsWhitelisted(gurl, scheme_whitelist)) |
| 143 return false; | 169 return false; |
| 144 | 170 |
| 145 URLID row_id = row.id(); | 171 URLID row_id = row.id(); |
| 146 // Strip out username and password before saving and indexing. | 172 // Strip out username and password before saving and indexing. |
| 147 string16 url(net::FormatUrl(gurl, languages_, | 173 string16 url(net::FormatUrl(gurl, languages, |
| 148 net::kFormatUrlOmitUsernamePassword, | 174 net::kFormatUrlOmitUsernamePassword, |
| 149 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS, | 175 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS, |
| 150 NULL, NULL, NULL)); | 176 NULL, NULL, NULL)); |
| 151 | 177 |
| 152 HistoryID history_id = static_cast<HistoryID>(row_id); | 178 HistoryID history_id = static_cast<HistoryID>(row_id); |
| 153 DCHECK_LT(history_id, std::numeric_limits<HistoryID>::max()); | 179 DCHECK_LT(history_id, std::numeric_limits<HistoryID>::max()); |
| 154 | 180 |
| 155 // Add the row for quick lookup in the history info store. | 181 // Add the row for quick lookup in the history info store. |
| 156 URLRow new_row(GURL(url), row_id); | 182 URLRow new_row(GURL(url), row_id); |
| 157 new_row.set_visit_count(row.visit_count()); | 183 new_row.set_visit_count(row.visit_count()); |
| 158 new_row.set_typed_count(row.typed_count()); | 184 new_row.set_typed_count(row.typed_count()); |
| 159 new_row.set_last_visit(row.last_visit()); | 185 new_row.set_last_visit(row.last_visit()); |
| 160 new_row.set_title(row.title()); | 186 new_row.set_title(row.title()); |
| 161 history_info_map_[history_id] = new_row; | 187 history_info_map_[history_id] = new_row; |
| 162 | 188 |
| 163 // Index the words contained in the URL and title of the row. | 189 // Index the words contained in the URL and title of the row. |
| 164 AddRowWordsToIndex(new_row); | 190 AddRowWordsToIndex(new_row, languages); |
| 165 return true; | 191 return true; |
| 166 } | 192 } |
| 167 | 193 |
| 168 void URLIndexPrivateData::AddRowWordsToIndex(const URLRow& row) { | 194 void URLIndexPrivateData::AddRowWordsToIndex(const URLRow& row, |
| 195 const std::string& languages) { |
| 169 HistoryID history_id = static_cast<HistoryID>(row.id()); | 196 HistoryID history_id = static_cast<HistoryID>(row.id()); |
| 170 // Split URL into individual, unique words then add in the title words. | 197 // Split URL into individual, unique words then add in the title words. |
| 171 const GURL& gurl(row.url()); | 198 const GURL& gurl(row.url()); |
| 172 string16 url(net::FormatUrl(gurl, languages_, | 199 string16 url(net::FormatUrl(gurl, languages, |
| 173 net::kFormatUrlOmitUsernamePassword, | 200 net::kFormatUrlOmitUsernamePassword, |
| 174 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS, | 201 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS, |
| 175 NULL, NULL, NULL)); | 202 NULL, NULL, NULL)); |
| 176 url = base::i18n::ToLower(url); | 203 url = base::i18n::ToLower(url); |
| 177 String16Set url_words = String16SetFromString16(url); | 204 String16Set url_words = String16SetFromString16(url); |
| 178 String16Set title_words = String16SetFromString16(row.title()); | 205 String16Set title_words = String16SetFromString16(row.title()); |
| 179 String16Set words; | 206 String16Set words; |
| 180 std::set_union(url_words.begin(), url_words.end(), | 207 std::set_union(url_words.begin(), url_words.end(), |
| 181 title_words.begin(), title_words.end(), | 208 title_words.begin(), title_words.end(), |
| 182 std::insert_iterator<String16Set>(words, words.begin())); | 209 std::insert_iterator<String16Set>(words, words.begin())); |
| (...skipping 105 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 288 if (iter != history_id_word_map_.end()) { | 315 if (iter != history_id_word_map_.end()) { |
| 289 WordIDSet& word_id_set(iter->second); | 316 WordIDSet& word_id_set(iter->second); |
| 290 word_id_set.insert(word_id); | 317 word_id_set.insert(word_id); |
| 291 } else { | 318 } else { |
| 292 WordIDSet word_id_set; | 319 WordIDSet word_id_set; |
| 293 word_id_set.insert(word_id); | 320 word_id_set.insert(word_id); |
| 294 history_id_word_map_[history_id] = word_id_set; | 321 history_id_word_map_[history_id] = word_id_set; |
| 295 } | 322 } |
| 296 } | 323 } |
| 297 | 324 |
| 298 bool URLIndexPrivateData::UpdateURL(const URLRow& row) { | 325 bool URLIndexPrivateData::UpdateURL( |
| 326 const URLRow& row, |
| 327 const std::string& languages, |
| 328 const std::set<std::string>& scheme_whitelist) { |
| 299 // The row may or may not already be in our index. If it is not already | 329 // The row may or may not already be in our index. If it is not already |
| 300 // indexed and it qualifies then it gets indexed. If it is already | 330 // indexed and it qualifies then it gets indexed. If it is already |
| 301 // indexed and still qualifies then it gets updated, otherwise it | 331 // indexed and still qualifies then it gets updated, otherwise it |
| 302 // is deleted from the index. | 332 // is deleted from the index. |
| 303 bool row_was_updated = false; | 333 bool row_was_updated = false; |
| 304 URLID row_id = row.id(); | 334 URLID row_id = row.id(); |
| 305 HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id); | 335 HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id); |
| 306 if (row_pos == history_info_map_.end()) { | 336 if (row_pos == history_info_map_.end()) { |
| 307 // This new row should be indexed if it qualifies. | 337 // This new row should be indexed if it qualifies. |
| 308 URLRow new_row(row); | 338 URLRow new_row(row); |
| 309 new_row.set_id(row_id); | 339 new_row.set_id(row_id); |
| 310 row_was_updated = | 340 row_was_updated = RowQualifiesAsSignificant(new_row, base::Time()) && |
| 311 RowQualifiesAsSignificant(new_row, base::Time()) && IndexRow(new_row); | 341 IndexRow(new_row, languages, scheme_whitelist); |
| 312 } else if (RowQualifiesAsSignificant(row, base::Time())) { | 342 } else if (RowQualifiesAsSignificant(row, base::Time())) { |
| 313 // This indexed row still qualifies and will be re-indexed. | 343 // This indexed row still qualifies and will be re-indexed. |
| 314 // The url won't have changed but the title, visit count, etc. | 344 // The url won't have changed but the title, visit count, etc. |
| 315 // might have changed. | 345 // might have changed. |
| 316 URLRow& row_to_update = row_pos->second; | 346 URLRow& row_to_update = row_pos->second; |
| 317 bool title_updated = row_to_update.title() != row.title(); | 347 bool title_updated = row_to_update.title() != row.title(); |
| 318 if (row_to_update.visit_count() != row.visit_count() || | 348 if (row_to_update.visit_count() != row.visit_count() || |
| 319 row_to_update.typed_count() != row.typed_count() || | 349 row_to_update.typed_count() != row.typed_count() || |
| 320 row_to_update.last_visit() != row.last_visit() || title_updated) { | 350 row_to_update.last_visit() != row.last_visit() || title_updated) { |
| 321 row_to_update.set_visit_count(row.visit_count()); | 351 row_to_update.set_visit_count(row.visit_count()); |
| 322 row_to_update.set_typed_count(row.typed_count()); | 352 row_to_update.set_typed_count(row.typed_count()); |
| 323 row_to_update.set_last_visit(row.last_visit()); | 353 row_to_update.set_last_visit(row.last_visit()); |
| 324 // While the URL is guaranteed to remain stable, the title may have | 354 // While the URL is guaranteed to remain stable, the title may have |
| 325 // changed. If so, then update the index with the changed words. | 355 // changed. If so, then update the index with the changed words. |
| 326 if (title_updated) { | 356 if (title_updated) { |
| 327 // Clear all words associated with this row and re-index both the | 357 // Clear all words associated with this row and re-index both the |
| 328 // URL and title. | 358 // URL and title. |
| 329 RemoveRowWordsFromIndex(row_to_update); | 359 RemoveRowWordsFromIndex(row_to_update); |
| 330 row_to_update.set_title(row.title()); | 360 row_to_update.set_title(row.title()); |
| 331 AddRowWordsToIndex(row_to_update); | 361 AddRowWordsToIndex(row_to_update, languages); |
| 332 } | 362 } |
| 333 row_was_updated = true; | 363 row_was_updated = true; |
| 334 } | 364 } |
| 335 } else { | 365 } else { |
| 336 // This indexed row no longer qualifies and will be de-indexed by | 366 // This indexed row no longer qualifies and will be de-indexed by |
| 337 // clearing all words associated with this row. | 367 // clearing all words associated with this row. |
| 338 RemoveRowFromIndex(row); | 368 RemoveRowFromIndex(row); |
| 339 row_was_updated = true; | 369 row_was_updated = true; |
| 340 } | 370 } |
| 341 if (row_was_updated) | 371 if (row_was_updated) |
| (...skipping 20 matching lines...) Expand all Loading... |
| 362 history_info_map_.begin(), | 392 history_info_map_.begin(), |
| 363 history_info_map_.end(), | 393 history_info_map_.end(), |
| 364 HistoryInfoMapItemHasURL(url)); | 394 HistoryInfoMapItemHasURL(url)); |
| 365 if (pos == history_info_map_.end()) | 395 if (pos == history_info_map_.end()) |
| 366 return false; | 396 return false; |
| 367 RemoveRowFromIndex(pos->second); | 397 RemoveRowFromIndex(pos->second); |
| 368 search_term_cache_.clear(); // This invalidates the cache. | 398 search_term_cache_.clear(); // This invalidates the cache. |
| 369 return true; | 399 return true; |
| 370 } | 400 } |
| 371 | 401 |
| 372 bool URLIndexPrivateData::URLSchemeIsWhitelisted(const GURL& gurl) const { | |
| 373 return scheme_whitelist_.find(gurl.scheme()) != scheme_whitelist_.end(); | |
| 374 } | |
| 375 | |
| 376 // URLIndexPrivateData::HistoryItemFactorGreater ------------------------------- | 402 // URLIndexPrivateData::HistoryItemFactorGreater ------------------------------- |
| 377 | 403 |
| 378 URLIndexPrivateData::HistoryItemFactorGreater::HistoryItemFactorGreater( | 404 URLIndexPrivateData::HistoryItemFactorGreater::HistoryItemFactorGreater( |
| 379 const HistoryInfoMap& history_info_map) | 405 const HistoryInfoMap& history_info_map) |
| 380 : history_info_map_(history_info_map) { | 406 : history_info_map_(history_info_map) { |
| 381 } | 407 } |
| 382 | 408 |
| 383 URLIndexPrivateData::HistoryItemFactorGreater::~HistoryItemFactorGreater() {} | 409 URLIndexPrivateData::HistoryItemFactorGreater::~HistoryItemFactorGreater() {} |
| 384 | 410 |
| 385 bool URLIndexPrivateData::HistoryItemFactorGreater::operator()( | 411 bool URLIndexPrivateData::HistoryItemFactorGreater::operator()( |
| (...skipping 494 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 880 std::set_intersection(word_id_set.begin(), word_id_set.end(), | 906 std::set_intersection(word_id_set.begin(), word_id_set.end(), |
| 881 char_word_id_set.begin(), char_word_id_set.end(), | 907 char_word_id_set.begin(), char_word_id_set.end(), |
| 882 std::inserter(new_word_id_set, | 908 std::inserter(new_word_id_set, |
| 883 new_word_id_set.begin())); | 909 new_word_id_set.begin())); |
| 884 word_id_set.swap(new_word_id_set); | 910 word_id_set.swap(new_word_id_set); |
| 885 } | 911 } |
| 886 } | 912 } |
| 887 return word_id_set; | 913 return word_id_set; |
| 888 } | 914 } |
| 889 | 915 |
| 890 // static | |
| 891 void URLIndexPrivateData::InitializeSchemeWhitelist( | |
| 892 std::set<std::string>* whitelist) { | |
| 893 DCHECK(whitelist); | |
| 894 whitelist->insert(std::string(chrome::kAboutScheme)); | |
| 895 whitelist->insert(std::string(chrome::kChromeUIScheme)); | |
| 896 whitelist->insert(std::string(chrome::kFileScheme)); | |
| 897 whitelist->insert(std::string(chrome::kFtpScheme)); | |
| 898 whitelist->insert(std::string(chrome::kHttpScheme)); | |
| 899 whitelist->insert(std::string(chrome::kHttpsScheme)); | |
| 900 whitelist->insert(std::string(chrome::kMailToScheme)); | |
| 901 } | |
| 902 | |
| 903 // Cache Saving ---------------------------------------------------------------- | 916 // Cache Saving ---------------------------------------------------------------- |
| 904 | 917 |
| 918 // static |
| 919 void URLIndexPrivateData::WritePrivateDataToCacheFileTask( |
| 920 scoped_refptr<URLIndexPrivateData> private_data, |
| 921 const FilePath& file_path, |
| 922 scoped_refptr<RefCountedBool> succeeded) { |
| 923 DCHECK(private_data.get()); |
| 924 DCHECK(!file_path.empty()); |
| 925 succeeded->set_value(private_data->SaveToFile(file_path)); |
| 926 } |
| 927 |
| 905 bool URLIndexPrivateData::SaveToFile(const FilePath& file_path) { | 928 bool URLIndexPrivateData::SaveToFile(const FilePath& file_path) { |
| 906 // TODO(mrossetti): Move File IO to another thread. | |
| 907 base::ThreadRestrictions::ScopedAllowIO allow_io; | |
| 908 base::TimeTicks beginning_time = base::TimeTicks::Now(); | 929 base::TimeTicks beginning_time = base::TimeTicks::Now(); |
| 909 InMemoryURLIndexCacheItem index_cache; | 930 InMemoryURLIndexCacheItem index_cache; |
| 910 SavePrivateData(&index_cache); | 931 SavePrivateData(&index_cache); |
| 911 std::string data; | 932 std::string data; |
| 912 if (!index_cache.SerializeToString(&data)) { | 933 if (!index_cache.SerializeToString(&data)) { |
| 913 LOG(WARNING) << "Failed to serialize the InMemoryURLIndex cache."; | 934 LOG(WARNING) << "Failed to serialize the InMemoryURLIndex cache."; |
| 914 return false; | 935 return false; |
| 915 } | 936 } |
| 916 | 937 |
| 917 int size = data.size(); | 938 int size = data.size(); |
| (...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1003 if (history_info_map_.empty()) | 1024 if (history_info_map_.empty()) |
| 1004 return; | 1025 return; |
| 1005 HistoryInfoMapItem* map_item = cache->mutable_history_info_map(); | 1026 HistoryInfoMapItem* map_item = cache->mutable_history_info_map(); |
| 1006 map_item->set_item_count(history_info_map_.size()); | 1027 map_item->set_item_count(history_info_map_.size()); |
| 1007 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin(); | 1028 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin(); |
| 1008 iter != history_info_map_.end(); ++iter) { | 1029 iter != history_info_map_.end(); ++iter) { |
| 1009 HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry(); | 1030 HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry(); |
| 1010 map_entry->set_history_id(iter->first); | 1031 map_entry->set_history_id(iter->first); |
| 1011 const URLRow& url_row(iter->second); | 1032 const URLRow& url_row(iter->second); |
| 1012 // Note: We only save information that contributes to the index so there | 1033 // Note: We only save information that contributes to the index so there |
| 1013 // is no need to save search_term_cache_ (not persistent), | 1034 // is no need to save search_term_cache_ (not persistent). |
| 1014 // languages_, etc. | |
| 1015 map_entry->set_visit_count(url_row.visit_count()); | 1035 map_entry->set_visit_count(url_row.visit_count()); |
| 1016 map_entry->set_typed_count(url_row.typed_count()); | 1036 map_entry->set_typed_count(url_row.typed_count()); |
| 1017 map_entry->set_last_visit(url_row.last_visit().ToInternalValue()); | 1037 map_entry->set_last_visit(url_row.last_visit().ToInternalValue()); |
| 1018 map_entry->set_url(url_row.url().spec()); | 1038 map_entry->set_url(url_row.url().spec()); |
| 1019 map_entry->set_title(UTF16ToUTF8(url_row.title())); | 1039 map_entry->set_title(UTF16ToUTF8(url_row.title())); |
| 1020 } | 1040 } |
| 1021 } | 1041 } |
| 1022 | 1042 |
| 1023 // Cache Restoring ------------------------------------------------------------- | 1043 // Cache Restoring ------------------------------------------------------------- |
| 1024 | 1044 |
| 1025 bool URLIndexPrivateData::RestoreFromFile(const FilePath& file_path) { | 1045 // static |
| 1026 // TODO(mrossetti): Figure out how to determine if the cache is up-to-date. | 1046 void URLIndexPrivateData::RestoreFromFileTask( |
| 1027 // That is: ensure that the database has not been modified since the cache | 1047 const FilePath& file_path, |
| 1028 // was last saved. DB file modification date is inadequate. There are no | 1048 scoped_refptr<URLIndexPrivateData> private_data) { |
| 1029 // SQLite table checksums automatically stored. | 1049 private_data = URLIndexPrivateData::RestoreFromFile(file_path); |
| 1030 Clear(); // Start with a clean slate. | 1050 } |
| 1031 | 1051 |
| 1032 // FIXME(mrossetti): Move File IO to another thread. | 1052 // static |
| 1033 base::ThreadRestrictions::ScopedAllowIO allow_io; | 1053 scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::RestoreFromFile( |
| 1054 const FilePath& file_path) { |
| 1034 base::TimeTicks beginning_time = base::TimeTicks::Now(); | 1055 base::TimeTicks beginning_time = base::TimeTicks::Now(); |
| 1035 if (!file_util::PathExists(file_path)) | 1056 if (!file_util::PathExists(file_path)) |
| 1036 return false; | 1057 return NULL; |
| 1037 std::string data; | 1058 std::string data; |
| 1038 // If there is no cache file then simply give up. This will cause us to | 1059 // If there is no cache file then simply give up. This will cause us to |
| 1039 // attempt to rebuild from the history database. | 1060 // attempt to rebuild from the history database. |
| 1040 if (!file_util::ReadFileToString(file_path, &data)) | 1061 if (!file_util::ReadFileToString(file_path, &data)) |
| 1041 return false; | 1062 return NULL; |
| 1042 | 1063 |
| 1064 scoped_refptr<URLIndexPrivateData> restored_data(new URLIndexPrivateData); |
| 1043 InMemoryURLIndexCacheItem index_cache; | 1065 InMemoryURLIndexCacheItem index_cache; |
| 1044 if (!index_cache.ParseFromArray(data.c_str(), data.size())) { | 1066 if (!index_cache.ParseFromArray(data.c_str(), data.size())) { |
| 1045 LOG(WARNING) << "Failed to parse InMemoryURLIndex cache data read from " | 1067 LOG(WARNING) << "Failed to parse URLIndexPrivateData cache data read from " |
| 1046 << file_path.value(); | 1068 << file_path.value(); |
| 1047 return false; | 1069 return restored_data; |
| 1048 } | 1070 } |
| 1049 | 1071 |
| 1050 if (!RestorePrivateData(index_cache)) { | 1072 if (!restored_data->RestorePrivateData(index_cache)) |
| 1051 Clear(); // Back to square one -- must build from scratch. | 1073 return NULL; |
| 1052 return false; | |
| 1053 } | |
| 1054 | 1074 |
| 1055 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime", | 1075 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime", |
| 1056 base::TimeTicks::Now() - beginning_time); | 1076 base::TimeTicks::Now() - beginning_time); |
| 1057 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", | 1077 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", |
| 1058 history_id_word_map_.size()); | 1078 restored_data->history_id_word_map_.size()); |
| 1059 UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size()); | 1079 UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size()); |
| 1060 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", word_map_.size()); | 1080 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", |
| 1061 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", char_word_map_.size()); | 1081 restored_data->word_map_.size()); |
| 1062 return true; | 1082 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", |
| 1083 restored_data->char_word_map_.size()); |
| 1084 if (restored_data->Empty()) |
| 1085 return NULL; // 'No data' is the same as a failed reload. |
| 1086 return restored_data; |
| 1063 } | 1087 } |
| 1064 | 1088 |
| 1065 // static | 1089 // static |
| 1066 URLIndexPrivateData* URLIndexPrivateData::RebuildFromHistory( | 1090 scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::RebuildFromHistory( |
| 1067 HistoryDatabase* history_db) { | 1091 HistoryDatabase* history_db, |
| 1092 const std::string& languages, |
| 1093 const std::set<std::string>& scheme_whitelist) { |
| 1068 if (!history_db) | 1094 if (!history_db) |
| 1069 return NULL; | 1095 return NULL; |
| 1070 | 1096 |
| 1071 base::TimeTicks beginning_time = base::TimeTicks::Now(); | 1097 base::TimeTicks beginning_time = base::TimeTicks::Now(); |
| 1072 | 1098 |
| 1073 scoped_ptr<URLIndexPrivateData> rebuilt_data(new URLIndexPrivateData); | 1099 scoped_refptr<URLIndexPrivateData> rebuilt_data(new URLIndexPrivateData); |
| 1074 URLDatabase::URLEnumerator history_enum; | 1100 URLDatabase::URLEnumerator history_enum; |
| 1075 if (!history_db->InitURLEnumeratorForSignificant(&history_enum)) | 1101 if (!history_db->InitURLEnumeratorForSignificant(&history_enum)) |
| 1076 return NULL; | 1102 return NULL; |
| 1077 for (URLRow row; history_enum.GetNextURL(&row); ) | 1103 for (URLRow row; history_enum.GetNextURL(&row); ) |
| 1078 rebuilt_data->IndexRow(row); | 1104 rebuilt_data->IndexRow(row, languages, scheme_whitelist); |
| 1079 | 1105 |
| 1080 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime", | 1106 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime", |
| 1081 base::TimeTicks::Now() - beginning_time); | 1107 base::TimeTicks::Now() - beginning_time); |
| 1082 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", | 1108 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", |
| 1083 rebuilt_data->history_id_word_map_.size()); | 1109 rebuilt_data->history_id_word_map_.size()); |
| 1084 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", | 1110 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", |
| 1085 rebuilt_data->word_map_.size()); | 1111 rebuilt_data->word_map_.size()); |
| 1086 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", | 1112 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", |
| 1087 rebuilt_data->char_word_map_.size()); | 1113 rebuilt_data->char_word_map_.size()); |
| 1088 return rebuilt_data.release(); | 1114 return rebuilt_data; |
| 1089 } | 1115 } |
| 1090 | 1116 |
| 1091 bool URLIndexPrivateData::RestorePrivateData( | 1117 bool URLIndexPrivateData::RestorePrivateData( |
| 1092 const InMemoryURLIndexCacheItem& cache) { | 1118 const InMemoryURLIndexCacheItem& cache) { |
| 1093 return RestoreWordList(cache) && RestoreWordMap(cache) && | 1119 return RestoreWordList(cache) && RestoreWordMap(cache) && |
| 1094 RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) && | 1120 RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) && |
| 1095 RestoreHistoryInfoMap(cache); | 1121 RestoreHistoryInfoMap(cache); |
| 1096 } | 1122 } |
| 1097 | 1123 |
| 1098 bool URLIndexPrivateData::RestoreWordList( | 1124 bool URLIndexPrivateData::RestoreWordList( |
| (...skipping 107 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1206 url_row.set_last_visit(base::Time::FromInternalValue(iter->last_visit())); | 1232 url_row.set_last_visit(base::Time::FromInternalValue(iter->last_visit())); |
| 1207 if (iter->has_title()) { | 1233 if (iter->has_title()) { |
| 1208 string16 title(UTF8ToUTF16(iter->title())); | 1234 string16 title(UTF8ToUTF16(iter->title())); |
| 1209 url_row.set_title(title); | 1235 url_row.set_title(title); |
| 1210 } | 1236 } |
| 1211 history_info_map_[history_id] = url_row; | 1237 history_info_map_[history_id] = url_row; |
| 1212 } | 1238 } |
| 1213 return true; | 1239 return true; |
| 1214 } | 1240 } |
| 1215 | 1241 |
| 1242 // static |
| 1243 bool URLIndexPrivateData::URLSchemeIsWhitelisted( |
| 1244 const GURL& gurl, |
| 1245 const std::set<std::string>& whitelist) { |
| 1246 return whitelist.find(gurl.scheme()) != whitelist.end(); |
| 1247 } |
| 1248 |
| 1216 } // namespace history | 1249 } // namespace history |
| OLD | NEW |