OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/history/url_index_private_data.h" | 5 #include "chrome/browser/history/url_index_private_data.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <functional> | 8 #include <functional> |
9 #include <iterator> | 9 #include <iterator> |
10 #include <limits> | 10 #include <limits> |
11 #include <numeric> | 11 #include <numeric> |
12 | 12 |
13 #include "base/file_util.h" | 13 #include "base/file_util.h" |
14 #include "base/i18n/case_conversion.h" | 14 #include "base/i18n/case_conversion.h" |
15 #include "base/metrics/histogram.h" | 15 #include "base/metrics/histogram.h" |
16 #include "base/string_util.h" | 16 #include "base/string_util.h" |
17 #include "base/threading/thread_restrictions.h" | 17 #include "base/time.h" |
18 #include "base/utf_string_conversions.h" | 18 #include "base/utf_string_conversions.h" |
19 #include "chrome/browser/autocomplete/autocomplete.h" | 19 #include "chrome/browser/autocomplete/autocomplete.h" |
20 #include "chrome/browser/history/history_database.h" | 20 #include "chrome/browser/history/history_database.h" |
21 #include "chrome/common/url_constants.h" | 21 #include "chrome/browser/history/in_memory_url_index.h" |
| 22 #include "content/public/browser/notification_details.h" |
| 23 #include "content/public/browser/notification_service.h" |
| 24 #include "content/public/browser/notification_source.h" |
22 #include "net/base/net_util.h" | 25 #include "net/base/net_util.h" |
23 #include "third_party/protobuf/src/google/protobuf/repeated_field.h" | 26 #include "third_party/protobuf/src/google/protobuf/repeated_field.h" |
24 | 27 |
25 using google::protobuf::RepeatedField; | 28 using google::protobuf::RepeatedField; |
26 using google::protobuf::RepeatedPtrField; | 29 using google::protobuf::RepeatedPtrField; |
27 using in_memory_url_index::InMemoryURLIndexCacheItem; | 30 using in_memory_url_index::InMemoryURLIndexCacheItem; |
28 | 31 |
29 namespace history { | 32 namespace history { |
30 | 33 |
31 typedef imui::InMemoryURLIndexCacheItem_WordListItem WordListItem; | 34 typedef imui::InMemoryURLIndexCacheItem_WordListItem WordListItem; |
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
111 } | 114 } |
112 return score; | 115 return score; |
113 } | 116 } |
114 | 117 |
115 // InMemoryURLIndex's Private Data --------------------------------------------- | 118 // InMemoryURLIndex's Private Data --------------------------------------------- |
116 | 119 |
117 URLIndexPrivateData::URLIndexPrivateData() | 120 URLIndexPrivateData::URLIndexPrivateData() |
118 : pre_filter_item_count_(0), | 121 : pre_filter_item_count_(0), |
119 post_filter_item_count_(0), | 122 post_filter_item_count_(0), |
120 post_scoring_item_count_(0) { | 123 post_scoring_item_count_(0) { |
121 URLIndexPrivateData::InitializeSchemeWhitelist(&scheme_whitelist_); | |
122 } | 124 } |
123 | 125 |
124 URLIndexPrivateData::~URLIndexPrivateData() {} | 126 URLIndexPrivateData::~URLIndexPrivateData() {} |
125 | 127 |
126 void URLIndexPrivateData::Clear() { | 128 void URLIndexPrivateData::Clear() { |
127 word_list_.clear(); | 129 word_list_.clear(); |
128 available_words_.clear(); | 130 available_words_.clear(); |
129 word_map_.clear(); | 131 word_map_.clear(); |
130 char_word_map_.clear(); | 132 char_word_map_.clear(); |
131 word_id_history_map_.clear(); | 133 word_id_history_map_.clear(); |
132 history_id_word_map_.clear(); | 134 history_id_word_map_.clear(); |
133 history_info_map_.clear(); | 135 history_info_map_.clear(); |
134 } | 136 } |
135 | 137 |
| 138 bool URLIndexPrivateData::Empty() const { |
| 139 return history_info_map_.empty(); |
| 140 } |
| 141 |
| 142 URLIndexPrivateData* URLIndexPrivateData::Duplicate() const { |
| 143 scoped_refptr<URLIndexPrivateData> data_copy = new URLIndexPrivateData; |
| 144 data_copy->word_list_ = word_list_; |
| 145 data_copy->available_words_ = available_words_; |
| 146 data_copy->word_map_ = word_map_; |
| 147 data_copy->char_word_map_ = char_word_map_; |
| 148 data_copy->word_id_history_map_ = word_id_history_map_; |
| 149 data_copy->history_id_word_map_ = history_id_word_map_; |
| 150 data_copy->history_info_map_ = history_info_map_; |
| 151 return data_copy.release(); |
| 152 // Not copied: |
| 153 // search_term_cache_ |
| 154 // pre_filter_item_count_ |
| 155 // post_filter_item_count_ |
| 156 // post_scoring_item_count_ |
| 157 }; |
| 158 |
136 // Cache Updating -------------------------------------------------------------- | 159 // Cache Updating -------------------------------------------------------------- |
137 | 160 |
138 bool URLIndexPrivateData::IndexRow(const URLRow& row) { | 161 bool URLIndexPrivateData::IndexRow( |
| 162 const URLRow& row, |
| 163 const std::string& languages, |
| 164 const std::set<std::string>& scheme_whitelist) { |
139 const GURL& gurl(row.url()); | 165 const GURL& gurl(row.url()); |
140 | 166 |
141 // Index only URLs with a whitelisted scheme. | 167 // Index only URLs with a whitelisted scheme. |
142 if (!URLIndexPrivateData::URLSchemeIsWhitelisted(gurl)) | 168 if (!URLSchemeIsWhitelisted(gurl, scheme_whitelist)) |
143 return false; | 169 return false; |
144 | 170 |
145 URLID row_id = row.id(); | 171 URLID row_id = row.id(); |
146 // Strip out username and password before saving and indexing. | 172 // Strip out username and password before saving and indexing. |
147 string16 url(net::FormatUrl(gurl, languages_, | 173 string16 url(net::FormatUrl(gurl, languages, |
148 net::kFormatUrlOmitUsernamePassword, | 174 net::kFormatUrlOmitUsernamePassword, |
149 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS, | 175 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS, |
150 NULL, NULL, NULL)); | 176 NULL, NULL, NULL)); |
151 | 177 |
152 HistoryID history_id = static_cast<HistoryID>(row_id); | 178 HistoryID history_id = static_cast<HistoryID>(row_id); |
153 DCHECK_LT(history_id, std::numeric_limits<HistoryID>::max()); | 179 DCHECK_LT(history_id, std::numeric_limits<HistoryID>::max()); |
154 | 180 |
155 // Add the row for quick lookup in the history info store. | 181 // Add the row for quick lookup in the history info store. |
156 URLRow new_row(GURL(url), row_id); | 182 URLRow new_row(GURL(url), row_id); |
157 new_row.set_visit_count(row.visit_count()); | 183 new_row.set_visit_count(row.visit_count()); |
158 new_row.set_typed_count(row.typed_count()); | 184 new_row.set_typed_count(row.typed_count()); |
159 new_row.set_last_visit(row.last_visit()); | 185 new_row.set_last_visit(row.last_visit()); |
160 new_row.set_title(row.title()); | 186 new_row.set_title(row.title()); |
161 history_info_map_[history_id] = new_row; | 187 history_info_map_[history_id] = new_row; |
162 | 188 |
163 // Index the words contained in the URL and title of the row. | 189 // Index the words contained in the URL and title of the row. |
164 AddRowWordsToIndex(new_row); | 190 AddRowWordsToIndex(new_row, languages); |
165 return true; | 191 return true; |
166 } | 192 } |
167 | 193 |
168 void URLIndexPrivateData::AddRowWordsToIndex(const URLRow& row) { | 194 void URLIndexPrivateData::AddRowWordsToIndex(const URLRow& row, |
| 195 const std::string& languages) { |
169 HistoryID history_id = static_cast<HistoryID>(row.id()); | 196 HistoryID history_id = static_cast<HistoryID>(row.id()); |
170 // Split URL into individual, unique words then add in the title words. | 197 // Split URL into individual, unique words then add in the title words. |
171 const GURL& gurl(row.url()); | 198 const GURL& gurl(row.url()); |
172 string16 url(net::FormatUrl(gurl, languages_, | 199 string16 url(net::FormatUrl(gurl, languages, |
173 net::kFormatUrlOmitUsernamePassword, | 200 net::kFormatUrlOmitUsernamePassword, |
174 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS, | 201 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS, |
175 NULL, NULL, NULL)); | 202 NULL, NULL, NULL)); |
176 url = base::i18n::ToLower(url); | 203 url = base::i18n::ToLower(url); |
177 String16Set url_words = String16SetFromString16(url); | 204 String16Set url_words = String16SetFromString16(url); |
178 String16Set title_words = String16SetFromString16(row.title()); | 205 String16Set title_words = String16SetFromString16(row.title()); |
179 String16Set words; | 206 String16Set words; |
180 std::set_union(url_words.begin(), url_words.end(), | 207 std::set_union(url_words.begin(), url_words.end(), |
181 title_words.begin(), title_words.end(), | 208 title_words.begin(), title_words.end(), |
182 std::insert_iterator<String16Set>(words, words.begin())); | 209 std::insert_iterator<String16Set>(words, words.begin())); |
(...skipping 105 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
288 if (iter != history_id_word_map_.end()) { | 315 if (iter != history_id_word_map_.end()) { |
289 WordIDSet& word_id_set(iter->second); | 316 WordIDSet& word_id_set(iter->second); |
290 word_id_set.insert(word_id); | 317 word_id_set.insert(word_id); |
291 } else { | 318 } else { |
292 WordIDSet word_id_set; | 319 WordIDSet word_id_set; |
293 word_id_set.insert(word_id); | 320 word_id_set.insert(word_id); |
294 history_id_word_map_[history_id] = word_id_set; | 321 history_id_word_map_[history_id] = word_id_set; |
295 } | 322 } |
296 } | 323 } |
297 | 324 |
298 bool URLIndexPrivateData::UpdateURL(const URLRow& row) { | 325 bool URLIndexPrivateData::UpdateURL( |
| 326 const URLRow& row, |
| 327 const std::string& languages, |
| 328 const std::set<std::string>& scheme_whitelist) { |
299 // The row may or may not already be in our index. If it is not already | 329 // The row may or may not already be in our index. If it is not already |
300 // indexed and it qualifies then it gets indexed. If it is already | 330 // indexed and it qualifies then it gets indexed. If it is already |
301 // indexed and still qualifies then it gets updated, otherwise it | 331 // indexed and still qualifies then it gets updated, otherwise it |
302 // is deleted from the index. | 332 // is deleted from the index. |
303 bool row_was_updated = false; | 333 bool row_was_updated = false; |
304 URLID row_id = row.id(); | 334 URLID row_id = row.id(); |
305 HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id); | 335 HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id); |
306 if (row_pos == history_info_map_.end()) { | 336 if (row_pos == history_info_map_.end()) { |
307 // This new row should be indexed if it qualifies. | 337 // This new row should be indexed if it qualifies. |
308 URLRow new_row(row); | 338 URLRow new_row(row); |
309 new_row.set_id(row_id); | 339 new_row.set_id(row_id); |
310 row_was_updated = | 340 row_was_updated = RowQualifiesAsSignificant(new_row, base::Time()) && |
311 RowQualifiesAsSignificant(new_row, base::Time()) && IndexRow(new_row); | 341 IndexRow(new_row, languages, scheme_whitelist); |
312 } else if (RowQualifiesAsSignificant(row, base::Time())) { | 342 } else if (RowQualifiesAsSignificant(row, base::Time())) { |
313 // This indexed row still qualifies and will be re-indexed. | 343 // This indexed row still qualifies and will be re-indexed. |
314 // The url won't have changed but the title, visit count, etc. | 344 // The url won't have changed but the title, visit count, etc. |
315 // might have changed. | 345 // might have changed. |
316 URLRow& row_to_update = row_pos->second; | 346 URLRow& row_to_update = row_pos->second; |
317 bool title_updated = row_to_update.title() != row.title(); | 347 bool title_updated = row_to_update.title() != row.title(); |
318 if (row_to_update.visit_count() != row.visit_count() || | 348 if (row_to_update.visit_count() != row.visit_count() || |
319 row_to_update.typed_count() != row.typed_count() || | 349 row_to_update.typed_count() != row.typed_count() || |
320 row_to_update.last_visit() != row.last_visit() || title_updated) { | 350 row_to_update.last_visit() != row.last_visit() || title_updated) { |
321 row_to_update.set_visit_count(row.visit_count()); | 351 row_to_update.set_visit_count(row.visit_count()); |
322 row_to_update.set_typed_count(row.typed_count()); | 352 row_to_update.set_typed_count(row.typed_count()); |
323 row_to_update.set_last_visit(row.last_visit()); | 353 row_to_update.set_last_visit(row.last_visit()); |
324 // While the URL is guaranteed to remain stable, the title may have | 354 // While the URL is guaranteed to remain stable, the title may have |
325 // changed. If so, then update the index with the changed words. | 355 // changed. If so, then update the index with the changed words. |
326 if (title_updated) { | 356 if (title_updated) { |
327 // Clear all words associated with this row and re-index both the | 357 // Clear all words associated with this row and re-index both the |
328 // URL and title. | 358 // URL and title. |
329 RemoveRowWordsFromIndex(row_to_update); | 359 RemoveRowWordsFromIndex(row_to_update); |
330 row_to_update.set_title(row.title()); | 360 row_to_update.set_title(row.title()); |
331 AddRowWordsToIndex(row_to_update); | 361 AddRowWordsToIndex(row_to_update, languages); |
332 } | 362 } |
333 row_was_updated = true; | 363 row_was_updated = true; |
334 } | 364 } |
335 } else { | 365 } else { |
336 // This indexed row no longer qualifies and will be de-indexed by | 366 // This indexed row no longer qualifies and will be de-indexed by |
337 // clearing all words associated with this row. | 367 // clearing all words associated with this row. |
338 RemoveRowFromIndex(row); | 368 RemoveRowFromIndex(row); |
339 row_was_updated = true; | 369 row_was_updated = true; |
340 } | 370 } |
341 if (row_was_updated) | 371 if (row_was_updated) |
(...skipping 20 matching lines...) Expand all Loading... |
362 history_info_map_.begin(), | 392 history_info_map_.begin(), |
363 history_info_map_.end(), | 393 history_info_map_.end(), |
364 HistoryInfoMapItemHasURL(url)); | 394 HistoryInfoMapItemHasURL(url)); |
365 if (pos == history_info_map_.end()) | 395 if (pos == history_info_map_.end()) |
366 return false; | 396 return false; |
367 RemoveRowFromIndex(pos->second); | 397 RemoveRowFromIndex(pos->second); |
368 search_term_cache_.clear(); // This invalidates the cache. | 398 search_term_cache_.clear(); // This invalidates the cache. |
369 return true; | 399 return true; |
370 } | 400 } |
371 | 401 |
372 bool URLIndexPrivateData::URLSchemeIsWhitelisted(const GURL& gurl) const { | |
373 return scheme_whitelist_.find(gurl.scheme()) != scheme_whitelist_.end(); | |
374 } | |
375 | |
376 // URLIndexPrivateData::HistoryItemFactorGreater ------------------------------- | 402 // URLIndexPrivateData::HistoryItemFactorGreater ------------------------------- |
377 | 403 |
378 URLIndexPrivateData::HistoryItemFactorGreater::HistoryItemFactorGreater( | 404 URLIndexPrivateData::HistoryItemFactorGreater::HistoryItemFactorGreater( |
379 const HistoryInfoMap& history_info_map) | 405 const HistoryInfoMap& history_info_map) |
380 : history_info_map_(history_info_map) { | 406 : history_info_map_(history_info_map) { |
381 } | 407 } |
382 | 408 |
383 URLIndexPrivateData::HistoryItemFactorGreater::~HistoryItemFactorGreater() {} | 409 URLIndexPrivateData::HistoryItemFactorGreater::~HistoryItemFactorGreater() {} |
384 | 410 |
385 bool URLIndexPrivateData::HistoryItemFactorGreater::operator()( | 411 bool URLIndexPrivateData::HistoryItemFactorGreater::operator()( |
(...skipping 494 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
880 std::set_intersection(word_id_set.begin(), word_id_set.end(), | 906 std::set_intersection(word_id_set.begin(), word_id_set.end(), |
881 char_word_id_set.begin(), char_word_id_set.end(), | 907 char_word_id_set.begin(), char_word_id_set.end(), |
882 std::inserter(new_word_id_set, | 908 std::inserter(new_word_id_set, |
883 new_word_id_set.begin())); | 909 new_word_id_set.begin())); |
884 word_id_set.swap(new_word_id_set); | 910 word_id_set.swap(new_word_id_set); |
885 } | 911 } |
886 } | 912 } |
887 return word_id_set; | 913 return word_id_set; |
888 } | 914 } |
889 | 915 |
890 // static | |
891 void URLIndexPrivateData::InitializeSchemeWhitelist( | |
892 std::set<std::string>* whitelist) { | |
893 DCHECK(whitelist); | |
894 whitelist->insert(std::string(chrome::kAboutScheme)); | |
895 whitelist->insert(std::string(chrome::kChromeUIScheme)); | |
896 whitelist->insert(std::string(chrome::kFileScheme)); | |
897 whitelist->insert(std::string(chrome::kFtpScheme)); | |
898 whitelist->insert(std::string(chrome::kHttpScheme)); | |
899 whitelist->insert(std::string(chrome::kHttpsScheme)); | |
900 whitelist->insert(std::string(chrome::kMailToScheme)); | |
901 } | |
902 | |
903 // Cache Saving ---------------------------------------------------------------- | 916 // Cache Saving ---------------------------------------------------------------- |
904 | 917 |
| 918 // static |
| 919 void URLIndexPrivateData::WritePrivateDataToCacheFileTask( |
| 920 scoped_refptr<URLIndexPrivateData> private_data, |
| 921 const FilePath& file_path, |
| 922 scoped_refptr<RefCountedBool> succeeded) { |
| 923 DCHECK(private_data.get()); |
| 924 DCHECK(!file_path.empty()); |
| 925 succeeded->set_value(private_data->SaveToFile(file_path)); |
| 926 } |
| 927 |
905 bool URLIndexPrivateData::SaveToFile(const FilePath& file_path) { | 928 bool URLIndexPrivateData::SaveToFile(const FilePath& file_path) { |
906 // TODO(mrossetti): Move File IO to another thread. | |
907 base::ThreadRestrictions::ScopedAllowIO allow_io; | |
908 base::TimeTicks beginning_time = base::TimeTicks::Now(); | 929 base::TimeTicks beginning_time = base::TimeTicks::Now(); |
909 InMemoryURLIndexCacheItem index_cache; | 930 InMemoryURLIndexCacheItem index_cache; |
910 SavePrivateData(&index_cache); | 931 SavePrivateData(&index_cache); |
911 std::string data; | 932 std::string data; |
912 if (!index_cache.SerializeToString(&data)) { | 933 if (!index_cache.SerializeToString(&data)) { |
913 LOG(WARNING) << "Failed to serialize the InMemoryURLIndex cache."; | 934 LOG(WARNING) << "Failed to serialize the InMemoryURLIndex cache."; |
914 return false; | 935 return false; |
915 } | 936 } |
916 | 937 |
917 int size = data.size(); | 938 int size = data.size(); |
(...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1003 if (history_info_map_.empty()) | 1024 if (history_info_map_.empty()) |
1004 return; | 1025 return; |
1005 HistoryInfoMapItem* map_item = cache->mutable_history_info_map(); | 1026 HistoryInfoMapItem* map_item = cache->mutable_history_info_map(); |
1006 map_item->set_item_count(history_info_map_.size()); | 1027 map_item->set_item_count(history_info_map_.size()); |
1007 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin(); | 1028 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin(); |
1008 iter != history_info_map_.end(); ++iter) { | 1029 iter != history_info_map_.end(); ++iter) { |
1009 HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry(); | 1030 HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry(); |
1010 map_entry->set_history_id(iter->first); | 1031 map_entry->set_history_id(iter->first); |
1011 const URLRow& url_row(iter->second); | 1032 const URLRow& url_row(iter->second); |
1012 // Note: We only save information that contributes to the index so there | 1033 // Note: We only save information that contributes to the index so there |
1013 // is no need to save search_term_cache_ (not persistent), | 1034 // is no need to save search_term_cache_ (not persistent). |
1014 // languages_, etc. | |
1015 map_entry->set_visit_count(url_row.visit_count()); | 1035 map_entry->set_visit_count(url_row.visit_count()); |
1016 map_entry->set_typed_count(url_row.typed_count()); | 1036 map_entry->set_typed_count(url_row.typed_count()); |
1017 map_entry->set_last_visit(url_row.last_visit().ToInternalValue()); | 1037 map_entry->set_last_visit(url_row.last_visit().ToInternalValue()); |
1018 map_entry->set_url(url_row.url().spec()); | 1038 map_entry->set_url(url_row.url().spec()); |
1019 map_entry->set_title(UTF16ToUTF8(url_row.title())); | 1039 map_entry->set_title(UTF16ToUTF8(url_row.title())); |
1020 } | 1040 } |
1021 } | 1041 } |
1022 | 1042 |
1023 // Cache Restoring ------------------------------------------------------------- | 1043 // Cache Restoring ------------------------------------------------------------- |
1024 | 1044 |
1025 bool URLIndexPrivateData::RestoreFromFile(const FilePath& file_path) { | 1045 // static |
1026 // TODO(mrossetti): Figure out how to determine if the cache is up-to-date. | 1046 void URLIndexPrivateData::RestoreFromFileTask( |
1027 // That is: ensure that the database has not been modified since the cache | 1047 const FilePath& file_path, |
1028 // was last saved. DB file modification date is inadequate. There are no | 1048 scoped_refptr<URLIndexPrivateData> private_data) { |
1029 // SQLite table checksums automatically stored. | 1049 private_data = URLIndexPrivateData::RestoreFromFile(file_path); |
1030 Clear(); // Start with a clean slate. | 1050 } |
1031 | 1051 |
1032 // FIXME(mrossetti): Move File IO to another thread. | 1052 // static |
1033 base::ThreadRestrictions::ScopedAllowIO allow_io; | 1053 scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::RestoreFromFile( |
| 1054 const FilePath& file_path) { |
1034 base::TimeTicks beginning_time = base::TimeTicks::Now(); | 1055 base::TimeTicks beginning_time = base::TimeTicks::Now(); |
1035 if (!file_util::PathExists(file_path)) | 1056 if (!file_util::PathExists(file_path)) |
1036 return false; | 1057 return NULL; |
1037 std::string data; | 1058 std::string data; |
1038 // If there is no cache file then simply give up. This will cause us to | 1059 // If there is no cache file then simply give up. This will cause us to |
1039 // attempt to rebuild from the history database. | 1060 // attempt to rebuild from the history database. |
1040 if (!file_util::ReadFileToString(file_path, &data)) | 1061 if (!file_util::ReadFileToString(file_path, &data)) |
1041 return false; | 1062 return NULL; |
1042 | 1063 |
| 1064 scoped_refptr<URLIndexPrivateData> restored_data(new URLIndexPrivateData); |
1043 InMemoryURLIndexCacheItem index_cache; | 1065 InMemoryURLIndexCacheItem index_cache; |
1044 if (!index_cache.ParseFromArray(data.c_str(), data.size())) { | 1066 if (!index_cache.ParseFromArray(data.c_str(), data.size())) { |
1045 LOG(WARNING) << "Failed to parse InMemoryURLIndex cache data read from " | 1067 LOG(WARNING) << "Failed to parse URLIndexPrivateData cache data read from " |
1046 << file_path.value(); | 1068 << file_path.value(); |
1047 return false; | 1069 return restored_data; |
1048 } | 1070 } |
1049 | 1071 |
1050 if (!RestorePrivateData(index_cache)) { | 1072 if (!restored_data->RestorePrivateData(index_cache)) |
1051 Clear(); // Back to square one -- must build from scratch. | 1073 return NULL; |
1052 return false; | |
1053 } | |
1054 | 1074 |
1055 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime", | 1075 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime", |
1056 base::TimeTicks::Now() - beginning_time); | 1076 base::TimeTicks::Now() - beginning_time); |
1057 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", | 1077 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", |
1058 history_id_word_map_.size()); | 1078 restored_data->history_id_word_map_.size()); |
1059 UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size()); | 1079 UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size()); |
1060 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", word_map_.size()); | 1080 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", |
1061 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", char_word_map_.size()); | 1081 restored_data->word_map_.size()); |
1062 return true; | 1082 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", |
| 1083 restored_data->char_word_map_.size()); |
| 1084 if (restored_data->Empty()) |
| 1085 return NULL; // 'No data' is the same as a failed reload. |
| 1086 return restored_data; |
1063 } | 1087 } |
1064 | 1088 |
1065 // static | 1089 // static |
1066 URLIndexPrivateData* URLIndexPrivateData::RebuildFromHistory( | 1090 scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::RebuildFromHistory( |
1067 HistoryDatabase* history_db) { | 1091 HistoryDatabase* history_db, |
| 1092 const std::string& languages, |
| 1093 const std::set<std::string>& scheme_whitelist) { |
1068 if (!history_db) | 1094 if (!history_db) |
1069 return NULL; | 1095 return NULL; |
1070 | 1096 |
1071 base::TimeTicks beginning_time = base::TimeTicks::Now(); | 1097 base::TimeTicks beginning_time = base::TimeTicks::Now(); |
1072 | 1098 |
1073 scoped_ptr<URLIndexPrivateData> rebuilt_data(new URLIndexPrivateData); | 1099 scoped_refptr<URLIndexPrivateData> rebuilt_data(new URLIndexPrivateData); |
1074 URLDatabase::URLEnumerator history_enum; | 1100 URLDatabase::URLEnumerator history_enum; |
1075 if (!history_db->InitURLEnumeratorForSignificant(&history_enum)) | 1101 if (!history_db->InitURLEnumeratorForSignificant(&history_enum)) |
1076 return NULL; | 1102 return NULL; |
1077 for (URLRow row; history_enum.GetNextURL(&row); ) | 1103 for (URLRow row; history_enum.GetNextURL(&row); ) |
1078 rebuilt_data->IndexRow(row); | 1104 rebuilt_data->IndexRow(row, languages, scheme_whitelist); |
1079 | 1105 |
1080 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime", | 1106 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime", |
1081 base::TimeTicks::Now() - beginning_time); | 1107 base::TimeTicks::Now() - beginning_time); |
1082 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", | 1108 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", |
1083 rebuilt_data->history_id_word_map_.size()); | 1109 rebuilt_data->history_id_word_map_.size()); |
1084 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", | 1110 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", |
1085 rebuilt_data->word_map_.size()); | 1111 rebuilt_data->word_map_.size()); |
1086 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", | 1112 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", |
1087 rebuilt_data->char_word_map_.size()); | 1113 rebuilt_data->char_word_map_.size()); |
1088 return rebuilt_data.release(); | 1114 return rebuilt_data; |
1089 } | 1115 } |
1090 | 1116 |
1091 bool URLIndexPrivateData::RestorePrivateData( | 1117 bool URLIndexPrivateData::RestorePrivateData( |
1092 const InMemoryURLIndexCacheItem& cache) { | 1118 const InMemoryURLIndexCacheItem& cache) { |
1093 return RestoreWordList(cache) && RestoreWordMap(cache) && | 1119 return RestoreWordList(cache) && RestoreWordMap(cache) && |
1094 RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) && | 1120 RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) && |
1095 RestoreHistoryInfoMap(cache); | 1121 RestoreHistoryInfoMap(cache); |
1096 } | 1122 } |
1097 | 1123 |
1098 bool URLIndexPrivateData::RestoreWordList( | 1124 bool URLIndexPrivateData::RestoreWordList( |
(...skipping 107 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1206 url_row.set_last_visit(base::Time::FromInternalValue(iter->last_visit())); | 1232 url_row.set_last_visit(base::Time::FromInternalValue(iter->last_visit())); |
1207 if (iter->has_title()) { | 1233 if (iter->has_title()) { |
1208 string16 title(UTF8ToUTF16(iter->title())); | 1234 string16 title(UTF8ToUTF16(iter->title())); |
1209 url_row.set_title(title); | 1235 url_row.set_title(title); |
1210 } | 1236 } |
1211 history_info_map_[history_id] = url_row; | 1237 history_info_map_[history_id] = url_row; |
1212 } | 1238 } |
1213 return true; | 1239 return true; |
1214 } | 1240 } |
1215 | 1241 |
| 1242 // static |
| 1243 bool URLIndexPrivateData::URLSchemeIsWhitelisted( |
| 1244 const GURL& gurl, |
| 1245 const std::set<std::string>& whitelist) { |
| 1246 return whitelist.find(gurl.scheme()) != whitelist.end(); |
| 1247 } |
| 1248 |
1216 } // namespace history | 1249 } // namespace history |
OLD | NEW |