OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/history/url_index_private_data.h" | 5 #include "chrome/browser/history/url_index_private_data.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <functional> | 8 #include <functional> |
9 #include <iterator> | 9 #include <iterator> |
10 #include <limits> | 10 #include <limits> |
11 #include <numeric> | 11 #include <numeric> |
12 | 12 |
13 #include "base/file_util.h" | 13 #include "base/file_util.h" |
14 #include "base/i18n/case_conversion.h" | 14 #include "base/i18n/case_conversion.h" |
15 #include "base/metrics/histogram.h" | 15 #include "base/metrics/histogram.h" |
16 #include "base/string_util.h" | 16 #include "base/string_util.h" |
17 #include "base/threading/thread_restrictions.h" | 17 #include "base/time.h" |
18 #include "base/utf_string_conversions.h" | 18 #include "base/utf_string_conversions.h" |
19 #include "chrome/browser/autocomplete/autocomplete.h" | 19 #include "chrome/browser/autocomplete/autocomplete.h" |
20 #include "chrome/browser/history/history_database.h" | 20 #include "chrome/browser/history/history_database.h" |
21 #include "chrome/common/url_constants.h" | 21 #include "chrome/browser/history/in_memory_url_index.h" |
| 22 #include "content/public/browser/notification_details.h" |
| 23 #include "content/public/browser/notification_service.h" |
| 24 #include "content/public/browser/notification_source.h" |
22 #include "net/base/net_util.h" | 25 #include "net/base/net_util.h" |
23 #include "third_party/protobuf/src/google/protobuf/repeated_field.h" | 26 #include "third_party/protobuf/src/google/protobuf/repeated_field.h" |
24 | 27 |
25 using google::protobuf::RepeatedField; | 28 using google::protobuf::RepeatedField; |
26 using google::protobuf::RepeatedPtrField; | 29 using google::protobuf::RepeatedPtrField; |
27 using in_memory_url_index::InMemoryURLIndexCacheItem; | 30 using in_memory_url_index::InMemoryURLIndexCacheItem; |
28 | 31 |
29 namespace history { | 32 namespace history { |
30 | 33 |
31 typedef imui::InMemoryURLIndexCacheItem_WordListItem WordListItem; | 34 typedef imui::InMemoryURLIndexCacheItem_WordListItem WordListItem; |
(...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
116 } | 119 } |
117 | 120 |
118 // InMemoryURLIndex's Private Data --------------------------------------------- | 121 // InMemoryURLIndex's Private Data --------------------------------------------- |
119 | 122 |
120 URLIndexPrivateData::URLIndexPrivateData() | 123 URLIndexPrivateData::URLIndexPrivateData() |
121 : restored_cache_version_(0), | 124 : restored_cache_version_(0), |
122 saved_cache_version_(kCurrentCacheFileVersion), | 125 saved_cache_version_(kCurrentCacheFileVersion), |
123 pre_filter_item_count_(0), | 126 pre_filter_item_count_(0), |
124 post_filter_item_count_(0), | 127 post_filter_item_count_(0), |
125 post_scoring_item_count_(0) { | 128 post_scoring_item_count_(0) { |
126 URLIndexPrivateData::InitializeSchemeWhitelist(&scheme_whitelist_); | |
127 } | 129 } |
128 | 130 |
129 URLIndexPrivateData::~URLIndexPrivateData() {} | 131 URLIndexPrivateData::~URLIndexPrivateData() {} |
130 | 132 |
131 void URLIndexPrivateData::Clear() { | 133 void URLIndexPrivateData::Clear() { |
132 word_list_.clear(); | 134 word_list_.clear(); |
133 available_words_.clear(); | 135 available_words_.clear(); |
134 word_map_.clear(); | 136 word_map_.clear(); |
135 char_word_map_.clear(); | 137 char_word_map_.clear(); |
136 word_id_history_map_.clear(); | 138 word_id_history_map_.clear(); |
137 history_id_word_map_.clear(); | 139 history_id_word_map_.clear(); |
138 history_info_map_.clear(); | 140 history_info_map_.clear(); |
139 word_starts_map_.clear(); | 141 word_starts_map_.clear(); |
140 } | 142 } |
141 | 143 |
| 144 bool URLIndexPrivateData::Empty() const { |
| 145 return history_info_map_.empty(); |
| 146 } |
| 147 |
| 148 scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::Duplicate() const { |
| 149 scoped_refptr<URLIndexPrivateData> data_copy = new URLIndexPrivateData; |
| 150 data_copy->word_list_ = word_list_; |
| 151 data_copy->available_words_ = available_words_; |
| 152 data_copy->word_map_ = word_map_; |
| 153 data_copy->char_word_map_ = char_word_map_; |
| 154 data_copy->word_id_history_map_ = word_id_history_map_; |
| 155 data_copy->history_id_word_map_ = history_id_word_map_; |
| 156 data_copy->history_info_map_ = history_info_map_; |
| 157 return data_copy; |
| 158 // Not copied: |
| 159 // search_term_cache_ |
| 160 // pre_filter_item_count_ |
| 161 // post_filter_item_count_ |
| 162 // post_scoring_item_count_ |
| 163 }; |
| 164 |
142 // Cache Updating -------------------------------------------------------------- | 165 // Cache Updating -------------------------------------------------------------- |
143 | 166 |
144 bool URLIndexPrivateData::IndexRow(const URLRow& row) { | 167 bool URLIndexPrivateData::IndexRow( |
| 168 const URLRow& row, |
| 169 const std::string& languages, |
| 170 const std::set<std::string>& scheme_whitelist) { |
145 const GURL& gurl(row.url()); | 171 const GURL& gurl(row.url()); |
146 | 172 |
147 // Index only URLs with a whitelisted scheme. | 173 // Index only URLs with a whitelisted scheme. |
148 if (!URLIndexPrivateData::URLSchemeIsWhitelisted(gurl)) | 174 if (!URLSchemeIsWhitelisted(gurl, scheme_whitelist)) |
149 return false; | 175 return false; |
150 | 176 |
151 URLID row_id = row.id(); | 177 URLID row_id = row.id(); |
152 // Strip out username and password before saving and indexing. | 178 // Strip out username and password before saving and indexing. |
153 string16 url(net::FormatUrl(gurl, languages_, | 179 string16 url(net::FormatUrl(gurl, languages, |
154 net::kFormatUrlOmitUsernamePassword, | 180 net::kFormatUrlOmitUsernamePassword, |
155 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS, | 181 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS, |
156 NULL, NULL, NULL)); | 182 NULL, NULL, NULL)); |
157 | 183 |
158 HistoryID history_id = static_cast<HistoryID>(row_id); | 184 HistoryID history_id = static_cast<HistoryID>(row_id); |
159 DCHECK_LT(history_id, std::numeric_limits<HistoryID>::max()); | 185 DCHECK_LT(history_id, std::numeric_limits<HistoryID>::max()); |
160 | 186 |
161 // Add the row for quick lookup in the history info store. | 187 // Add the row for quick lookup in the history info store. |
162 URLRow new_row(GURL(url), row_id); | 188 URLRow new_row(GURL(url), row_id); |
163 new_row.set_visit_count(row.visit_count()); | 189 new_row.set_visit_count(row.visit_count()); |
164 new_row.set_typed_count(row.typed_count()); | 190 new_row.set_typed_count(row.typed_count()); |
165 new_row.set_last_visit(row.last_visit()); | 191 new_row.set_last_visit(row.last_visit()); |
166 new_row.set_title(row.title()); | 192 new_row.set_title(row.title()); |
167 history_info_map_[history_id] = new_row; | 193 history_info_map_[history_id] = new_row; |
168 | 194 |
169 // Index the words contained in the URL and title of the row. | 195 // Index the words contained in the URL and title of the row. |
170 RowWordStarts word_starts; | 196 RowWordStarts word_starts; |
171 AddRowWordsToIndex(new_row, &word_starts); | 197 AddRowWordsToIndex(new_row, &word_starts, languages); |
172 word_starts_map_[history_id] = word_starts; | 198 word_starts_map_[history_id] = word_starts; |
173 return true; | 199 return true; |
174 } | 200 } |
175 | 201 |
176 void URLIndexPrivateData::AddRowWordsToIndex(const URLRow& row, | 202 void URLIndexPrivateData::AddRowWordsToIndex(const URLRow& row, |
177 RowWordStarts* word_starts) { | 203 RowWordStarts* word_starts, |
| 204 const std::string& languages) { |
178 HistoryID history_id = static_cast<HistoryID>(row.id()); | 205 HistoryID history_id = static_cast<HistoryID>(row.id()); |
179 // Split URL into individual, unique words then add in the title words. | 206 // Split URL into individual, unique words then add in the title words. |
180 const GURL& gurl(row.url()); | 207 const GURL& gurl(row.url()); |
181 string16 url(net::FormatUrl(gurl, languages_, | 208 string16 url(net::FormatUrl(gurl, languages, |
182 net::kFormatUrlOmitUsernamePassword, | 209 net::kFormatUrlOmitUsernamePassword, |
183 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS, | 210 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS, |
184 NULL, NULL, NULL)); | 211 NULL, NULL, NULL)); |
185 url = base::i18n::ToLower(url); | 212 url = base::i18n::ToLower(url); |
186 String16Set url_words = String16SetFromString16(url, | 213 String16Set url_words = String16SetFromString16(url, |
187 word_starts ? &word_starts->url_word_starts_ : NULL); | 214 word_starts ? &word_starts->url_word_starts_ : NULL); |
188 String16Set title_words = String16SetFromString16(row.title(), | 215 String16Set title_words = String16SetFromString16(row.title(), |
189 word_starts ? &word_starts->title_word_starts_ : NULL); | 216 word_starts ? &word_starts->title_word_starts_ : NULL); |
190 String16Set words; | 217 String16Set words; |
191 std::set_union(url_words.begin(), url_words.end(), | 218 std::set_union(url_words.begin(), url_words.end(), |
(...skipping 108 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
300 if (iter != history_id_word_map_.end()) { | 327 if (iter != history_id_word_map_.end()) { |
301 WordIDSet& word_id_set(iter->second); | 328 WordIDSet& word_id_set(iter->second); |
302 word_id_set.insert(word_id); | 329 word_id_set.insert(word_id); |
303 } else { | 330 } else { |
304 WordIDSet word_id_set; | 331 WordIDSet word_id_set; |
305 word_id_set.insert(word_id); | 332 word_id_set.insert(word_id); |
306 history_id_word_map_[history_id] = word_id_set; | 333 history_id_word_map_[history_id] = word_id_set; |
307 } | 334 } |
308 } | 335 } |
309 | 336 |
310 bool URLIndexPrivateData::UpdateURL(const URLRow& row) { | 337 bool URLIndexPrivateData::UpdateURL( |
| 338 const URLRow& row, |
| 339 const std::string& languages, |
| 340 const std::set<std::string>& scheme_whitelist) { |
311 // The row may or may not already be in our index. If it is not already | 341 // The row may or may not already be in our index. If it is not already |
312 // indexed and it qualifies then it gets indexed. If it is already | 342 // indexed and it qualifies then it gets indexed. If it is already |
313 // indexed and still qualifies then it gets updated, otherwise it | 343 // indexed and still qualifies then it gets updated, otherwise it |
314 // is deleted from the index. | 344 // is deleted from the index. |
315 bool row_was_updated = false; | 345 bool row_was_updated = false; |
316 URLID row_id = row.id(); | 346 URLID row_id = row.id(); |
317 HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id); | 347 HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id); |
318 if (row_pos == history_info_map_.end()) { | 348 if (row_pos == history_info_map_.end()) { |
319 // This new row should be indexed if it qualifies. | 349 // This new row should be indexed if it qualifies. |
320 URLRow new_row(row); | 350 URLRow new_row(row); |
321 new_row.set_id(row_id); | 351 new_row.set_id(row_id); |
322 row_was_updated = | 352 row_was_updated = RowQualifiesAsSignificant(new_row, base::Time()) && |
323 RowQualifiesAsSignificant(new_row, base::Time()) && IndexRow(new_row); | 353 IndexRow(new_row, languages, scheme_whitelist); |
324 } else if (RowQualifiesAsSignificant(row, base::Time())) { | 354 } else if (RowQualifiesAsSignificant(row, base::Time())) { |
325 // This indexed row still qualifies and will be re-indexed. | 355 // This indexed row still qualifies and will be re-indexed. |
326 // The url won't have changed but the title, visit count, etc. | 356 // The url won't have changed but the title, visit count, etc. |
327 // might have changed. | 357 // might have changed. |
328 URLRow& row_to_update = row_pos->second; | 358 URLRow& row_to_update = row_pos->second; |
329 bool title_updated = row_to_update.title() != row.title(); | 359 bool title_updated = row_to_update.title() != row.title(); |
330 if (row_to_update.visit_count() != row.visit_count() || | 360 if (row_to_update.visit_count() != row.visit_count() || |
331 row_to_update.typed_count() != row.typed_count() || | 361 row_to_update.typed_count() != row.typed_count() || |
332 row_to_update.last_visit() != row.last_visit() || title_updated) { | 362 row_to_update.last_visit() != row.last_visit() || title_updated) { |
333 row_to_update.set_visit_count(row.visit_count()); | 363 row_to_update.set_visit_count(row.visit_count()); |
334 row_to_update.set_typed_count(row.typed_count()); | 364 row_to_update.set_typed_count(row.typed_count()); |
335 row_to_update.set_last_visit(row.last_visit()); | 365 row_to_update.set_last_visit(row.last_visit()); |
336 // While the URL is guaranteed to remain stable, the title may have | 366 // While the URL is guaranteed to remain stable, the title may have |
337 // changed. If so, then update the index with the changed words. | 367 // changed. If so, then update the index with the changed words. |
338 if (title_updated) { | 368 if (title_updated) { |
339 // Clear all words associated with this row and re-index both the | 369 // Clear all words associated with this row and re-index both the |
340 // URL and title. | 370 // URL and title. |
341 RemoveRowWordsFromIndex(row_to_update); | 371 RemoveRowWordsFromIndex(row_to_update); |
342 row_to_update.set_title(row.title()); | 372 row_to_update.set_title(row.title()); |
343 RowWordStarts word_starts; | 373 RowWordStarts word_starts; |
344 AddRowWordsToIndex(row_to_update, &word_starts); | 374 AddRowWordsToIndex(row_to_update, &word_starts, languages); |
345 word_starts_map_[row_id] = word_starts; | 375 word_starts_map_[row_id] = word_starts; |
346 } | 376 } |
347 row_was_updated = true; | 377 row_was_updated = true; |
348 } | 378 } |
349 } else { | 379 } else { |
350 // This indexed row no longer qualifies and will be de-indexed by | 380 // This indexed row no longer qualifies and will be de-indexed by |
351 // clearing all words associated with this row. | 381 // clearing all words associated with this row. |
352 RemoveRowFromIndex(row); | 382 RemoveRowFromIndex(row); |
353 row_was_updated = true; | 383 row_was_updated = true; |
354 } | 384 } |
(...skipping 21 matching lines...) Expand all Loading... |
376 history_info_map_.begin(), | 406 history_info_map_.begin(), |
377 history_info_map_.end(), | 407 history_info_map_.end(), |
378 HistoryInfoMapItemHasURL(url)); | 408 HistoryInfoMapItemHasURL(url)); |
379 if (pos == history_info_map_.end()) | 409 if (pos == history_info_map_.end()) |
380 return false; | 410 return false; |
381 RemoveRowFromIndex(pos->second); | 411 RemoveRowFromIndex(pos->second); |
382 search_term_cache_.clear(); // This invalidates the cache. | 412 search_term_cache_.clear(); // This invalidates the cache. |
383 return true; | 413 return true; |
384 } | 414 } |
385 | 415 |
386 bool URLIndexPrivateData::URLSchemeIsWhitelisted(const GURL& gurl) const { | |
387 return scheme_whitelist_.find(gurl.scheme()) != scheme_whitelist_.end(); | |
388 } | |
389 | |
390 // URLIndexPrivateData::HistoryItemFactorGreater ------------------------------- | 416 // URLIndexPrivateData::HistoryItemFactorGreater ------------------------------- |
391 | 417 |
392 URLIndexPrivateData::HistoryItemFactorGreater::HistoryItemFactorGreater( | 418 URLIndexPrivateData::HistoryItemFactorGreater::HistoryItemFactorGreater( |
393 const HistoryInfoMap& history_info_map) | 419 const HistoryInfoMap& history_info_map) |
394 : history_info_map_(history_info_map) { | 420 : history_info_map_(history_info_map) { |
395 } | 421 } |
396 | 422 |
397 URLIndexPrivateData::HistoryItemFactorGreater::~HistoryItemFactorGreater() {} | 423 URLIndexPrivateData::HistoryItemFactorGreater::~HistoryItemFactorGreater() {} |
398 | 424 |
399 bool URLIndexPrivateData::HistoryItemFactorGreater::operator()( | 425 bool URLIndexPrivateData::HistoryItemFactorGreater::operator()( |
(...skipping 498 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
898 std::set_intersection(word_id_set.begin(), word_id_set.end(), | 924 std::set_intersection(word_id_set.begin(), word_id_set.end(), |
899 char_word_id_set.begin(), char_word_id_set.end(), | 925 char_word_id_set.begin(), char_word_id_set.end(), |
900 std::inserter(new_word_id_set, | 926 std::inserter(new_word_id_set, |
901 new_word_id_set.begin())); | 927 new_word_id_set.begin())); |
902 word_id_set.swap(new_word_id_set); | 928 word_id_set.swap(new_word_id_set); |
903 } | 929 } |
904 } | 930 } |
905 return word_id_set; | 931 return word_id_set; |
906 } | 932 } |
907 | 933 |
908 // static | |
909 void URLIndexPrivateData::InitializeSchemeWhitelist( | |
910 std::set<std::string>* whitelist) { | |
911 DCHECK(whitelist); | |
912 whitelist->insert(std::string(chrome::kAboutScheme)); | |
913 whitelist->insert(std::string(chrome::kChromeUIScheme)); | |
914 whitelist->insert(std::string(chrome::kFileScheme)); | |
915 whitelist->insert(std::string(chrome::kFtpScheme)); | |
916 whitelist->insert(std::string(chrome::kHttpScheme)); | |
917 whitelist->insert(std::string(chrome::kHttpsScheme)); | |
918 whitelist->insert(std::string(chrome::kMailToScheme)); | |
919 } | |
920 | |
921 // Cache Saving ---------------------------------------------------------------- | 934 // Cache Saving ---------------------------------------------------------------- |
922 | 935 |
| 936 // static |
| 937 void URLIndexPrivateData::WritePrivateDataToCacheFileTask( |
| 938 scoped_refptr<URLIndexPrivateData> private_data, |
| 939 const FilePath& file_path, |
| 940 scoped_refptr<RefCountedBool> succeeded) { |
| 941 DCHECK(private_data.get()); |
| 942 DCHECK(!file_path.empty()); |
| 943 succeeded->set_value(private_data->SaveToFile(file_path)); |
| 944 } |
| 945 |
923 bool URLIndexPrivateData::SaveToFile(const FilePath& file_path) { | 946 bool URLIndexPrivateData::SaveToFile(const FilePath& file_path) { |
924 // TODO(mrossetti): Move File IO to another thread. | |
925 base::ThreadRestrictions::ScopedAllowIO allow_io; | |
926 base::TimeTicks beginning_time = base::TimeTicks::Now(); | 947 base::TimeTicks beginning_time = base::TimeTicks::Now(); |
927 InMemoryURLIndexCacheItem index_cache; | 948 InMemoryURLIndexCacheItem index_cache; |
928 SavePrivateData(&index_cache); | 949 SavePrivateData(&index_cache); |
929 std::string data; | 950 std::string data; |
930 if (!index_cache.SerializeToString(&data)) { | 951 if (!index_cache.SerializeToString(&data)) { |
931 LOG(WARNING) << "Failed to serialize the InMemoryURLIndex cache."; | 952 LOG(WARNING) << "Failed to serialize the InMemoryURLIndex cache."; |
932 return false; | 953 return false; |
933 } | 954 } |
934 | 955 |
935 int size = data.size(); | 956 int size = data.size(); |
(...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1023 if (history_info_map_.empty()) | 1044 if (history_info_map_.empty()) |
1024 return; | 1045 return; |
1025 HistoryInfoMapItem* map_item = cache->mutable_history_info_map(); | 1046 HistoryInfoMapItem* map_item = cache->mutable_history_info_map(); |
1026 map_item->set_item_count(history_info_map_.size()); | 1047 map_item->set_item_count(history_info_map_.size()); |
1027 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin(); | 1048 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin(); |
1028 iter != history_info_map_.end(); ++iter) { | 1049 iter != history_info_map_.end(); ++iter) { |
1029 HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry(); | 1050 HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry(); |
1030 map_entry->set_history_id(iter->first); | 1051 map_entry->set_history_id(iter->first); |
1031 const URLRow& url_row(iter->second); | 1052 const URLRow& url_row(iter->second); |
1032 // Note: We only save information that contributes to the index so there | 1053 // Note: We only save information that contributes to the index so there |
1033 // is no need to save search_term_cache_ (not persistent), | 1054 // is no need to save search_term_cache_ (not persistent). |
1034 // languages_, etc. | |
1035 map_entry->set_visit_count(url_row.visit_count()); | 1055 map_entry->set_visit_count(url_row.visit_count()); |
1036 map_entry->set_typed_count(url_row.typed_count()); | 1056 map_entry->set_typed_count(url_row.typed_count()); |
1037 map_entry->set_last_visit(url_row.last_visit().ToInternalValue()); | 1057 map_entry->set_last_visit(url_row.last_visit().ToInternalValue()); |
1038 map_entry->set_url(url_row.url().spec()); | 1058 map_entry->set_url(url_row.url().spec()); |
1039 map_entry->set_title(UTF16ToUTF8(url_row.title())); | 1059 map_entry->set_title(UTF16ToUTF8(url_row.title())); |
1040 } | 1060 } |
1041 } | 1061 } |
1042 | 1062 |
1043 void URLIndexPrivateData::SaveWordStartsMap( | 1063 void URLIndexPrivateData::SaveWordStartsMap( |
1044 InMemoryURLIndexCacheItem* cache) const { | 1064 InMemoryURLIndexCacheItem* cache) const { |
(...skipping 18 matching lines...) Expand all Loading... |
1063 i != word_starts.url_word_starts_.end(); ++i) | 1083 i != word_starts.url_word_starts_.end(); ++i) |
1064 map_entry->add_url_word_starts(*i); | 1084 map_entry->add_url_word_starts(*i); |
1065 for (WordStarts::const_iterator i = word_starts.title_word_starts_.begin(); | 1085 for (WordStarts::const_iterator i = word_starts.title_word_starts_.begin(); |
1066 i != word_starts.title_word_starts_.end(); ++i) | 1086 i != word_starts.title_word_starts_.end(); ++i) |
1067 map_entry->add_title_word_starts(*i); | 1087 map_entry->add_title_word_starts(*i); |
1068 } | 1088 } |
1069 } | 1089 } |
1070 | 1090 |
1071 // Cache Restoring ------------------------------------------------------------- | 1091 // Cache Restoring ------------------------------------------------------------- |
1072 | 1092 |
1073 bool URLIndexPrivateData::RestoreFromFile(const FilePath& file_path) { | 1093 // static |
1074 // TODO(mrossetti): Figure out how to determine if the cache is up-to-date. | 1094 void URLIndexPrivateData::RestoreFromFileTask( |
1075 // That is: ensure that the database has not been modified since the cache | 1095 const FilePath& file_path, |
1076 // was last saved. DB file modification date is inadequate. There are no | 1096 scoped_refptr<URLIndexPrivateData> private_data, |
1077 // SQLite table checksums automatically stored. | 1097 std::string languages) { |
1078 Clear(); // Start with a clean slate. | 1098 private_data = URLIndexPrivateData::RestoreFromFile(file_path, languages); |
| 1099 } |
1079 | 1100 |
1080 // FIXME(mrossetti): Move File IO to another thread. | 1101 // static |
1081 base::ThreadRestrictions::ScopedAllowIO allow_io; | 1102 scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::RestoreFromFile( |
| 1103 const FilePath& file_path, |
| 1104 const std::string& languages) { |
1082 base::TimeTicks beginning_time = base::TimeTicks::Now(); | 1105 base::TimeTicks beginning_time = base::TimeTicks::Now(); |
1083 if (!file_util::PathExists(file_path)) | 1106 if (!file_util::PathExists(file_path)) |
1084 return false; | 1107 return NULL; |
1085 std::string data; | 1108 std::string data; |
1086 // If there is no cache file then simply give up. This will cause us to | 1109 // If there is no cache file then simply give up. This will cause us to |
1087 // attempt to rebuild from the history database. | 1110 // attempt to rebuild from the history database. |
1088 if (!file_util::ReadFileToString(file_path, &data)) | 1111 if (!file_util::ReadFileToString(file_path, &data)) |
1089 return false; | 1112 return NULL; |
1090 | 1113 |
| 1114 scoped_refptr<URLIndexPrivateData> restored_data(new URLIndexPrivateData); |
1091 InMemoryURLIndexCacheItem index_cache; | 1115 InMemoryURLIndexCacheItem index_cache; |
1092 if (!index_cache.ParseFromArray(data.c_str(), data.size())) { | 1116 if (!index_cache.ParseFromArray(data.c_str(), data.size())) { |
1093 LOG(WARNING) << "Failed to parse InMemoryURLIndex cache data read from " | 1117 LOG(WARNING) << "Failed to parse URLIndexPrivateData cache data read from " |
1094 << file_path.value(); | 1118 << file_path.value(); |
1095 return false; | 1119 return restored_data; |
1096 } | 1120 } |
1097 | 1121 |
1098 if (!RestorePrivateData(index_cache)) { | 1122 if (!restored_data->RestorePrivateData(index_cache, languages)) |
1099 Clear(); // Back to square one -- must build from scratch. | 1123 return NULL; |
1100 return false; | |
1101 } | |
1102 | 1124 |
1103 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime", | 1125 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime", |
1104 base::TimeTicks::Now() - beginning_time); | 1126 base::TimeTicks::Now() - beginning_time); |
1105 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", | 1127 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", |
1106 history_id_word_map_.size()); | 1128 restored_data->history_id_word_map_.size()); |
1107 UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size()); | 1129 UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size()); |
1108 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", word_map_.size()); | 1130 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", |
1109 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", char_word_map_.size()); | 1131 restored_data->word_map_.size()); |
1110 return true; | 1132 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", |
| 1133 restored_data->char_word_map_.size()); |
| 1134 if (restored_data->Empty()) |
| 1135 return NULL; // 'No data' is the same as a failed reload. |
| 1136 return restored_data; |
1111 } | 1137 } |
1112 | 1138 |
1113 // static | 1139 // static |
1114 URLIndexPrivateData* URLIndexPrivateData::RebuildFromHistory( | 1140 scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::RebuildFromHistory( |
1115 HistoryDatabase* history_db) { | 1141 HistoryDatabase* history_db, |
| 1142 const std::string& languages, |
| 1143 const std::set<std::string>& scheme_whitelist) { |
1116 if (!history_db) | 1144 if (!history_db) |
1117 return NULL; | 1145 return NULL; |
1118 | 1146 |
1119 base::TimeTicks beginning_time = base::TimeTicks::Now(); | 1147 base::TimeTicks beginning_time = base::TimeTicks::Now(); |
1120 | 1148 |
1121 scoped_ptr<URLIndexPrivateData> rebuilt_data(new URLIndexPrivateData); | 1149 scoped_refptr<URLIndexPrivateData> rebuilt_data(new URLIndexPrivateData); |
1122 URLDatabase::URLEnumerator history_enum; | 1150 URLDatabase::URLEnumerator history_enum; |
1123 if (!history_db->InitURLEnumeratorForSignificant(&history_enum)) | 1151 if (!history_db->InitURLEnumeratorForSignificant(&history_enum)) |
1124 return NULL; | 1152 return NULL; |
1125 for (URLRow row; history_enum.GetNextURL(&row); ) | 1153 for (URLRow row; history_enum.GetNextURL(&row); ) |
1126 rebuilt_data->IndexRow(row); | 1154 rebuilt_data->IndexRow(row, languages, scheme_whitelist); |
1127 | 1155 |
1128 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime", | 1156 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime", |
1129 base::TimeTicks::Now() - beginning_time); | 1157 base::TimeTicks::Now() - beginning_time); |
1130 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", | 1158 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", |
1131 rebuilt_data->history_id_word_map_.size()); | 1159 rebuilt_data->history_id_word_map_.size()); |
1132 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", | 1160 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", |
1133 rebuilt_data->word_map_.size()); | 1161 rebuilt_data->word_map_.size()); |
1134 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", | 1162 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", |
1135 rebuilt_data->char_word_map_.size()); | 1163 rebuilt_data->char_word_map_.size()); |
1136 return rebuilt_data.release(); | 1164 return rebuilt_data; |
1137 } | 1165 } |
1138 | 1166 |
1139 bool URLIndexPrivateData::RestorePrivateData( | 1167 bool URLIndexPrivateData::RestorePrivateData( |
1140 const InMemoryURLIndexCacheItem& cache) { | 1168 const InMemoryURLIndexCacheItem& cache, |
| 1169 const std::string& languages) { |
1141 if (cache.has_version()) | 1170 if (cache.has_version()) |
1142 restored_cache_version_ = cache.version(); | 1171 restored_cache_version_ = cache.version(); |
1143 return RestoreWordList(cache) && RestoreWordMap(cache) && | 1172 return RestoreWordList(cache) && RestoreWordMap(cache) && |
1144 RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) && | 1173 RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) && |
1145 RestoreHistoryInfoMap(cache) && RestoreWordStartsMap(cache); | 1174 RestoreHistoryInfoMap(cache) && RestoreWordStartsMap(cache, languages); |
1146 } | 1175 } |
1147 | 1176 |
1148 bool URLIndexPrivateData::RestoreWordList( | 1177 bool URLIndexPrivateData::RestoreWordList( |
1149 const InMemoryURLIndexCacheItem& cache) { | 1178 const InMemoryURLIndexCacheItem& cache) { |
1150 if (!cache.has_word_list()) | 1179 if (!cache.has_word_list()) |
1151 return false; | 1180 return false; |
1152 const WordListItem& list_item(cache.word_list()); | 1181 const WordListItem& list_item(cache.word_list()); |
1153 uint32 expected_item_count = list_item.word_count(); | 1182 uint32 expected_item_count = list_item.word_count(); |
1154 uint32 actual_item_count = list_item.word_size(); | 1183 uint32 actual_item_count = list_item.word_size(); |
1155 if (actual_item_count == 0 || actual_item_count != expected_item_count) | 1184 if (actual_item_count == 0 || actual_item_count != expected_item_count) |
(...skipping 101 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1257 if (iter->has_title()) { | 1286 if (iter->has_title()) { |
1258 string16 title(UTF8ToUTF16(iter->title())); | 1287 string16 title(UTF8ToUTF16(iter->title())); |
1259 url_row.set_title(title); | 1288 url_row.set_title(title); |
1260 } | 1289 } |
1261 history_info_map_[history_id] = url_row; | 1290 history_info_map_[history_id] = url_row; |
1262 } | 1291 } |
1263 return true; | 1292 return true; |
1264 } | 1293 } |
1265 | 1294 |
1266 bool URLIndexPrivateData::RestoreWordStartsMap( | 1295 bool URLIndexPrivateData::RestoreWordStartsMap( |
1267 const InMemoryURLIndexCacheItem& cache) { | 1296 const InMemoryURLIndexCacheItem& cache, |
| 1297 const std::string& languages) { |
1268 // Note that this function must be called after RestoreHistoryInfoMap() has | 1298 // Note that this function must be called after RestoreHistoryInfoMap() has |
1269 // been run as the word starts may have to be recalculated from the urls and | 1299 // been run as the word starts may have to be recalculated from the urls and |
1270 // page titles. | 1300 // page titles. |
1271 if (cache.has_word_starts_map()) { | 1301 if (cache.has_word_starts_map()) { |
1272 const WordStartsMapItem& list_item(cache.word_starts_map()); | 1302 const WordStartsMapItem& list_item(cache.word_starts_map()); |
1273 uint32 expected_item_count = list_item.item_count(); | 1303 uint32 expected_item_count = list_item.item_count(); |
1274 uint32 actual_item_count = list_item.word_starts_map_entry_size(); | 1304 uint32 actual_item_count = list_item.word_starts_map_entry_size(); |
1275 if (actual_item_count == 0 || actual_item_count != expected_item_count) | 1305 if (actual_item_count == 0 || actual_item_count != expected_item_count) |
1276 return false; | 1306 return false; |
1277 const RepeatedPtrField<WordStartsMapEntry>& | 1307 const RepeatedPtrField<WordStartsMapEntry>& |
(...skipping 14 matching lines...) Expand all Loading... |
1292 word_starts.title_word_starts_.push_back(*jiter); | 1322 word_starts.title_word_starts_.push_back(*jiter); |
1293 word_starts_map_[history_id] = word_starts; | 1323 word_starts_map_[history_id] = word_starts; |
1294 } | 1324 } |
1295 } else { | 1325 } else { |
1296 // Since the cache did not contain any word starts we must rebuild then from | 1326 // Since the cache did not contain any word starts we must rebuild then from |
1297 // the URL and page titles. | 1327 // the URL and page titles. |
1298 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin(); | 1328 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin(); |
1299 iter != history_info_map_.end(); ++iter) { | 1329 iter != history_info_map_.end(); ++iter) { |
1300 RowWordStarts word_starts; | 1330 RowWordStarts word_starts; |
1301 const URLRow& row(iter->second); | 1331 const URLRow& row(iter->second); |
1302 string16 url(net::FormatUrl(row.url(), languages_, | 1332 string16 url(net::FormatUrl(row.url(), languages, |
1303 net::kFormatUrlOmitUsernamePassword, | 1333 net::kFormatUrlOmitUsernamePassword, |
1304 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS, | 1334 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS, |
1305 NULL, NULL, NULL)); | 1335 NULL, NULL, NULL)); |
1306 url = base::i18n::ToLower(url); | 1336 url = base::i18n::ToLower(url); |
1307 String16VectorFromString16(url, false, &word_starts.url_word_starts_); | 1337 String16VectorFromString16(url, false, &word_starts.url_word_starts_); |
1308 String16VectorFromString16( | 1338 String16VectorFromString16( |
1309 row.title(), false, &word_starts.title_word_starts_); | 1339 row.title(), false, &word_starts.title_word_starts_); |
1310 word_starts_map_[iter->first] = word_starts; | 1340 word_starts_map_[iter->first] = word_starts; |
1311 } | 1341 } |
1312 } | 1342 } |
1313 return true; | 1343 return true; |
1314 } | 1344 } |
1315 | 1345 |
| 1346 // static |
| 1347 bool URLIndexPrivateData::URLSchemeIsWhitelisted( |
| 1348 const GURL& gurl, |
| 1349 const std::set<std::string>& whitelist) { |
| 1350 return whitelist.find(gurl.scheme()) != whitelist.end(); |
| 1351 } |
| 1352 |
1316 } // namespace history | 1353 } // namespace history |
OLD | NEW |