Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(98)

Side by Side Diff: chrome/browser/history/url_index_private_data.cc

Issue 9030031: Move InMemoryURLIndex Caching Operations to FILE Thread (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/
Patch Set: Syncing with hopes of pleasing trybot update Created 8 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « chrome/browser/history/url_index_private_data.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/history/url_index_private_data.h" 5 #include "chrome/browser/history/url_index_private_data.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <functional> 8 #include <functional>
9 #include <iterator> 9 #include <iterator>
10 #include <limits> 10 #include <limits>
11 #include <numeric> 11 #include <numeric>
12 12
13 #include "base/file_util.h" 13 #include "base/file_util.h"
14 #include "base/i18n/case_conversion.h" 14 #include "base/i18n/case_conversion.h"
15 #include "base/metrics/histogram.h" 15 #include "base/metrics/histogram.h"
16 #include "base/string_util.h" 16 #include "base/string_util.h"
17 #include "base/threading/thread_restrictions.h" 17 #include "base/time.h"
18 #include "base/utf_string_conversions.h" 18 #include "base/utf_string_conversions.h"
19 #include "chrome/browser/autocomplete/autocomplete.h" 19 #include "chrome/browser/autocomplete/autocomplete.h"
20 #include "chrome/browser/history/history_database.h" 20 #include "chrome/browser/history/history_database.h"
21 #include "chrome/common/url_constants.h" 21 #include "chrome/browser/history/in_memory_url_index.h"
22 #include "content/public/browser/notification_details.h"
23 #include "content/public/browser/notification_service.h"
24 #include "content/public/browser/notification_source.h"
22 #include "net/base/net_util.h" 25 #include "net/base/net_util.h"
23 #include "third_party/protobuf/src/google/protobuf/repeated_field.h" 26 #include "third_party/protobuf/src/google/protobuf/repeated_field.h"
24 27
25 using google::protobuf::RepeatedField; 28 using google::protobuf::RepeatedField;
26 using google::protobuf::RepeatedPtrField; 29 using google::protobuf::RepeatedPtrField;
27 using in_memory_url_index::InMemoryURLIndexCacheItem; 30 using in_memory_url_index::InMemoryURLIndexCacheItem;
28 31
29 namespace history { 32 namespace history {
30 33
31 typedef imui::InMemoryURLIndexCacheItem_WordListItem WordListItem; 34 typedef imui::InMemoryURLIndexCacheItem_WordListItem WordListItem;
(...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after
116 } 119 }
117 120
118 // InMemoryURLIndex's Private Data --------------------------------------------- 121 // InMemoryURLIndex's Private Data ---------------------------------------------
119 122
120 URLIndexPrivateData::URLIndexPrivateData() 123 URLIndexPrivateData::URLIndexPrivateData()
121 : restored_cache_version_(0), 124 : restored_cache_version_(0),
122 saved_cache_version_(kCurrentCacheFileVersion), 125 saved_cache_version_(kCurrentCacheFileVersion),
123 pre_filter_item_count_(0), 126 pre_filter_item_count_(0),
124 post_filter_item_count_(0), 127 post_filter_item_count_(0),
125 post_scoring_item_count_(0) { 128 post_scoring_item_count_(0) {
126 URLIndexPrivateData::InitializeSchemeWhitelist(&scheme_whitelist_);
127 } 129 }
128 130
129 URLIndexPrivateData::~URLIndexPrivateData() {} 131 URLIndexPrivateData::~URLIndexPrivateData() {}
130 132
131 void URLIndexPrivateData::Clear() { 133 void URLIndexPrivateData::Clear() {
132 word_list_.clear(); 134 word_list_.clear();
133 available_words_.clear(); 135 available_words_.clear();
134 word_map_.clear(); 136 word_map_.clear();
135 char_word_map_.clear(); 137 char_word_map_.clear();
136 word_id_history_map_.clear(); 138 word_id_history_map_.clear();
137 history_id_word_map_.clear(); 139 history_id_word_map_.clear();
138 history_info_map_.clear(); 140 history_info_map_.clear();
139 word_starts_map_.clear(); 141 word_starts_map_.clear();
140 } 142 }
141 143
144 bool URLIndexPrivateData::Empty() const {
145 return history_info_map_.empty();
146 }
147
148 scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::Duplicate() const {
149 scoped_refptr<URLIndexPrivateData> data_copy = new URLIndexPrivateData;
150 data_copy->word_list_ = word_list_;
151 data_copy->available_words_ = available_words_;
152 data_copy->word_map_ = word_map_;
153 data_copy->char_word_map_ = char_word_map_;
154 data_copy->word_id_history_map_ = word_id_history_map_;
155 data_copy->history_id_word_map_ = history_id_word_map_;
156 data_copy->history_info_map_ = history_info_map_;
157 return data_copy;
158 // Not copied:
159 // search_term_cache_
160 // pre_filter_item_count_
161 // post_filter_item_count_
162 // post_scoring_item_count_
163 };
164
142 // Cache Updating -------------------------------------------------------------- 165 // Cache Updating --------------------------------------------------------------
143 166
144 bool URLIndexPrivateData::IndexRow(const URLRow& row) { 167 bool URLIndexPrivateData::IndexRow(
168 const URLRow& row,
169 const std::string& languages,
170 const std::set<std::string>& scheme_whitelist) {
145 const GURL& gurl(row.url()); 171 const GURL& gurl(row.url());
146 172
147 // Index only URLs with a whitelisted scheme. 173 // Index only URLs with a whitelisted scheme.
148 if (!URLIndexPrivateData::URLSchemeIsWhitelisted(gurl)) 174 if (!URLSchemeIsWhitelisted(gurl, scheme_whitelist))
149 return false; 175 return false;
150 176
151 URLID row_id = row.id(); 177 URLID row_id = row.id();
152 // Strip out username and password before saving and indexing. 178 // Strip out username and password before saving and indexing.
153 string16 url(net::FormatUrl(gurl, languages_, 179 string16 url(net::FormatUrl(gurl, languages,
154 net::kFormatUrlOmitUsernamePassword, 180 net::kFormatUrlOmitUsernamePassword,
155 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS, 181 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS,
156 NULL, NULL, NULL)); 182 NULL, NULL, NULL));
157 183
158 HistoryID history_id = static_cast<HistoryID>(row_id); 184 HistoryID history_id = static_cast<HistoryID>(row_id);
159 DCHECK_LT(history_id, std::numeric_limits<HistoryID>::max()); 185 DCHECK_LT(history_id, std::numeric_limits<HistoryID>::max());
160 186
161 // Add the row for quick lookup in the history info store. 187 // Add the row for quick lookup in the history info store.
162 URLRow new_row(GURL(url), row_id); 188 URLRow new_row(GURL(url), row_id);
163 new_row.set_visit_count(row.visit_count()); 189 new_row.set_visit_count(row.visit_count());
164 new_row.set_typed_count(row.typed_count()); 190 new_row.set_typed_count(row.typed_count());
165 new_row.set_last_visit(row.last_visit()); 191 new_row.set_last_visit(row.last_visit());
166 new_row.set_title(row.title()); 192 new_row.set_title(row.title());
167 history_info_map_[history_id] = new_row; 193 history_info_map_[history_id] = new_row;
168 194
169 // Index the words contained in the URL and title of the row. 195 // Index the words contained in the URL and title of the row.
170 RowWordStarts word_starts; 196 RowWordStarts word_starts;
171 AddRowWordsToIndex(new_row, &word_starts); 197 AddRowWordsToIndex(new_row, &word_starts, languages);
172 word_starts_map_[history_id] = word_starts; 198 word_starts_map_[history_id] = word_starts;
173 return true; 199 return true;
174 } 200 }
175 201
176 void URLIndexPrivateData::AddRowWordsToIndex(const URLRow& row, 202 void URLIndexPrivateData::AddRowWordsToIndex(const URLRow& row,
177 RowWordStarts* word_starts) { 203 RowWordStarts* word_starts,
204 const std::string& languages) {
178 HistoryID history_id = static_cast<HistoryID>(row.id()); 205 HistoryID history_id = static_cast<HistoryID>(row.id());
179 // Split URL into individual, unique words then add in the title words. 206 // Split URL into individual, unique words then add in the title words.
180 const GURL& gurl(row.url()); 207 const GURL& gurl(row.url());
181 string16 url(net::FormatUrl(gurl, languages_, 208 string16 url(net::FormatUrl(gurl, languages,
182 net::kFormatUrlOmitUsernamePassword, 209 net::kFormatUrlOmitUsernamePassword,
183 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS, 210 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS,
184 NULL, NULL, NULL)); 211 NULL, NULL, NULL));
185 url = base::i18n::ToLower(url); 212 url = base::i18n::ToLower(url);
186 String16Set url_words = String16SetFromString16(url, 213 String16Set url_words = String16SetFromString16(url,
187 word_starts ? &word_starts->url_word_starts_ : NULL); 214 word_starts ? &word_starts->url_word_starts_ : NULL);
188 String16Set title_words = String16SetFromString16(row.title(), 215 String16Set title_words = String16SetFromString16(row.title(),
189 word_starts ? &word_starts->title_word_starts_ : NULL); 216 word_starts ? &word_starts->title_word_starts_ : NULL);
190 String16Set words; 217 String16Set words;
191 std::set_union(url_words.begin(), url_words.end(), 218 std::set_union(url_words.begin(), url_words.end(),
(...skipping 108 matching lines...) Expand 10 before | Expand all | Expand 10 after
300 if (iter != history_id_word_map_.end()) { 327 if (iter != history_id_word_map_.end()) {
301 WordIDSet& word_id_set(iter->second); 328 WordIDSet& word_id_set(iter->second);
302 word_id_set.insert(word_id); 329 word_id_set.insert(word_id);
303 } else { 330 } else {
304 WordIDSet word_id_set; 331 WordIDSet word_id_set;
305 word_id_set.insert(word_id); 332 word_id_set.insert(word_id);
306 history_id_word_map_[history_id] = word_id_set; 333 history_id_word_map_[history_id] = word_id_set;
307 } 334 }
308 } 335 }
309 336
310 bool URLIndexPrivateData::UpdateURL(const URLRow& row) { 337 bool URLIndexPrivateData::UpdateURL(
338 const URLRow& row,
339 const std::string& languages,
340 const std::set<std::string>& scheme_whitelist) {
311 // The row may or may not already be in our index. If it is not already 341 // The row may or may not already be in our index. If it is not already
312 // indexed and it qualifies then it gets indexed. If it is already 342 // indexed and it qualifies then it gets indexed. If it is already
313 // indexed and still qualifies then it gets updated, otherwise it 343 // indexed and still qualifies then it gets updated, otherwise it
314 // is deleted from the index. 344 // is deleted from the index.
315 bool row_was_updated = false; 345 bool row_was_updated = false;
316 URLID row_id = row.id(); 346 URLID row_id = row.id();
317 HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id); 347 HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id);
318 if (row_pos == history_info_map_.end()) { 348 if (row_pos == history_info_map_.end()) {
319 // This new row should be indexed if it qualifies. 349 // This new row should be indexed if it qualifies.
320 URLRow new_row(row); 350 URLRow new_row(row);
321 new_row.set_id(row_id); 351 new_row.set_id(row_id);
322 row_was_updated = 352 row_was_updated = RowQualifiesAsSignificant(new_row, base::Time()) &&
323 RowQualifiesAsSignificant(new_row, base::Time()) && IndexRow(new_row); 353 IndexRow(new_row, languages, scheme_whitelist);
324 } else if (RowQualifiesAsSignificant(row, base::Time())) { 354 } else if (RowQualifiesAsSignificant(row, base::Time())) {
325 // This indexed row still qualifies and will be re-indexed. 355 // This indexed row still qualifies and will be re-indexed.
326 // The url won't have changed but the title, visit count, etc. 356 // The url won't have changed but the title, visit count, etc.
327 // might have changed. 357 // might have changed.
328 URLRow& row_to_update = row_pos->second; 358 URLRow& row_to_update = row_pos->second;
329 bool title_updated = row_to_update.title() != row.title(); 359 bool title_updated = row_to_update.title() != row.title();
330 if (row_to_update.visit_count() != row.visit_count() || 360 if (row_to_update.visit_count() != row.visit_count() ||
331 row_to_update.typed_count() != row.typed_count() || 361 row_to_update.typed_count() != row.typed_count() ||
332 row_to_update.last_visit() != row.last_visit() || title_updated) { 362 row_to_update.last_visit() != row.last_visit() || title_updated) {
333 row_to_update.set_visit_count(row.visit_count()); 363 row_to_update.set_visit_count(row.visit_count());
334 row_to_update.set_typed_count(row.typed_count()); 364 row_to_update.set_typed_count(row.typed_count());
335 row_to_update.set_last_visit(row.last_visit()); 365 row_to_update.set_last_visit(row.last_visit());
336 // While the URL is guaranteed to remain stable, the title may have 366 // While the URL is guaranteed to remain stable, the title may have
337 // changed. If so, then update the index with the changed words. 367 // changed. If so, then update the index with the changed words.
338 if (title_updated) { 368 if (title_updated) {
339 // Clear all words associated with this row and re-index both the 369 // Clear all words associated with this row and re-index both the
340 // URL and title. 370 // URL and title.
341 RemoveRowWordsFromIndex(row_to_update); 371 RemoveRowWordsFromIndex(row_to_update);
342 row_to_update.set_title(row.title()); 372 row_to_update.set_title(row.title());
343 RowWordStarts word_starts; 373 RowWordStarts word_starts;
344 AddRowWordsToIndex(row_to_update, &word_starts); 374 AddRowWordsToIndex(row_to_update, &word_starts, languages);
345 word_starts_map_[row_id] = word_starts; 375 word_starts_map_[row_id] = word_starts;
346 } 376 }
347 row_was_updated = true; 377 row_was_updated = true;
348 } 378 }
349 } else { 379 } else {
350 // This indexed row no longer qualifies and will be de-indexed by 380 // This indexed row no longer qualifies and will be de-indexed by
351 // clearing all words associated with this row. 381 // clearing all words associated with this row.
352 RemoveRowFromIndex(row); 382 RemoveRowFromIndex(row);
353 row_was_updated = true; 383 row_was_updated = true;
354 } 384 }
(...skipping 21 matching lines...) Expand all
376 history_info_map_.begin(), 406 history_info_map_.begin(),
377 history_info_map_.end(), 407 history_info_map_.end(),
378 HistoryInfoMapItemHasURL(url)); 408 HistoryInfoMapItemHasURL(url));
379 if (pos == history_info_map_.end()) 409 if (pos == history_info_map_.end())
380 return false; 410 return false;
381 RemoveRowFromIndex(pos->second); 411 RemoveRowFromIndex(pos->second);
382 search_term_cache_.clear(); // This invalidates the cache. 412 search_term_cache_.clear(); // This invalidates the cache.
383 return true; 413 return true;
384 } 414 }
385 415
386 bool URLIndexPrivateData::URLSchemeIsWhitelisted(const GURL& gurl) const {
387 return scheme_whitelist_.find(gurl.scheme()) != scheme_whitelist_.end();
388 }
389
390 // URLIndexPrivateData::HistoryItemFactorGreater ------------------------------- 416 // URLIndexPrivateData::HistoryItemFactorGreater -------------------------------
391 417
392 URLIndexPrivateData::HistoryItemFactorGreater::HistoryItemFactorGreater( 418 URLIndexPrivateData::HistoryItemFactorGreater::HistoryItemFactorGreater(
393 const HistoryInfoMap& history_info_map) 419 const HistoryInfoMap& history_info_map)
394 : history_info_map_(history_info_map) { 420 : history_info_map_(history_info_map) {
395 } 421 }
396 422
397 URLIndexPrivateData::HistoryItemFactorGreater::~HistoryItemFactorGreater() {} 423 URLIndexPrivateData::HistoryItemFactorGreater::~HistoryItemFactorGreater() {}
398 424
399 bool URLIndexPrivateData::HistoryItemFactorGreater::operator()( 425 bool URLIndexPrivateData::HistoryItemFactorGreater::operator()(
(...skipping 498 matching lines...) Expand 10 before | Expand all | Expand 10 after
898 std::set_intersection(word_id_set.begin(), word_id_set.end(), 924 std::set_intersection(word_id_set.begin(), word_id_set.end(),
899 char_word_id_set.begin(), char_word_id_set.end(), 925 char_word_id_set.begin(), char_word_id_set.end(),
900 std::inserter(new_word_id_set, 926 std::inserter(new_word_id_set,
901 new_word_id_set.begin())); 927 new_word_id_set.begin()));
902 word_id_set.swap(new_word_id_set); 928 word_id_set.swap(new_word_id_set);
903 } 929 }
904 } 930 }
905 return word_id_set; 931 return word_id_set;
906 } 932 }
907 933
908 // static
909 void URLIndexPrivateData::InitializeSchemeWhitelist(
910 std::set<std::string>* whitelist) {
911 DCHECK(whitelist);
912 whitelist->insert(std::string(chrome::kAboutScheme));
913 whitelist->insert(std::string(chrome::kChromeUIScheme));
914 whitelist->insert(std::string(chrome::kFileScheme));
915 whitelist->insert(std::string(chrome::kFtpScheme));
916 whitelist->insert(std::string(chrome::kHttpScheme));
917 whitelist->insert(std::string(chrome::kHttpsScheme));
918 whitelist->insert(std::string(chrome::kMailToScheme));
919 }
920
921 // Cache Saving ---------------------------------------------------------------- 934 // Cache Saving ----------------------------------------------------------------
922 935
936 // static
937 void URLIndexPrivateData::WritePrivateDataToCacheFileTask(
938 scoped_refptr<URLIndexPrivateData> private_data,
939 const FilePath& file_path,
940 scoped_refptr<RefCountedBool> succeeded) {
941 DCHECK(private_data.get());
942 DCHECK(!file_path.empty());
943 succeeded->set_value(private_data->SaveToFile(file_path));
944 }
945
923 bool URLIndexPrivateData::SaveToFile(const FilePath& file_path) { 946 bool URLIndexPrivateData::SaveToFile(const FilePath& file_path) {
924 // TODO(mrossetti): Move File IO to another thread.
925 base::ThreadRestrictions::ScopedAllowIO allow_io;
926 base::TimeTicks beginning_time = base::TimeTicks::Now(); 947 base::TimeTicks beginning_time = base::TimeTicks::Now();
927 InMemoryURLIndexCacheItem index_cache; 948 InMemoryURLIndexCacheItem index_cache;
928 SavePrivateData(&index_cache); 949 SavePrivateData(&index_cache);
929 std::string data; 950 std::string data;
930 if (!index_cache.SerializeToString(&data)) { 951 if (!index_cache.SerializeToString(&data)) {
931 LOG(WARNING) << "Failed to serialize the InMemoryURLIndex cache."; 952 LOG(WARNING) << "Failed to serialize the InMemoryURLIndex cache.";
932 return false; 953 return false;
933 } 954 }
934 955
935 int size = data.size(); 956 int size = data.size();
(...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after
1023 if (history_info_map_.empty()) 1044 if (history_info_map_.empty())
1024 return; 1045 return;
1025 HistoryInfoMapItem* map_item = cache->mutable_history_info_map(); 1046 HistoryInfoMapItem* map_item = cache->mutable_history_info_map();
1026 map_item->set_item_count(history_info_map_.size()); 1047 map_item->set_item_count(history_info_map_.size());
1027 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin(); 1048 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin();
1028 iter != history_info_map_.end(); ++iter) { 1049 iter != history_info_map_.end(); ++iter) {
1029 HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry(); 1050 HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry();
1030 map_entry->set_history_id(iter->first); 1051 map_entry->set_history_id(iter->first);
1031 const URLRow& url_row(iter->second); 1052 const URLRow& url_row(iter->second);
1032 // Note: We only save information that contributes to the index so there 1053 // Note: We only save information that contributes to the index so there
1033 // is no need to save search_term_cache_ (not persistent), 1054 // is no need to save search_term_cache_ (not persistent).
1034 // languages_, etc.
1035 map_entry->set_visit_count(url_row.visit_count()); 1055 map_entry->set_visit_count(url_row.visit_count());
1036 map_entry->set_typed_count(url_row.typed_count()); 1056 map_entry->set_typed_count(url_row.typed_count());
1037 map_entry->set_last_visit(url_row.last_visit().ToInternalValue()); 1057 map_entry->set_last_visit(url_row.last_visit().ToInternalValue());
1038 map_entry->set_url(url_row.url().spec()); 1058 map_entry->set_url(url_row.url().spec());
1039 map_entry->set_title(UTF16ToUTF8(url_row.title())); 1059 map_entry->set_title(UTF16ToUTF8(url_row.title()));
1040 } 1060 }
1041 } 1061 }
1042 1062
1043 void URLIndexPrivateData::SaveWordStartsMap( 1063 void URLIndexPrivateData::SaveWordStartsMap(
1044 InMemoryURLIndexCacheItem* cache) const { 1064 InMemoryURLIndexCacheItem* cache) const {
(...skipping 18 matching lines...) Expand all
1063 i != word_starts.url_word_starts_.end(); ++i) 1083 i != word_starts.url_word_starts_.end(); ++i)
1064 map_entry->add_url_word_starts(*i); 1084 map_entry->add_url_word_starts(*i);
1065 for (WordStarts::const_iterator i = word_starts.title_word_starts_.begin(); 1085 for (WordStarts::const_iterator i = word_starts.title_word_starts_.begin();
1066 i != word_starts.title_word_starts_.end(); ++i) 1086 i != word_starts.title_word_starts_.end(); ++i)
1067 map_entry->add_title_word_starts(*i); 1087 map_entry->add_title_word_starts(*i);
1068 } 1088 }
1069 } 1089 }
1070 1090
1071 // Cache Restoring ------------------------------------------------------------- 1091 // Cache Restoring -------------------------------------------------------------
1072 1092
1073 bool URLIndexPrivateData::RestoreFromFile(const FilePath& file_path) { 1093 // static
1074 // TODO(mrossetti): Figure out how to determine if the cache is up-to-date. 1094 void URLIndexPrivateData::RestoreFromFileTask(
1075 // That is: ensure that the database has not been modified since the cache 1095 const FilePath& file_path,
1076 // was last saved. DB file modification date is inadequate. There are no 1096 scoped_refptr<URLIndexPrivateData> private_data,
1077 // SQLite table checksums automatically stored. 1097 std::string languages) {
1078 Clear(); // Start with a clean slate. 1098 private_data = URLIndexPrivateData::RestoreFromFile(file_path, languages);
1099 }
1079 1100
1080 // FIXME(mrossetti): Move File IO to another thread. 1101 // static
1081 base::ThreadRestrictions::ScopedAllowIO allow_io; 1102 scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::RestoreFromFile(
1103 const FilePath& file_path,
1104 const std::string& languages) {
1082 base::TimeTicks beginning_time = base::TimeTicks::Now(); 1105 base::TimeTicks beginning_time = base::TimeTicks::Now();
1083 if (!file_util::PathExists(file_path)) 1106 if (!file_util::PathExists(file_path))
1084 return false; 1107 return NULL;
1085 std::string data; 1108 std::string data;
1086 // If there is no cache file then simply give up. This will cause us to 1109 // If there is no cache file then simply give up. This will cause us to
1087 // attempt to rebuild from the history database. 1110 // attempt to rebuild from the history database.
1088 if (!file_util::ReadFileToString(file_path, &data)) 1111 if (!file_util::ReadFileToString(file_path, &data))
1089 return false; 1112 return NULL;
1090 1113
1114 scoped_refptr<URLIndexPrivateData> restored_data(new URLIndexPrivateData);
1091 InMemoryURLIndexCacheItem index_cache; 1115 InMemoryURLIndexCacheItem index_cache;
1092 if (!index_cache.ParseFromArray(data.c_str(), data.size())) { 1116 if (!index_cache.ParseFromArray(data.c_str(), data.size())) {
1093 LOG(WARNING) << "Failed to parse InMemoryURLIndex cache data read from " 1117 LOG(WARNING) << "Failed to parse URLIndexPrivateData cache data read from "
1094 << file_path.value(); 1118 << file_path.value();
1095 return false; 1119 return restored_data;
1096 } 1120 }
1097 1121
1098 if (!RestorePrivateData(index_cache)) { 1122 if (!restored_data->RestorePrivateData(index_cache, languages))
1099 Clear(); // Back to square one -- must build from scratch. 1123 return NULL;
1100 return false;
1101 }
1102 1124
1103 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime", 1125 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime",
1104 base::TimeTicks::Now() - beginning_time); 1126 base::TimeTicks::Now() - beginning_time);
1105 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", 1127 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems",
1106 history_id_word_map_.size()); 1128 restored_data->history_id_word_map_.size());
1107 UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size()); 1129 UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size());
1108 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", word_map_.size()); 1130 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords",
1109 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", char_word_map_.size()); 1131 restored_data->word_map_.size());
1110 return true; 1132 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars",
1133 restored_data->char_word_map_.size());
1134 if (restored_data->Empty())
1135 return NULL; // 'No data' is the same as a failed reload.
1136 return restored_data;
1111 } 1137 }
1112 1138
1113 // static 1139 // static
1114 URLIndexPrivateData* URLIndexPrivateData::RebuildFromHistory( 1140 scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::RebuildFromHistory(
1115 HistoryDatabase* history_db) { 1141 HistoryDatabase* history_db,
1142 const std::string& languages,
1143 const std::set<std::string>& scheme_whitelist) {
1116 if (!history_db) 1144 if (!history_db)
1117 return NULL; 1145 return NULL;
1118 1146
1119 base::TimeTicks beginning_time = base::TimeTicks::Now(); 1147 base::TimeTicks beginning_time = base::TimeTicks::Now();
1120 1148
1121 scoped_ptr<URLIndexPrivateData> rebuilt_data(new URLIndexPrivateData); 1149 scoped_refptr<URLIndexPrivateData> rebuilt_data(new URLIndexPrivateData);
1122 URLDatabase::URLEnumerator history_enum; 1150 URLDatabase::URLEnumerator history_enum;
1123 if (!history_db->InitURLEnumeratorForSignificant(&history_enum)) 1151 if (!history_db->InitURLEnumeratorForSignificant(&history_enum))
1124 return NULL; 1152 return NULL;
1125 for (URLRow row; history_enum.GetNextURL(&row); ) 1153 for (URLRow row; history_enum.GetNextURL(&row); )
1126 rebuilt_data->IndexRow(row); 1154 rebuilt_data->IndexRow(row, languages, scheme_whitelist);
1127 1155
1128 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime", 1156 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime",
1129 base::TimeTicks::Now() - beginning_time); 1157 base::TimeTicks::Now() - beginning_time);
1130 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", 1158 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems",
1131 rebuilt_data->history_id_word_map_.size()); 1159 rebuilt_data->history_id_word_map_.size());
1132 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", 1160 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords",
1133 rebuilt_data->word_map_.size()); 1161 rebuilt_data->word_map_.size());
1134 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", 1162 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars",
1135 rebuilt_data->char_word_map_.size()); 1163 rebuilt_data->char_word_map_.size());
1136 return rebuilt_data.release(); 1164 return rebuilt_data;
1137 } 1165 }
1138 1166
1139 bool URLIndexPrivateData::RestorePrivateData( 1167 bool URLIndexPrivateData::RestorePrivateData(
1140 const InMemoryURLIndexCacheItem& cache) { 1168 const InMemoryURLIndexCacheItem& cache,
1169 const std::string& languages) {
1141 if (cache.has_version()) 1170 if (cache.has_version())
1142 restored_cache_version_ = cache.version(); 1171 restored_cache_version_ = cache.version();
1143 return RestoreWordList(cache) && RestoreWordMap(cache) && 1172 return RestoreWordList(cache) && RestoreWordMap(cache) &&
1144 RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) && 1173 RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) &&
1145 RestoreHistoryInfoMap(cache) && RestoreWordStartsMap(cache); 1174 RestoreHistoryInfoMap(cache) && RestoreWordStartsMap(cache, languages);
1146 } 1175 }
1147 1176
1148 bool URLIndexPrivateData::RestoreWordList( 1177 bool URLIndexPrivateData::RestoreWordList(
1149 const InMemoryURLIndexCacheItem& cache) { 1178 const InMemoryURLIndexCacheItem& cache) {
1150 if (!cache.has_word_list()) 1179 if (!cache.has_word_list())
1151 return false; 1180 return false;
1152 const WordListItem& list_item(cache.word_list()); 1181 const WordListItem& list_item(cache.word_list());
1153 uint32 expected_item_count = list_item.word_count(); 1182 uint32 expected_item_count = list_item.word_count();
1154 uint32 actual_item_count = list_item.word_size(); 1183 uint32 actual_item_count = list_item.word_size();
1155 if (actual_item_count == 0 || actual_item_count != expected_item_count) 1184 if (actual_item_count == 0 || actual_item_count != expected_item_count)
(...skipping 101 matching lines...) Expand 10 before | Expand all | Expand 10 after
1257 if (iter->has_title()) { 1286 if (iter->has_title()) {
1258 string16 title(UTF8ToUTF16(iter->title())); 1287 string16 title(UTF8ToUTF16(iter->title()));
1259 url_row.set_title(title); 1288 url_row.set_title(title);
1260 } 1289 }
1261 history_info_map_[history_id] = url_row; 1290 history_info_map_[history_id] = url_row;
1262 } 1291 }
1263 return true; 1292 return true;
1264 } 1293 }
1265 1294
1266 bool URLIndexPrivateData::RestoreWordStartsMap( 1295 bool URLIndexPrivateData::RestoreWordStartsMap(
1267 const InMemoryURLIndexCacheItem& cache) { 1296 const InMemoryURLIndexCacheItem& cache,
1297 const std::string& languages) {
1268 // Note that this function must be called after RestoreHistoryInfoMap() has 1298 // Note that this function must be called after RestoreHistoryInfoMap() has
1269 // been run as the word starts may have to be recalculated from the urls and 1299 // been run as the word starts may have to be recalculated from the urls and
1270 // page titles. 1300 // page titles.
1271 if (cache.has_word_starts_map()) { 1301 if (cache.has_word_starts_map()) {
1272 const WordStartsMapItem& list_item(cache.word_starts_map()); 1302 const WordStartsMapItem& list_item(cache.word_starts_map());
1273 uint32 expected_item_count = list_item.item_count(); 1303 uint32 expected_item_count = list_item.item_count();
1274 uint32 actual_item_count = list_item.word_starts_map_entry_size(); 1304 uint32 actual_item_count = list_item.word_starts_map_entry_size();
1275 if (actual_item_count == 0 || actual_item_count != expected_item_count) 1305 if (actual_item_count == 0 || actual_item_count != expected_item_count)
1276 return false; 1306 return false;
1277 const RepeatedPtrField<WordStartsMapEntry>& 1307 const RepeatedPtrField<WordStartsMapEntry>&
(...skipping 14 matching lines...) Expand all
1292 word_starts.title_word_starts_.push_back(*jiter); 1322 word_starts.title_word_starts_.push_back(*jiter);
1293 word_starts_map_[history_id] = word_starts; 1323 word_starts_map_[history_id] = word_starts;
1294 } 1324 }
1295 } else { 1325 } else {
1296 // Since the cache did not contain any word starts we must rebuild then from 1326 // Since the cache did not contain any word starts we must rebuild then from
1297 // the URL and page titles. 1327 // the URL and page titles.
1298 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin(); 1328 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin();
1299 iter != history_info_map_.end(); ++iter) { 1329 iter != history_info_map_.end(); ++iter) {
1300 RowWordStarts word_starts; 1330 RowWordStarts word_starts;
1301 const URLRow& row(iter->second); 1331 const URLRow& row(iter->second);
1302 string16 url(net::FormatUrl(row.url(), languages_, 1332 string16 url(net::FormatUrl(row.url(), languages,
1303 net::kFormatUrlOmitUsernamePassword, 1333 net::kFormatUrlOmitUsernamePassword,
1304 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS, 1334 net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS,
1305 NULL, NULL, NULL)); 1335 NULL, NULL, NULL));
1306 url = base::i18n::ToLower(url); 1336 url = base::i18n::ToLower(url);
1307 String16VectorFromString16(url, false, &word_starts.url_word_starts_); 1337 String16VectorFromString16(url, false, &word_starts.url_word_starts_);
1308 String16VectorFromString16( 1338 String16VectorFromString16(
1309 row.title(), false, &word_starts.title_word_starts_); 1339 row.title(), false, &word_starts.title_word_starts_);
1310 word_starts_map_[iter->first] = word_starts; 1340 word_starts_map_[iter->first] = word_starts;
1311 } 1341 }
1312 } 1342 }
1313 return true; 1343 return true;
1314 } 1344 }
1315 1345
1346 // static
1347 bool URLIndexPrivateData::URLSchemeIsWhitelisted(
1348 const GURL& gurl,
1349 const std::set<std::string>& whitelist) {
1350 return whitelist.find(gurl.scheme()) != whitelist.end();
1351 }
1352
1316 } // namespace history 1353 } // namespace history
OLDNEW
« no previous file with comments | « chrome/browser/history/url_index_private_data.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698