Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1409)

Side by Side Diff: chrome/browser/history/url_index_private_data.cc

Issue 9030031: Move InMemoryURLIndex Caching Operations to FILE Thread (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/
Patch Set: Missed a private data swap. Try to fix update phase failure. Created 8 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/history/url_index_private_data.h" 5 #include "chrome/browser/history/url_index_private_data.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <functional> 8 #include <functional>
9 #include <iterator> 9 #include <iterator>
10 #include <limits> 10 #include <limits>
11 #include <numeric> 11 #include <numeric>
12 12
13 #include "base/bind.h"
13 #include "base/file_util.h" 14 #include "base/file_util.h"
15 #include "base/i18n/break_iterator.h"
14 #include "base/i18n/case_conversion.h" 16 #include "base/i18n/case_conversion.h"
17 #include "base/memory/scoped_ptr.h"
15 #include "base/metrics/histogram.h" 18 #include "base/metrics/histogram.h"
16 #include "base/string_util.h" 19 #include "base/string_util.h"
17 #include "base/threading/thread_restrictions.h" 20 #include "base/time.h"
18 #include "base/utf_string_conversions.h" 21 #include "base/utf_string_conversions.h"
19 #include "chrome/browser/autocomplete/autocomplete.h" 22 #include "chrome/browser/autocomplete/autocomplete.h"
20 #include "chrome/browser/history/url_database.h" 23 #include "chrome/browser/history/url_database.h"
21 #include "chrome/common/url_constants.h" 24 #include "chrome/common/url_constants.h"
22 #include "net/base/net_util.h" 25 #include "net/base/net_util.h"
23 #include "third_party/protobuf/src/google/protobuf/repeated_field.h" 26 #include "third_party/protobuf/src/google/protobuf/repeated_field.h"
27 #include "ui/base/l10n/l10n_util.h"
24 28
25 using google::protobuf::RepeatedField; 29 using google::protobuf::RepeatedField;
26 using google::protobuf::RepeatedPtrField; 30 using google::protobuf::RepeatedPtrField;
27 using in_memory_url_index::InMemoryURLIndexCacheItem; 31 using in_memory_url_index::InMemoryURLIndexCacheItem;
28 32
29 namespace history { 33 namespace history {
30 34
31 typedef imui::InMemoryURLIndexCacheItem_WordListItem WordListItem; 35 typedef imui::InMemoryURLIndexCacheItem_WordListItem WordListItem;
32 typedef imui::InMemoryURLIndexCacheItem_WordMapItem_WordMapEntry WordMapEntry; 36 typedef imui::InMemoryURLIndexCacheItem_WordMapItem_WordMapEntry WordMapEntry;
33 typedef imui::InMemoryURLIndexCacheItem_WordMapItem WordMapItem; 37 typedef imui::InMemoryURLIndexCacheItem_WordMapItem WordMapItem;
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
69 bool LengthGreater(const string16& string_a, const string16& string_b) { 73 bool LengthGreater(const string16& string_a, const string16& string_b) {
70 return string_a.length() > string_b.length(); 74 return string_a.length() > string_b.length();
71 } 75 }
72 76
73 // std::accumulate helper function to add up TermMatches' lengths. 77 // std::accumulate helper function to add up TermMatches' lengths.
74 int AccumulateMatchLength(int total, const TermMatch& match) { 78 int AccumulateMatchLength(int total, const TermMatch& match) {
75 return total + match.length; 79 return total + match.length;
76 } 80 }
77 81
78 // Converts a raw value for some particular scoring factor into a score 82 // Converts a raw value for some particular scoring factor into a score
79 // component for that factor. The conversion function is piecewise linear, with 83 // component for that factor. The conversion function is linear within the
80 // input values provided in |value_ranks| and resulting output scores from 84 // range of the |value_ranks| array. |value| is mapped to a range within
81 // |kScoreRank| (mathematically, f(value_rank[i]) = kScoreRank[i]). A score 85 // |value_ranks| with the resulting range projecting into the |kScoreRank|
82 // cannot be higher than kScoreRank[0], and drops directly to 0 if lower than 86 // array. A score cannot be higher than kScoreRank[0], and drops directly to 0
83 // kScoreRank[3]. 87 // if lower than kScoreRank[3] (eliminating the item being scored from further
88 // consideration since such a score is insignificant and unlikely to be
89 // presented to the user).
Peter Kasting 2012/01/14 00:12:49 Nit: Honestly, the old comment seemed a lot cleare
mrossetti 2012/03/03 05:05:56 Done.
84 // 90 //
85 // For example, take |value| == 70 and |value_ranks| == { 100, 50, 30, 10 }. 91 // For example, take |value| == 70 and |value_ranks| == { 100, 50, 30, 10 }.
86 // Because 70 falls between ranks 0 (100) and 1 (50), the score is given by the 92 // Because 70 falls between ranks 0 (100) and 1 (50), the score is given by the
87 // linear function: 93 // linear function:
88 // score = m * value + b, where 94 // score = m * value + b, where
89 // m = (kScoreRank[0] - kScoreRank[1]) / (value_ranks[0] - value_ranks[1]) 95 // m = (kScoreRank[0] - kScoreRank[1]) / (value_ranks[0] - value_ranks[1])
90 // b = value_ranks[1] 96 // b = value_ranks[1]
91 // Any value higher than 100 would be scored as if it were 100, and any value 97 // Any value higher than 100 would be scored as if it were 100, and any value
92 // lower than 10 scored 0. 98 // lower than 10 scored 0.
93 int ScoreForValue(int value, const int* value_ranks) { 99 int ScoreForValue(int value, const int* value_ranks) {
(...skipping 189 matching lines...) Expand 10 before | Expand all | Expand 10 after
283 if (iter != history_id_word_map_.end()) { 289 if (iter != history_id_word_map_.end()) {
284 WordIDSet& word_id_set(iter->second); 290 WordIDSet& word_id_set(iter->second);
285 word_id_set.insert(word_id); 291 word_id_set.insert(word_id);
286 } else { 292 } else {
287 WordIDSet word_id_set; 293 WordIDSet word_id_set;
288 word_id_set.insert(word_id); 294 word_id_set.insert(word_id);
289 history_id_word_map_[history_id] = word_id_set; 295 history_id_word_map_[history_id] = word_id_set;
290 } 296 }
291 } 297 }
292 298
293 void URLIndexPrivateData::UpdateURL(URLID row_id, const URLRow& row) { 299 void URLIndexPrivateData::UpdateURL(const URLRow& row) {
294 // The row may or may not already be in our index. If it is not already 300 // The row may or may not already be in our index. If it is not already
295 // indexed and it qualifies then it gets indexed. If it is already 301 // indexed and it qualifies then it gets indexed. If it is already
296 // indexed and still qualifies then it gets updated, otherwise it 302 // indexed and still qualifies then it gets updated, otherwise it
297 // is deleted from the index. 303 // is deleted from the index.
304 URLID row_id = row.id();
298 HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id); 305 HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id);
299 if (row_pos == history_info_map_.end()) { 306 if (row_pos == history_info_map_.end()) {
300 // This new row should be indexed if it qualifies. 307 // This new row should be indexed if it qualifies.
301 URLRow new_row(row); 308 URLRow new_row(row);
302 new_row.set_id(row_id); 309 new_row.set_id(row_id);
303 if (RowQualifiesAsSignificant(new_row, base::Time())) 310 if (RowQualifiesAsSignificant(new_row, base::Time()))
304 IndexRow(new_row); 311 IndexRow(new_row);
305 } else if (RowQualifiesAsSignificant(row, base::Time())) { 312 } else if (RowQualifiesAsSignificant(row, base::Time())) {
306 // This indexed row still qualifies and will be re-indexed. 313 // This indexed row still qualifies and will be re-indexed.
307 // The url won't have changed but the title, visit count, etc. 314 // The url won't have changed but the title, visit count, etc.
308 // might have changed. 315 // might have changed.
309 URLRow& updated_row = row_pos->second; 316 URLRow& updated_row = row_pos->second;
310 updated_row.set_visit_count(row.visit_count()); 317 updated_row.set_visit_count(row.visit_count());
311 updated_row.set_typed_count(row.typed_count()); 318 updated_row.set_typed_count(row.typed_count());
312 updated_row.set_last_visit(row.last_visit()); 319 updated_row.set_last_visit(row.last_visit());
313 // While the URL is guaranteed to remain stable, the title may have changed. 320 // While the URL is guaranteed to remain stable, the title may have changed.
314 // If so, then we need to update the index with the changed words. 321 // If so, then we need to update the index with the changed words.
315 if (updated_row.title() != row.title()) { 322 if (updated_row.title() != row.title()) {
316 // Clear all words associated with this row and re-index both the 323 // Clear all words associated with this row and re-index both the
317 // URL and title. 324 // URL and title.
318 RemoveRowWordsFromIndex(updated_row); 325 RemoveRowWordsFromIndex(updated_row);
319 updated_row.set_title(row.title()); 326 updated_row.set_title(row.title());
320 AddRowWordsToIndex(updated_row); 327 AddRowWordsToIndex(updated_row);
321 } 328 }
322 } else { 329 } else {
323 // This indexed row no longer qualifies and will be de-indexed by 330 // This indexed row no longer qualifies and will be de-indexed by
324 // clearing all words associated with this row. 331 // clearing all words associated with this row.
325 URLRow& removed_row = row_pos->second; 332 RemoveRowFromIndex(row);
326 RemoveRowFromIndex(removed_row);
327 } 333 }
328 // This invalidates the cache. 334 search_term_cache_.clear(); // This invalidates the cache.
329 search_term_cache_.clear();
330 } 335 }
331 336
332 void URLIndexPrivateData::DeleteURL(URLID row_id) { 337 // Helper functor for DeleteURL.
333 // Note that this does not remove any reference to this row from the 338 class HistoryInfoMapItemHasURL {
334 // word_id_history_map_. That map will continue to contain (and return) 339 public:
335 // hits against this row until that map is rebuilt, but since the 340 explicit HistoryInfoMapItemHasURL(const GURL& url): url_(url) {}
336 // history_info_map_ no longer references the row no erroneous results 341
337 // will propagate to the user. 342 bool operator()(const std::pair<const HistoryID, URLRow> item) {
Peter Kasting 2012/01/14 00:12:49 Nit: Missing &?
mrossetti 2012/03/03 05:05:56 Done.
338 history_info_map_.erase(row_id); 343 return item.second.url() == url_;
339 search_term_cache_.clear(); // This invalidates the word cache. 344 }
345
346 private:
347 const GURL& url_;
348 };
349
350 void URLIndexPrivateData::DeleteURL(const GURL& url) {
351 // Find the matching entry in the history_info_map_.
352 HistoryInfoMap::iterator pos = std::find_if(
353 history_info_map_.begin(),
354 history_info_map_.end(),
355 HistoryInfoMapItemHasURL(url));
356 if (pos != history_info_map_.end()) {
357 RemoveRowFromIndex(pos->second);
358 search_term_cache_.clear(); // This invalidates the cache.
359 }
340 } 360 }
341 361
342 bool URLIndexPrivateData::URLSchemeIsWhitelisted(const GURL& gurl) const { 362 bool URLIndexPrivateData::URLSchemeIsWhitelisted(const GURL& gurl) const {
343 return scheme_whitelist_.find(gurl.scheme()) != scheme_whitelist_.end(); 363 return scheme_whitelist_.find(gurl.scheme()) != scheme_whitelist_.end();
344 } 364 }
345 365
346 // URLIndexPrivateData::HistoryItemFactorGreater ------------------------------- 366 // URLIndexPrivateData::HistoryItemFactorGreater -------------------------------
347 367
348 URLIndexPrivateData::HistoryItemFactorGreater::HistoryItemFactorGreater( 368 URLIndexPrivateData::HistoryItemFactorGreater::HistoryItemFactorGreater(
349 const HistoryInfoMap& history_info_map) 369 const HistoryInfoMap& history_info_map)
(...skipping 480 matching lines...) Expand 10 before | Expand all | Expand 10 after
830 whitelist->insert(std::string(chrome::kFileScheme)); 850 whitelist->insert(std::string(chrome::kFileScheme));
831 whitelist->insert(std::string(chrome::kFtpScheme)); 851 whitelist->insert(std::string(chrome::kFtpScheme));
832 whitelist->insert(std::string(chrome::kHttpScheme)); 852 whitelist->insert(std::string(chrome::kHttpScheme));
833 whitelist->insert(std::string(chrome::kHttpsScheme)); 853 whitelist->insert(std::string(chrome::kHttpsScheme));
834 whitelist->insert(std::string(chrome::kMailToScheme)); 854 whitelist->insert(std::string(chrome::kMailToScheme));
835 } 855 }
836 856
837 // Cache Saving ---------------------------------------------------------------- 857 // Cache Saving ----------------------------------------------------------------
838 858
839 bool URLIndexPrivateData::SaveToFile(const FilePath& file_path) { 859 bool URLIndexPrivateData::SaveToFile(const FilePath& file_path) {
840 // TODO(mrossetti): Move File IO to another thread.
841 base::ThreadRestrictions::ScopedAllowIO allow_io;
842 base::TimeTicks beginning_time = base::TimeTicks::Now(); 860 base::TimeTicks beginning_time = base::TimeTicks::Now();
843 InMemoryURLIndexCacheItem index_cache; 861 InMemoryURLIndexCacheItem index_cache;
844 SavePrivateData(&index_cache); 862 SavePrivateData(&index_cache);
845 std::string data; 863 std::string data;
846 if (!index_cache.SerializeToString(&data)) { 864 if (!index_cache.SerializeToString(&data)) {
847 LOG(WARNING) << "Failed to serialize the InMemoryURLIndex cache."; 865 LOG(WARNING) << "Failed to serialize the InMemoryURLIndex cache.";
848 return false; 866 return false;
849 } 867 }
850 868
851 int size = data.size(); 869 int size = data.size();
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after
949 map_entry->set_visit_count(url_row.visit_count()); 967 map_entry->set_visit_count(url_row.visit_count());
950 map_entry->set_typed_count(url_row.typed_count()); 968 map_entry->set_typed_count(url_row.typed_count());
951 map_entry->set_last_visit(url_row.last_visit().ToInternalValue()); 969 map_entry->set_last_visit(url_row.last_visit().ToInternalValue());
952 map_entry->set_url(url_row.url().spec()); 970 map_entry->set_url(url_row.url().spec());
953 map_entry->set_title(UTF16ToUTF8(url_row.title())); 971 map_entry->set_title(UTF16ToUTF8(url_row.title()));
954 } 972 }
955 } 973 }
956 974
957 // Cache Restoring ------------------------------------------------------------- 975 // Cache Restoring -------------------------------------------------------------
958 976
959 bool URLIndexPrivateData::ReloadFromHistory(history::URLDatabase* history_db) { 977 // static
960 Clear(); 978 URLIndexPrivateData* URLIndexPrivateData::RestoreFromFile(
961 979 const FilePath& file_path) {
962 if (!history_db)
963 return false;
964
965 base::TimeTicks beginning_time = base::TimeTicks::Now(); 980 base::TimeTicks beginning_time = base::TimeTicks::Now();
966 URLDatabase::URLEnumerator history_enum; 981 if (!file_util::PathExists(file_path))
967 if (!history_db->InitURLEnumeratorForSignificant(&history_enum)) 982 return NULL;
968 return false;
969 URLRow row;
970 while (history_enum.GetNextURL(&row))
971 IndexRow(row);
972 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime",
973 base::TimeTicks::Now() - beginning_time);
974 return true;
975 }
976
977 bool URLIndexPrivateData::RestoreFromFile(const FilePath& file_path) {
978 // TODO(mrossetti): Figure out how to determine if the cache is up-to-date.
979 // That is: ensure that the database has not been modified since the cache
980 // was last saved. DB file modification date is inadequate. There are no
981 // SQLite table checksums automatically stored.
982 // FIXME(mrossetti): Move File IO to another thread.
983 base::ThreadRestrictions::ScopedAllowIO allow_io;
984 base::TimeTicks beginning_time = base::TimeTicks::Now();
985 std::string data; 983 std::string data;
986 // If there is no cache file then simply give up. This will cause us to 984 // If there is no cache file then simply give up. This will cause us to
987 // attempt to rebuild from the history database. 985 // attempt to rebuild from the history database.
988 if (!file_util::ReadFileToString(file_path, &data)) 986 if (!file_util::ReadFileToString(file_path, &data))
989 return false; 987 return NULL;
990 988
989 scoped_ptr<URLIndexPrivateData> restored_data(new URLIndexPrivateData);
991 InMemoryURLIndexCacheItem index_cache; 990 InMemoryURLIndexCacheItem index_cache;
992 if (!index_cache.ParseFromArray(data.c_str(), data.size())) { 991 if (!index_cache.ParseFromArray(data.c_str(), data.size())) {
993 LOG(WARNING) << "Failed to parse InMemoryURLIndex cache data read from " 992 LOG(WARNING) << "Failed to parse URLIndexPrivateData cache data read from "
994 << file_path.value(); 993 << file_path.value();
995 return false; 994 return restored_data.release();
996 } 995 }
997 996
998 if (!RestorePrivateData(index_cache)) { 997 if (!restored_data->RestorePrivateData(index_cache)) {
999 Clear(); // Back to square one -- must build from scratch. 998 restored_data.reset(); // Back to square one -- must build from history DB.
1000 return false; 999 return NULL;
1001 } 1000 }
1002 1001
1003 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime", 1002 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime",
1004 base::TimeTicks::Now() - beginning_time); 1003 base::TimeTicks::Now() - beginning_time);
1005 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", 1004 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems",
1006 history_id_word_map_.size()); 1005 restored_data->history_id_word_map_.size());
1007 UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size()); 1006 UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size());
1008 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", word_map_.size()); 1007 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords",
1009 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", char_word_map_.size()); 1008 restored_data->word_map_.size());
1010 return true; 1009 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars",
1010 restored_data->char_word_map_.size());
1011 return restored_data.release();
1012 }
1013
1014 // static
1015 URLIndexPrivateData* URLIndexPrivateData::RebuildFromHistory(
1016 URLDatabase* history_db) {
1017 if (!history_db)
1018 return NULL;
1019
1020 base::TimeTicks beginning_time = base::TimeTicks::Now();
1021
1022 scoped_ptr<URLIndexPrivateData> rebuilt_data(new URLIndexPrivateData);
1023 URLDatabase::URLEnumerator history_enum;
1024 if (!history_db->InitURLEnumeratorForSignificant(&history_enum))
1025 return NULL;
1026 URLRow row;
Peter Kasting 2012/01/14 00:12:49 Nit: You can condense these three lines to two and
mrossetti 2012/03/03 05:05:56 w00t! My ancient mind just doesn't work that way!
1027 while (history_enum.GetNextURL(&row))
1028 rebuilt_data->IndexRow(row);
1029
1030 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime",
1031 base::TimeTicks::Now() - beginning_time);
1032 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems",
1033 rebuilt_data->history_id_word_map_.size());
1034 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords",
1035 rebuilt_data->word_map_.size());
1036 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars",
1037 rebuilt_data->char_word_map_.size());
1038 return rebuilt_data.release();
1011 } 1039 }
1012 1040
1013 bool URLIndexPrivateData::RestorePrivateData( 1041 bool URLIndexPrivateData::RestorePrivateData(
1014 const InMemoryURLIndexCacheItem& cache) { 1042 const InMemoryURLIndexCacheItem& cache) {
1015 return RestoreWordList(cache) && RestoreWordMap(cache) && 1043 return RestoreWordList(cache) && RestoreWordMap(cache) &&
1016 RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) && 1044 RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) &&
1017 RestoreHistoryInfoMap(cache); 1045 RestoreHistoryInfoMap(cache);
1018 } 1046 }
1019 1047
1020 bool URLIndexPrivateData::RestoreWordList( 1048 bool URLIndexPrivateData::RestoreWordList(
(...skipping 108 matching lines...) Expand 10 before | Expand all | Expand 10 after
1129 if (iter->has_title()) { 1157 if (iter->has_title()) {
1130 string16 title(UTF8ToUTF16(iter->title())); 1158 string16 title(UTF8ToUTF16(iter->title()));
1131 url_row.set_title(title); 1159 url_row.set_title(title);
1132 } 1160 }
1133 history_info_map_[history_id] = url_row; 1161 history_info_map_[history_id] = url_row;
1134 } 1162 }
1135 return true; 1163 return true;
1136 } 1164 }
1137 1165
1138 } // namespace history 1166 } // namespace history
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698