chrome/browser/history/url_index_private_data.cc - Issue 9030031: Move InMemoryURLIndex Caching Operations to FILE Thread

Side by Side Diff: chrome/browser/history/url_index_private_data.cc

Issue 9030031: Move InMemoryURLIndex Caching Operations to FILE Thread (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/

Patch Set: Missed a private data swap. Try to fix update phase failure. Created 8 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

« chrome/browser/history/url_index_private_data.h ('K') | « chrome/browser/history/url_index_private_data.h ('k') | chrome/browser/sync/profile_sync_service_typed_url_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "chrome/browser/history/url_index_private_data.h"	5 #include "chrome/browser/history/url_index_private_data.h"

6	6

7 #include <algorithm>	7 #include <algorithm>

8 #include <functional>	8 #include <functional>

9 #include <iterator>	9 #include <iterator>

10 #include <limits>	10 #include <limits>

11 #include <numeric>	11 #include <numeric>

12	12

	13 #include "base/bind.h"

13 #include "base/file_util.h"	14 #include "base/file_util.h"

	15 #include "base/i18n/break_iterator.h"

14 #include "base/i18n/case_conversion.h"	16 #include "base/i18n/case_conversion.h"

	17 #include "base/memory/scoped_ptr.h"

15 #include "base/metrics/histogram.h"	18 #include "base/metrics/histogram.h"

16 #include "base/string_util.h"	19 #include "base/string_util.h"

17 #include "base/threading/thread_restrictions.h"	20 #include "base/time.h"

18 #include "base/utf_string_conversions.h"	21 #include "base/utf_string_conversions.h"

19 #include "chrome/browser/autocomplete/autocomplete.h"	22 #include "chrome/browser/autocomplete/autocomplete.h"

20 #include "chrome/browser/history/url_database.h"	23 #include "chrome/browser/history/url_database.h"

21 #include "chrome/common/url_constants.h"	24 #include "chrome/common/url_constants.h"

22 #include "net/base/net_util.h"	25 #include "net/base/net_util.h"

23 #include "third_party/protobuf/src/google/protobuf/repeated_field.h"	26 #include "third_party/protobuf/src/google/protobuf/repeated_field.h"

	27 #include "ui/base/l10n/l10n_util.h"

24	28

25 using google::protobuf::RepeatedField;	29 using google::protobuf::RepeatedField;

26 using google::protobuf::RepeatedPtrField;	30 using google::protobuf::RepeatedPtrField;

27 using in_memory_url_index::InMemoryURLIndexCacheItem;	31 using in_memory_url_index::InMemoryURLIndexCacheItem;

28	32

29 namespace history {	33 namespace history {

30	34

31 typedef imui::InMemoryURLIndexCacheItem_WordListItem WordListItem;	35 typedef imui::InMemoryURLIndexCacheItem_WordListItem WordListItem;

32 typedef imui::InMemoryURLIndexCacheItem_WordMapItem_WordMapEntry WordMapEntry;	36 typedef imui::InMemoryURLIndexCacheItem_WordMapItem_WordMapEntry WordMapEntry;

33 typedef imui::InMemoryURLIndexCacheItem_WordMapItem WordMapItem;	37 typedef imui::InMemoryURLIndexCacheItem_WordMapItem WordMapItem;

(...skipping 35 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
69 bool LengthGreater(const string16& string_a, const string16& string_b) {	73 bool LengthGreater(const string16& string_a, const string16& string_b) {

70 return string_a.length() > string_b.length();	74 return string_a.length() > string_b.length();

71 }	75 }

72	76

73 // std::accumulate helper function to add up TermMatches' lengths.	77 // std::accumulate helper function to add up TermMatches' lengths.

74 int AccumulateMatchLength(int total, const TermMatch& match) {	78 int AccumulateMatchLength(int total, const TermMatch& match) {

75 return total + match.length;	79 return total + match.length;

76 }	80 }

77	81

78 // Converts a raw value for some particular scoring factor into a score	82 // Converts a raw value for some particular scoring factor into a score

79 // component for that factor. The conversion function is piecewise linear, with	83 // component for that factor. The conversion function is linear within the

80 // input values provided in \|value_ranks\| and resulting output scores from	84 // range of the \|value_ranks\| array. \|value\| is mapped to a range within

81 // \|kScoreRank\| (mathematically, f(value_rank[i]) = kScoreRank[i]). A score	85 // \|value_ranks\| with the resulting range projecting into the \|kScoreRank\|

82 // cannot be higher than kScoreRank[0], and drops directly to 0 if lower than	86 // array. A score cannot be higher than kScoreRank[0], and drops directly to 0

83 // kScoreRank[3].	87 // if lower than kScoreRank[3] (eliminating the item being scored from further

	88 // consideration since such a score is insignificant and unlikely to be

	89 // presented to the user).
	Peter Kasting 2012/01/14 00:12:49 Nit: Honestly, the old comment seemed a lot cleare Nit: Honestly, the old comment seemed a lot clearer to me... mrossetti 2012/03/03 05:05:56 Done. Show quoted text On 2012/01/14 00:12:49, Peter Kasting wrote: > Nit: Honestly, the old comment seemed a lot clearer to me... Done.
84 //	90 //

85 // For example, take \|value\| == 70 and \|value_ranks\| == { 100, 50, 30, 10 }.	91 // For example, take \|value\| == 70 and \|value_ranks\| == { 100, 50, 30, 10 }.

86 // Because 70 falls between ranks 0 (100) and 1 (50), the score is given by the	92 // Because 70 falls between ranks 0 (100) and 1 (50), the score is given by the

87 // linear function:	93 // linear function:

88 // score = m * value + b, where	94 // score = m * value + b, where

89 // m = (kScoreRank[0] - kScoreRank[1]) / (value_ranks[0] - value_ranks[1])	95 // m = (kScoreRank[0] - kScoreRank[1]) / (value_ranks[0] - value_ranks[1])

90 // b = value_ranks[1]	96 // b = value_ranks[1]

91 // Any value higher than 100 would be scored as if it were 100, and any value	97 // Any value higher than 100 would be scored as if it were 100, and any value

92 // lower than 10 scored 0.	98 // lower than 10 scored 0.

93 int ScoreForValue(int value, const int* value_ranks) {	99 int ScoreForValue(int value, const int* value_ranks) {

(...skipping 189 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
283 if (iter != history_id_word_map_.end()) {	289 if (iter != history_id_word_map_.end()) {

284 WordIDSet& word_id_set(iter->second);	290 WordIDSet& word_id_set(iter->second);

285 word_id_set.insert(word_id);	291 word_id_set.insert(word_id);

286 } else {	292 } else {

287 WordIDSet word_id_set;	293 WordIDSet word_id_set;

288 word_id_set.insert(word_id);	294 word_id_set.insert(word_id);

289 history_id_word_map_[history_id] = word_id_set;	295 history_id_word_map_[history_id] = word_id_set;

290 }	296 }

291 }	297 }

292	298

293 void URLIndexPrivateData::UpdateURL(URLID row_id, const URLRow& row) {	299 void URLIndexPrivateData::UpdateURL(const URLRow& row) {

294 // The row may or may not already be in our index. If it is not already	300 // The row may or may not already be in our index. If it is not already

295 // indexed and it qualifies then it gets indexed. If it is already	301 // indexed and it qualifies then it gets indexed. If it is already

296 // indexed and still qualifies then it gets updated, otherwise it	302 // indexed and still qualifies then it gets updated, otherwise it

297 // is deleted from the index.	303 // is deleted from the index.

	304 URLID row_id = row.id();

298 HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id);	305 HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id);

299 if (row_pos == history_info_map_.end()) {	306 if (row_pos == history_info_map_.end()) {

300 // This new row should be indexed if it qualifies.	307 // This new row should be indexed if it qualifies.

301 URLRow new_row(row);	308 URLRow new_row(row);

302 new_row.set_id(row_id);	309 new_row.set_id(row_id);

303 if (RowQualifiesAsSignificant(new_row, base::Time()))	310 if (RowQualifiesAsSignificant(new_row, base::Time()))

304 IndexRow(new_row);	311 IndexRow(new_row);

305 } else if (RowQualifiesAsSignificant(row, base::Time())) {	312 } else if (RowQualifiesAsSignificant(row, base::Time())) {

306 // This indexed row still qualifies and will be re-indexed.	313 // This indexed row still qualifies and will be re-indexed.

307 // The url won't have changed but the title, visit count, etc.	314 // The url won't have changed but the title, visit count, etc.

308 // might have changed.	315 // might have changed.

309 URLRow& updated_row = row_pos->second;	316 URLRow& updated_row = row_pos->second;

310 updated_row.set_visit_count(row.visit_count());	317 updated_row.set_visit_count(row.visit_count());

311 updated_row.set_typed_count(row.typed_count());	318 updated_row.set_typed_count(row.typed_count());

312 updated_row.set_last_visit(row.last_visit());	319 updated_row.set_last_visit(row.last_visit());

313 // While the URL is guaranteed to remain stable, the title may have changed.	320 // While the URL is guaranteed to remain stable, the title may have changed.

314 // If so, then we need to update the index with the changed words.	321 // If so, then we need to update the index with the changed words.

315 if (updated_row.title() != row.title()) {	322 if (updated_row.title() != row.title()) {

316 // Clear all words associated with this row and re-index both the	323 // Clear all words associated with this row and re-index both the

317 // URL and title.	324 // URL and title.

318 RemoveRowWordsFromIndex(updated_row);	325 RemoveRowWordsFromIndex(updated_row);

319 updated_row.set_title(row.title());	326 updated_row.set_title(row.title());

320 AddRowWordsToIndex(updated_row);	327 AddRowWordsToIndex(updated_row);

321 }	328 }

322 } else {	329 } else {

323 // This indexed row no longer qualifies and will be de-indexed by	330 // This indexed row no longer qualifies and will be de-indexed by

324 // clearing all words associated with this row.	331 // clearing all words associated with this row.

325 URLRow& removed_row = row_pos->second;	332 RemoveRowFromIndex(row);

326 RemoveRowFromIndex(removed_row);

327 }	333 }

328 // This invalidates the cache.	334 search_term_cache_.clear(); // This invalidates the cache.

329 search_term_cache_.clear();

330 }	335 }

331	336

332 void URLIndexPrivateData::DeleteURL(URLID row_id) {	337 // Helper functor for DeleteURL.

333 // Note that this does not remove any reference to this row from the	338 class HistoryInfoMapItemHasURL {

334 // word_id_history_map_. That map will continue to contain (and return)	339 public:

335 // hits against this row until that map is rebuilt, but since the	340 explicit HistoryInfoMapItemHasURL(const GURL& url): url_(url) {}

336 // history_info_map_ no longer references the row no erroneous results	341

337 // will propagate to the user.	342 bool operator()(const std::pair<const HistoryID, URLRow> item) {
	Peter Kasting 2012/01/14 00:12:49 Nit: Missing &? Nit: Missing &? mrossetti 2012/03/03 05:05:56 Done. Show quoted text On 2012/01/14 00:12:49, Peter Kasting wrote: > Nit: Missing &? Done.
338 history_info_map_.erase(row_id);	343 return item.second.url() == url_;

339 search_term_cache_.clear(); // This invalidates the word cache.	344 }

	345

	346 private:

	347 const GURL& url_;

	348 };

	349

	350 void URLIndexPrivateData::DeleteURL(const GURL& url) {

	351 // Find the matching entry in the history_info_map_.

	352 HistoryInfoMap::iterator pos = std::find_if(

	353 history_info_map_.begin(),

	354 history_info_map_.end(),

	355 HistoryInfoMapItemHasURL(url));

	356 if (pos != history_info_map_.end()) {

	357 RemoveRowFromIndex(pos->second);

	358 search_term_cache_.clear(); // This invalidates the cache.

	359 }

340 }	360 }

341	361

342 bool URLIndexPrivateData::URLSchemeIsWhitelisted(const GURL& gurl) const {	362 bool URLIndexPrivateData::URLSchemeIsWhitelisted(const GURL& gurl) const {

343 return scheme_whitelist_.find(gurl.scheme()) != scheme_whitelist_.end();	363 return scheme_whitelist_.find(gurl.scheme()) != scheme_whitelist_.end();

344 }	364 }

345	365

346 // URLIndexPrivateData::HistoryItemFactorGreater -------------------------------	366 // URLIndexPrivateData::HistoryItemFactorGreater -------------------------------

347	367

348 URLIndexPrivateData::HistoryItemFactorGreater::HistoryItemFactorGreater(	368 URLIndexPrivateData::HistoryItemFactorGreater::HistoryItemFactorGreater(

349 const HistoryInfoMap& history_info_map)	369 const HistoryInfoMap& history_info_map)

(...skipping 480 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
830 whitelist->insert(std::string(chrome::kFileScheme));	850 whitelist->insert(std::string(chrome::kFileScheme));

831 whitelist->insert(std::string(chrome::kFtpScheme));	851 whitelist->insert(std::string(chrome::kFtpScheme));

832 whitelist->insert(std::string(chrome::kHttpScheme));	852 whitelist->insert(std::string(chrome::kHttpScheme));

833 whitelist->insert(std::string(chrome::kHttpsScheme));	853 whitelist->insert(std::string(chrome::kHttpsScheme));

834 whitelist->insert(std::string(chrome::kMailToScheme));	854 whitelist->insert(std::string(chrome::kMailToScheme));

835 }	855 }

836	856

837 // Cache Saving ----------------------------------------------------------------	857 // Cache Saving ----------------------------------------------------------------

838	858

839 bool URLIndexPrivateData::SaveToFile(const FilePath& file_path) {	859 bool URLIndexPrivateData::SaveToFile(const FilePath& file_path) {

840 // TODO(mrossetti): Move File IO to another thread.

841 base::ThreadRestrictions::ScopedAllowIO allow_io;

842 base::TimeTicks beginning_time = base::TimeTicks::Now();	860 base::TimeTicks beginning_time = base::TimeTicks::Now();

843 InMemoryURLIndexCacheItem index_cache;	861 InMemoryURLIndexCacheItem index_cache;

844 SavePrivateData(&index_cache);	862 SavePrivateData(&index_cache);

845 std::string data;	863 std::string data;

846 if (!index_cache.SerializeToString(&data)) {	864 if (!index_cache.SerializeToString(&data)) {

847 LOG(WARNING) << "Failed to serialize the InMemoryURLIndex cache.";	865 LOG(WARNING) << "Failed to serialize the InMemoryURLIndex cache.";

848 return false;	866 return false;

849 }	867 }

850	868

851 int size = data.size();	869 int size = data.size();

(...skipping 97 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
949 map_entry->set_visit_count(url_row.visit_count());	967 map_entry->set_visit_count(url_row.visit_count());

950 map_entry->set_typed_count(url_row.typed_count());	968 map_entry->set_typed_count(url_row.typed_count());

951 map_entry->set_last_visit(url_row.last_visit().ToInternalValue());	969 map_entry->set_last_visit(url_row.last_visit().ToInternalValue());

952 map_entry->set_url(url_row.url().spec());	970 map_entry->set_url(url_row.url().spec());

953 map_entry->set_title(UTF16ToUTF8(url_row.title()));	971 map_entry->set_title(UTF16ToUTF8(url_row.title()));

954 }	972 }

955 }	973 }

956	974

957 // Cache Restoring -------------------------------------------------------------	975 // Cache Restoring -------------------------------------------------------------

958	976

959 bool URLIndexPrivateData::ReloadFromHistory(history::URLDatabase* history_db) {	977 // static

960 Clear();	978 URLIndexPrivateData* URLIndexPrivateData::RestoreFromFile(

961	979 const FilePath& file_path) {

962 if (!history_db)

963 return false;

964

965 base::TimeTicks beginning_time = base::TimeTicks::Now();	980 base::TimeTicks beginning_time = base::TimeTicks::Now();

966 URLDatabase::URLEnumerator history_enum;	981 if (!file_util::PathExists(file_path))

967 if (!history_db->InitURLEnumeratorForSignificant(&history_enum))	982 return NULL;

968 return false;

969 URLRow row;

970 while (history_enum.GetNextURL(&row))

971 IndexRow(row);

972 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime",

973 base::TimeTicks::Now() - beginning_time);

974 return true;

975 }

976

977 bool URLIndexPrivateData::RestoreFromFile(const FilePath& file_path) {

978 // TODO(mrossetti): Figure out how to determine if the cache is up-to-date.

979 // That is: ensure that the database has not been modified since the cache

980 // was last saved. DB file modification date is inadequate. There are no

981 // SQLite table checksums automatically stored.

982 // FIXME(mrossetti): Move File IO to another thread.

983 base::ThreadRestrictions::ScopedAllowIO allow_io;

984 base::TimeTicks beginning_time = base::TimeTicks::Now();

985 std::string data;	983 std::string data;

986 // If there is no cache file then simply give up. This will cause us to	984 // If there is no cache file then simply give up. This will cause us to

987 // attempt to rebuild from the history database.	985 // attempt to rebuild from the history database.

988 if (!file_util::ReadFileToString(file_path, &data))	986 if (!file_util::ReadFileToString(file_path, &data))

989 return false;	987 return NULL;

990	988

	989 scoped_ptr<URLIndexPrivateData> restored_data(new URLIndexPrivateData);

991 InMemoryURLIndexCacheItem index_cache;	990 InMemoryURLIndexCacheItem index_cache;

992 if (!index_cache.ParseFromArray(data.c_str(), data.size())) {	991 if (!index_cache.ParseFromArray(data.c_str(), data.size())) {

993 LOG(WARNING) << "Failed to parse InMemoryURLIndex cache data read from "	992 LOG(WARNING) << "Failed to parse URLIndexPrivateData cache data read from "

994 << file_path.value();	993 << file_path.value();

995 return false;	994 return restored_data.release();

996 }	995 }

997	996

998 if (!RestorePrivateData(index_cache)) {	997 if (!restored_data->RestorePrivateData(index_cache)) {

999 Clear(); // Back to square one -- must build from scratch.	998 restored_data.reset(); // Back to square one -- must build from history DB.

1000 return false;	999 return NULL;

1001 }	1000 }

1002	1001

1003 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime",	1002 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime",

1004 base::TimeTicks::Now() - beginning_time);	1003 base::TimeTicks::Now() - beginning_time);

1005 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems",	1004 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems",

1006 history_id_word_map_.size());	1005 restored_data->history_id_word_map_.size());

1007 UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size());	1006 UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size());

1008 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", word_map_.size());	1007 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords",

1009 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", char_word_map_.size());	1008 restored_data->word_map_.size());

1010 return true;	1009 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars",

	1010 restored_data->char_word_map_.size());

	1011 return restored_data.release();

	1012 }

	1013

	1014 // static

	1015 URLIndexPrivateData* URLIndexPrivateData::RebuildFromHistory(

	1016 URLDatabase* history_db) {

	1017 if (!history_db)

	1018 return NULL;

	1019

	1020 base::TimeTicks beginning_time = base::TimeTicks::Now();

	1021

	1022 scoped_ptr<URLIndexPrivateData> rebuilt_data(new URLIndexPrivateData);

	1023 URLDatabase::URLEnumerator history_enum;

	1024 if (!history_db->InitURLEnumeratorForSignificant(&history_enum))

	1025 return NULL;

	1026 URLRow row;
	Peter Kasting 2012/01/14 00:12:49 Nit: You can condense these three lines to two and Nit: You can condense these three lines to two and get \|row\| scoped to the loop as a bonus: for (URLRow row; history_enum.GetNextURL(&row); ) rebuilt_data->IndexRow(row); mrossetti 2012/03/03 05:05:56 w00t! My ancient mind just doesn't work that way! Show quoted text On 2012/01/14 00:12:49, Peter Kasting wrote: > Nit: You can condense these three lines to two and get \|row\| scoped to the loop > as a bonus: > > for (URLRow row; history_enum.GetNextURL(&row); ) > rebuilt_data->IndexRow(row); w00t! My ancient mind just doesn't work that way! ;^)
	1027 while (history_enum.GetNextURL(&row))

	1028 rebuilt_data->IndexRow(row);

	1029

	1030 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime",

	1031 base::TimeTicks::Now() - beginning_time);

	1032 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems",

	1033 rebuilt_data->history_id_word_map_.size());

	1034 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords",

	1035 rebuilt_data->word_map_.size());

	1036 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars",

	1037 rebuilt_data->char_word_map_.size());

	1038 return rebuilt_data.release();

1011 }	1039 }

1012	1040

1013 bool URLIndexPrivateData::RestorePrivateData(	1041 bool URLIndexPrivateData::RestorePrivateData(

1014 const InMemoryURLIndexCacheItem& cache) {	1042 const InMemoryURLIndexCacheItem& cache) {

1015 return RestoreWordList(cache) && RestoreWordMap(cache) &&	1043 return RestoreWordList(cache) && RestoreWordMap(cache) &&

1016 RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) &&	1044 RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) &&

1017 RestoreHistoryInfoMap(cache);	1045 RestoreHistoryInfoMap(cache);

1018 }	1046 }

1019	1047

1020 bool URLIndexPrivateData::RestoreWordList(	1048 bool URLIndexPrivateData::RestoreWordList(

(...skipping 108 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1129 if (iter->has_title()) {	1157 if (iter->has_title()) {

1130 string16 title(UTF8ToUTF16(iter->title()));	1158 string16 title(UTF8ToUTF16(iter->title()));

1131 url_row.set_title(title);	1159 url_row.set_title(title);

1132 }	1160 }

1133 history_info_map_[history_id] = url_row;	1161 history_info_map_[history_id] = url_row;

1134 }	1162 }

1135 return true;	1163 return true;

1136 }	1164 }

1137	1165

1138 } // namespace history	1166 } // namespace history

OLD	NEW