chrome/browser/history/url_index_private_data.cc - Issue 9030031: Move InMemoryURLIndex Caching Operations to FILE Thread

Side by Side Diff: chrome/browser/history/url_index_private_data.cc

Issue 9030031: Move InMemoryURLIndex Caching Operations to FILE Thread (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/

Patch Set: Syncing with hopes of pleasing trybot update Created 8 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "chrome/browser/history/url_index_private_data.h"	5 #include "chrome/browser/history/url_index_private_data.h"

6	6

7 #include <algorithm>	7 #include <algorithm>

8 #include <functional>	8 #include <functional>

9 #include <iterator>	9 #include <iterator>

10 #include <limits>	10 #include <limits>

11 #include <numeric>	11 #include <numeric>

12	12

13 #include "base/file_util.h"	13 #include "base/file_util.h"

14 #include "base/i18n/case_conversion.h"	14 #include "base/i18n/case_conversion.h"

15 #include "base/metrics/histogram.h"	15 #include "base/metrics/histogram.h"

16 #include "base/string_util.h"	16 #include "base/string_util.h"

17 #include "base/threading/thread_restrictions.h"	17 #include "base/time.h"

18 #include "base/utf_string_conversions.h"	18 #include "base/utf_string_conversions.h"

19 #include "chrome/browser/autocomplete/autocomplete.h"	19 #include "chrome/browser/autocomplete/autocomplete.h"

20 #include "chrome/browser/history/history_database.h"	20 #include "chrome/browser/history/history_database.h"

21 #include "chrome/common/url_constants.h"	21 #include "chrome/browser/history/in_memory_url_index.h"

	22 #include "content/public/browser/notification_details.h"

	23 #include "content/public/browser/notification_service.h"

	24 #include "content/public/browser/notification_source.h"

22 #include "net/base/net_util.h"	25 #include "net/base/net_util.h"

23 #include "third_party/protobuf/src/google/protobuf/repeated_field.h"	26 #include "third_party/protobuf/src/google/protobuf/repeated_field.h"

24	27

25 using google::protobuf::RepeatedField;	28 using google::protobuf::RepeatedField;

26 using google::protobuf::RepeatedPtrField;	29 using google::protobuf::RepeatedPtrField;

27 using in_memory_url_index::InMemoryURLIndexCacheItem;	30 using in_memory_url_index::InMemoryURLIndexCacheItem;

28	31

29 namespace history {	32 namespace history {

30	33

31 typedef imui::InMemoryURLIndexCacheItem_WordListItem WordListItem;	34 typedef imui::InMemoryURLIndexCacheItem_WordListItem WordListItem;

(...skipping 84 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
116 }	119 }

117	120

118 // InMemoryURLIndex's Private Data ---------------------------------------------	121 // InMemoryURLIndex's Private Data ---------------------------------------------

119	122

120 URLIndexPrivateData::URLIndexPrivateData()	123 URLIndexPrivateData::URLIndexPrivateData()

121 : restored_cache_version_(0),	124 : restored_cache_version_(0),

122 saved_cache_version_(kCurrentCacheFileVersion),	125 saved_cache_version_(kCurrentCacheFileVersion),

123 pre_filter_item_count_(0),	126 pre_filter_item_count_(0),

124 post_filter_item_count_(0),	127 post_filter_item_count_(0),

125 post_scoring_item_count_(0) {	128 post_scoring_item_count_(0) {

126 URLIndexPrivateData::InitializeSchemeWhitelist(&scheme_whitelist_);

127 }	129 }

128	130

129 URLIndexPrivateData::~URLIndexPrivateData() {}	131 URLIndexPrivateData::~URLIndexPrivateData() {}

130	132

131 void URLIndexPrivateData::Clear() {	133 void URLIndexPrivateData::Clear() {

132 word_list_.clear();	134 word_list_.clear();

133 available_words_.clear();	135 available_words_.clear();

134 word_map_.clear();	136 word_map_.clear();

135 char_word_map_.clear();	137 char_word_map_.clear();

136 word_id_history_map_.clear();	138 word_id_history_map_.clear();

137 history_id_word_map_.clear();	139 history_id_word_map_.clear();

138 history_info_map_.clear();	140 history_info_map_.clear();

139 word_starts_map_.clear();	141 word_starts_map_.clear();

140 }	142 }

141	143

	144 bool URLIndexPrivateData::Empty() const {

	145 return history_info_map_.empty();

	146 }

	147

	148 scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::Duplicate() const {

	149 scoped_refptr<URLIndexPrivateData> data_copy = new URLIndexPrivateData;

	150 data_copy->word_list_ = word_list_;

	151 data_copy->available_words_ = available_words_;

	152 data_copy->word_map_ = word_map_;

	153 data_copy->char_word_map_ = char_word_map_;

	154 data_copy->word_id_history_map_ = word_id_history_map_;

	155 data_copy->history_id_word_map_ = history_id_word_map_;

	156 data_copy->history_info_map_ = history_info_map_;

	157 return data_copy;

	158 // Not copied:

	159 // search_term_cache_

	160 // pre_filter_item_count_

	161 // post_filter_item_count_

	162 // post_scoring_item_count_

	163 };

	164

142 // Cache Updating --------------------------------------------------------------	165 // Cache Updating --------------------------------------------------------------

143	166

144 bool URLIndexPrivateData::IndexRow(const URLRow& row) {	167 bool URLIndexPrivateData::IndexRow(

	168 const URLRow& row,

	169 const std::string& languages,

	170 const std::set<std::string>& scheme_whitelist) {

145 const GURL& gurl(row.url());	171 const GURL& gurl(row.url());

146	172

147 // Index only URLs with a whitelisted scheme.	173 // Index only URLs with a whitelisted scheme.

148 if (!URLIndexPrivateData::URLSchemeIsWhitelisted(gurl))	174 if (!URLSchemeIsWhitelisted(gurl, scheme_whitelist))

149 return false;	175 return false;

150	176

151 URLID row_id = row.id();	177 URLID row_id = row.id();

152 // Strip out username and password before saving and indexing.	178 // Strip out username and password before saving and indexing.

153 string16 url(net::FormatUrl(gurl, languages_,	179 string16 url(net::FormatUrl(gurl, languages,

154 net::kFormatUrlOmitUsernamePassword,	180 net::kFormatUrlOmitUsernamePassword,

155 net::UnescapeRule::SPACES \| net::UnescapeRule::URL_SPECIAL_CHARS,	181 net::UnescapeRule::SPACES \| net::UnescapeRule::URL_SPECIAL_CHARS,

156 NULL, NULL, NULL));	182 NULL, NULL, NULL));

157	183

158 HistoryID history_id = static_cast<HistoryID>(row_id);	184 HistoryID history_id = static_cast<HistoryID>(row_id);

159 DCHECK_LT(history_id, std::numeric_limits<HistoryID>::max());	185 DCHECK_LT(history_id, std::numeric_limits<HistoryID>::max());

160	186

161 // Add the row for quick lookup in the history info store.	187 // Add the row for quick lookup in the history info store.

162 URLRow new_row(GURL(url), row_id);	188 URLRow new_row(GURL(url), row_id);

163 new_row.set_visit_count(row.visit_count());	189 new_row.set_visit_count(row.visit_count());

164 new_row.set_typed_count(row.typed_count());	190 new_row.set_typed_count(row.typed_count());

165 new_row.set_last_visit(row.last_visit());	191 new_row.set_last_visit(row.last_visit());

166 new_row.set_title(row.title());	192 new_row.set_title(row.title());

167 history_info_map_[history_id] = new_row;	193 history_info_map_[history_id] = new_row;

168	194

169 // Index the words contained in the URL and title of the row.	195 // Index the words contained in the URL and title of the row.

170 RowWordStarts word_starts;	196 RowWordStarts word_starts;

171 AddRowWordsToIndex(new_row, &word_starts);	197 AddRowWordsToIndex(new_row, &word_starts, languages);

172 word_starts_map_[history_id] = word_starts;	198 word_starts_map_[history_id] = word_starts;

173 return true;	199 return true;

174 }	200 }

175	201

176 void URLIndexPrivateData::AddRowWordsToIndex(const URLRow& row,	202 void URLIndexPrivateData::AddRowWordsToIndex(const URLRow& row,

177 RowWordStarts* word_starts) {	203 RowWordStarts* word_starts,

	204 const std::string& languages) {

178 HistoryID history_id = static_cast<HistoryID>(row.id());	205 HistoryID history_id = static_cast<HistoryID>(row.id());

179 // Split URL into individual, unique words then add in the title words.	206 // Split URL into individual, unique words then add in the title words.

180 const GURL& gurl(row.url());	207 const GURL& gurl(row.url());

181 string16 url(net::FormatUrl(gurl, languages_,	208 string16 url(net::FormatUrl(gurl, languages,

182 net::kFormatUrlOmitUsernamePassword,	209 net::kFormatUrlOmitUsernamePassword,

183 net::UnescapeRule::SPACES \| net::UnescapeRule::URL_SPECIAL_CHARS,	210 net::UnescapeRule::SPACES \| net::UnescapeRule::URL_SPECIAL_CHARS,

184 NULL, NULL, NULL));	211 NULL, NULL, NULL));

185 url = base::i18n::ToLower(url);	212 url = base::i18n::ToLower(url);

186 String16Set url_words = String16SetFromString16(url,	213 String16Set url_words = String16SetFromString16(url,

187 word_starts ? &word_starts->url_word_starts_ : NULL);	214 word_starts ? &word_starts->url_word_starts_ : NULL);

188 String16Set title_words = String16SetFromString16(row.title(),	215 String16Set title_words = String16SetFromString16(row.title(),

189 word_starts ? &word_starts->title_word_starts_ : NULL);	216 word_starts ? &word_starts->title_word_starts_ : NULL);

190 String16Set words;	217 String16Set words;

191 std::set_union(url_words.begin(), url_words.end(),	218 std::set_union(url_words.begin(), url_words.end(),

(...skipping 108 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
300 if (iter != history_id_word_map_.end()) {	327 if (iter != history_id_word_map_.end()) {

301 WordIDSet& word_id_set(iter->second);	328 WordIDSet& word_id_set(iter->second);

302 word_id_set.insert(word_id);	329 word_id_set.insert(word_id);

303 } else {	330 } else {

304 WordIDSet word_id_set;	331 WordIDSet word_id_set;

305 word_id_set.insert(word_id);	332 word_id_set.insert(word_id);

306 history_id_word_map_[history_id] = word_id_set;	333 history_id_word_map_[history_id] = word_id_set;

307 }	334 }

308 }	335 }

309	336

310 bool URLIndexPrivateData::UpdateURL(const URLRow& row) {	337 bool URLIndexPrivateData::UpdateURL(

	338 const URLRow& row,

	339 const std::string& languages,

	340 const std::set<std::string>& scheme_whitelist) {

311 // The row may or may not already be in our index. If it is not already	341 // The row may or may not already be in our index. If it is not already

312 // indexed and it qualifies then it gets indexed. If it is already	342 // indexed and it qualifies then it gets indexed. If it is already

313 // indexed and still qualifies then it gets updated, otherwise it	343 // indexed and still qualifies then it gets updated, otherwise it

314 // is deleted from the index.	344 // is deleted from the index.

315 bool row_was_updated = false;	345 bool row_was_updated = false;

316 URLID row_id = row.id();	346 URLID row_id = row.id();

317 HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id);	347 HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id);

318 if (row_pos == history_info_map_.end()) {	348 if (row_pos == history_info_map_.end()) {

319 // This new row should be indexed if it qualifies.	349 // This new row should be indexed if it qualifies.

320 URLRow new_row(row);	350 URLRow new_row(row);

321 new_row.set_id(row_id);	351 new_row.set_id(row_id);

322 row_was_updated =	352 row_was_updated = RowQualifiesAsSignificant(new_row, base::Time()) &&

323 RowQualifiesAsSignificant(new_row, base::Time()) && IndexRow(new_row);	353 IndexRow(new_row, languages, scheme_whitelist);

324 } else if (RowQualifiesAsSignificant(row, base::Time())) {	354 } else if (RowQualifiesAsSignificant(row, base::Time())) {

325 // This indexed row still qualifies and will be re-indexed.	355 // This indexed row still qualifies and will be re-indexed.

326 // The url won't have changed but the title, visit count, etc.	356 // The url won't have changed but the title, visit count, etc.

327 // might have changed.	357 // might have changed.

328 URLRow& row_to_update = row_pos->second;	358 URLRow& row_to_update = row_pos->second;

329 bool title_updated = row_to_update.title() != row.title();	359 bool title_updated = row_to_update.title() != row.title();

330 if (row_to_update.visit_count() != row.visit_count() \|\|	360 if (row_to_update.visit_count() != row.visit_count() \|\|

331 row_to_update.typed_count() != row.typed_count() \|\|	361 row_to_update.typed_count() != row.typed_count() \|\|

332 row_to_update.last_visit() != row.last_visit() \|\| title_updated) {	362 row_to_update.last_visit() != row.last_visit() \|\| title_updated) {

333 row_to_update.set_visit_count(row.visit_count());	363 row_to_update.set_visit_count(row.visit_count());

334 row_to_update.set_typed_count(row.typed_count());	364 row_to_update.set_typed_count(row.typed_count());

335 row_to_update.set_last_visit(row.last_visit());	365 row_to_update.set_last_visit(row.last_visit());

336 // While the URL is guaranteed to remain stable, the title may have	366 // While the URL is guaranteed to remain stable, the title may have

337 // changed. If so, then update the index with the changed words.	367 // changed. If so, then update the index with the changed words.

338 if (title_updated) {	368 if (title_updated) {

339 // Clear all words associated with this row and re-index both the	369 // Clear all words associated with this row and re-index both the

340 // URL and title.	370 // URL and title.

341 RemoveRowWordsFromIndex(row_to_update);	371 RemoveRowWordsFromIndex(row_to_update);

342 row_to_update.set_title(row.title());	372 row_to_update.set_title(row.title());

343 RowWordStarts word_starts;	373 RowWordStarts word_starts;

344 AddRowWordsToIndex(row_to_update, &word_starts);	374 AddRowWordsToIndex(row_to_update, &word_starts, languages);

345 word_starts_map_[row_id] = word_starts;	375 word_starts_map_[row_id] = word_starts;

346 }	376 }

347 row_was_updated = true;	377 row_was_updated = true;

348 }	378 }

349 } else {	379 } else {

350 // This indexed row no longer qualifies and will be de-indexed by	380 // This indexed row no longer qualifies and will be de-indexed by

351 // clearing all words associated with this row.	381 // clearing all words associated with this row.

352 RemoveRowFromIndex(row);	382 RemoveRowFromIndex(row);

353 row_was_updated = true;	383 row_was_updated = true;

354 }	384 }

(...skipping 21 matching lines...) Expand all Loading...
376 history_info_map_.begin(),	406 history_info_map_.begin(),

377 history_info_map_.end(),	407 history_info_map_.end(),

378 HistoryInfoMapItemHasURL(url));	408 HistoryInfoMapItemHasURL(url));

379 if (pos == history_info_map_.end())	409 if (pos == history_info_map_.end())

380 return false;	410 return false;

381 RemoveRowFromIndex(pos->second);	411 RemoveRowFromIndex(pos->second);

382 search_term_cache_.clear(); // This invalidates the cache.	412 search_term_cache_.clear(); // This invalidates the cache.

383 return true;	413 return true;

384 }	414 }

385	415

386 bool URLIndexPrivateData::URLSchemeIsWhitelisted(const GURL& gurl) const {

387 return scheme_whitelist_.find(gurl.scheme()) != scheme_whitelist_.end();

388 }

389

390 // URLIndexPrivateData::HistoryItemFactorGreater -------------------------------	416 // URLIndexPrivateData::HistoryItemFactorGreater -------------------------------

391	417

392 URLIndexPrivateData::HistoryItemFactorGreater::HistoryItemFactorGreater(	418 URLIndexPrivateData::HistoryItemFactorGreater::HistoryItemFactorGreater(

393 const HistoryInfoMap& history_info_map)	419 const HistoryInfoMap& history_info_map)

394 : history_info_map_(history_info_map) {	420 : history_info_map_(history_info_map) {

395 }	421 }

396	422

397 URLIndexPrivateData::HistoryItemFactorGreater::~HistoryItemFactorGreater() {}	423 URLIndexPrivateData::HistoryItemFactorGreater::~HistoryItemFactorGreater() {}

398	424

399 bool URLIndexPrivateData::HistoryItemFactorGreater::operator()(	425 bool URLIndexPrivateData::HistoryItemFactorGreater::operator()(

(...skipping 498 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
898 std::set_intersection(word_id_set.begin(), word_id_set.end(),	924 std::set_intersection(word_id_set.begin(), word_id_set.end(),

899 char_word_id_set.begin(), char_word_id_set.end(),	925 char_word_id_set.begin(), char_word_id_set.end(),

900 std::inserter(new_word_id_set,	926 std::inserter(new_word_id_set,

901 new_word_id_set.begin()));	927 new_word_id_set.begin()));

902 word_id_set.swap(new_word_id_set);	928 word_id_set.swap(new_word_id_set);

903 }	929 }

904 }	930 }

905 return word_id_set;	931 return word_id_set;

906 }	932 }

907	933

908 // static

909 void URLIndexPrivateData::InitializeSchemeWhitelist(

910 std::set<std::string>* whitelist) {

911 DCHECK(whitelist);

912 whitelist->insert(std::string(chrome::kAboutScheme));

913 whitelist->insert(std::string(chrome::kChromeUIScheme));

914 whitelist->insert(std::string(chrome::kFileScheme));

915 whitelist->insert(std::string(chrome::kFtpScheme));

916 whitelist->insert(std::string(chrome::kHttpScheme));

917 whitelist->insert(std::string(chrome::kHttpsScheme));

918 whitelist->insert(std::string(chrome::kMailToScheme));

919 }

920

921 // Cache Saving ----------------------------------------------------------------	934 // Cache Saving ----------------------------------------------------------------

922	935

	936 // static

	937 void URLIndexPrivateData::WritePrivateDataToCacheFileTask(

	938 scoped_refptr<URLIndexPrivateData> private_data,

	939 const FilePath& file_path,

	940 scoped_refptr<RefCountedBool> succeeded) {

	941 DCHECK(private_data.get());

	942 DCHECK(!file_path.empty());

	943 succeeded->set_value(private_data->SaveToFile(file_path));

	944 }

	945

923 bool URLIndexPrivateData::SaveToFile(const FilePath& file_path) {	946 bool URLIndexPrivateData::SaveToFile(const FilePath& file_path) {

924 // TODO(mrossetti): Move File IO to another thread.

925 base::ThreadRestrictions::ScopedAllowIO allow_io;

926 base::TimeTicks beginning_time = base::TimeTicks::Now();	947 base::TimeTicks beginning_time = base::TimeTicks::Now();

927 InMemoryURLIndexCacheItem index_cache;	948 InMemoryURLIndexCacheItem index_cache;

928 SavePrivateData(&index_cache);	949 SavePrivateData(&index_cache);

929 std::string data;	950 std::string data;

930 if (!index_cache.SerializeToString(&data)) {	951 if (!index_cache.SerializeToString(&data)) {

931 LOG(WARNING) << "Failed to serialize the InMemoryURLIndex cache.";	952 LOG(WARNING) << "Failed to serialize the InMemoryURLIndex cache.";

932 return false;	953 return false;

933 }	954 }

934	955

935 int size = data.size();	956 int size = data.size();

(...skipping 87 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1023 if (history_info_map_.empty())	1044 if (history_info_map_.empty())

1024 return;	1045 return;

1025 HistoryInfoMapItem* map_item = cache->mutable_history_info_map();	1046 HistoryInfoMapItem* map_item = cache->mutable_history_info_map();

1026 map_item->set_item_count(history_info_map_.size());	1047 map_item->set_item_count(history_info_map_.size());

1027 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin();	1048 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin();

1028 iter != history_info_map_.end(); ++iter) {	1049 iter != history_info_map_.end(); ++iter) {

1029 HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry();	1050 HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry();

1030 map_entry->set_history_id(iter->first);	1051 map_entry->set_history_id(iter->first);

1031 const URLRow& url_row(iter->second);	1052 const URLRow& url_row(iter->second);

1032 // Note: We only save information that contributes to the index so there	1053 // Note: We only save information that contributes to the index so there

1033 // is no need to save search_term_cache_ (not persistent),	1054 // is no need to save search_term_cache_ (not persistent).

1034 // languages_, etc.

1035 map_entry->set_visit_count(url_row.visit_count());	1055 map_entry->set_visit_count(url_row.visit_count());

1036 map_entry->set_typed_count(url_row.typed_count());	1056 map_entry->set_typed_count(url_row.typed_count());

1037 map_entry->set_last_visit(url_row.last_visit().ToInternalValue());	1057 map_entry->set_last_visit(url_row.last_visit().ToInternalValue());

1038 map_entry->set_url(url_row.url().spec());	1058 map_entry->set_url(url_row.url().spec());

1039 map_entry->set_title(UTF16ToUTF8(url_row.title()));	1059 map_entry->set_title(UTF16ToUTF8(url_row.title()));

1040 }	1060 }

1041 }	1061 }

1042	1062

1043 void URLIndexPrivateData::SaveWordStartsMap(	1063 void URLIndexPrivateData::SaveWordStartsMap(

1044 InMemoryURLIndexCacheItem* cache) const {	1064 InMemoryURLIndexCacheItem* cache) const {

(...skipping 18 matching lines...) Expand all Loading...
1063 i != word_starts.url_word_starts_.end(); ++i)	1083 i != word_starts.url_word_starts_.end(); ++i)

1064 map_entry->add_url_word_starts(*i);	1084 map_entry->add_url_word_starts(*i);

1065 for (WordStarts::const_iterator i = word_starts.title_word_starts_.begin();	1085 for (WordStarts::const_iterator i = word_starts.title_word_starts_.begin();

1066 i != word_starts.title_word_starts_.end(); ++i)	1086 i != word_starts.title_word_starts_.end(); ++i)

1067 map_entry->add_title_word_starts(*i);	1087 map_entry->add_title_word_starts(*i);

1068 }	1088 }

1069 }	1089 }

1070	1090

1071 // Cache Restoring -------------------------------------------------------------	1091 // Cache Restoring -------------------------------------------------------------

1072	1092

1073 bool URLIndexPrivateData::RestoreFromFile(const FilePath& file_path) {	1093 // static

1074 // TODO(mrossetti): Figure out how to determine if the cache is up-to-date.	1094 void URLIndexPrivateData::RestoreFromFileTask(

1075 // That is: ensure that the database has not been modified since the cache	1095 const FilePath& file_path,

1076 // was last saved. DB file modification date is inadequate. There are no	1096 scoped_refptr<URLIndexPrivateData> private_data,

1077 // SQLite table checksums automatically stored.	1097 std::string languages) {

1078 Clear(); // Start with a clean slate.	1098 private_data = URLIndexPrivateData::RestoreFromFile(file_path, languages);

	1099 }

1079	1100

1080 // FIXME(mrossetti): Move File IO to another thread.	1101 // static

1081 base::ThreadRestrictions::ScopedAllowIO allow_io;	1102 scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::RestoreFromFile(

	1103 const FilePath& file_path,

	1104 const std::string& languages) {

1082 base::TimeTicks beginning_time = base::TimeTicks::Now();	1105 base::TimeTicks beginning_time = base::TimeTicks::Now();

1083 if (!file_util::PathExists(file_path))	1106 if (!file_util::PathExists(file_path))

1084 return false;	1107 return NULL;

1085 std::string data;	1108 std::string data;

1086 // If there is no cache file then simply give up. This will cause us to	1109 // If there is no cache file then simply give up. This will cause us to

1087 // attempt to rebuild from the history database.	1110 // attempt to rebuild from the history database.

1088 if (!file_util::ReadFileToString(file_path, &data))	1111 if (!file_util::ReadFileToString(file_path, &data))

1089 return false;	1112 return NULL;

1090	1113

	1114 scoped_refptr<URLIndexPrivateData> restored_data(new URLIndexPrivateData);

1091 InMemoryURLIndexCacheItem index_cache;	1115 InMemoryURLIndexCacheItem index_cache;

1092 if (!index_cache.ParseFromArray(data.c_str(), data.size())) {	1116 if (!index_cache.ParseFromArray(data.c_str(), data.size())) {

1093 LOG(WARNING) << "Failed to parse InMemoryURLIndex cache data read from "	1117 LOG(WARNING) << "Failed to parse URLIndexPrivateData cache data read from "

1094 << file_path.value();	1118 << file_path.value();

1095 return false;	1119 return restored_data;

1096 }	1120 }

1097	1121

1098 if (!RestorePrivateData(index_cache)) {	1122 if (!restored_data->RestorePrivateData(index_cache, languages))

1099 Clear(); // Back to square one -- must build from scratch.	1123 return NULL;

1100 return false;

1101 }

1102	1124

1103 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime",	1125 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime",

1104 base::TimeTicks::Now() - beginning_time);	1126 base::TimeTicks::Now() - beginning_time);

1105 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems",	1127 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems",

1106 history_id_word_map_.size());	1128 restored_data->history_id_word_map_.size());

1107 UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size());	1129 UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size());

1108 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", word_map_.size());	1130 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords",

1109 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", char_word_map_.size());	1131 restored_data->word_map_.size());

1110 return true;	1132 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars",

	1133 restored_data->char_word_map_.size());

	1134 if (restored_data->Empty())

	1135 return NULL; // 'No data' is the same as a failed reload.

	1136 return restored_data;

1111 }	1137 }

1112	1138

1113 // static	1139 // static

1114 URLIndexPrivateData* URLIndexPrivateData::RebuildFromHistory(	1140 scoped_refptr<URLIndexPrivateData> URLIndexPrivateData::RebuildFromHistory(

1115 HistoryDatabase* history_db) {	1141 HistoryDatabase* history_db,

	1142 const std::string& languages,

	1143 const std::set<std::string>& scheme_whitelist) {

1116 if (!history_db)	1144 if (!history_db)

1117 return NULL;	1145 return NULL;

1118	1146

1119 base::TimeTicks beginning_time = base::TimeTicks::Now();	1147 base::TimeTicks beginning_time = base::TimeTicks::Now();

1120	1148

1121 scoped_ptr<URLIndexPrivateData> rebuilt_data(new URLIndexPrivateData);	1149 scoped_refptr<URLIndexPrivateData> rebuilt_data(new URLIndexPrivateData);

1122 URLDatabase::URLEnumerator history_enum;	1150 URLDatabase::URLEnumerator history_enum;

1123 if (!history_db->InitURLEnumeratorForSignificant(&history_enum))	1151 if (!history_db->InitURLEnumeratorForSignificant(&history_enum))

1124 return NULL;	1152 return NULL;

1125 for (URLRow row; history_enum.GetNextURL(&row); )	1153 for (URLRow row; history_enum.GetNextURL(&row); )

1126 rebuilt_data->IndexRow(row);	1154 rebuilt_data->IndexRow(row, languages, scheme_whitelist);

1127	1155

1128 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime",	1156 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime",

1129 base::TimeTicks::Now() - beginning_time);	1157 base::TimeTicks::Now() - beginning_time);

1130 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems",	1158 UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems",

1131 rebuilt_data->history_id_word_map_.size());	1159 rebuilt_data->history_id_word_map_.size());

1132 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords",	1160 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords",

1133 rebuilt_data->word_map_.size());	1161 rebuilt_data->word_map_.size());

1134 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars",	1162 UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars",

1135 rebuilt_data->char_word_map_.size());	1163 rebuilt_data->char_word_map_.size());

1136 return rebuilt_data.release();	1164 return rebuilt_data;

1137 }	1165 }

1138	1166

1139 bool URLIndexPrivateData::RestorePrivateData(	1167 bool URLIndexPrivateData::RestorePrivateData(

1140 const InMemoryURLIndexCacheItem& cache) {	1168 const InMemoryURLIndexCacheItem& cache,

	1169 const std::string& languages) {

1141 if (cache.has_version())	1170 if (cache.has_version())

1142 restored_cache_version_ = cache.version();	1171 restored_cache_version_ = cache.version();

1143 return RestoreWordList(cache) && RestoreWordMap(cache) &&	1172 return RestoreWordList(cache) && RestoreWordMap(cache) &&

1144 RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) &&	1173 RestoreCharWordMap(cache) && RestoreWordIDHistoryMap(cache) &&

1145 RestoreHistoryInfoMap(cache) && RestoreWordStartsMap(cache);	1174 RestoreHistoryInfoMap(cache) && RestoreWordStartsMap(cache, languages);

1146 }	1175 }

1147	1176

1148 bool URLIndexPrivateData::RestoreWordList(	1177 bool URLIndexPrivateData::RestoreWordList(

1149 const InMemoryURLIndexCacheItem& cache) {	1178 const InMemoryURLIndexCacheItem& cache) {

1150 if (!cache.has_word_list())	1179 if (!cache.has_word_list())

1151 return false;	1180 return false;

1152 const WordListItem& list_item(cache.word_list());	1181 const WordListItem& list_item(cache.word_list());

1153 uint32 expected_item_count = list_item.word_count();	1182 uint32 expected_item_count = list_item.word_count();

1154 uint32 actual_item_count = list_item.word_size();	1183 uint32 actual_item_count = list_item.word_size();

1155 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)	1184 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)

(...skipping 101 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1257 if (iter->has_title()) {	1286 if (iter->has_title()) {

1258 string16 title(UTF8ToUTF16(iter->title()));	1287 string16 title(UTF8ToUTF16(iter->title()));

1259 url_row.set_title(title);	1288 url_row.set_title(title);

1260 }	1289 }

1261 history_info_map_[history_id] = url_row;	1290 history_info_map_[history_id] = url_row;

1262 }	1291 }

1263 return true;	1292 return true;

1264 }	1293 }

1265	1294

1266 bool URLIndexPrivateData::RestoreWordStartsMap(	1295 bool URLIndexPrivateData::RestoreWordStartsMap(

1267 const InMemoryURLIndexCacheItem& cache) {	1296 const InMemoryURLIndexCacheItem& cache,

	1297 const std::string& languages) {

1268 // Note that this function must be called after RestoreHistoryInfoMap() has	1298 // Note that this function must be called after RestoreHistoryInfoMap() has

1269 // been run as the word starts may have to be recalculated from the urls and	1299 // been run as the word starts may have to be recalculated from the urls and

1270 // page titles.	1300 // page titles.

1271 if (cache.has_word_starts_map()) {	1301 if (cache.has_word_starts_map()) {

1272 const WordStartsMapItem& list_item(cache.word_starts_map());	1302 const WordStartsMapItem& list_item(cache.word_starts_map());

1273 uint32 expected_item_count = list_item.item_count();	1303 uint32 expected_item_count = list_item.item_count();

1274 uint32 actual_item_count = list_item.word_starts_map_entry_size();	1304 uint32 actual_item_count = list_item.word_starts_map_entry_size();

1275 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)	1305 if (actual_item_count == 0 \|\| actual_item_count != expected_item_count)

1276 return false;	1306 return false;

1277 const RepeatedPtrField<WordStartsMapEntry>&	1307 const RepeatedPtrField<WordStartsMapEntry>&

(...skipping 14 matching lines...) Expand all Loading...
1292 word_starts.title_word_starts_.push_back(*jiter);	1322 word_starts.title_word_starts_.push_back(*jiter);

1293 word_starts_map_[history_id] = word_starts;	1323 word_starts_map_[history_id] = word_starts;

1294 }	1324 }

1295 } else {	1325 } else {

1296 // Since the cache did not contain any word starts we must rebuild then from	1326 // Since the cache did not contain any word starts we must rebuild then from

1297 // the URL and page titles.	1327 // the URL and page titles.

1298 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin();	1328 for (HistoryInfoMap::const_iterator iter = history_info_map_.begin();

1299 iter != history_info_map_.end(); ++iter) {	1329 iter != history_info_map_.end(); ++iter) {

1300 RowWordStarts word_starts;	1330 RowWordStarts word_starts;

1301 const URLRow& row(iter->second);	1331 const URLRow& row(iter->second);

1302 string16 url(net::FormatUrl(row.url(), languages_,	1332 string16 url(net::FormatUrl(row.url(), languages,

1303 net::kFormatUrlOmitUsernamePassword,	1333 net::kFormatUrlOmitUsernamePassword,

1304 net::UnescapeRule::SPACES \| net::UnescapeRule::URL_SPECIAL_CHARS,	1334 net::UnescapeRule::SPACES \| net::UnescapeRule::URL_SPECIAL_CHARS,

1305 NULL, NULL, NULL));	1335 NULL, NULL, NULL));

1306 url = base::i18n::ToLower(url);	1336 url = base::i18n::ToLower(url);

1307 String16VectorFromString16(url, false, &word_starts.url_word_starts_);	1337 String16VectorFromString16(url, false, &word_starts.url_word_starts_);

1308 String16VectorFromString16(	1338 String16VectorFromString16(

1309 row.title(), false, &word_starts.title_word_starts_);	1339 row.title(), false, &word_starts.title_word_starts_);

1310 word_starts_map_[iter->first] = word_starts;	1340 word_starts_map_[iter->first] = word_starts;

1311 }	1341 }

1312 }	1342 }

1313 return true;	1343 return true;

1314 }	1344 }

1315	1345

	1346 // static

	1347 bool URLIndexPrivateData::URLSchemeIsWhitelisted(

	1348 const GURL& gurl,

	1349 const std::set<std::string>& whitelist) {

	1350 return whitelist.find(gurl.scheme()) != whitelist.end();

	1351 }

	1352

1316 } // namespace history	1353 } // namespace history

OLD	NEW

« no previous file with comments | « chrome/browser/history/url_index_private_data.h ('k') | no next file » | no next file with comments »