Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(19)

Side by Side Diff: chrome/browser/history/in_memory_url_index.cc

Issue 8451009: HQP Refactoring (in Preparation for SQLite Cache) (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/
Patch Set: '' Created 9 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/history/in_memory_url_index.h" 5 #include "chrome/browser/history/in_memory_url_index.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <functional> 8 #include <functional>
9 #include <iterator> 9 #include <iterator>
10 #include <limits> 10 #include <limits>
11 #include <numeric> 11 #include <numeric>
12 12
13 #include "base/file_util.h" 13 #include "base/file_util.h"
14 #include "base/i18n/case_conversion.h" 14 #include "base/i18n/case_conversion.h"
15 #include "base/metrics/histogram.h" 15 #include "base/metrics/histogram.h"
16 #include "base/string_util.h"
17 #include "base/threading/thread_restrictions.h" 16 #include "base/threading/thread_restrictions.h"
18 #include "base/time.h"
19 #include "base/utf_string_conversions.h" 17 #include "base/utf_string_conversions.h"
20 #include "chrome/browser/autocomplete/autocomplete.h" 18 #include "chrome/browser/autocomplete/autocomplete.h"
21 #include "chrome/browser/autocomplete/history_provider_util.h" 19 #include "chrome/browser/history/history.h"
20 #include "chrome/browser/history/history_notifications.h"
22 #include "chrome/browser/history/url_database.h" 21 #include "chrome/browser/history/url_database.h"
23 #include "chrome/browser/profiles/profile.h" 22 #include "chrome/browser/profiles/profile.h"
23 #include "chrome/common/chrome_notification_types.h"
24 #include "chrome/common/url_constants.h" 24 #include "chrome/common/url_constants.h"
25 #include "googleurl/src/url_parse.h" 25 #include "content/public/browser/notification_service.h"
26 #include "googleurl/src/url_util.h"
27 #include "net/base/escape.h"
28 #include "net/base/net_util.h" 26 #include "net/base/net_util.h"
29 #include "third_party/protobuf/src/google/protobuf/repeated_field.h"
30 #include "ui/base/l10n/l10n_util.h" 27 #include "ui/base/l10n/l10n_util.h"
31 28
32 using google::protobuf::RepeatedField; 29 using google::protobuf::RepeatedField;
33 using google::protobuf::RepeatedPtrField; 30 using google::protobuf::RepeatedPtrField;
34 using in_memory_url_index::InMemoryURLIndexCacheItem; 31 using in_memory_url_index::InMemoryURLIndexCacheItem;
35 32
36 namespace history { 33 namespace history {
37 34
38 typedef imui::InMemoryURLIndexCacheItem_WordListItem WordListItem; 35 typedef imui::InMemoryURLIndexCacheItem_WordListItem WordListItem;
39 typedef imui::InMemoryURLIndexCacheItem_WordMapItem_WordMapEntry WordMapEntry; 36 typedef imui::InMemoryURLIndexCacheItem_WordMapItem_WordMapEntry WordMapEntry;
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after
105 return 0; 102 return 0;
106 int score = kScoreRank[i]; 103 int score = kScoreRank[i];
107 if (i > 0) { 104 if (i > 0) {
108 score += (value - value_ranks[i]) * 105 score += (value - value_ranks[i]) *
109 (kScoreRank[i - 1] - kScoreRank[i]) / 106 (kScoreRank[i - 1] - kScoreRank[i]) /
110 (value_ranks[i - 1] - value_ranks[i]); 107 (value_ranks[i - 1] - value_ranks[i]);
111 } 108 }
112 return score; 109 return score;
113 } 110 }
114 111
115 InMemoryURLIndex::InMemoryURLIndex(const FilePath& history_dir) 112 InMemoryURLIndex::InMemoryURLIndex(Profile* profile,
116 : history_dir_(history_dir), 113 const FilePath& history_dir)
114 : profile_(profile),
115 history_dir_(history_dir),
117 private_data_(new URLIndexPrivateData), 116 private_data_(new URLIndexPrivateData),
118 cached_at_shutdown_(false) { 117 cached_at_shutdown_(false) {
119 InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_); 118 InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_);
119 if (profile) {
120 content::Source<Profile> source(profile);
121 registrar_.Add(this, chrome::NOTIFICATION_HISTORY_URL_VISITED, source);
122 registrar_.Add(this, chrome::NOTIFICATION_HISTORY_TYPED_URLS_MODIFIED,
123 source);
124 registrar_.Add(this, chrome::NOTIFICATION_HISTORY_URLS_DELETED, source);
125 }
120 } 126 }
121 127
122 // Called only by unit tests. 128 // Called only by unit tests.
123 InMemoryURLIndex::InMemoryURLIndex() 129 InMemoryURLIndex::InMemoryURLIndex()
124 : private_data_(new URLIndexPrivateData), 130 : private_data_(new URLIndexPrivateData),
125 cached_at_shutdown_(false) { 131 cached_at_shutdown_(false) {
126 InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_); 132 InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_);
127 } 133 }
128 134
129 InMemoryURLIndex::~InMemoryURLIndex() { 135 InMemoryURLIndex::~InMemoryURLIndex() {
130 // If there was a history directory (which there won't be for some unit tests) 136 // If there was a history directory (which there won't be for some unit tests)
131 // then insure that the cache has already been saved. 137 // then insure that the cache has already been saved.
138 registrar_.RemoveAll();
132 DCHECK(history_dir_.empty() || cached_at_shutdown_); 139 DCHECK(history_dir_.empty() || cached_at_shutdown_);
133 } 140 }
134 141
135 // static 142 // static
136 void InMemoryURLIndex::InitializeSchemeWhitelist( 143 void InMemoryURLIndex::InitializeSchemeWhitelist(
137 std::set<std::string>* whitelist) { 144 std::set<std::string>* whitelist) {
138 DCHECK(whitelist); 145 DCHECK(whitelist);
139 whitelist->insert(std::string(chrome::kAboutScheme)); 146 whitelist->insert(std::string(chrome::kAboutScheme));
140 whitelist->insert(std::string(chrome::kChromeUIScheme)); 147 whitelist->insert(std::string(chrome::kChromeUIScheme));
141 whitelist->insert(std::string(chrome::kFileScheme)); 148 whitelist->insert(std::string(chrome::kFileScheme));
142 whitelist->insert(std::string(chrome::kFtpScheme)); 149 whitelist->insert(std::string(chrome::kFtpScheme));
143 whitelist->insert(std::string(chrome::kHttpScheme)); 150 whitelist->insert(std::string(chrome::kHttpScheme));
144 whitelist->insert(std::string(chrome::kHttpsScheme)); 151 whitelist->insert(std::string(chrome::kHttpsScheme));
145 whitelist->insert(std::string(chrome::kMailToScheme)); 152 whitelist->insert(std::string(chrome::kMailToScheme));
146 } 153 }
147 154
148 // Indexing 155 // Indexing
149 156
150 bool InMemoryURLIndex::Init(URLDatabase* history_db, 157 void InMemoryURLIndex::Init(const std::string& languages) {
151 const std::string& languages) {
152 // TODO(mrossetti): Register for profile/language change notifications. 158 // TODO(mrossetti): Register for profile/language change notifications.
153 languages_ = languages; 159 languages_ = languages;
154 return ReloadFromHistory(history_db, false); 160 RestoreFromCache();
155 } 161 }
156 162
157 void InMemoryURLIndex::ShutDown() { 163 void InMemoryURLIndex::ShutDown() {
158 // Write our cache.
159 SaveToCacheFile(); 164 SaveToCacheFile();
160 cached_at_shutdown_ = true; 165 cached_at_shutdown_ = true;
161 } 166 }
162 167
168 void InMemoryURLIndex::Observe(int type,
169 const content::NotificationSource& source,
170 const content::NotificationDetails& details) {
171 switch (type) {
172 case chrome::NOTIFICATION_HISTORY_URL_VISITED:
173 OnURLVisited(content::Details<URLVisitedDetails>(details).ptr());
174 break;
175 case chrome::NOTIFICATION_HISTORY_TYPED_URLS_MODIFIED:
176 OnURLsModified(
177 content::Details<history::URLsModifiedDetails>(details).ptr());
178 break;
179 case chrome::NOTIFICATION_HISTORY_URLS_DELETED:
180 OnURLsDeleted(
181 content::Details<history::URLsDeletedDetails>(details).ptr());
182 break;
183 case chrome::NOTIFICATION_HISTORY_LOADED: {
184 HistoryService* history_service =
185 profile_->GetHistoryService(Profile::EXPLICIT_ACCESS);
186 URLDatabase* history_db = history_service->HistoryDatabase();
187 ReloadFromHistory(history_db);
188 }
189 break;
190 default:
191 // For simplicity, the unit tests send us all notifications, even when
192 // we haven't registered for them, so don't assert here.
193 break;
194 }
195 }
196
197 void InMemoryURLIndex::OnURLVisited(const URLVisitedDetails* details) {
198 UpdateURL(details->row);
199 }
200
201 void InMemoryURLIndex::OnURLsModified(const URLsModifiedDetails* details) {
202 for (std::vector<history::URLRow>::const_iterator row =
203 details->changed_urls.begin();
204 row != details->changed_urls.end(); ++row)
205 UpdateURL(*row);
206 }
207
208 void InMemoryURLIndex::OnURLsDeleted(const URLsDeletedDetails* details) {
209 if (details->all_history) {
210 ClearPrivateData();
211 } else {
212 for (std::vector<URLRow>::const_iterator row = details->rows.begin();
213 row != details->rows.end(); ++row)
214 DeleteURL(*row);
215 }
216 }
217
163 void InMemoryURLIndex::IndexRow(const URLRow& row) { 218 void InMemoryURLIndex::IndexRow(const URLRow& row) {
164 const GURL& gurl(row.url()); 219 const GURL& gurl(row.url());
165 220
166 // Index only URLs with a whitelisted scheme. 221 // Index only URLs with a whitelisted scheme.
167 if (!InMemoryURLIndex::URLSchemeIsWhitelisted(gurl)) 222 if (!InMemoryURLIndex::URLSchemeIsWhitelisted(gurl))
168 return; 223 return;
169 224
170 URLID row_id = row.id(); 225 URLID row_id = row.id();
171 // Strip out username and password before saving and indexing. 226 // Strip out username and password before saving and indexing.
172 string16 url(net::FormatUrl(gurl, languages_, 227 string16 url(net::FormatUrl(gurl, languages_,
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after
246 } 301 }
247 302
248 // Complete the removal of references to the word. 303 // Complete the removal of references to the word.
249 private_data.word_id_history_map_.erase(word_id); 304 private_data.word_id_history_map_.erase(word_id);
250 private_data.word_map_.erase(word); 305 private_data.word_map_.erase(word);
251 private_data.word_list_[word_id] = string16(); 306 private_data.word_list_[word_id] = string16();
252 private_data.available_words_.insert(word_id); 307 private_data.available_words_.insert(word_id);
253 } 308 }
254 } 309 }
255 310
256 bool InMemoryURLIndex::ReloadFromHistory(history::URLDatabase* history_db, 311 void InMemoryURLIndex::ReloadFromHistory(history::URLDatabase* history_db) {
257 bool clear_cache) {
258 ClearPrivateData(); 312 ClearPrivateData();
259 313
260 if (!history_db) 314 if (!history_db)
261 return false; 315 return;
262 316
263 if (clear_cache || !RestoreFromCacheFile()) { 317 base::TimeTicks beginning_time = base::TimeTicks::Now();
264 base::TimeTicks beginning_time = base::TimeTicks::Now(); 318 // The index has to be built from scratch.
265 // The index has to be built from scratch. 319 URLDatabase::URLEnumerator history_enum;
266 URLDatabase::URLEnumerator history_enum; 320 if (!history_db->InitURLEnumeratorForSignificant(&history_enum))
267 if (!history_db->InitURLEnumeratorForSignificant(&history_enum)) 321 return;
268 return false; 322
269 URLRow row; 323 URLRow row;
270 while (history_enum.GetNextURL(&row)) 324 while (history_enum.GetNextURL(&row))
271 IndexRow(row); 325 IndexRow(row);
272 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime", 326 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime",
273 base::TimeTicks::Now() - beginning_time); 327 base::TimeTicks::Now() - beginning_time);
274 SaveToCacheFile(); 328 SaveToCacheFile();
275 }
276 return true;
277 } 329 }
278 330
279 void InMemoryURLIndex::ClearPrivateData() { 331 void InMemoryURLIndex::ClearPrivateData() {
280 private_data_->Clear(); 332 private_data_->Clear();
281 search_term_cache_.clear(); 333 search_term_cache_.clear();
282 } 334 }
283 335
336 void InMemoryURLIndex::RestoreFromCache() {
337 ClearPrivateData();
338 if (!RestoreFromCacheFile() && profile_) {
339 content::Source<Profile> source(profile_);
340 registrar_.Add(this, chrome::NOTIFICATION_HISTORY_LOADED, source);
341 }
342 }
343
284 bool InMemoryURLIndex::RestoreFromCacheFile() { 344 bool InMemoryURLIndex::RestoreFromCacheFile() {
285 // TODO(mrossetti): Figure out how to determine if the cache is up-to-date. 345 // TODO(mrossetti): Figure out how to determine if the cache is up-to-date.
286 // That is: ensure that the database has not been modified since the cache 346 // That is: ensure that the database has not been modified since the cache
287 // was last saved. DB file modification date is inadequate. There are no 347 // was last saved. DB file modification date is inadequate. There are no
288 // SQLite table checksums automatically stored. 348 // SQLite table checksums automatically stored.
289 // FIXME(mrossetti): Move File IO to another thread. 349 // FIXME(mrossetti): Move File IO to another thread.
290 base::ThreadRestrictions::ScopedAllowIO allow_io; 350 base::ThreadRestrictions::ScopedAllowIO allow_io;
291 base::TimeTicks beginning_time = base::TimeTicks::Now(); 351 base::TimeTicks beginning_time = base::TimeTicks::Now();
292 FilePath file_path; 352 FilePath file_path;
293 if (!GetCacheFilePath(&file_path) || !file_util::PathExists(file_path)) 353 if (!GetCacheFilePath(&file_path) || !file_util::PathExists(file_path))
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after
342 int size = data.size(); 402 int size = data.size();
343 if (file_util::WriteFile(file_path, data.c_str(), size) != size) { 403 if (file_util::WriteFile(file_path, data.c_str(), size) != size) {
344 LOG(WARNING) << "Failed to write " << file_path.value(); 404 LOG(WARNING) << "Failed to write " << file_path.value();
345 return false; 405 return false;
346 } 406 }
347 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexSaveCacheTime", 407 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexSaveCacheTime",
348 base::TimeTicks::Now() - beginning_time); 408 base::TimeTicks::Now() - beginning_time);
349 return true; 409 return true;
350 } 410 }
351 411
352 void InMemoryURLIndex::UpdateURL(URLID row_id, const URLRow& row) { 412 void InMemoryURLIndex::UpdateURL(const URLRow& row) {
353 // The row may or may not already be in our index. If it is not already 413 // The row may or may not already be in our index. If it is not already
354 // indexed and it qualifies then it gets indexed. If it is already 414 // indexed and it qualifies then it gets indexed. If it is already
355 // indexed and still qualifies then it gets updated, otherwise it 415 // indexed and still qualifies then it gets updated, otherwise it
356 // is deleted from the index. 416 // is deleted from the index.
357 HistoryInfoMap::iterator row_pos = 417 HistoryInfoMap::iterator row_pos =
358 private_data_->history_info_map_.find(row_id); 418 private_data_->history_info_map_.find(row.id());
359 if (row_pos == private_data_->history_info_map_.end()) { 419 if (row_pos == private_data_->history_info_map_.end()) {
360 // This new row should be indexed if it qualifies. 420 // This new row should be indexed if it qualifies.
361 URLRow new_row(row); 421 if (RowQualifiesAsSignificant(row, base::Time()))
362 new_row.set_id(row_id); 422 IndexRow(row);
363 if (RowQualifiesAsSignificant(new_row, base::Time()))
364 IndexRow(new_row);
365 } else if (RowQualifiesAsSignificant(row, base::Time())) { 423 } else if (RowQualifiesAsSignificant(row, base::Time())) {
366 // This indexed row still qualifies and will be re-indexed. 424 // This indexed row still qualifies and will be re-indexed.
367 // The url won't have changed but the title, visit count, etc. 425 // The url won't have changed but the title, visit count, etc.
368 // might have changed. 426 // might have changed.
369 URLRow& updated_row = row_pos->second; 427 URLRow& old_row = row_pos->second;
370 updated_row.set_visit_count(row.visit_count()); 428 old_row.set_visit_count(row.visit_count());
371 updated_row.set_typed_count(row.typed_count()); 429 old_row.set_typed_count(row.typed_count());
372 updated_row.set_last_visit(row.last_visit()); 430 old_row.set_last_visit(row.last_visit());
373 // While the URL is guaranteed to remain stable, the title may have changed. 431 // While the URL is guaranteed to remain stable, the title may have changed.
374 // If so, then we need to update the index with the changed words. 432 // If so, then we need to update the index with the changed words.
375 if (updated_row.title() != row.title()) { 433 if (old_row.title() != row.title()) {
376 // Clear all words associated with this row and re-index both the 434 // Clear all words associated with this row and re-index both the
377 // URL and title. 435 // URL and title.
378 RemoveRowWordsFromIndex(updated_row); 436 RemoveRowWordsFromIndex(row);
379 updated_row.set_title(row.title()); 437 old_row.set_title(row.title());
380 AddRowWordsToIndex(updated_row); 438 AddRowWordsToIndex(old_row);
381 } 439 }
382 } else { 440 } else {
383 // This indexed row no longer qualifies and will be de-indexed by 441 // This indexed row no longer qualifies and will be de-indexed by
384 // clearing all words associated with this row. 442 // clearing all words associated with this row.
385 URLRow& removed_row = row_pos->second; 443 RemoveRowFromIndex(row);
386 RemoveRowFromIndex(removed_row);
387 } 444 }
388 // This invalidates the cache. 445 // This invalidates the cache.
389 search_term_cache_.clear(); 446 search_term_cache_.clear();
390 } 447 }
391 448
392 void InMemoryURLIndex::DeleteURL(URLID row_id) { 449 void InMemoryURLIndex::DeleteURL(const URLRow& row) {
393 // Note that this does not remove any reference to this row from the 450 RemoveRowFromIndex(row);
394 // word_id_history_map_. That map will continue to contain (and return) 451 search_term_cache_.clear(); // Invalidate the word cache.
395 // hits against this row until that map is rebuilt, but since the
396 // history_info_map_ no longer references the row no erroneous results
397 // will propagate to the user.
398 private_data_->history_info_map_.erase(row_id);
399 // This invalidates the word cache.
400 search_term_cache_.clear();
401 } 452 }
402 453
403 // Searching 454 // Searching
404 455
405 ScoredHistoryMatches InMemoryURLIndex::HistoryItemsForTerms( 456 ScoredHistoryMatches InMemoryURLIndex::HistoryItemsForTerms(
406 const String16Vector& terms) { 457 const String16Vector& terms) {
407 ScoredHistoryMatches scored_items; 458 ScoredHistoryMatches scored_items;
408 459
409 // Do nothing if we have indexed no words (probably because we've not been 460 // Do nothing if we have indexed no words (probably because we've not been
410 // initialized yet). 461 // initialized yet).
(...skipping 658 matching lines...) Expand 10 before | Expand all | Expand 10 after
1069 if (iter->has_title()) { 1120 if (iter->has_title()) {
1070 string16 title(UTF8ToUTF16(iter->title())); 1121 string16 title(UTF8ToUTF16(iter->title()));
1071 url_row.set_title(title); 1122 url_row.set_title(title);
1072 } 1123 }
1073 private_data_->history_info_map_[history_id] = url_row; 1124 private_data_->history_info_map_[history_id] = url_row;
1074 } 1125 }
1075 return true; 1126 return true;
1076 } 1127 }
1077 1128
1078 } // namespace history 1129 } // namespace history
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698