Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(44)

Side by Side Diff: chrome/browser/history/in_memory_url_index.cc

Issue 8662035: Revert 111378 - HQP Refactoring (in Preparation for SQLite Cache) (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/
Patch Set: Created 9 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/history/in_memory_url_index.h" 5 #include "chrome/browser/history/in_memory_url_index.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <functional> 8 #include <functional>
9 #include <iterator> 9 #include <iterator>
10 #include <limits> 10 #include <limits>
11 #include <numeric> 11 #include <numeric>
12 12
13 #include "base/file_util.h" 13 #include "base/file_util.h"
14 #include "base/i18n/case_conversion.h" 14 #include "base/i18n/case_conversion.h"
15 #include "base/metrics/histogram.h" 15 #include "base/metrics/histogram.h"
16 #include "base/string_util.h"
16 #include "base/threading/thread_restrictions.h" 17 #include "base/threading/thread_restrictions.h"
18 #include "base/time.h"
17 #include "base/utf_string_conversions.h" 19 #include "base/utf_string_conversions.h"
18 #include "chrome/browser/autocomplete/autocomplete.h" 20 #include "chrome/browser/autocomplete/autocomplete.h"
19 #include "chrome/browser/history/history.h" 21 #include "chrome/browser/autocomplete/history_provider_util.h"
20 #include "chrome/browser/history/history_notifications.h"
21 #include "chrome/browser/history/url_database.h" 22 #include "chrome/browser/history/url_database.h"
22 #include "chrome/browser/profiles/profile.h" 23 #include "chrome/browser/profiles/profile.h"
23 #include "chrome/common/chrome_notification_types.h"
24 #include "chrome/common/url_constants.h" 24 #include "chrome/common/url_constants.h"
25 #include "content/public/browser/notification_service.h" 25 #include "googleurl/src/url_parse.h"
26 #include "googleurl/src/url_util.h"
27 #include "net/base/escape.h"
26 #include "net/base/net_util.h" 28 #include "net/base/net_util.h"
29 #include "third_party/protobuf/src/google/protobuf/repeated_field.h"
27 #include "ui/base/l10n/l10n_util.h" 30 #include "ui/base/l10n/l10n_util.h"
28 31
29 using google::protobuf::RepeatedField; 32 using google::protobuf::RepeatedField;
30 using google::protobuf::RepeatedPtrField; 33 using google::protobuf::RepeatedPtrField;
31 using in_memory_url_index::InMemoryURLIndexCacheItem; 34 using in_memory_url_index::InMemoryURLIndexCacheItem;
32 35
33 namespace history { 36 namespace history {
34 37
35 typedef imui::InMemoryURLIndexCacheItem_WordListItem WordListItem; 38 typedef imui::InMemoryURLIndexCacheItem_WordListItem WordListItem;
36 typedef imui::InMemoryURLIndexCacheItem_WordMapItem_WordMapEntry WordMapEntry; 39 typedef imui::InMemoryURLIndexCacheItem_WordMapItem_WordMapEntry WordMapEntry;
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after
102 return 0; 105 return 0;
103 int score = kScoreRank[i]; 106 int score = kScoreRank[i];
104 if (i > 0) { 107 if (i > 0) {
105 score += (value - value_ranks[i]) * 108 score += (value - value_ranks[i]) *
106 (kScoreRank[i - 1] - kScoreRank[i]) / 109 (kScoreRank[i - 1] - kScoreRank[i]) /
107 (value_ranks[i - 1] - value_ranks[i]); 110 (value_ranks[i - 1] - value_ranks[i]);
108 } 111 }
109 return score; 112 return score;
110 } 113 }
111 114
112 InMemoryURLIndex::InMemoryURLIndex(Profile* profile, 115 InMemoryURLIndex::InMemoryURLIndex(const FilePath& history_dir)
113 const FilePath& history_dir) 116 : history_dir_(history_dir),
114 : profile_(profile),
115 history_dir_(history_dir),
116 private_data_(new URLIndexPrivateData), 117 private_data_(new URLIndexPrivateData),
117 cached_at_shutdown_(false) { 118 cached_at_shutdown_(false) {
118 InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_); 119 InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_);
119 if (profile) {
120 content::Source<Profile> source(profile);
121 registrar_.Add(this, chrome::NOTIFICATION_HISTORY_URL_VISITED, source);
122 registrar_.Add(this, chrome::NOTIFICATION_HISTORY_TYPED_URLS_MODIFIED,
123 source);
124 registrar_.Add(this, chrome::NOTIFICATION_HISTORY_URLS_DELETED, source);
125 }
126 } 120 }
127 121
128 // Called only by unit tests. 122 // Called only by unit tests.
129 InMemoryURLIndex::InMemoryURLIndex() 123 InMemoryURLIndex::InMemoryURLIndex()
130 : private_data_(new URLIndexPrivateData), 124 : private_data_(new URLIndexPrivateData),
131 cached_at_shutdown_(false) { 125 cached_at_shutdown_(false) {
132 InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_); 126 InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_);
133 } 127 }
134 128
135 InMemoryURLIndex::~InMemoryURLIndex() { 129 InMemoryURLIndex::~InMemoryURLIndex() {
136 // If there was a history directory (which there won't be for some unit tests) 130 // If there was a history directory (which there won't be for some unit tests)
137 // then insure that the cache has already been saved. 131 // then insure that the cache has already been saved.
138 registrar_.RemoveAll();
139 DCHECK(history_dir_.empty() || cached_at_shutdown_); 132 DCHECK(history_dir_.empty() || cached_at_shutdown_);
140 } 133 }
141 134
142 // static 135 // static
143 void InMemoryURLIndex::InitializeSchemeWhitelist( 136 void InMemoryURLIndex::InitializeSchemeWhitelist(
144 std::set<std::string>* whitelist) { 137 std::set<std::string>* whitelist) {
145 DCHECK(whitelist); 138 DCHECK(whitelist);
146 whitelist->insert(std::string(chrome::kAboutScheme)); 139 whitelist->insert(std::string(chrome::kAboutScheme));
147 whitelist->insert(std::string(chrome::kChromeUIScheme)); 140 whitelist->insert(std::string(chrome::kChromeUIScheme));
148 whitelist->insert(std::string(chrome::kFileScheme)); 141 whitelist->insert(std::string(chrome::kFileScheme));
149 whitelist->insert(std::string(chrome::kFtpScheme)); 142 whitelist->insert(std::string(chrome::kFtpScheme));
150 whitelist->insert(std::string(chrome::kHttpScheme)); 143 whitelist->insert(std::string(chrome::kHttpScheme));
151 whitelist->insert(std::string(chrome::kHttpsScheme)); 144 whitelist->insert(std::string(chrome::kHttpsScheme));
152 whitelist->insert(std::string(chrome::kMailToScheme)); 145 whitelist->insert(std::string(chrome::kMailToScheme));
153 } 146 }
154 147
155 // Indexing 148 // Indexing
156 149
157 void InMemoryURLIndex::Init(const std::string& languages) { 150 bool InMemoryURLIndex::Init(URLDatabase* history_db,
151 const std::string& languages) {
158 // TODO(mrossetti): Register for profile/language change notifications. 152 // TODO(mrossetti): Register for profile/language change notifications.
159 languages_ = languages; 153 languages_ = languages;
160 RestoreFromCache(); 154 return ReloadFromHistory(history_db, false);
161 } 155 }
162 156
163 void InMemoryURLIndex::ShutDown() { 157 void InMemoryURLIndex::ShutDown() {
158 // Write our cache.
164 SaveToCacheFile(); 159 SaveToCacheFile();
165 cached_at_shutdown_ = true; 160 cached_at_shutdown_ = true;
166 } 161 }
167 162
168 void InMemoryURLIndex::Observe(int type,
169 const content::NotificationSource& source,
170 const content::NotificationDetails& details) {
171 switch (type) {
172 case chrome::NOTIFICATION_HISTORY_URL_VISITED:
173 OnURLVisited(content::Details<URLVisitedDetails>(details).ptr());
174 break;
175 case chrome::NOTIFICATION_HISTORY_TYPED_URLS_MODIFIED:
176 OnURLsModified(
177 content::Details<history::URLsModifiedDetails>(details).ptr());
178 break;
179 case chrome::NOTIFICATION_HISTORY_URLS_DELETED:
180 OnURLsDeleted(
181 content::Details<history::URLsDeletedDetails>(details).ptr());
182 break;
183 case chrome::NOTIFICATION_HISTORY_LOADED: {
184 HistoryService* history_service =
185 profile_->GetHistoryService(Profile::EXPLICIT_ACCESS);
186 URLDatabase* history_db = history_service->HistoryDatabase();
187 ReloadFromHistory(history_db);
188 }
189 break;
190 default:
191 // For simplicity, the unit tests send us all notifications, even when
192 // we haven't registered for them, so don't assert here.
193 break;
194 }
195 }
196
197 void InMemoryURLIndex::OnURLVisited(const URLVisitedDetails* details) {
198 UpdateURL(details->row);
199 }
200
201 void InMemoryURLIndex::OnURLsModified(const URLsModifiedDetails* details) {
202 for (std::vector<history::URLRow>::const_iterator row =
203 details->changed_urls.begin();
204 row != details->changed_urls.end(); ++row)
205 UpdateURL(*row);
206 }
207
208 void InMemoryURLIndex::OnURLsDeleted(const URLsDeletedDetails* details) {
209 if (details->all_history) {
210 ClearPrivateData();
211 } else {
212 for (std::vector<URLRow>::const_iterator row = details->rows.begin();
213 row != details->rows.end(); ++row)
214 DeleteURL(*row);
215 }
216 }
217
218 void InMemoryURLIndex::IndexRow(const URLRow& row) { 163 void InMemoryURLIndex::IndexRow(const URLRow& row) {
219 const GURL& gurl(row.url()); 164 const GURL& gurl(row.url());
220 165
221 // Index only URLs with a whitelisted scheme. 166 // Index only URLs with a whitelisted scheme.
222 if (!InMemoryURLIndex::URLSchemeIsWhitelisted(gurl)) 167 if (!InMemoryURLIndex::URLSchemeIsWhitelisted(gurl))
223 return; 168 return;
224 169
225 URLID row_id = row.id(); 170 URLID row_id = row.id();
226 // Strip out username and password before saving and indexing. 171 // Strip out username and password before saving and indexing.
227 string16 url(net::FormatUrl(gurl, languages_, 172 string16 url(net::FormatUrl(gurl, languages_,
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after
301 } 246 }
302 247
303 // Complete the removal of references to the word. 248 // Complete the removal of references to the word.
304 private_data.word_id_history_map_.erase(word_id); 249 private_data.word_id_history_map_.erase(word_id);
305 private_data.word_map_.erase(word); 250 private_data.word_map_.erase(word);
306 private_data.word_list_[word_id] = string16(); 251 private_data.word_list_[word_id] = string16();
307 private_data.available_words_.insert(word_id); 252 private_data.available_words_.insert(word_id);
308 } 253 }
309 } 254 }
310 255
311 void InMemoryURLIndex::ReloadFromHistory(history::URLDatabase* history_db) { 256 bool InMemoryURLIndex::ReloadFromHistory(history::URLDatabase* history_db,
257 bool clear_cache) {
312 ClearPrivateData(); 258 ClearPrivateData();
313 259
314 if (!history_db) 260 if (!history_db)
315 return; 261 return false;
316 262
317 base::TimeTicks beginning_time = base::TimeTicks::Now(); 263 if (clear_cache || !RestoreFromCacheFile()) {
318 // The index has to be built from scratch. 264 base::TimeTicks beginning_time = base::TimeTicks::Now();
319 URLDatabase::URLEnumerator history_enum; 265 // The index has to be built from scratch.
320 if (!history_db->InitURLEnumeratorForSignificant(&history_enum)) 266 URLDatabase::URLEnumerator history_enum;
321 return; 267 if (!history_db->InitURLEnumeratorForSignificant(&history_enum))
322 268 return false;
323 URLRow row; 269 URLRow row;
324 while (history_enum.GetNextURL(&row)) 270 while (history_enum.GetNextURL(&row))
325 IndexRow(row); 271 IndexRow(row);
326 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime", 272 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime",
327 base::TimeTicks::Now() - beginning_time); 273 base::TimeTicks::Now() - beginning_time);
328 SaveToCacheFile(); 274 SaveToCacheFile();
275 }
276 return true;
329 } 277 }
330 278
331 void InMemoryURLIndex::ClearPrivateData() { 279 void InMemoryURLIndex::ClearPrivateData() {
332 private_data_->Clear(); 280 private_data_->Clear();
333 search_term_cache_.clear(); 281 search_term_cache_.clear();
334 } 282 }
335 283
336 void InMemoryURLIndex::RestoreFromCache() {
337 ClearPrivateData();
338 if (!RestoreFromCacheFile() && profile_) {
339 content::Source<Profile> source(profile_);
340 registrar_.Add(this, chrome::NOTIFICATION_HISTORY_LOADED, source);
341 }
342 }
343
344 bool InMemoryURLIndex::RestoreFromCacheFile() { 284 bool InMemoryURLIndex::RestoreFromCacheFile() {
345 // TODO(mrossetti): Figure out how to determine if the cache is up-to-date. 285 // TODO(mrossetti): Figure out how to determine if the cache is up-to-date.
346 // That is: ensure that the database has not been modified since the cache 286 // That is: ensure that the database has not been modified since the cache
347 // was last saved. DB file modification date is inadequate. There are no 287 // was last saved. DB file modification date is inadequate. There are no
348 // SQLite table checksums automatically stored. 288 // SQLite table checksums automatically stored.
349 // FIXME(mrossetti): Move File IO to another thread. 289 // FIXME(mrossetti): Move File IO to another thread.
350 base::ThreadRestrictions::ScopedAllowIO allow_io; 290 base::ThreadRestrictions::ScopedAllowIO allow_io;
351 base::TimeTicks beginning_time = base::TimeTicks::Now(); 291 base::TimeTicks beginning_time = base::TimeTicks::Now();
352 FilePath file_path; 292 FilePath file_path;
353 if (!GetCacheFilePath(&file_path) || !file_util::PathExists(file_path)) 293 if (!GetCacheFilePath(&file_path) || !file_util::PathExists(file_path))
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after
402 int size = data.size(); 342 int size = data.size();
403 if (file_util::WriteFile(file_path, data.c_str(), size) != size) { 343 if (file_util::WriteFile(file_path, data.c_str(), size) != size) {
404 LOG(WARNING) << "Failed to write " << file_path.value(); 344 LOG(WARNING) << "Failed to write " << file_path.value();
405 return false; 345 return false;
406 } 346 }
407 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexSaveCacheTime", 347 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexSaveCacheTime",
408 base::TimeTicks::Now() - beginning_time); 348 base::TimeTicks::Now() - beginning_time);
409 return true; 349 return true;
410 } 350 }
411 351
412 void InMemoryURLIndex::UpdateURL(const URLRow& row) { 352 void InMemoryURLIndex::UpdateURL(URLID row_id, const URLRow& row) {
413 // The row may or may not already be in our index. If it is not already 353 // The row may or may not already be in our index. If it is not already
414 // indexed and it qualifies then it gets indexed. If it is already 354 // indexed and it qualifies then it gets indexed. If it is already
415 // indexed and still qualifies then it gets updated, otherwise it 355 // indexed and still qualifies then it gets updated, otherwise it
416 // is deleted from the index. 356 // is deleted from the index.
417 HistoryInfoMap::iterator row_pos = 357 HistoryInfoMap::iterator row_pos =
418 private_data_->history_info_map_.find(row.id()); 358 private_data_->history_info_map_.find(row_id);
419 if (row_pos == private_data_->history_info_map_.end()) { 359 if (row_pos == private_data_->history_info_map_.end()) {
420 // This new row should be indexed if it qualifies. 360 // This new row should be indexed if it qualifies.
421 if (RowQualifiesAsSignificant(row, base::Time())) 361 URLRow new_row(row);
422 IndexRow(row); 362 new_row.set_id(row_id);
363 if (RowQualifiesAsSignificant(new_row, base::Time()))
364 IndexRow(new_row);
423 } else if (RowQualifiesAsSignificant(row, base::Time())) { 365 } else if (RowQualifiesAsSignificant(row, base::Time())) {
424 // This indexed row still qualifies and will be re-indexed. 366 // This indexed row still qualifies and will be re-indexed.
425 // The url won't have changed but the title, visit count, etc. 367 // The url won't have changed but the title, visit count, etc.
426 // might have changed. 368 // might have changed.
427 URLRow& old_row = row_pos->second; 369 URLRow& updated_row = row_pos->second;
428 old_row.set_visit_count(row.visit_count()); 370 updated_row.set_visit_count(row.visit_count());
429 old_row.set_typed_count(row.typed_count()); 371 updated_row.set_typed_count(row.typed_count());
430 old_row.set_last_visit(row.last_visit()); 372 updated_row.set_last_visit(row.last_visit());
431 // While the URL is guaranteed to remain stable, the title may have changed. 373 // While the URL is guaranteed to remain stable, the title may have changed.
432 // If so, then we need to update the index with the changed words. 374 // If so, then we need to update the index with the changed words.
433 if (old_row.title() != row.title()) { 375 if (updated_row.title() != row.title()) {
434 // Clear all words associated with this row and re-index both the 376 // Clear all words associated with this row and re-index both the
435 // URL and title. 377 // URL and title.
436 RemoveRowWordsFromIndex(row); 378 RemoveRowWordsFromIndex(updated_row);
437 old_row.set_title(row.title()); 379 updated_row.set_title(row.title());
438 AddRowWordsToIndex(old_row); 380 AddRowWordsToIndex(updated_row);
439 } 381 }
440 } else { 382 } else {
441 // This indexed row no longer qualifies and will be de-indexed by 383 // This indexed row no longer qualifies and will be de-indexed by
442 // clearing all words associated with this row. 384 // clearing all words associated with this row.
443 RemoveRowFromIndex(row); 385 URLRow& removed_row = row_pos->second;
386 RemoveRowFromIndex(removed_row);
444 } 387 }
445 // This invalidates the cache. 388 // This invalidates the cache.
446 search_term_cache_.clear(); 389 search_term_cache_.clear();
447 } 390 }
448 391
449 void InMemoryURLIndex::DeleteURL(const URLRow& row) { 392 void InMemoryURLIndex::DeleteURL(URLID row_id) {
450 RemoveRowFromIndex(row); 393 // Note that this does not remove any reference to this row from the
451 search_term_cache_.clear(); // Invalidate the word cache. 394 // word_id_history_map_. That map will continue to contain (and return)
395 // hits against this row until that map is rebuilt, but since the
396 // history_info_map_ no longer references the row no erroneous results
397 // will propagate to the user.
398 private_data_->history_info_map_.erase(row_id);
399 // This invalidates the word cache.
400 search_term_cache_.clear();
452 } 401 }
453 402
454 // Searching 403 // Searching
455 404
456 ScoredHistoryMatches InMemoryURLIndex::HistoryItemsForTerms( 405 ScoredHistoryMatches InMemoryURLIndex::HistoryItemsForTerms(
457 const String16Vector& terms) { 406 const String16Vector& terms) {
458 ScoredHistoryMatches scored_items; 407 ScoredHistoryMatches scored_items;
459 408
460 // Do nothing if we have indexed no words (probably because we've not been 409 // Do nothing if we have indexed no words (probably because we've not been
461 // initialized yet). 410 // initialized yet).
(...skipping 658 matching lines...) Expand 10 before | Expand all | Expand 10 after
1120 if (iter->has_title()) { 1069 if (iter->has_title()) {
1121 string16 title(UTF8ToUTF16(iter->title())); 1070 string16 title(UTF8ToUTF16(iter->title()));
1122 url_row.set_title(title); 1071 url_row.set_title(title);
1123 } 1072 }
1124 private_data_->history_info_map_[history_id] = url_row; 1073 private_data_->history_info_map_[history_id] = url_row;
1125 } 1074 }
1126 return true; 1075 return true;
1127 } 1076 }
1128 1077
1129 } // namespace history 1078 } // namespace history
OLDNEW
« no previous file with comments | « chrome/browser/history/in_memory_url_index.h ('k') | chrome/browser/history/in_memory_url_index_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698