OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/history/in_memory_url_index.h" | 5 #include "chrome/browser/history/in_memory_url_index.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <functional> | 8 #include <functional> |
9 #include <iterator> | 9 #include <iterator> |
10 #include <limits> | 10 #include <limits> |
11 #include <numeric> | 11 #include <numeric> |
12 | 12 |
13 #include "base/file_util.h" | 13 #include "base/file_util.h" |
14 #include "base/i18n/case_conversion.h" | 14 #include "base/i18n/case_conversion.h" |
15 #include "base/metrics/histogram.h" | 15 #include "base/metrics/histogram.h" |
16 #include "base/string_util.h" | |
17 #include "base/threading/thread_restrictions.h" | 16 #include "base/threading/thread_restrictions.h" |
18 #include "base/time.h" | |
19 #include "base/utf_string_conversions.h" | 17 #include "base/utf_string_conversions.h" |
20 #include "chrome/browser/autocomplete/autocomplete.h" | 18 #include "chrome/browser/autocomplete/autocomplete.h" |
21 #include "chrome/browser/autocomplete/history_provider_util.h" | 19 #include "chrome/browser/history/history.h" |
| 20 #include "chrome/browser/history/history_notifications.h" |
22 #include "chrome/browser/history/url_database.h" | 21 #include "chrome/browser/history/url_database.h" |
23 #include "chrome/browser/profiles/profile.h" | 22 #include "chrome/browser/profiles/profile.h" |
| 23 #include "chrome/common/chrome_notification_types.h" |
24 #include "chrome/common/url_constants.h" | 24 #include "chrome/common/url_constants.h" |
25 #include "googleurl/src/url_parse.h" | 25 #include "content/public/browser/notification_service.h" |
26 #include "googleurl/src/url_util.h" | |
27 #include "net/base/escape.h" | |
28 #include "net/base/net_util.h" | 26 #include "net/base/net_util.h" |
29 #include "third_party/protobuf/src/google/protobuf/repeated_field.h" | |
30 #include "ui/base/l10n/l10n_util.h" | 27 #include "ui/base/l10n/l10n_util.h" |
31 | 28 |
32 using google::protobuf::RepeatedField; | 29 using google::protobuf::RepeatedField; |
33 using google::protobuf::RepeatedPtrField; | 30 using google::protobuf::RepeatedPtrField; |
34 using in_memory_url_index::InMemoryURLIndexCacheItem; | 31 using in_memory_url_index::InMemoryURLIndexCacheItem; |
35 | 32 |
36 namespace history { | 33 namespace history { |
37 | 34 |
38 typedef imui::InMemoryURLIndexCacheItem_WordListItem WordListItem; | 35 typedef imui::InMemoryURLIndexCacheItem_WordListItem WordListItem; |
39 typedef imui::InMemoryURLIndexCacheItem_WordMapItem_WordMapEntry WordMapEntry; | 36 typedef imui::InMemoryURLIndexCacheItem_WordMapItem_WordMapEntry WordMapEntry; |
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
105 return 0; | 102 return 0; |
106 int score = kScoreRank[i]; | 103 int score = kScoreRank[i]; |
107 if (i > 0) { | 104 if (i > 0) { |
108 score += (value - value_ranks[i]) * | 105 score += (value - value_ranks[i]) * |
109 (kScoreRank[i - 1] - kScoreRank[i]) / | 106 (kScoreRank[i - 1] - kScoreRank[i]) / |
110 (value_ranks[i - 1] - value_ranks[i]); | 107 (value_ranks[i - 1] - value_ranks[i]); |
111 } | 108 } |
112 return score; | 109 return score; |
113 } | 110 } |
114 | 111 |
115 InMemoryURLIndex::InMemoryURLIndex(const FilePath& history_dir) | 112 InMemoryURLIndex::InMemoryURLIndex(Profile* profile, |
116 : history_dir_(history_dir), | 113 const FilePath& history_dir) |
| 114 : profile_(profile), |
| 115 history_dir_(history_dir), |
117 private_data_(new URLIndexPrivateData), | 116 private_data_(new URLIndexPrivateData), |
118 cached_at_shutdown_(false) { | 117 cached_at_shutdown_(false) { |
119 InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_); | 118 InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_); |
| 119 if (profile) { |
| 120 content::Source<Profile> source(profile); |
| 121 registrar_.Add(this, chrome::NOTIFICATION_HISTORY_URL_VISITED, source); |
| 122 registrar_.Add(this, chrome::NOTIFICATION_HISTORY_TYPED_URLS_MODIFIED, |
| 123 source); |
| 124 registrar_.Add(this, chrome::NOTIFICATION_HISTORY_URLS_DELETED, source); |
| 125 } |
120 } | 126 } |
121 | 127 |
122 // Called only by unit tests. | 128 // Called only by unit tests. |
123 InMemoryURLIndex::InMemoryURLIndex() | 129 InMemoryURLIndex::InMemoryURLIndex() |
124 : private_data_(new URLIndexPrivateData), | 130 : private_data_(new URLIndexPrivateData), |
125 cached_at_shutdown_(false) { | 131 cached_at_shutdown_(false) { |
126 InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_); | 132 InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_); |
127 } | 133 } |
128 | 134 |
129 InMemoryURLIndex::~InMemoryURLIndex() { | 135 InMemoryURLIndex::~InMemoryURLIndex() { |
130 // If there was a history directory (which there won't be for some unit tests) | 136 // If there was a history directory (which there won't be for some unit tests) |
131 // then insure that the cache has already been saved. | 137 // then insure that the cache has already been saved. |
| 138 registrar_.RemoveAll(); |
132 DCHECK(history_dir_.empty() || cached_at_shutdown_); | 139 DCHECK(history_dir_.empty() || cached_at_shutdown_); |
133 } | 140 } |
134 | 141 |
135 // static | 142 // static |
136 void InMemoryURLIndex::InitializeSchemeWhitelist( | 143 void InMemoryURLIndex::InitializeSchemeWhitelist( |
137 std::set<std::string>* whitelist) { | 144 std::set<std::string>* whitelist) { |
138 DCHECK(whitelist); | 145 DCHECK(whitelist); |
139 whitelist->insert(std::string(chrome::kAboutScheme)); | 146 whitelist->insert(std::string(chrome::kAboutScheme)); |
140 whitelist->insert(std::string(chrome::kChromeUIScheme)); | 147 whitelist->insert(std::string(chrome::kChromeUIScheme)); |
141 whitelist->insert(std::string(chrome::kFileScheme)); | 148 whitelist->insert(std::string(chrome::kFileScheme)); |
142 whitelist->insert(std::string(chrome::kFtpScheme)); | 149 whitelist->insert(std::string(chrome::kFtpScheme)); |
143 whitelist->insert(std::string(chrome::kHttpScheme)); | 150 whitelist->insert(std::string(chrome::kHttpScheme)); |
144 whitelist->insert(std::string(chrome::kHttpsScheme)); | 151 whitelist->insert(std::string(chrome::kHttpsScheme)); |
145 whitelist->insert(std::string(chrome::kMailToScheme)); | 152 whitelist->insert(std::string(chrome::kMailToScheme)); |
146 } | 153 } |
147 | 154 |
148 // Indexing | 155 // Indexing |
149 | 156 |
150 bool InMemoryURLIndex::Init(URLDatabase* history_db, | 157 void InMemoryURLIndex::Init(const std::string& languages) { |
151 const std::string& languages) { | |
152 // TODO(mrossetti): Register for profile/language change notifications. | 158 // TODO(mrossetti): Register for profile/language change notifications. |
153 languages_ = languages; | 159 languages_ = languages; |
154 return ReloadFromHistory(history_db, false); | 160 RestoreFromCache(); |
155 } | 161 } |
156 | 162 |
157 void InMemoryURLIndex::ShutDown() { | 163 void InMemoryURLIndex::ShutDown() { |
158 // Write our cache. | |
159 SaveToCacheFile(); | 164 SaveToCacheFile(); |
160 cached_at_shutdown_ = true; | 165 cached_at_shutdown_ = true; |
161 } | 166 } |
162 | 167 |
| 168 void InMemoryURLIndex::Observe(int type, |
| 169 const content::NotificationSource& source, |
| 170 const content::NotificationDetails& details) { |
| 171 switch (type) { |
| 172 case chrome::NOTIFICATION_HISTORY_URL_VISITED: |
| 173 OnURLVisited(content::Details<URLVisitedDetails>(details).ptr()); |
| 174 break; |
| 175 case chrome::NOTIFICATION_HISTORY_TYPED_URLS_MODIFIED: |
| 176 OnURLsModified( |
| 177 content::Details<history::URLsModifiedDetails>(details).ptr()); |
| 178 break; |
| 179 case chrome::NOTIFICATION_HISTORY_URLS_DELETED: |
| 180 OnURLsDeleted( |
| 181 content::Details<history::URLsDeletedDetails>(details).ptr()); |
| 182 break; |
| 183 case chrome::NOTIFICATION_HISTORY_LOADED: { |
| 184 HistoryService* history_service = |
| 185 profile_->GetHistoryService(Profile::EXPLICIT_ACCESS); |
| 186 URLDatabase* history_db = history_service->HistoryDatabase(); |
| 187 ReloadFromHistory(history_db); |
| 188 } |
| 189 break; |
| 190 default: |
| 191 // For simplicity, the unit tests send us all notifications, even when |
| 192 // we haven't registered for them, so don't assert here. |
| 193 break; |
| 194 } |
| 195 } |
| 196 |
| 197 void InMemoryURLIndex::OnURLVisited(const URLVisitedDetails* details) { |
| 198 UpdateURL(details->row); |
| 199 } |
| 200 |
| 201 void InMemoryURLIndex::OnURLsModified(const URLsModifiedDetails* details) { |
| 202 for (std::vector<history::URLRow>::const_iterator row = |
| 203 details->changed_urls.begin(); |
| 204 row != details->changed_urls.end(); ++row) |
| 205 UpdateURL(*row); |
| 206 } |
| 207 |
| 208 void InMemoryURLIndex::OnURLsDeleted(const URLsDeletedDetails* details) { |
| 209 if (details->all_history) { |
| 210 ClearPrivateData(); |
| 211 } else { |
| 212 for (std::vector<URLRow>::const_iterator row = details->rows.begin(); |
| 213 row != details->rows.end(); ++row) |
| 214 DeleteURL(*row); |
| 215 } |
| 216 } |
| 217 |
163 void InMemoryURLIndex::IndexRow(const URLRow& row) { | 218 void InMemoryURLIndex::IndexRow(const URLRow& row) { |
164 const GURL& gurl(row.url()); | 219 const GURL& gurl(row.url()); |
165 | 220 |
166 // Index only URLs with a whitelisted scheme. | 221 // Index only URLs with a whitelisted scheme. |
167 if (!InMemoryURLIndex::URLSchemeIsWhitelisted(gurl)) | 222 if (!InMemoryURLIndex::URLSchemeIsWhitelisted(gurl)) |
168 return; | 223 return; |
169 | 224 |
170 URLID row_id = row.id(); | 225 URLID row_id = row.id(); |
171 // Strip out username and password before saving and indexing. | 226 // Strip out username and password before saving and indexing. |
172 string16 url(net::FormatUrl(gurl, languages_, | 227 string16 url(net::FormatUrl(gurl, languages_, |
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
246 } | 301 } |
247 | 302 |
248 // Complete the removal of references to the word. | 303 // Complete the removal of references to the word. |
249 private_data.word_id_history_map_.erase(word_id); | 304 private_data.word_id_history_map_.erase(word_id); |
250 private_data.word_map_.erase(word); | 305 private_data.word_map_.erase(word); |
251 private_data.word_list_[word_id] = string16(); | 306 private_data.word_list_[word_id] = string16(); |
252 private_data.available_words_.insert(word_id); | 307 private_data.available_words_.insert(word_id); |
253 } | 308 } |
254 } | 309 } |
255 | 310 |
256 bool InMemoryURLIndex::ReloadFromHistory(history::URLDatabase* history_db, | 311 void InMemoryURLIndex::ReloadFromHistory(history::URLDatabase* history_db) { |
257 bool clear_cache) { | |
258 ClearPrivateData(); | 312 ClearPrivateData(); |
259 | 313 |
260 if (!history_db) | 314 if (!history_db) |
261 return false; | 315 return; |
262 | 316 |
263 if (clear_cache || !RestoreFromCacheFile()) { | 317 base::TimeTicks beginning_time = base::TimeTicks::Now(); |
264 base::TimeTicks beginning_time = base::TimeTicks::Now(); | 318 // The index has to be built from scratch. |
265 // The index has to be built from scratch. | 319 URLDatabase::URLEnumerator history_enum; |
266 URLDatabase::URLEnumerator history_enum; | 320 if (!history_db->InitURLEnumeratorForSignificant(&history_enum)) |
267 if (!history_db->InitURLEnumeratorForSignificant(&history_enum)) | 321 return; |
268 return false; | 322 |
269 URLRow row; | 323 URLRow row; |
270 while (history_enum.GetNextURL(&row)) | 324 while (history_enum.GetNextURL(&row)) |
271 IndexRow(row); | 325 IndexRow(row); |
272 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime", | 326 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime", |
273 base::TimeTicks::Now() - beginning_time); | 327 base::TimeTicks::Now() - beginning_time); |
274 SaveToCacheFile(); | 328 SaveToCacheFile(); |
275 } | |
276 return true; | |
277 } | 329 } |
278 | 330 |
279 void InMemoryURLIndex::ClearPrivateData() { | 331 void InMemoryURLIndex::ClearPrivateData() { |
280 private_data_->Clear(); | 332 private_data_->Clear(); |
281 search_term_cache_.clear(); | 333 search_term_cache_.clear(); |
282 } | 334 } |
283 | 335 |
| 336 void InMemoryURLIndex::RestoreFromCache() { |
| 337 ClearPrivateData(); |
| 338 if (!RestoreFromCacheFile() && profile_) { |
| 339 content::Source<Profile> source(profile_); |
| 340 registrar_.Add(this, chrome::NOTIFICATION_HISTORY_LOADED, source); |
| 341 } |
| 342 } |
| 343 |
284 bool InMemoryURLIndex::RestoreFromCacheFile() { | 344 bool InMemoryURLIndex::RestoreFromCacheFile() { |
285 // TODO(mrossetti): Figure out how to determine if the cache is up-to-date. | 345 // TODO(mrossetti): Figure out how to determine if the cache is up-to-date. |
286 // That is: ensure that the database has not been modified since the cache | 346 // That is: ensure that the database has not been modified since the cache |
287 // was last saved. DB file modification date is inadequate. There are no | 347 // was last saved. DB file modification date is inadequate. There are no |
288 // SQLite table checksums automatically stored. | 348 // SQLite table checksums automatically stored. |
289 // FIXME(mrossetti): Move File IO to another thread. | 349 // FIXME(mrossetti): Move File IO to another thread. |
290 base::ThreadRestrictions::ScopedAllowIO allow_io; | 350 base::ThreadRestrictions::ScopedAllowIO allow_io; |
291 base::TimeTicks beginning_time = base::TimeTicks::Now(); | 351 base::TimeTicks beginning_time = base::TimeTicks::Now(); |
292 FilePath file_path; | 352 FilePath file_path; |
293 if (!GetCacheFilePath(&file_path) || !file_util::PathExists(file_path)) | 353 if (!GetCacheFilePath(&file_path) || !file_util::PathExists(file_path)) |
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
342 int size = data.size(); | 402 int size = data.size(); |
343 if (file_util::WriteFile(file_path, data.c_str(), size) != size) { | 403 if (file_util::WriteFile(file_path, data.c_str(), size) != size) { |
344 LOG(WARNING) << "Failed to write " << file_path.value(); | 404 LOG(WARNING) << "Failed to write " << file_path.value(); |
345 return false; | 405 return false; |
346 } | 406 } |
347 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexSaveCacheTime", | 407 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexSaveCacheTime", |
348 base::TimeTicks::Now() - beginning_time); | 408 base::TimeTicks::Now() - beginning_time); |
349 return true; | 409 return true; |
350 } | 410 } |
351 | 411 |
352 void InMemoryURLIndex::UpdateURL(URLID row_id, const URLRow& row) { | 412 void InMemoryURLIndex::UpdateURL(const URLRow& row) { |
353 // The row may or may not already be in our index. If it is not already | 413 // The row may or may not already be in our index. If it is not already |
354 // indexed and it qualifies then it gets indexed. If it is already | 414 // indexed and it qualifies then it gets indexed. If it is already |
355 // indexed and still qualifies then it gets updated, otherwise it | 415 // indexed and still qualifies then it gets updated, otherwise it |
356 // is deleted from the index. | 416 // is deleted from the index. |
357 HistoryInfoMap::iterator row_pos = | 417 HistoryInfoMap::iterator row_pos = |
358 private_data_->history_info_map_.find(row_id); | 418 private_data_->history_info_map_.find(row.id()); |
359 if (row_pos == private_data_->history_info_map_.end()) { | 419 if (row_pos == private_data_->history_info_map_.end()) { |
360 // This new row should be indexed if it qualifies. | 420 // This new row should be indexed if it qualifies. |
361 URLRow new_row(row); | 421 if (RowQualifiesAsSignificant(row, base::Time())) |
362 new_row.set_id(row_id); | 422 IndexRow(row); |
363 if (RowQualifiesAsSignificant(new_row, base::Time())) | |
364 IndexRow(new_row); | |
365 } else if (RowQualifiesAsSignificant(row, base::Time())) { | 423 } else if (RowQualifiesAsSignificant(row, base::Time())) { |
366 // This indexed row still qualifies and will be re-indexed. | 424 // This indexed row still qualifies and will be re-indexed. |
367 // The url won't have changed but the title, visit count, etc. | 425 // The url won't have changed but the title, visit count, etc. |
368 // might have changed. | 426 // might have changed. |
369 URLRow& updated_row = row_pos->second; | 427 URLRow& old_row = row_pos->second; |
370 updated_row.set_visit_count(row.visit_count()); | 428 old_row.set_visit_count(row.visit_count()); |
371 updated_row.set_typed_count(row.typed_count()); | 429 old_row.set_typed_count(row.typed_count()); |
372 updated_row.set_last_visit(row.last_visit()); | 430 old_row.set_last_visit(row.last_visit()); |
373 // While the URL is guaranteed to remain stable, the title may have changed. | 431 // While the URL is guaranteed to remain stable, the title may have changed. |
374 // If so, then we need to update the index with the changed words. | 432 // If so, then we need to update the index with the changed words. |
375 if (updated_row.title() != row.title()) { | 433 if (old_row.title() != row.title()) { |
376 // Clear all words associated with this row and re-index both the | 434 // Clear all words associated with this row and re-index both the |
377 // URL and title. | 435 // URL and title. |
378 RemoveRowWordsFromIndex(updated_row); | 436 RemoveRowWordsFromIndex(row); |
379 updated_row.set_title(row.title()); | 437 old_row.set_title(row.title()); |
380 AddRowWordsToIndex(updated_row); | 438 AddRowWordsToIndex(old_row); |
381 } | 439 } |
382 } else { | 440 } else { |
383 // This indexed row no longer qualifies and will be de-indexed by | 441 // This indexed row no longer qualifies and will be de-indexed by |
384 // clearing all words associated with this row. | 442 // clearing all words associated with this row. |
385 URLRow& removed_row = row_pos->second; | 443 RemoveRowFromIndex(row); |
386 RemoveRowFromIndex(removed_row); | |
387 } | 444 } |
388 // This invalidates the cache. | 445 // This invalidates the cache. |
389 search_term_cache_.clear(); | 446 search_term_cache_.clear(); |
390 } | 447 } |
391 | 448 |
392 void InMemoryURLIndex::DeleteURL(URLID row_id) { | 449 void InMemoryURLIndex::DeleteURL(const URLRow& row) { |
393 // Note that this does not remove any reference to this row from the | 450 RemoveRowFromIndex(row); |
394 // word_id_history_map_. That map will continue to contain (and return) | 451 search_term_cache_.clear(); // Invalidate the word cache. |
395 // hits against this row until that map is rebuilt, but since the | |
396 // history_info_map_ no longer references the row no erroneous results | |
397 // will propagate to the user. | |
398 private_data_->history_info_map_.erase(row_id); | |
399 // This invalidates the word cache. | |
400 search_term_cache_.clear(); | |
401 } | 452 } |
402 | 453 |
403 // Searching | 454 // Searching |
404 | 455 |
405 ScoredHistoryMatches InMemoryURLIndex::HistoryItemsForTerms( | 456 ScoredHistoryMatches InMemoryURLIndex::HistoryItemsForTerms( |
406 const String16Vector& terms) { | 457 const String16Vector& terms) { |
407 ScoredHistoryMatches scored_items; | 458 ScoredHistoryMatches scored_items; |
408 | 459 |
409 // Do nothing if we have indexed no words (probably because we've not been | 460 // Do nothing if we have indexed no words (probably because we've not been |
410 // initialized yet). | 461 // initialized yet). |
(...skipping 658 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1069 if (iter->has_title()) { | 1120 if (iter->has_title()) { |
1070 string16 title(UTF8ToUTF16(iter->title())); | 1121 string16 title(UTF8ToUTF16(iter->title())); |
1071 url_row.set_title(title); | 1122 url_row.set_title(title); |
1072 } | 1123 } |
1073 private_data_->history_info_map_[history_id] = url_row; | 1124 private_data_->history_info_map_[history_id] = url_row; |
1074 } | 1125 } |
1075 return true; | 1126 return true; |
1076 } | 1127 } |
1077 | 1128 |
1078 } // namespace history | 1129 } // namespace history |
OLD | NEW |