OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/history/in_memory_url_index.h" | 5 #include "chrome/browser/history/in_memory_url_index.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <functional> | 8 #include <functional> |
9 #include <iterator> | 9 #include <iterator> |
10 #include <limits> | 10 #include <limits> |
11 #include <numeric> | 11 #include <numeric> |
12 | 12 |
13 #include "base/file_util.h" | 13 #include "base/file_util.h" |
14 #include "base/i18n/case_conversion.h" | 14 #include "base/i18n/case_conversion.h" |
15 #include "base/metrics/histogram.h" | 15 #include "base/metrics/histogram.h" |
| 16 #include "base/string_util.h" |
16 #include "base/threading/thread_restrictions.h" | 17 #include "base/threading/thread_restrictions.h" |
| 18 #include "base/time.h" |
17 #include "base/utf_string_conversions.h" | 19 #include "base/utf_string_conversions.h" |
18 #include "chrome/browser/autocomplete/autocomplete.h" | 20 #include "chrome/browser/autocomplete/autocomplete.h" |
19 #include "chrome/browser/history/history.h" | 21 #include "chrome/browser/autocomplete/history_provider_util.h" |
20 #include "chrome/browser/history/history_notifications.h" | |
21 #include "chrome/browser/history/url_database.h" | 22 #include "chrome/browser/history/url_database.h" |
22 #include "chrome/browser/profiles/profile.h" | 23 #include "chrome/browser/profiles/profile.h" |
23 #include "chrome/common/chrome_notification_types.h" | |
24 #include "chrome/common/url_constants.h" | 24 #include "chrome/common/url_constants.h" |
25 #include "content/public/browser/notification_service.h" | 25 #include "googleurl/src/url_parse.h" |
| 26 #include "googleurl/src/url_util.h" |
| 27 #include "net/base/escape.h" |
26 #include "net/base/net_util.h" | 28 #include "net/base/net_util.h" |
| 29 #include "third_party/protobuf/src/google/protobuf/repeated_field.h" |
27 #include "ui/base/l10n/l10n_util.h" | 30 #include "ui/base/l10n/l10n_util.h" |
28 | 31 |
29 using google::protobuf::RepeatedField; | 32 using google::protobuf::RepeatedField; |
30 using google::protobuf::RepeatedPtrField; | 33 using google::protobuf::RepeatedPtrField; |
31 using in_memory_url_index::InMemoryURLIndexCacheItem; | 34 using in_memory_url_index::InMemoryURLIndexCacheItem; |
32 | 35 |
33 namespace history { | 36 namespace history { |
34 | 37 |
35 typedef imui::InMemoryURLIndexCacheItem_WordListItem WordListItem; | 38 typedef imui::InMemoryURLIndexCacheItem_WordListItem WordListItem; |
36 typedef imui::InMemoryURLIndexCacheItem_WordMapItem_WordMapEntry WordMapEntry; | 39 typedef imui::InMemoryURLIndexCacheItem_WordMapItem_WordMapEntry WordMapEntry; |
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
102 return 0; | 105 return 0; |
103 int score = kScoreRank[i]; | 106 int score = kScoreRank[i]; |
104 if (i > 0) { | 107 if (i > 0) { |
105 score += (value - value_ranks[i]) * | 108 score += (value - value_ranks[i]) * |
106 (kScoreRank[i - 1] - kScoreRank[i]) / | 109 (kScoreRank[i - 1] - kScoreRank[i]) / |
107 (value_ranks[i - 1] - value_ranks[i]); | 110 (value_ranks[i - 1] - value_ranks[i]); |
108 } | 111 } |
109 return score; | 112 return score; |
110 } | 113 } |
111 | 114 |
112 InMemoryURLIndex::InMemoryURLIndex(Profile* profile, | 115 InMemoryURLIndex::InMemoryURLIndex(const FilePath& history_dir) |
113 const FilePath& history_dir) | 116 : history_dir_(history_dir), |
114 : profile_(profile), | |
115 history_dir_(history_dir), | |
116 private_data_(new URLIndexPrivateData), | 117 private_data_(new URLIndexPrivateData), |
117 cached_at_shutdown_(false) { | 118 cached_at_shutdown_(false) { |
118 InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_); | 119 InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_); |
119 if (profile) { | |
120 content::Source<Profile> source(profile); | |
121 registrar_.Add(this, chrome::NOTIFICATION_HISTORY_URL_VISITED, source); | |
122 registrar_.Add(this, chrome::NOTIFICATION_HISTORY_TYPED_URLS_MODIFIED, | |
123 source); | |
124 registrar_.Add(this, chrome::NOTIFICATION_HISTORY_URLS_DELETED, source); | |
125 } | |
126 } | 120 } |
127 | 121 |
128 // Called only by unit tests. | 122 // Called only by unit tests. |
129 InMemoryURLIndex::InMemoryURLIndex() | 123 InMemoryURLIndex::InMemoryURLIndex() |
130 : private_data_(new URLIndexPrivateData), | 124 : private_data_(new URLIndexPrivateData), |
131 cached_at_shutdown_(false) { | 125 cached_at_shutdown_(false) { |
132 InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_); | 126 InMemoryURLIndex::InitializeSchemeWhitelist(&scheme_whitelist_); |
133 } | 127 } |
134 | 128 |
135 InMemoryURLIndex::~InMemoryURLIndex() { | 129 InMemoryURLIndex::~InMemoryURLIndex() { |
136 // If there was a history directory (which there won't be for some unit tests) | 130 // If there was a history directory (which there won't be for some unit tests) |
137 // then insure that the cache has already been saved. | 131 // then insure that the cache has already been saved. |
138 registrar_.RemoveAll(); | |
139 DCHECK(history_dir_.empty() || cached_at_shutdown_); | 132 DCHECK(history_dir_.empty() || cached_at_shutdown_); |
140 } | 133 } |
141 | 134 |
142 // static | 135 // static |
143 void InMemoryURLIndex::InitializeSchemeWhitelist( | 136 void InMemoryURLIndex::InitializeSchemeWhitelist( |
144 std::set<std::string>* whitelist) { | 137 std::set<std::string>* whitelist) { |
145 DCHECK(whitelist); | 138 DCHECK(whitelist); |
146 whitelist->insert(std::string(chrome::kAboutScheme)); | 139 whitelist->insert(std::string(chrome::kAboutScheme)); |
147 whitelist->insert(std::string(chrome::kChromeUIScheme)); | 140 whitelist->insert(std::string(chrome::kChromeUIScheme)); |
148 whitelist->insert(std::string(chrome::kFileScheme)); | 141 whitelist->insert(std::string(chrome::kFileScheme)); |
149 whitelist->insert(std::string(chrome::kFtpScheme)); | 142 whitelist->insert(std::string(chrome::kFtpScheme)); |
150 whitelist->insert(std::string(chrome::kHttpScheme)); | 143 whitelist->insert(std::string(chrome::kHttpScheme)); |
151 whitelist->insert(std::string(chrome::kHttpsScheme)); | 144 whitelist->insert(std::string(chrome::kHttpsScheme)); |
152 whitelist->insert(std::string(chrome::kMailToScheme)); | 145 whitelist->insert(std::string(chrome::kMailToScheme)); |
153 } | 146 } |
154 | 147 |
155 // Indexing | 148 // Indexing |
156 | 149 |
157 void InMemoryURLIndex::Init(const std::string& languages) { | 150 bool InMemoryURLIndex::Init(URLDatabase* history_db, |
| 151 const std::string& languages) { |
158 // TODO(mrossetti): Register for profile/language change notifications. | 152 // TODO(mrossetti): Register for profile/language change notifications. |
159 languages_ = languages; | 153 languages_ = languages; |
160 RestoreFromCache(); | 154 return ReloadFromHistory(history_db, false); |
161 } | 155 } |
162 | 156 |
163 void InMemoryURLIndex::ShutDown() { | 157 void InMemoryURLIndex::ShutDown() { |
| 158 // Write our cache. |
164 SaveToCacheFile(); | 159 SaveToCacheFile(); |
165 cached_at_shutdown_ = true; | 160 cached_at_shutdown_ = true; |
166 } | 161 } |
167 | 162 |
168 void InMemoryURLIndex::Observe(int type, | |
169 const content::NotificationSource& source, | |
170 const content::NotificationDetails& details) { | |
171 switch (type) { | |
172 case chrome::NOTIFICATION_HISTORY_URL_VISITED: | |
173 OnURLVisited(content::Details<URLVisitedDetails>(details).ptr()); | |
174 break; | |
175 case chrome::NOTIFICATION_HISTORY_TYPED_URLS_MODIFIED: | |
176 OnURLsModified( | |
177 content::Details<history::URLsModifiedDetails>(details).ptr()); | |
178 break; | |
179 case chrome::NOTIFICATION_HISTORY_URLS_DELETED: | |
180 OnURLsDeleted( | |
181 content::Details<history::URLsDeletedDetails>(details).ptr()); | |
182 break; | |
183 case chrome::NOTIFICATION_HISTORY_LOADED: { | |
184 HistoryService* history_service = | |
185 profile_->GetHistoryService(Profile::EXPLICIT_ACCESS); | |
186 URLDatabase* history_db = history_service->HistoryDatabase(); | |
187 ReloadFromHistory(history_db); | |
188 } | |
189 break; | |
190 default: | |
191 // For simplicity, the unit tests send us all notifications, even when | |
192 // we haven't registered for them, so don't assert here. | |
193 break; | |
194 } | |
195 } | |
196 | |
197 void InMemoryURLIndex::OnURLVisited(const URLVisitedDetails* details) { | |
198 UpdateURL(details->row); | |
199 } | |
200 | |
201 void InMemoryURLIndex::OnURLsModified(const URLsModifiedDetails* details) { | |
202 for (std::vector<history::URLRow>::const_iterator row = | |
203 details->changed_urls.begin(); | |
204 row != details->changed_urls.end(); ++row) | |
205 UpdateURL(*row); | |
206 } | |
207 | |
208 void InMemoryURLIndex::OnURLsDeleted(const URLsDeletedDetails* details) { | |
209 if (details->all_history) { | |
210 ClearPrivateData(); | |
211 } else { | |
212 for (std::vector<URLRow>::const_iterator row = details->rows.begin(); | |
213 row != details->rows.end(); ++row) | |
214 DeleteURL(*row); | |
215 } | |
216 } | |
217 | |
218 void InMemoryURLIndex::IndexRow(const URLRow& row) { | 163 void InMemoryURLIndex::IndexRow(const URLRow& row) { |
219 const GURL& gurl(row.url()); | 164 const GURL& gurl(row.url()); |
220 | 165 |
221 // Index only URLs with a whitelisted scheme. | 166 // Index only URLs with a whitelisted scheme. |
222 if (!InMemoryURLIndex::URLSchemeIsWhitelisted(gurl)) | 167 if (!InMemoryURLIndex::URLSchemeIsWhitelisted(gurl)) |
223 return; | 168 return; |
224 | 169 |
225 URLID row_id = row.id(); | 170 URLID row_id = row.id(); |
226 // Strip out username and password before saving and indexing. | 171 // Strip out username and password before saving and indexing. |
227 string16 url(net::FormatUrl(gurl, languages_, | 172 string16 url(net::FormatUrl(gurl, languages_, |
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
301 } | 246 } |
302 | 247 |
303 // Complete the removal of references to the word. | 248 // Complete the removal of references to the word. |
304 private_data.word_id_history_map_.erase(word_id); | 249 private_data.word_id_history_map_.erase(word_id); |
305 private_data.word_map_.erase(word); | 250 private_data.word_map_.erase(word); |
306 private_data.word_list_[word_id] = string16(); | 251 private_data.word_list_[word_id] = string16(); |
307 private_data.available_words_.insert(word_id); | 252 private_data.available_words_.insert(word_id); |
308 } | 253 } |
309 } | 254 } |
310 | 255 |
311 void InMemoryURLIndex::ReloadFromHistory(history::URLDatabase* history_db) { | 256 bool InMemoryURLIndex::ReloadFromHistory(history::URLDatabase* history_db, |
| 257 bool clear_cache) { |
312 ClearPrivateData(); | 258 ClearPrivateData(); |
313 | 259 |
314 if (!history_db) | 260 if (!history_db) |
315 return; | 261 return false; |
316 | 262 |
317 base::TimeTicks beginning_time = base::TimeTicks::Now(); | 263 if (clear_cache || !RestoreFromCacheFile()) { |
318 // The index has to be built from scratch. | 264 base::TimeTicks beginning_time = base::TimeTicks::Now(); |
319 URLDatabase::URLEnumerator history_enum; | 265 // The index has to be built from scratch. |
320 if (!history_db->InitURLEnumeratorForSignificant(&history_enum)) | 266 URLDatabase::URLEnumerator history_enum; |
321 return; | 267 if (!history_db->InitURLEnumeratorForSignificant(&history_enum)) |
322 | 268 return false; |
323 URLRow row; | 269 URLRow row; |
324 while (history_enum.GetNextURL(&row)) | 270 while (history_enum.GetNextURL(&row)) |
325 IndexRow(row); | 271 IndexRow(row); |
326 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime", | 272 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime", |
327 base::TimeTicks::Now() - beginning_time); | 273 base::TimeTicks::Now() - beginning_time); |
328 SaveToCacheFile(); | 274 SaveToCacheFile(); |
| 275 } |
| 276 return true; |
329 } | 277 } |
330 | 278 |
331 void InMemoryURLIndex::ClearPrivateData() { | 279 void InMemoryURLIndex::ClearPrivateData() { |
332 private_data_->Clear(); | 280 private_data_->Clear(); |
333 search_term_cache_.clear(); | 281 search_term_cache_.clear(); |
334 } | 282 } |
335 | 283 |
336 void InMemoryURLIndex::RestoreFromCache() { | |
337 ClearPrivateData(); | |
338 if (!RestoreFromCacheFile() && profile_) { | |
339 content::Source<Profile> source(profile_); | |
340 registrar_.Add(this, chrome::NOTIFICATION_HISTORY_LOADED, source); | |
341 } | |
342 } | |
343 | |
344 bool InMemoryURLIndex::RestoreFromCacheFile() { | 284 bool InMemoryURLIndex::RestoreFromCacheFile() { |
345 // TODO(mrossetti): Figure out how to determine if the cache is up-to-date. | 285 // TODO(mrossetti): Figure out how to determine if the cache is up-to-date. |
346 // That is: ensure that the database has not been modified since the cache | 286 // That is: ensure that the database has not been modified since the cache |
347 // was last saved. DB file modification date is inadequate. There are no | 287 // was last saved. DB file modification date is inadequate. There are no |
348 // SQLite table checksums automatically stored. | 288 // SQLite table checksums automatically stored. |
349 // FIXME(mrossetti): Move File IO to another thread. | 289 // FIXME(mrossetti): Move File IO to another thread. |
350 base::ThreadRestrictions::ScopedAllowIO allow_io; | 290 base::ThreadRestrictions::ScopedAllowIO allow_io; |
351 base::TimeTicks beginning_time = base::TimeTicks::Now(); | 291 base::TimeTicks beginning_time = base::TimeTicks::Now(); |
352 FilePath file_path; | 292 FilePath file_path; |
353 if (!GetCacheFilePath(&file_path) || !file_util::PathExists(file_path)) | 293 if (!GetCacheFilePath(&file_path) || !file_util::PathExists(file_path)) |
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
402 int size = data.size(); | 342 int size = data.size(); |
403 if (file_util::WriteFile(file_path, data.c_str(), size) != size) { | 343 if (file_util::WriteFile(file_path, data.c_str(), size) != size) { |
404 LOG(WARNING) << "Failed to write " << file_path.value(); | 344 LOG(WARNING) << "Failed to write " << file_path.value(); |
405 return false; | 345 return false; |
406 } | 346 } |
407 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexSaveCacheTime", | 347 UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexSaveCacheTime", |
408 base::TimeTicks::Now() - beginning_time); | 348 base::TimeTicks::Now() - beginning_time); |
409 return true; | 349 return true; |
410 } | 350 } |
411 | 351 |
412 void InMemoryURLIndex::UpdateURL(const URLRow& row) { | 352 void InMemoryURLIndex::UpdateURL(URLID row_id, const URLRow& row) { |
413 // The row may or may not already be in our index. If it is not already | 353 // The row may or may not already be in our index. If it is not already |
414 // indexed and it qualifies then it gets indexed. If it is already | 354 // indexed and it qualifies then it gets indexed. If it is already |
415 // indexed and still qualifies then it gets updated, otherwise it | 355 // indexed and still qualifies then it gets updated, otherwise it |
416 // is deleted from the index. | 356 // is deleted from the index. |
417 HistoryInfoMap::iterator row_pos = | 357 HistoryInfoMap::iterator row_pos = |
418 private_data_->history_info_map_.find(row.id()); | 358 private_data_->history_info_map_.find(row_id); |
419 if (row_pos == private_data_->history_info_map_.end()) { | 359 if (row_pos == private_data_->history_info_map_.end()) { |
420 // This new row should be indexed if it qualifies. | 360 // This new row should be indexed if it qualifies. |
421 if (RowQualifiesAsSignificant(row, base::Time())) | 361 URLRow new_row(row); |
422 IndexRow(row); | 362 new_row.set_id(row_id); |
| 363 if (RowQualifiesAsSignificant(new_row, base::Time())) |
| 364 IndexRow(new_row); |
423 } else if (RowQualifiesAsSignificant(row, base::Time())) { | 365 } else if (RowQualifiesAsSignificant(row, base::Time())) { |
424 // This indexed row still qualifies and will be re-indexed. | 366 // This indexed row still qualifies and will be re-indexed. |
425 // The url won't have changed but the title, visit count, etc. | 367 // The url won't have changed but the title, visit count, etc. |
426 // might have changed. | 368 // might have changed. |
427 URLRow& old_row = row_pos->second; | 369 URLRow& updated_row = row_pos->second; |
428 old_row.set_visit_count(row.visit_count()); | 370 updated_row.set_visit_count(row.visit_count()); |
429 old_row.set_typed_count(row.typed_count()); | 371 updated_row.set_typed_count(row.typed_count()); |
430 old_row.set_last_visit(row.last_visit()); | 372 updated_row.set_last_visit(row.last_visit()); |
431 // While the URL is guaranteed to remain stable, the title may have changed. | 373 // While the URL is guaranteed to remain stable, the title may have changed. |
432 // If so, then we need to update the index with the changed words. | 374 // If so, then we need to update the index with the changed words. |
433 if (old_row.title() != row.title()) { | 375 if (updated_row.title() != row.title()) { |
434 // Clear all words associated with this row and re-index both the | 376 // Clear all words associated with this row and re-index both the |
435 // URL and title. | 377 // URL and title. |
436 RemoveRowWordsFromIndex(row); | 378 RemoveRowWordsFromIndex(updated_row); |
437 old_row.set_title(row.title()); | 379 updated_row.set_title(row.title()); |
438 AddRowWordsToIndex(old_row); | 380 AddRowWordsToIndex(updated_row); |
439 } | 381 } |
440 } else { | 382 } else { |
441 // This indexed row no longer qualifies and will be de-indexed by | 383 // This indexed row no longer qualifies and will be de-indexed by |
442 // clearing all words associated with this row. | 384 // clearing all words associated with this row. |
443 RemoveRowFromIndex(row); | 385 URLRow& removed_row = row_pos->second; |
| 386 RemoveRowFromIndex(removed_row); |
444 } | 387 } |
445 // This invalidates the cache. | 388 // This invalidates the cache. |
446 search_term_cache_.clear(); | 389 search_term_cache_.clear(); |
447 } | 390 } |
448 | 391 |
449 void InMemoryURLIndex::DeleteURL(const URLRow& row) { | 392 void InMemoryURLIndex::DeleteURL(URLID row_id) { |
450 RemoveRowFromIndex(row); | 393 // Note that this does not remove any reference to this row from the |
451 search_term_cache_.clear(); // Invalidate the word cache. | 394 // word_id_history_map_. That map will continue to contain (and return) |
| 395 // hits against this row until that map is rebuilt, but since the |
| 396 // history_info_map_ no longer references the row no erroneous results |
| 397 // will propagate to the user. |
| 398 private_data_->history_info_map_.erase(row_id); |
| 399 // This invalidates the word cache. |
| 400 search_term_cache_.clear(); |
452 } | 401 } |
453 | 402 |
454 // Searching | 403 // Searching |
455 | 404 |
456 ScoredHistoryMatches InMemoryURLIndex::HistoryItemsForTerms( | 405 ScoredHistoryMatches InMemoryURLIndex::HistoryItemsForTerms( |
457 const String16Vector& terms) { | 406 const String16Vector& terms) { |
458 ScoredHistoryMatches scored_items; | 407 ScoredHistoryMatches scored_items; |
459 | 408 |
460 // Do nothing if we have indexed no words (probably because we've not been | 409 // Do nothing if we have indexed no words (probably because we've not been |
461 // initialized yet). | 410 // initialized yet). |
(...skipping 658 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1120 if (iter->has_title()) { | 1069 if (iter->has_title()) { |
1121 string16 title(UTF8ToUTF16(iter->title())); | 1070 string16 title(UTF8ToUTF16(iter->title())); |
1122 url_row.set_title(title); | 1071 url_row.set_title(title); |
1123 } | 1072 } |
1124 private_data_->history_info_map_[history_id] = url_row; | 1073 private_data_->history_info_map_[history_id] = url_row; |
1125 } | 1074 } |
1126 return true; | 1075 return true; |
1127 } | 1076 } |
1128 | 1077 |
1129 } // namespace history | 1078 } // namespace history |
OLD | NEW |