OLD | NEW |
| (Empty) |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "chrome/browser/history/expire_history_backend.h" | |
6 | |
7 #include <algorithm> | |
8 #include <functional> | |
9 #include <limits> | |
10 | |
11 #include "base/bind.h" | |
12 #include "base/compiler_specific.h" | |
13 #include "base/files/file_enumerator.h" | |
14 #include "base/files/file_util.h" | |
15 #include "base/logging.h" | |
16 #include "base/message_loop/message_loop.h" | |
17 #include "components/history/core/browser/history_backend_notifier.h" | |
18 #include "components/history/core/browser/history_client.h" | |
19 #include "components/history/core/browser/history_database.h" | |
20 #include "components/history/core/browser/thumbnail_database.h" | |
21 | |
22 namespace history { | |
23 | |
24 // Helpers -------------------------------------------------------------------- | |
25 | |
26 namespace { | |
27 | |
28 // The number of days by which the expiration threshold is advanced for items | |
29 // that we want to expire early, such as those of AUTO_SUBFRAME transition type. | |
30 // | |
31 // Early expiration stuff is kept around only for edge cases, as subframes | |
32 // don't appear in history and the vast majority of them are ads anyway. The | |
33 // main use case for these is if you're on a site with links to different | |
34 // frames, you'll be able to see those links as visited, and we'll also be | |
35 // able to get redirect information for those URLs. | |
36 // | |
37 // But since these uses are most valuable when you're actually on the site, | |
38 // and because these can take up the bulk of your history, we get a lot of | |
39 // space savings by deleting them quickly. | |
40 const int kEarlyExpirationAdvanceDays = 3; | |
41 | |
42 // Reads all types of visits starting from beginning of time to the given end | |
43 // time. This is the most general reader. | |
44 class AllVisitsReader : public ExpiringVisitsReader { | |
45 public: | |
46 bool Read(base::Time end_time, | |
47 HistoryDatabase* db, | |
48 VisitVector* visits, | |
49 int max_visits) const override { | |
50 DCHECK(db) << "must have a database to operate upon"; | |
51 DCHECK(visits) << "visit vector has to exist in order to populate it"; | |
52 | |
53 db->GetAllVisitsInRange(base::Time(), end_time, max_visits, visits); | |
54 // When we got the maximum number of visits we asked for, we say there could | |
55 // be additional things to expire now. | |
56 return static_cast<int>(visits->size()) == max_visits; | |
57 } | |
58 }; | |
59 | |
60 // Reads only AUTO_SUBFRAME visits, within a computed range. The range is | |
61 // computed as follows: | |
62 // * |begin_time| is read from the meta table. This value is updated whenever | |
63 // there are no more additional visits to expire by this reader. | |
64 // * |end_time| is advanced forward by a constant (kEarlyExpirationAdvanceDay), | |
65 // but not past the current time. | |
66 class AutoSubframeVisitsReader : public ExpiringVisitsReader { | |
67 public: | |
68 bool Read(base::Time end_time, | |
69 HistoryDatabase* db, | |
70 VisitVector* visits, | |
71 int max_visits) const override { | |
72 DCHECK(db) << "must have a database to operate upon"; | |
73 DCHECK(visits) << "visit vector has to exist in order to populate it"; | |
74 | |
75 base::Time begin_time = db->GetEarlyExpirationThreshold(); | |
76 // Advance |end_time| to expire early. | |
77 base::Time early_end_time = end_time + | |
78 base::TimeDelta::FromDays(kEarlyExpirationAdvanceDays); | |
79 | |
80 // We don't want to set the early expiration threshold to a time in the | |
81 // future. | |
82 base::Time now = base::Time::Now(); | |
83 if (early_end_time > now) | |
84 early_end_time = now; | |
85 | |
86 db->GetVisitsInRangeForTransition(begin_time, early_end_time, | |
87 max_visits, | |
88 ui::PAGE_TRANSITION_AUTO_SUBFRAME, | |
89 visits); | |
90 bool more = static_cast<int>(visits->size()) == max_visits; | |
91 if (!more) | |
92 db->UpdateEarlyExpirationThreshold(early_end_time); | |
93 | |
94 return more; | |
95 } | |
96 }; | |
97 | |
98 // The number of visits we will expire very time we check for old items. This | |
99 // Prevents us from doing too much work any given time. | |
100 const int kNumExpirePerIteration = 32; | |
101 | |
102 // The number of seconds between checking for items that should be expired when | |
103 // we think there might be more items to expire. This timeout is used when the | |
104 // last expiration found at least kNumExpirePerIteration and we want to check | |
105 // again "soon." | |
106 const int kExpirationDelaySec = 30; | |
107 | |
108 // The number of minutes between checking, as with kExpirationDelaySec, but | |
109 // when we didn't find enough things to expire last time. If there was no | |
110 // history to expire last iteration, it's likely there is nothing next | |
111 // iteration, so we want to wait longer before checking to avoid wasting CPU. | |
112 const int kExpirationEmptyDelayMin = 5; | |
113 | |
114 } // namespace | |
115 | |
116 | |
117 // ExpireHistoryBackend::DeleteEffects ---------------------------------------- | |
118 | |
119 ExpireHistoryBackend::DeleteEffects::DeleteEffects() { | |
120 } | |
121 | |
122 ExpireHistoryBackend::DeleteEffects::~DeleteEffects() { | |
123 } | |
124 | |
125 | |
126 // ExpireHistoryBackend ------------------------------------------------------- | |
127 | |
128 ExpireHistoryBackend::ExpireHistoryBackend( | |
129 HistoryBackendNotifier* notifier, | |
130 HistoryClient* history_client) | |
131 : notifier_(notifier), | |
132 main_db_(NULL), | |
133 thumb_db_(NULL), | |
134 history_client_(history_client), | |
135 weak_factory_(this) { | |
136 DCHECK(notifier_); | |
137 } | |
138 | |
139 ExpireHistoryBackend::~ExpireHistoryBackend() { | |
140 } | |
141 | |
142 void ExpireHistoryBackend::SetDatabases(HistoryDatabase* main_db, | |
143 ThumbnailDatabase* thumb_db) { | |
144 main_db_ = main_db; | |
145 thumb_db_ = thumb_db; | |
146 } | |
147 | |
148 void ExpireHistoryBackend::DeleteURL(const GURL& url) { | |
149 DeleteURLs(std::vector<GURL>(1, url)); | |
150 } | |
151 | |
152 void ExpireHistoryBackend::DeleteURLs(const std::vector<GURL>& urls) { | |
153 if (!main_db_) | |
154 return; | |
155 | |
156 DeleteEffects effects; | |
157 HistoryClient* history_client = GetHistoryClient(); | |
158 for (std::vector<GURL>::const_iterator url = urls.begin(); url != urls.end(); | |
159 ++url) { | |
160 URLRow url_row; | |
161 if (!main_db_->GetRowForURL(*url, &url_row)) | |
162 continue; // Nothing to delete. | |
163 | |
164 // Collect all the visits and delete them. Note that we don't give up if | |
165 // there are no visits, since the URL could still have an entry that we | |
166 // should delete. | |
167 VisitVector visits; | |
168 main_db_->GetVisitsForURL(url_row.id(), &visits); | |
169 | |
170 DeleteVisitRelatedInfo(visits, &effects); | |
171 | |
172 // We skip ExpireURLsForVisits (since we are deleting from the | |
173 // URL, and not starting with visits in a given time range). We | |
174 // therefore need to call the deletion and favicon update | |
175 // functions manually. | |
176 DeleteOneURL(url_row, | |
177 history_client && history_client->IsBookmarked(*url), | |
178 &effects); | |
179 } | |
180 | |
181 DeleteFaviconsIfPossible(&effects); | |
182 | |
183 BroadcastNotifications(&effects, DELETION_USER_INITIATED); | |
184 } | |
185 | |
186 void ExpireHistoryBackend::ExpireHistoryBetween( | |
187 const std::set<GURL>& restrict_urls, | |
188 base::Time begin_time, | |
189 base::Time end_time) { | |
190 if (!main_db_) | |
191 return; | |
192 | |
193 // Find the affected visits and delete them. | |
194 VisitVector visits; | |
195 main_db_->GetAllVisitsInRange(begin_time, end_time, 0, &visits); | |
196 if (!restrict_urls.empty()) { | |
197 std::set<URLID> url_ids; | |
198 for (std::set<GURL>::const_iterator url = restrict_urls.begin(); | |
199 url != restrict_urls.end(); ++url) | |
200 url_ids.insert(main_db_->GetRowForURL(*url, NULL)); | |
201 VisitVector all_visits; | |
202 all_visits.swap(visits); | |
203 for (VisitVector::iterator visit = all_visits.begin(); | |
204 visit != all_visits.end(); ++visit) { | |
205 if (url_ids.find(visit->url_id) != url_ids.end()) | |
206 visits.push_back(*visit); | |
207 } | |
208 } | |
209 ExpireVisits(visits); | |
210 } | |
211 | |
212 void ExpireHistoryBackend::ExpireHistoryForTimes( | |
213 const std::vector<base::Time>& times) { | |
214 // |times| must be in reverse chronological order and have no | |
215 // duplicates, i.e. each member must be earlier than the one before | |
216 // it. | |
217 DCHECK( | |
218 std::adjacent_find( | |
219 times.begin(), times.end(), std::less_equal<base::Time>()) == | |
220 times.end()); | |
221 | |
222 if (!main_db_) | |
223 return; | |
224 | |
225 // Find the affected visits and delete them. | |
226 VisitVector visits; | |
227 main_db_->GetVisitsForTimes(times, &visits); | |
228 ExpireVisits(visits); | |
229 } | |
230 | |
231 void ExpireHistoryBackend::ExpireVisits(const VisitVector& visits) { | |
232 if (visits.empty()) | |
233 return; | |
234 | |
235 DeleteEffects effects; | |
236 DeleteVisitRelatedInfo(visits, &effects); | |
237 | |
238 // Delete or update the URLs affected. We want to update the visit counts | |
239 // since this is called by the user who wants to delete their recent history, | |
240 // and we don't want to leave any evidence. | |
241 ExpireURLsForVisits(visits, &effects); | |
242 DeleteFaviconsIfPossible(&effects); | |
243 BroadcastNotifications(&effects, DELETION_USER_INITIATED); | |
244 | |
245 // Pick up any bits possibly left over. | |
246 ParanoidExpireHistory(); | |
247 } | |
248 | |
249 void ExpireHistoryBackend::ExpireHistoryBefore(base::Time end_time) { | |
250 if (!main_db_) | |
251 return; | |
252 | |
253 // Expire as much history as possible before the given date. | |
254 ExpireSomeOldHistory(end_time, GetAllVisitsReader(), | |
255 std::numeric_limits<int>::max()); | |
256 ParanoidExpireHistory(); | |
257 } | |
258 | |
259 void ExpireHistoryBackend::InitWorkQueue() { | |
260 DCHECK(work_queue_.empty()) << "queue has to be empty prior to init"; | |
261 | |
262 for (size_t i = 0; i < readers_.size(); i++) | |
263 work_queue_.push(readers_[i]); | |
264 } | |
265 | |
266 const ExpiringVisitsReader* ExpireHistoryBackend::GetAllVisitsReader() { | |
267 if (!all_visits_reader_) | |
268 all_visits_reader_.reset(new AllVisitsReader()); | |
269 return all_visits_reader_.get(); | |
270 } | |
271 | |
272 const ExpiringVisitsReader* | |
273 ExpireHistoryBackend::GetAutoSubframeVisitsReader() { | |
274 if (!auto_subframe_visits_reader_) | |
275 auto_subframe_visits_reader_.reset(new AutoSubframeVisitsReader()); | |
276 return auto_subframe_visits_reader_.get(); | |
277 } | |
278 | |
279 void ExpireHistoryBackend::StartExpiringOldStuff( | |
280 base::TimeDelta expiration_threshold) { | |
281 expiration_threshold_ = expiration_threshold; | |
282 | |
283 // Remove all readers, just in case this was method was called before. | |
284 readers_.clear(); | |
285 // For now, we explicitly add all known readers. If we come up with more | |
286 // reader types (in case we want to expire different types of visits in | |
287 // different ways), we can make it be populated by creator/owner of | |
288 // ExpireHistoryBackend. | |
289 readers_.push_back(GetAllVisitsReader()); | |
290 readers_.push_back(GetAutoSubframeVisitsReader()); | |
291 | |
292 // Initialize the queue with all tasks for the first set of iterations. | |
293 InitWorkQueue(); | |
294 ScheduleExpire(); | |
295 } | |
296 | |
297 void ExpireHistoryBackend::DeleteFaviconsIfPossible(DeleteEffects* effects) { | |
298 if (!thumb_db_) | |
299 return; | |
300 | |
301 for (std::set<favicon_base::FaviconID>::const_iterator i = | |
302 effects->affected_favicons.begin(); | |
303 i != effects->affected_favicons.end(); ++i) { | |
304 if (!thumb_db_->HasMappingFor(*i)) { | |
305 GURL icon_url; | |
306 favicon_base::IconType icon_type; | |
307 if (thumb_db_->GetFaviconHeader(*i, | |
308 &icon_url, | |
309 &icon_type) && | |
310 thumb_db_->DeleteFavicon(*i)) { | |
311 effects->deleted_favicons.insert(icon_url); | |
312 } | |
313 } | |
314 } | |
315 } | |
316 | |
317 void ExpireHistoryBackend::BroadcastNotifications(DeleteEffects* effects, | |
318 DeletionType type) { | |
319 if (!effects->modified_urls.empty()) { | |
320 notifier_->NotifyURLsModified(effects->modified_urls); | |
321 } | |
322 if (!effects->deleted_urls.empty()) { | |
323 notifier_->NotifyURLsDeleted(false, | |
324 type == DELETION_EXPIRED, | |
325 effects->deleted_urls, | |
326 effects->deleted_favicons); | |
327 } | |
328 } | |
329 | |
330 void ExpireHistoryBackend::DeleteVisitRelatedInfo(const VisitVector& visits, | |
331 DeleteEffects* effects) { | |
332 for (size_t i = 0; i < visits.size(); i++) { | |
333 // Delete the visit itself. | |
334 main_db_->DeleteVisit(visits[i]); | |
335 | |
336 // Add the URL row to the affected URL list. | |
337 if (!effects->affected_urls.count(visits[i].url_id)) { | |
338 URLRow row; | |
339 if (main_db_->GetURLRow(visits[i].url_id, &row)) | |
340 effects->affected_urls[visits[i].url_id] = row; | |
341 } | |
342 } | |
343 } | |
344 | |
345 void ExpireHistoryBackend::DeleteOneURL(const URLRow& url_row, | |
346 bool is_bookmarked, | |
347 DeleteEffects* effects) { | |
348 main_db_->DeleteSegmentForURL(url_row.id()); | |
349 | |
350 if (!is_bookmarked) { | |
351 effects->deleted_urls.push_back(url_row); | |
352 | |
353 // Delete stuff that references this URL. | |
354 if (thumb_db_) { | |
355 // Collect shared information. | |
356 std::vector<IconMapping> icon_mappings; | |
357 if (thumb_db_->GetIconMappingsForPageURL(url_row.url(), &icon_mappings)) { | |
358 for (std::vector<IconMapping>::iterator m = icon_mappings.begin(); | |
359 m != icon_mappings.end(); ++m) { | |
360 effects->affected_favicons.insert(m->icon_id); | |
361 } | |
362 // Delete the mapping entries for the url. | |
363 thumb_db_->DeleteIconMappings(url_row.url()); | |
364 } | |
365 } | |
366 // Last, delete the URL entry. | |
367 main_db_->DeleteURLRow(url_row.id()); | |
368 } | |
369 } | |
370 | |
371 namespace { | |
372 | |
373 struct ChangedURL { | |
374 ChangedURL() : visit_count(0), typed_count(0) {} | |
375 int visit_count; | |
376 int typed_count; | |
377 }; | |
378 | |
379 } // namespace | |
380 | |
381 void ExpireHistoryBackend::ExpireURLsForVisits(const VisitVector& visits, | |
382 DeleteEffects* effects) { | |
383 // First find all unique URLs and the number of visits we're deleting for | |
384 // each one. | |
385 std::map<URLID, ChangedURL> changed_urls; | |
386 for (size_t i = 0; i < visits.size(); i++) { | |
387 ChangedURL& cur = changed_urls[visits[i].url_id]; | |
388 // NOTE: This code must stay in sync with HistoryBackend::AddPageVisit(). | |
389 // TODO(pkasting): http://b/1148304 We shouldn't be marking so many URLs as | |
390 // typed, which would help eliminate the need for this code (we still would | |
391 // need to handle RELOAD transitions specially, though). | |
392 ui::PageTransition transition = | |
393 ui::PageTransitionStripQualifier(visits[i].transition); | |
394 if (transition != ui::PAGE_TRANSITION_RELOAD) | |
395 cur.visit_count++; | |
396 if ((transition == ui::PAGE_TRANSITION_TYPED && | |
397 !ui::PageTransitionIsRedirect(visits[i].transition)) || | |
398 transition == ui::PAGE_TRANSITION_KEYWORD_GENERATED) | |
399 cur.typed_count++; | |
400 } | |
401 | |
402 // Check each unique URL with deleted visits. | |
403 HistoryClient* history_client = GetHistoryClient(); | |
404 for (std::map<URLID, ChangedURL>::const_iterator i = changed_urls.begin(); | |
405 i != changed_urls.end(); ++i) { | |
406 // The unique URL rows should already be filled in. | |
407 URLRow& url_row = effects->affected_urls[i->first]; | |
408 if (!url_row.id()) | |
409 continue; // URL row doesn't exist in the database. | |
410 | |
411 // Check if there are any other visits for this URL and update the time | |
412 // (the time change may not actually be synced to disk below when we're | |
413 // archiving). | |
414 VisitRow last_visit; | |
415 if (main_db_->GetMostRecentVisitForURL(url_row.id(), &last_visit)) | |
416 url_row.set_last_visit(last_visit.visit_time); | |
417 else | |
418 url_row.set_last_visit(base::Time()); | |
419 | |
420 // Don't delete URLs with visits still in the DB, or bookmarked. | |
421 bool is_bookmarked = | |
422 (history_client && history_client->IsBookmarked(url_row.url())); | |
423 if (!is_bookmarked && url_row.last_visit().is_null()) { | |
424 // Not bookmarked and no more visits. Nuke the url. | |
425 DeleteOneURL(url_row, is_bookmarked, effects); | |
426 } else { | |
427 // NOTE: The calls to std::max() below are a backstop, but they should | |
428 // never actually be needed unless the database is corrupt (I think). | |
429 url_row.set_visit_count( | |
430 std::max(0, url_row.visit_count() - i->second.visit_count)); | |
431 url_row.set_typed_count( | |
432 std::max(0, url_row.typed_count() - i->second.typed_count)); | |
433 | |
434 // Update the db with the new details. | |
435 main_db_->UpdateURLRow(url_row.id(), url_row); | |
436 | |
437 effects->modified_urls.push_back(url_row); | |
438 } | |
439 } | |
440 } | |
441 | |
442 void ExpireHistoryBackend::ScheduleExpire() { | |
443 base::TimeDelta delay; | |
444 if (work_queue_.empty()) { | |
445 // If work queue is empty, reset the work queue to contain all tasks and | |
446 // schedule next iteration after a longer delay. | |
447 InitWorkQueue(); | |
448 delay = base::TimeDelta::FromMinutes(kExpirationEmptyDelayMin); | |
449 } else { | |
450 delay = base::TimeDelta::FromSeconds(kExpirationDelaySec); | |
451 } | |
452 | |
453 base::MessageLoop::current()->PostDelayedTask( | |
454 FROM_HERE, | |
455 base::Bind(&ExpireHistoryBackend::DoExpireIteration, | |
456 weak_factory_.GetWeakPtr()), | |
457 delay); | |
458 } | |
459 | |
460 void ExpireHistoryBackend::DoExpireIteration() { | |
461 DCHECK(!work_queue_.empty()) << "queue has to be non-empty"; | |
462 | |
463 const ExpiringVisitsReader* reader = work_queue_.front(); | |
464 bool more_to_expire = ExpireSomeOldHistory( | |
465 GetCurrentExpirationTime(), reader, kNumExpirePerIteration); | |
466 | |
467 work_queue_.pop(); | |
468 // If there are more items to expire, add the reader back to the queue, thus | |
469 // creating a new task for future iterations. | |
470 if (more_to_expire) | |
471 work_queue_.push(reader); | |
472 | |
473 ScheduleExpire(); | |
474 } | |
475 | |
476 bool ExpireHistoryBackend::ExpireSomeOldHistory( | |
477 base::Time end_time, | |
478 const ExpiringVisitsReader* reader, | |
479 int max_visits) { | |
480 if (!main_db_) | |
481 return false; | |
482 | |
483 // Add an extra time unit to given end time, because | |
484 // GetAllVisitsInRange, et al. queries' end value is non-inclusive. | |
485 base::Time effective_end_time = | |
486 base::Time::FromInternalValue(end_time.ToInternalValue() + 1); | |
487 | |
488 VisitVector deleted_visits; | |
489 bool more_to_expire = reader->Read(effective_end_time, main_db_, | |
490 &deleted_visits, max_visits); | |
491 | |
492 DeleteEffects deleted_effects; | |
493 DeleteVisitRelatedInfo(deleted_visits, &deleted_effects); | |
494 ExpireURLsForVisits(deleted_visits, &deleted_effects); | |
495 DeleteFaviconsIfPossible(&deleted_effects); | |
496 | |
497 BroadcastNotifications(&deleted_effects, DELETION_EXPIRED); | |
498 | |
499 return more_to_expire; | |
500 } | |
501 | |
502 void ExpireHistoryBackend::ParanoidExpireHistory() { | |
503 // TODO(brettw): Bug 1067331: write this to clean up any errors. | |
504 } | |
505 | |
506 HistoryClient* ExpireHistoryBackend::GetHistoryClient() { | |
507 // We use the history client to determine if a URL is bookmarked. The data is | |
508 // loaded on a separate thread and may not be done by the time we get here. | |
509 // We therefore block until the bookmarks have finished loading. | |
510 if (history_client_) | |
511 history_client_->BlockUntilBookmarksLoaded(); | |
512 return history_client_; | |
513 } | |
514 | |
515 } // namespace history | |
OLD | NEW |