Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(4766)

Unified Diff: chrome/browser/android/offline_pages/recent_tab_helper.cc

Issue 1936613002: Implementing recent pages snapshot capture. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: chrome/browser/android/offline_pages/recent_tab_helper.cc
diff --git a/chrome/browser/android/offline_pages/recent_tab_helper.cc b/chrome/browser/android/offline_pages/recent_tab_helper.cc
index 13983db4ada93d176eae37eeedfb2620fed455bd..ea428feb0796405219487c5e12dc5dc17a38f6d3 100644
--- a/chrome/browser/android/offline_pages/recent_tab_helper.cc
+++ b/chrome/browser/android/offline_pages/recent_tab_helper.cc
@@ -4,15 +4,222 @@
#include "chrome/browser/android/offline_pages/recent_tab_helper.h"
+#include <queue>
+#include <vector>
+
+#include "base/bind.h"
+#include "base/location.h"
+#include "base/logging.h"
+#include "base/macros.h"
+#include "base/thread_task_runner_handle.h"
+#include "base/time/time.h"
+#include "chrome/browser/android/offline_pages/offline_page_mhtml_archiver.h"
+#include "chrome/browser/android/offline_pages/offline_page_model_factory.h"
+#include "components/offline_pages/offline_page_item.h"
+#include "components/offline_pages/offline_page_model.h"
+#include "content/public/browser/browser_context.h"
+#include "content/public/browser/browser_thread.h"
+#include "content/public/browser/navigation_entry.h"
+#include "content/public/browser/navigation_handle.h"
+
DEFINE_WEB_CONTENTS_USER_DATA_KEY(offline_pages::RecentTabHelper);
+namespace {
+const char* kClientNamespace = "last_n";
+
+// Max number of pages to keep. The oldest pages that are over this count are
+// deleted before the next one is saved.
+const size_t kMaxPagesToKeep = 50;
+
+// Predicate for priority_queue used to compute the oldest pages.
+struct ComparePagesForPurge {
+ bool operator()(const offline_pages::OfflinePageItem* left,
+ const offline_pages::OfflinePageItem* right) const {
+ return left->creation_time > right->creation_time;
+ }
+};
+
+} // namespace
+
namespace offline_pages {
RecentTabHelper::RecentTabHelper(content::WebContents* web_contents)
- : content::WebContentsObserver(web_contents) {
+ : content::WebContentsObserver(web_contents),
+ page_model_(nullptr),
+ page_model_is_loaded_(false),
+ weak_ptr_factory_(this) {
+ DCHECK_CURRENTLY_ON(content::BrowserThread::UI);
+ snapshot_controller_.reset(new SnapshotController(
+ base::ThreadTaskRunnerHandle::Get(), this));
+
+ // TODO(dimich): When we have BackgroundOffliner, avoid capturing prerenderer
+ // WebContents with its origin as well.
+ never_do_snapshots_ = web_contents->GetBrowserContext()->IsOffTheRecord();
}
RecentTabHelper::~RecentTabHelper() {
+ if (page_model_)
+ page_model_->RemoveObserver(this);
+}
+
+void RecentTabHelper::DidFinishNavigation(
+ content::NavigationHandle* navigation_handle) {
+ if (navigation_handle->IsInMainFrame() &&
+ navigation_handle->HasCommitted() &&
+ !navigation_handle->IsErrorPage()) {
+ // New navigation, new snapshot session.
+ snapshot_controller_->Reset();
+ snapshot_url_ = GURL::EmptyGURL();
+ }
+}
+
+void RecentTabHelper::DocumentAvailableInMainFrame() {
+ snapshot_controller_->DocumentAvailableInMainFrame();
+}
+
+void RecentTabHelper::DocumentOnLoadCompletedInMainFrame() {
+ snapshot_controller_->DocumentOnLoadCompletedInMainFrame();
+}
+
+void RecentTabHelper::OfflinePageModelLoaded(OfflinePageModel* model) {
+ page_model_is_loaded_ = page_model_->is_loaded();;
+ StartSnapshot();
+}
+
+// This starts a sequence of async operations chained through callbacks:
+// - ensure OfflinePageModel is loaded
+// - compute the set of old 'last_n' pages that have to be purged
+// - delete the pages found in the previous step
+// - snapshot the current web contents
dewittj 2016/04/29 20:29:30 I wonder if the snapshot should not actually wait
Dmitry Titov 2016/04/30 00:13:08 This is going to change as soon as cache pruning b
dewittj 2016/05/02 19:54:27 Okay. Could you document that decision here in th
+// Along the chain, the original URL is passed and compared, to detect
+// possible navigation and cancel snapshot in that case.
+void RecentTabHelper::StartSnapshot() {
+ if (never_do_snapshots_)
+ return;
+
+ snapshot_url_ = web_contents()->GetLastCommittedURL();
+
+ // Lazily get the page model and start its load if not yet loaded. It will
+ // call this method again when it's done loading.
+ if (!page_model_) {
dewittj 2016/04/29 20:29:30 Any reason you do this lazily?
Dmitry Titov 2016/04/30 00:13:08 Mostly because of never_do_snapshot - some tab hel
+ page_model_ = GetPageModel();
+ page_model_->AddObserver(this);
+ page_model_is_loaded_ = page_model_->is_loaded();
+ }
+
+ if (page_model_is_loaded_)
+ ContinueSnapshotWithModel();
+}
+
+void RecentTabHelper::SetArchiverForTest(
+ std::unique_ptr<OfflinePageArchiver> test_archiver) {
+ test_archiver_ = std::move(test_archiver);
+}
+
+void RecentTabHelper::SetTaskRunnerForTest(
+ const scoped_refptr<base::SingleThreadTaskRunner>& task_runner) {
+ snapshot_controller_.reset(new SnapshotController(task_runner, this));
+}
+
+void RecentTabHelper::ContinueSnapshotWithModel() {
+ if (!IsSamePage())
+ return;
+
+ if (!page_model_->CanSavePage(snapshot_url_)) {
+ // TODO(dimich): Add UMA.
+ return;
+ }
+
+ // TODO(dimich): Implement automatic cleanup as per design doc, based on
+ // storage limits and page age.
+ std::vector<int64_t> ids = GetPagesToPurge();
+ page_model_->DeletePagesByOfflineId(
+ ids, base::Bind(&RecentTabHelper::ContinueSnapshotAfterPurge,
+ weak_ptr_factory_.GetWeakPtr()));
+}
+
+void RecentTabHelper::ContinueSnapshotAfterPurge(
+ OfflinePageModel::DeletePageResult result) {
+ // NOT_FOUND is ok because this is what we get when passing empty
+ // vector of ids.
dewittj 2016/04/29 20:29:30 This is probably bad behavior, please open a bug o
Dmitry Titov 2016/04/30 00:13:08 Done. 608057 and TODO.
+ if (result != OfflinePageModel::DeletePageResult::SUCCESS &&
+ result != OfflinePageModel::DeletePageResult::NOT_FOUND) {
+ // If previous pages can't be deleted, don't add new ones.
+ ReportSnapshotCompleted();
+ return;
+ }
+
+ if (!IsSamePage()) {
+ ReportSnapshotCompleted();
+ return;
+ }
+
+ std::unique_ptr<OfflinePageArchiver> archiver(
+ test_archiver_.get() ? test_archiver_.release() :
dewittj 2016/04/29 20:29:30 I had to think about this a bit. |test_archiver_|
Dmitry Titov 2016/04/30 00:13:08 Done. Added a TestArchiverFactory as a local class
+ new OfflinePageMHTMLArchiver(web_contents()));
+
+ page_model_->SavePage(
+ snapshot_url_, client_id(), std::move(archiver),
+ base::Bind(&RecentTabHelper::SavePageCallback,
+ weak_ptr_factory_.GetWeakPtr()));
+}
+
+void RecentTabHelper::SavePageCallback(OfflinePageModel::SavePageResult result,
+ int64_t offline_id) {
+ // TODO(dimich): add UMA, including result.
dewittj 2016/04/29 20:29:30 is there a bug for the uma?
Dmitry Titov 2016/04/30 00:13:08 Done, added bug number.
+ ReportSnapshotCompleted();
+}
+
+OfflinePageModel* RecentTabHelper::GetPageModel() {
+ content::WebContents* contents = web_contents();
+ return OfflinePageModelFactory::GetForBrowserContext(
+ contents->GetBrowserContext());
+}
+
+std::vector<int64_t> RecentTabHelper::GetPagesToPurge() const {
+ std::vector<int64_t> pages_to_purge;
dewittj 2016/04/29 20:29:31 this name is a little ambiguous. It's really a li
Dmitry Titov 2016/04/30 00:13:08 Hmm, this is a list of items not necessarily relat
+ std::vector<int64_t> page_ids =
+ page_model_->GetOfflineIdsForClientId(client_id());
dewittj 2016/04/29 20:29:30 This would be the first usage of multiple offline
Dmitry Titov 2016/04/30 00:13:08 Coudl you clarify? Is there a plan to remove non-u
dewittj 2016/05/02 19:54:27 There isn't a plan to remove non-unique client ids
+
+ // Use priority queue to figure out the set of oldest pages to purge.
+ std::priority_queue<const OfflinePageItem*,
+ std::vector<const OfflinePageItem*>,
+ ComparePagesForPurge> pages_queue;
+
+ for (const auto& offline_id : page_ids) {
+ const OfflinePageItem* page = page_model_->GetPageByOfflineId(offline_id);
+ // If there is already a snapshot of this page, remove it so we don't
+ // have multiple snapshots of the same page.
+ if (page->url == snapshot_url_) {
+ pages_to_purge.push_back(offline_id);
dewittj 2016/04/29 20:29:30 I think that we should wait for a successful snaps
Dmitry Titov 2016/04/30 00:13:08 See comment above. It is probably safer to remove
dewittj 2016/05/02 19:54:27 Acknowledged.
+ } else {
+ pages_queue.push(page);
+ }
+ }
+
+ // Negative counter means nothing else to purge.
+ int count_to_purge =
+ page_ids.size() - kMaxPagesToKeep - pages_to_purge.size();
+
+ for (int count = 0; count < count_to_purge; ++count) {
dewittj 2016/04/29 20:29:30 int -> size_t
Dmitry Titov 2016/04/30 00:13:08 Note count_to_purge can be negative - then the loo
dewittj 2016/05/02 19:54:27 Acknowledged.
+ pages_to_purge.push_back(pages_queue.top()->offline_id);
+ pages_queue.pop();
+ }
+
+ return pages_to_purge;
+}
+
+void RecentTabHelper::ReportSnapshotCompleted() {
+ snapshot_controller_->PendingSnapshotCompleted();
+}
+
+bool RecentTabHelper::IsSamePage() const {
+ return web_contents() &&
+ (web_contents()->GetLastCommittedURL() == snapshot_url_);
+}
+
+ClientId RecentTabHelper::client_id() const {
+ return ClientId(kClientNamespace, "");
}
} // namespace offline_pages

Powered by Google App Engine
This is Rietveld 408576698