Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(261)

Side by Side Diff: chrome/browser/android/offline_pages/recent_tab_helper.cc

Issue 1936613002: Implementing recent pages snapshot capture. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: review comments Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2016 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/android/offline_pages/recent_tab_helper.h" 5 #include "chrome/browser/android/offline_pages/recent_tab_helper.h"
6 6
7 #include <queue>
8 #include <vector>
9
10 #include "base/bind.h"
11 #include "base/location.h"
12 #include "base/logging.h"
13 #include "base/macros.h"
14 #include "base/thread_task_runner_handle.h"
15 #include "base/time/time.h"
16 #include "chrome/browser/android/offline_pages/offline_page_mhtml_archiver.h"
17 #include "chrome/browser/android/offline_pages/offline_page_model_factory.h"
18 #include "components/offline_pages/offline_page_item.h"
19 #include "components/offline_pages/offline_page_model.h"
20 #include "content/public/browser/browser_context.h"
21 #include "content/public/browser/browser_thread.h"
22 #include "content/public/browser/navigation_entry.h"
23 #include "content/public/browser/navigation_handle.h"
24
7 DEFINE_WEB_CONTENTS_USER_DATA_KEY(offline_pages::RecentTabHelper); 25 DEFINE_WEB_CONTENTS_USER_DATA_KEY(offline_pages::RecentTabHelper);
8 26
27 namespace {
28 const char* kClientNamespace = "last_n";
29
30 // Max number of pages to keep. The oldest pages that are over this count are
31 // deleted before the next one is saved.
32 const size_t kMaxPagesToKeep = 50;
33
34 // Predicate for priority_queue used to compute the oldest pages.
35 struct ComparePagesForPurge {
36 bool operator()(const offline_pages::OfflinePageItem* left,
37 const offline_pages::OfflinePageItem* right) const {
38 return left->creation_time > right->creation_time;
39 }
40 };
41
42 } // namespace
43
9 namespace offline_pages { 44 namespace offline_pages {
10 45
11 RecentTabHelper::RecentTabHelper(content::WebContents* web_contents) 46 RecentTabHelper::RecentTabHelper(content::WebContents* web_contents)
12 : content::WebContentsObserver(web_contents) { 47 : content::WebContentsObserver(web_contents),
48 page_model_(nullptr),
49 page_model_is_loaded_(false),
50 weak_ptr_factory_(this) {
51 DCHECK_CURRENTLY_ON(content::BrowserThread::UI);
52 snapshot_controller_.reset(new SnapshotController(
53 base::ThreadTaskRunnerHandle::Get(), this));
54
55 // TODO(dimich): When we have BackgroundOffliner, avoid capturing prerenderer
56 // WebContents with its origin as well.
57 never_do_snapshots_ = web_contents->GetBrowserContext()->IsOffTheRecord();
13 } 58 }
14 59
15 RecentTabHelper::~RecentTabHelper() { 60 RecentTabHelper::~RecentTabHelper() {
61 if (page_model_)
62 page_model_->RemoveObserver(this);
63 }
64
65 void RecentTabHelper::DidFinishNavigation(
66 content::NavigationHandle* navigation_handle) {
67 if (navigation_handle->IsInMainFrame() &&
68 navigation_handle->HasCommitted() &&
69 !navigation_handle->IsErrorPage()) {
70 // New navigation, new snapshot session.
71 snapshot_controller_->Reset();
72 snapshot_url_ = GURL::EmptyGURL();
73 }
74 }
75
76 void RecentTabHelper::DocumentAvailableInMainFrame() {
77 snapshot_controller_->DocumentAvailableInMainFrame();
78 }
79
80 void RecentTabHelper::DocumentOnLoadCompletedInMainFrame() {
81 snapshot_controller_->DocumentOnLoadCompletedInMainFrame();
82 }
83
84 void RecentTabHelper::OfflinePageModelLoaded(OfflinePageModel* model) {
85 page_model_is_loaded_ = page_model_->is_loaded();;
86 StartSnapshot();
87 }
88
89 // This starts a sequence of async operations chained through callbacks:
90 // - ensure OfflinePageModel is loaded
91 // - compute the set of old 'last_n' pages that have to be purged
92 // - delete the pages found in the previous step
93 // - snapshot the current web contents
94 // Along the chain, the original URL is passed and compared, to detect
95 // possible navigation and cancel snapshot in that case.
96 void RecentTabHelper::StartSnapshot() {
97 if (never_do_snapshots_)
98 return;
99
100 snapshot_url_ = web_contents()->GetLastCommittedURL();
101
102 // Lazily get the page model and start its load if not yet loaded. It will
103 // call this method again when it's done loading.
104 if (!page_model_) {
105 page_model_ = GetPageModel();
106 page_model_->AddObserver(this);
107 page_model_is_loaded_ = page_model_->is_loaded();
108 }
109
110 if (page_model_is_loaded_)
111 ContinueSnapshotWithModel();
112 }
113
114 void RecentTabHelper::SetArchiveFactoryForTest(
115 std::unique_ptr<TestArchiveFactory> test_archive_factory) {
116 test_archive_factory_ = std::move(test_archive_factory);
117 }
118
119 void RecentTabHelper::SetTaskRunnerForTest(
120 const scoped_refptr<base::SingleThreadTaskRunner>& task_runner) {
121 snapshot_controller_.reset(new SnapshotController(task_runner, this));
122 }
123
124 void RecentTabHelper::ContinueSnapshotWithModel() {
125 if (!IsSamePage())
126 return;
127
128 if (!page_model_->CanSavePage(snapshot_url_)) {
129 // TODO(dimich): Add UMA. Bug 608112.
130 return;
131 }
132
133 // TODO(dimich): Implement automatic cleanup as per design doc, based on
134 // storage limits and page age.
135 std::vector<int64_t> ids = GetPagesToPurge();
136 page_model_->DeletePagesByOfflineId(
137 ids, base::Bind(&RecentTabHelper::ContinueSnapshotAfterPurge,
138 weak_ptr_factory_.GetWeakPtr()));
139 }
140
141 void RecentTabHelper::ContinueSnapshotAfterPurge(
142 OfflinePageModel::DeletePageResult result) {
143 // NOT_FOUND is because it's what we get when passing empty vector of ids.
144 // TODO(dimich): remove NOT_FOUND when bug 608057 is fixed.
145 if (result != OfflinePageModel::DeletePageResult::SUCCESS &&
146 result != OfflinePageModel::DeletePageResult::NOT_FOUND) {
147 // If previous pages can't be deleted, don't add new ones.
148 ReportSnapshotCompleted();
149 return;
150 }
151
152 if (!IsSamePage()) {
153 ReportSnapshotCompleted();
154 return;
155 }
156
157 // Create either test Archiver or a regular one.
158 std::unique_ptr<OfflinePageArchiver> archiver;
159 if (test_archive_factory_.get()) {
160 archiver = test_archive_factory_->CreateArchiver(web_contents());
161 } else {
162 archiver.reset(new OfflinePageMHTMLArchiver(web_contents()));
163 }
164
165 page_model_->SavePage(
166 snapshot_url_, client_id(), std::move(archiver),
167 base::Bind(&RecentTabHelper::SavePageCallback,
168 weak_ptr_factory_.GetWeakPtr()));
169 }
170
171 void RecentTabHelper::SavePageCallback(OfflinePageModel::SavePageResult result,
172 int64_t offline_id) {
173 // TODO(dimich): add UMA, including result. Bug 608112.
174 ReportSnapshotCompleted();
175 }
176
177 OfflinePageModel* RecentTabHelper::GetPageModel() {
178 content::WebContents* contents = web_contents();
179 return OfflinePageModelFactory::GetForBrowserContext(
180 contents->GetBrowserContext());
181 }
182
183 // Collects folloing pages from lastN namespace:
184 // - the ones with the same online URL
185 // - the oldest pages, enough to keep kMaxPagesToKeep limit.
186 std::vector<int64_t> RecentTabHelper::GetPagesToPurge() const {
187 std::vector<int64_t> pages_to_purge;
188 std::vector<int64_t> page_ids =
189 page_model_->GetOfflineIdsForClientId(client_id());
190
191 // Use priority queue to figure out the set of oldest pages to purge.
192 std::priority_queue<const OfflinePageItem*,
193 std::vector<const OfflinePageItem*>,
194 ComparePagesForPurge> pages_queue;
195
196 for (const auto& offline_id : page_ids) {
197 const OfflinePageItem* page = page_model_->GetPageByOfflineId(offline_id);
198 // If there is already a snapshot of this page, remove it so we don't
199 // have multiple snapshots of the same page.
200 if (page->url == snapshot_url_) {
201 pages_to_purge.push_back(offline_id);
202 } else {
203 pages_queue.push(page);
204 }
205 }
206
207 // Negative counter means nothing else to purge.
208 int count_to_purge =
209 page_ids.size() - kMaxPagesToKeep - pages_to_purge.size();
210
211 for (int count = 0; count < count_to_purge; ++count) {
212 pages_to_purge.push_back(pages_queue.top()->offline_id);
213 pages_queue.pop();
214 }
215
216 return pages_to_purge;
217 }
218
219 void RecentTabHelper::ReportSnapshotCompleted() {
220 snapshot_controller_->PendingSnapshotCompleted();
221 }
222
223 bool RecentTabHelper::IsSamePage() const {
224 return web_contents() &&
225 (web_contents()->GetLastCommittedURL() == snapshot_url_);
226 }
227
228 ClientId RecentTabHelper::client_id() const {
229 return ClientId(kClientNamespace, "");
16 } 230 }
17 231
18 } // namespace offline_pages 232 } // namespace offline_pages
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698