Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1196)

Side by Side Diff: chrome/browser/android/offline_pages/recent_tab_helper.cc

Issue 1936613002: Implementing recent pages snapshot capture. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2016 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/android/offline_pages/recent_tab_helper.h" 5 #include "chrome/browser/android/offline_pages/recent_tab_helper.h"
6 6
7 #include <queue>
8 #include <vector>
9
10 #include "base/bind.h"
11 #include "base/location.h"
12 #include "base/logging.h"
13 #include "base/macros.h"
14 #include "base/thread_task_runner_handle.h"
15 #include "base/time/time.h"
16 #include "chrome/browser/android/offline_pages/offline_page_mhtml_archiver.h"
17 #include "chrome/browser/android/offline_pages/offline_page_model_factory.h"
18 #include "components/offline_pages/offline_page_item.h"
19 #include "components/offline_pages/offline_page_model.h"
20 #include "content/public/browser/browser_context.h"
21 #include "content/public/browser/browser_thread.h"
22 #include "content/public/browser/navigation_entry.h"
23 #include "content/public/browser/navigation_handle.h"
24
7 DEFINE_WEB_CONTENTS_USER_DATA_KEY(offline_pages::RecentTabHelper); 25 DEFINE_WEB_CONTENTS_USER_DATA_KEY(offline_pages::RecentTabHelper);
8 26
27 namespace {
28 const char* kClientNamespace = "last_n";
29
30 // Max number of pages to keep. The oldest pages that are over this count are
31 // deleted before the next one is saved.
32 const size_t kMaxPagesToKeep = 50;
33
34 // Predicate for priority_queue used to compute the oldest pages.
35 struct ComparePagesForPurge {
36 bool operator()(const offline_pages::OfflinePageItem* left,
37 const offline_pages::OfflinePageItem* right) const {
38 return left->creation_time > right->creation_time;
39 }
40 };
41
42 } // namespace
43
9 namespace offline_pages { 44 namespace offline_pages {
10 45
11 RecentTabHelper::RecentTabHelper(content::WebContents* web_contents) 46 RecentTabHelper::RecentTabHelper(content::WebContents* web_contents)
12 : content::WebContentsObserver(web_contents) { 47 : content::WebContentsObserver(web_contents),
48 page_model_(nullptr),
49 page_model_is_loaded_(false),
50 weak_ptr_factory_(this) {
51 DCHECK_CURRENTLY_ON(content::BrowserThread::UI);
52 snapshot_controller_.reset(new SnapshotController(
53 base::ThreadTaskRunnerHandle::Get(), this));
54
55 // TODO(dimich): When we have BackgroundOffliner, avoid capturing prerenderer
56 // WebContents with its origin as well.
57 never_do_snapshots_ = web_contents->GetBrowserContext()->IsOffTheRecord();
13 } 58 }
14 59
15 RecentTabHelper::~RecentTabHelper() { 60 RecentTabHelper::~RecentTabHelper() {
61 if (page_model_)
62 page_model_->RemoveObserver(this);
63 }
64
65 void RecentTabHelper::DidFinishNavigation(
66 content::NavigationHandle* navigation_handle) {
67 if (navigation_handle->IsInMainFrame() &&
68 navigation_handle->HasCommitted() &&
69 !navigation_handle->IsErrorPage()) {
70 // New navigation, new snapshot session.
71 snapshot_controller_->Reset();
72 snapshot_url_ = GURL::EmptyGURL();
73 }
74 }
75
76 void RecentTabHelper::DocumentAvailableInMainFrame() {
77 snapshot_controller_->DocumentAvailableInMainFrame();
78 }
79
80 void RecentTabHelper::DocumentOnLoadCompletedInMainFrame() {
81 snapshot_controller_->DocumentOnLoadCompletedInMainFrame();
82 }
83
84 void RecentTabHelper::OfflinePageModelLoaded(OfflinePageModel* model) {
85 page_model_is_loaded_ = page_model_->is_loaded();;
86 StartSnapshot();
87 }
88
89 // This starts a sequence of async operations chained through callbacks:
90 // - ensure OfflinePageModel is loaded
91 // - compute the set of old 'last_n' pages that have to be purged
92 // - delete the pages found in the previous step
93 // - snapshot the current web contents
dewittj 2016/04/29 20:29:30 I wonder if the snapshot should not actually wait
Dmitry Titov 2016/04/30 00:13:08 This is going to change as soon as cache pruning b
dewittj 2016/05/02 19:54:27 Okay. Could you document that decision here in th
94 // Along the chain, the original URL is passed and compared, to detect
95 // possible navigation and cancel snapshot in that case.
96 void RecentTabHelper::StartSnapshot() {
97 if (never_do_snapshots_)
98 return;
99
100 snapshot_url_ = web_contents()->GetLastCommittedURL();
101
102 // Lazily get the page model and start its load if not yet loaded. It will
103 // call this method again when it's done loading.
104 if (!page_model_) {
dewittj 2016/04/29 20:29:30 Any reason you do this lazily?
Dmitry Titov 2016/04/30 00:13:08 Mostly because of never_do_snapshot - some tab hel
105 page_model_ = GetPageModel();
106 page_model_->AddObserver(this);
107 page_model_is_loaded_ = page_model_->is_loaded();
108 }
109
110 if (page_model_is_loaded_)
111 ContinueSnapshotWithModel();
112 }
113
114 void RecentTabHelper::SetArchiverForTest(
115 std::unique_ptr<OfflinePageArchiver> test_archiver) {
116 test_archiver_ = std::move(test_archiver);
117 }
118
119 void RecentTabHelper::SetTaskRunnerForTest(
120 const scoped_refptr<base::SingleThreadTaskRunner>& task_runner) {
121 snapshot_controller_.reset(new SnapshotController(task_runner, this));
122 }
123
124 void RecentTabHelper::ContinueSnapshotWithModel() {
125 if (!IsSamePage())
126 return;
127
128 if (!page_model_->CanSavePage(snapshot_url_)) {
129 // TODO(dimich): Add UMA.
130 return;
131 }
132
133 // TODO(dimich): Implement automatic cleanup as per design doc, based on
134 // storage limits and page age.
135 std::vector<int64_t> ids = GetPagesToPurge();
136 page_model_->DeletePagesByOfflineId(
137 ids, base::Bind(&RecentTabHelper::ContinueSnapshotAfterPurge,
138 weak_ptr_factory_.GetWeakPtr()));
139 }
140
141 void RecentTabHelper::ContinueSnapshotAfterPurge(
142 OfflinePageModel::DeletePageResult result) {
143 // NOT_FOUND is ok because this is what we get when passing empty
144 // vector of ids.
dewittj 2016/04/29 20:29:30 This is probably bad behavior, please open a bug o
Dmitry Titov 2016/04/30 00:13:08 Done. 608057 and TODO.
145 if (result != OfflinePageModel::DeletePageResult::SUCCESS &&
146 result != OfflinePageModel::DeletePageResult::NOT_FOUND) {
147 // If previous pages can't be deleted, don't add new ones.
148 ReportSnapshotCompleted();
149 return;
150 }
151
152 if (!IsSamePage()) {
153 ReportSnapshotCompleted();
154 return;
155 }
156
157 std::unique_ptr<OfflinePageArchiver> archiver(
158 test_archiver_.get() ? test_archiver_.release() :
dewittj 2016/04/29 20:29:30 I had to think about this a bit. |test_archiver_|
Dmitry Titov 2016/04/30 00:13:08 Done. Added a TestArchiverFactory as a local class
159 new OfflinePageMHTMLArchiver(web_contents()));
160
161 page_model_->SavePage(
162 snapshot_url_, client_id(), std::move(archiver),
163 base::Bind(&RecentTabHelper::SavePageCallback,
164 weak_ptr_factory_.GetWeakPtr()));
165 }
166
167 void RecentTabHelper::SavePageCallback(OfflinePageModel::SavePageResult result,
168 int64_t offline_id) {
169 // TODO(dimich): add UMA, including result.
dewittj 2016/04/29 20:29:30 is there a bug for the uma?
Dmitry Titov 2016/04/30 00:13:08 Done, added bug number.
170 ReportSnapshotCompleted();
171 }
172
173 OfflinePageModel* RecentTabHelper::GetPageModel() {
174 content::WebContents* contents = web_contents();
175 return OfflinePageModelFactory::GetForBrowserContext(
176 contents->GetBrowserContext());
177 }
178
179 std::vector<int64_t> RecentTabHelper::GetPagesToPurge() const {
180 std::vector<int64_t> pages_to_purge;
dewittj 2016/04/29 20:29:31 this name is a little ambiguous. It's really a li
Dmitry Titov 2016/04/30 00:13:08 Hmm, this is a list of items not necessarily relat
181 std::vector<int64_t> page_ids =
182 page_model_->GetOfflineIdsForClientId(client_id());
dewittj 2016/04/29 20:29:30 This would be the first usage of multiple offline
Dmitry Titov 2016/04/30 00:13:08 Coudl you clarify? Is there a plan to remove non-u
dewittj 2016/05/02 19:54:27 There isn't a plan to remove non-unique client ids
183
184 // Use priority queue to figure out the set of oldest pages to purge.
185 std::priority_queue<const OfflinePageItem*,
186 std::vector<const OfflinePageItem*>,
187 ComparePagesForPurge> pages_queue;
188
189 for (const auto& offline_id : page_ids) {
190 const OfflinePageItem* page = page_model_->GetPageByOfflineId(offline_id);
191 // If there is already a snapshot of this page, remove it so we don't
192 // have multiple snapshots of the same page.
193 if (page->url == snapshot_url_) {
194 pages_to_purge.push_back(offline_id);
dewittj 2016/04/29 20:29:30 I think that we should wait for a successful snaps
Dmitry Titov 2016/04/30 00:13:08 See comment above. It is probably safer to remove
dewittj 2016/05/02 19:54:27 Acknowledged.
195 } else {
196 pages_queue.push(page);
197 }
198 }
199
200 // Negative counter means nothing else to purge.
201 int count_to_purge =
202 page_ids.size() - kMaxPagesToKeep - pages_to_purge.size();
203
204 for (int count = 0; count < count_to_purge; ++count) {
dewittj 2016/04/29 20:29:30 int -> size_t
Dmitry Titov 2016/04/30 00:13:08 Note count_to_purge can be negative - then the loo
dewittj 2016/05/02 19:54:27 Acknowledged.
205 pages_to_purge.push_back(pages_queue.top()->offline_id);
206 pages_queue.pop();
207 }
208
209 return pages_to_purge;
210 }
211
212 void RecentTabHelper::ReportSnapshotCompleted() {
213 snapshot_controller_->PendingSnapshotCompleted();
214 }
215
216 bool RecentTabHelper::IsSamePage() const {
217 return web_contents() &&
218 (web_contents()->GetLastCommittedURL() == snapshot_url_);
219 }
220
221 ClientId RecentTabHelper::client_id() const {
222 return ClientId(kClientNamespace, "");
16 } 223 }
17 224
18 } // namespace offline_pages 225 } // namespace offline_pages
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698