Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(90)

Side by Side Diff: components/dom_distiller/core/distiller.cc

Issue 130543003: Store page no for distilled pages undergoing distillation. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Created 6 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright 2013 The Chromium Authors. All rights reserved. 1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/dom_distiller/core/distiller.h" 5 #include "components/dom_distiller/core/distiller.h"
6 6
7 #include <map> 7 #include <map>
8 8
9 #include "base/bind.h" 9 #include "base/bind.h"
10 #include "base/callback.h" 10 #include "base/callback.h"
(...skipping 23 matching lines...) Expand all
34 34
35 DistillerFactoryImpl::~DistillerFactoryImpl() {} 35 DistillerFactoryImpl::~DistillerFactoryImpl() {}
36 36
37 scoped_ptr<Distiller> DistillerFactoryImpl::CreateDistiller() { 37 scoped_ptr<Distiller> DistillerFactoryImpl::CreateDistiller() {
38 scoped_ptr<DistillerImpl> distiller(new DistillerImpl( 38 scoped_ptr<DistillerImpl> distiller(new DistillerImpl(
39 *distiller_page_factory_, *distiller_url_fetcher_factory_)); 39 *distiller_page_factory_, *distiller_url_fetcher_factory_));
40 distiller->Init(); 40 distiller->Init();
41 return distiller.PassAs<Distiller>(); 41 return distiller.PassAs<Distiller>();
42 } 42 }
43 43
44 DistillerImpl::DistilledPageData::DistilledPageData() {}
45
46 DistillerImpl::DistilledPageData::~DistilledPageData() {}
47
44 DistillerImpl::DistillerImpl( 48 DistillerImpl::DistillerImpl(
45 const DistillerPageFactory& distiller_page_factory, 49 const DistillerPageFactory& distiller_page_factory,
46 const DistillerURLFetcherFactory& distiller_url_fetcher_factory) 50 const DistillerURLFetcherFactory& distiller_url_fetcher_factory)
47 : distiller_url_fetcher_factory_(distiller_url_fetcher_factory), 51 : distiller_url_fetcher_factory_(distiller_url_fetcher_factory) {
48 distillation_in_progress_(false) {
49 page_distiller_.reset(new PageDistiller(distiller_page_factory)); 52 page_distiller_.reset(new PageDistiller(distiller_page_factory));
50 } 53 }
51 54
52 DistillerImpl::~DistillerImpl() { 55 DistillerImpl::~DistillerImpl() { DCHECK(in_progress_pages_.empty()); }
53 DCHECK(image_fetchers_.empty());
54 DCHECK(!distillation_in_progress_);
55 }
56 56
57 void DistillerImpl::Init() { 57 void DistillerImpl::Init() {
58 DCHECK(!distillation_in_progress_); 58 DCHECK(in_progress_pages_.empty());
59 page_distiller_->Init(); 59 page_distiller_->Init();
60 article_proto_.reset(new DistilledArticleProto()); 60 article_proto_.reset(new DistilledArticleProto());
61 } 61 }
62 62
63 void DistillerImpl::AddToDistillationQueue(int page_no, const GURL& url) {
64 if (!IsValidPageNo(page_no) && url.is_valid() &&
cjhopman 2014/02/12 20:39:09 Maybe rename IsValidPageNo to IsUsedPageNumber or
shashi 2014/02/13 01:03:11 Done.
65 article_proto_->pages_size() < kMaxPagesInArticle &&
cjhopman 2014/02/12 20:39:09 Should this account for pages in the queue but not
cjhopman 2014/02/12 20:39:09 Also, article_proto_ isn't actually created until
shashi 2014/02/13 01:03:11 Good catch, I missed that, added a test. On 2014/0
66 processed_urls_.find(url.spec()) == processed_urls_.end()) {
67 in_progress_pages_.insert(page_no);
68 pages_to_be_distilled_[page_no] = url;
69 }
70 }
71
72 bool DistillerImpl::IsValidPageNo(int page_no) const {
73 return in_progress_pages_.find(page_no) != in_progress_pages_.end() ||
74 distilled_pages_index_.find(page_no) != distilled_pages_index_.end();
75 }
76
63 void DistillerImpl::DistillPage(const GURL& url, 77 void DistillerImpl::DistillPage(const GURL& url,
64 const DistillerCallback& distillation_cb) { 78 const DistillerCallback& distillation_cb) {
65 DCHECK(!distillation_in_progress_); 79 DCHECK(in_progress_pages_.empty());
66 distillation_cb_ = distillation_cb; 80 distillation_cb_ = distillation_cb;
67 DistillPage(url); 81
82 AddToDistillationQueue(0, url);
83 DistillNextPage();
68 } 84 }
69 85
70 void DistillerImpl::DistillPage(const GURL& url) { 86 void DistillerImpl::DistillNextPage() {
71 DCHECK(!distillation_in_progress_); 87 if (!pages_to_be_distilled_.empty()) {
72 if (url.is_valid() && article_proto_->pages_size() < kMaxPagesInArticle && 88 std::map<int, GURL>::const_iterator front = pages_to_be_distilled_.begin();
73 processed_urls_.find(url.spec()) == processed_urls_.end()) { 89 int page_no = front->first;
74 distillation_in_progress_ = true; 90 const GURL url = front->second;
75 // Distill the next page. 91 // Distill the next page.
76 DCHECK(url.is_valid()); 92 DCHECK(url.is_valid());
77 DCHECK_LT(article_proto_->pages_size(), kMaxPagesInArticle); 93 DCHECK_LT(article_proto_->pages_size(), kMaxPagesInArticle);
78 page_distiller_->DistillPage( 94 page_distiller_->DistillPage(
79 url, 95 url,
80 base::Bind(&DistillerImpl::OnPageDistillationFinished, 96 base::Bind(&DistillerImpl::OnPageDistillationFinished,
81 base::Unretained(this), 97 base::Unretained(this),
82 url)); 98 page_no));
83 } else {
84 RunDistillerCallbackIfDone();
85 } 99 }
86 } 100 }
87 101
88 void DistillerImpl::OnPageDistillationFinished( 102 void DistillerImpl::OnPageDistillationFinished(
89 const GURL& page_url, 103 int page_no,
90 scoped_ptr<DistilledPageInfo> distilled_page, 104 scoped_ptr<DistilledPageInfo> distilled_page,
91 bool distillation_successful) { 105 bool distillation_successful) {
92 DCHECK(distillation_in_progress_);
93 DCHECK(distilled_page.get()); 106 DCHECK(distilled_page.get());
94 if (!distillation_successful) { 107 DCHECK(IsValidPageNo(page_no));
95 RunDistillerCallbackIfDone(); 108 DCHECK(pages_to_be_distilled_.end() != pages_to_be_distilled_.find(page_no));
cjhopman 2014/02/12 20:39:09 nit: swap the order of this comparison: foo.find(_
shashi 2014/02/13 01:03:11 I also thought so, but replicated the style in thi
96 } else { 109 const GURL page_url = pages_to_be_distilled_[page_no];
97 DistilledPageProto* current_page = article_proto_->add_pages(); 110 pages_to_be_distilled_.erase(page_no);
98 // Set the title of the article as the title of the first page. 111 if (distillation_successful) {
99 if (article_proto_->pages_size() == 1) { 112 DistilledPageProto* current_page = new DistilledPageProto();
100 article_proto_->set_title(distilled_page->title); 113 DistilledPageData* page_data = new DistilledPageData();
101 } 114 page_data->proto.reset(current_page);
115 page_data->page_no = page_no;
116 page_data->title = distilled_page->title;
117 distilled_pages_.push_back(page_data);
118 distilled_pages_index_[page_no] = distilled_pages_.size() - 1;
102 119
103 current_page->set_url(page_url.spec()); 120 current_page->set_url(page_url.spec());
104 current_page->set_html(distilled_page->html); 121 current_page->set_html(distilled_page->html);
105 122
106 GURL next_page_url(distilled_page->next_page_url); 123 GURL next_page_url(distilled_page->next_page_url);
107 if (next_page_url.is_valid()) { 124 if (next_page_url.is_valid()) {
108 // The pages should be in same origin. 125 // The pages should be in same origin.
109 DCHECK_EQ(next_page_url.GetOrigin(), page_url.GetOrigin()); 126 DCHECK_EQ(next_page_url.GetOrigin(), page_url.GetOrigin());
110 } 127 }
111 128
112 processed_urls_.insert(page_url.spec()); 129 processed_urls_.insert(page_url.spec());
113 distillation_in_progress_ = false;
114 int page_number = article_proto_->pages_size();
115 for (size_t img_num = 0; img_num < distilled_page->image_urls.size(); 130 for (size_t img_num = 0; img_num < distilled_page->image_urls.size();
116 ++img_num) { 131 ++img_num) {
117 std::string image_id = 132 std::string image_id =
118 base::IntToString(page_number) + "_" + base::IntToString(img_num); 133 base::IntToString(page_no + 1) + "_" + base::IntToString(img_num);
119 FetchImage(current_page, image_id, distilled_page->image_urls[img_num]); 134 FetchImage(page_data, image_id, distilled_page->image_urls[img_num]);
120 } 135 }
121 DistillPage(next_page_url); 136
137 AddToDistillationQueue(page_no + 1, next_page_url);
138 CheckIfPageDone(page_data);
139 DistillNextPage();
140 } else {
141 in_progress_pages_.erase(page_no);
cjhopman 2014/02/12 20:39:09 I don't think this is properly handled in RunDisti
shashi 2014/02/13 01:03:11 Sorry, I do not understand the comment, can you be
cjhopman 2014/02/13 02:48:48 I mean that I wasn't sure what happens if one page
shashi 2014/02/13 20:09:51 I am not sure about the correct behavior either, c
142 RunDistillerCallbackIfDone();
122 } 143 }
144
145 DCHECK(pages_to_be_distilled_.end() == pages_to_be_distilled_.find(page_no));
cjhopman 2014/02/12 20:39:09 nit: swap order of this comparison
shashi 2014/02/13 01:03:11 Done.
123 } 146 }
124 147
125 void DistillerImpl::FetchImage(DistilledPageProto* distilled_page_proto, 148 void DistillerImpl::FetchImage(DistilledPageData* distilled_page_data,
126 const std::string& image_id, 149 const std::string& image_id,
127 const std::string& item) { 150 const std::string& item) {
151 DCHECK(distilled_page_data);
128 DistillerURLFetcher* fetcher = 152 DistillerURLFetcher* fetcher =
129 distiller_url_fetcher_factory_.CreateDistillerURLFetcher(); 153 distiller_url_fetcher_factory_.CreateDistillerURLFetcher();
130 image_fetchers_.push_back(fetcher); 154 distilled_page_data->image_fetchers_.push_back(fetcher);
155
131 fetcher->FetchURL(item, 156 fetcher->FetchURL(item,
132 base::Bind(&DistillerImpl::OnFetchImageDone, 157 base::Bind(&DistillerImpl::OnFetchImageDone,
133 base::Unretained(this), 158 base::Unretained(this),
134 base::Unretained(distilled_page_proto), 159 base::Unretained(distilled_page_data),
135 base::Unretained(fetcher), 160 base::Unretained(fetcher),
136 image_id)); 161 image_id));
137 } 162 }
138 163
139 void DistillerImpl::OnFetchImageDone(DistilledPageProto* distilled_page_proto, 164 void DistillerImpl::OnFetchImageDone(DistilledPageData* distilled_page_data,
140 DistillerURLFetcher* url_fetcher, 165 DistillerURLFetcher* url_fetcher,
141 const std::string& id, 166 const std::string& id,
142 const std::string& response) { 167 const std::string& response) {
143 DCHECK_GT(article_proto_->pages_size(), 0); 168 DCHECK(distilled_page_data);
144 DCHECK(distilled_page_proto); 169 DCHECK(distilled_page_data->proto);
145 DCHECK(url_fetcher); 170 DCHECK(url_fetcher);
146 ScopedVector<DistillerURLFetcher>::iterator fetcher_it = 171 ScopedVector<DistillerURLFetcher>::iterator fetcher_it =
147 std::find(image_fetchers_.begin(), image_fetchers_.end(), url_fetcher); 172 std::find(distilled_page_data->image_fetchers_.begin(),
173 distilled_page_data->image_fetchers_.end(),
174 url_fetcher);
148 175
149 DCHECK(fetcher_it != image_fetchers_.end()); 176 DCHECK(fetcher_it != distilled_page_data->image_fetchers_.end());
150 // Delete the |url_fetcher| by DeleteSoon since the OnFetchImageDone 177 // Delete the |url_fetcher| by DeleteSoon since the OnFetchImageDone
151 // callback is invoked by the |url_fetcher|. 178 // callback is invoked by the |url_fetcher|.
152 image_fetchers_.weak_erase(fetcher_it); 179 distilled_page_data->image_fetchers_.weak_erase(fetcher_it);
153 base::MessageLoop::current()->DeleteSoon(FROM_HERE, url_fetcher); 180 base::MessageLoop::current()->DeleteSoon(FROM_HERE, url_fetcher);
154 DistilledPageProto_Image* image = distilled_page_proto->add_image(); 181
182 DistilledPageProto_Image* image = distilled_page_data->proto->add_image();
155 image->set_name(id); 183 image->set_name(id);
156 image->set_data(response); 184 image->set_data(response);
157 RunDistillerCallbackIfDone(); 185
186 CheckIfPageDone(distilled_page_data);
187 }
188
189 void DistillerImpl::CheckIfPageDone(
190 const DistilledPageData* distilled_page_data) {
191 DCHECK(distilled_page_data);
192 int page_no = distilled_page_data->page_no;
193 DCHECK(in_progress_pages_.end() != in_progress_pages_.find(page_no));
cjhopman 2014/02/12 20:39:09 nit: swap order
shashi 2014/02/13 01:03:11 Done.
shashi 2014/02/13 01:03:11 Done.
194 if (distilled_page_data->image_fetchers_.empty()) {
195 in_progress_pages_.erase(page_no);
196 RunDistillerCallbackIfDone();
197 }
158 } 198 }
159 199
160 void DistillerImpl::RunDistillerCallbackIfDone() { 200 void DistillerImpl::RunDistillerCallbackIfDone() {
161 if (image_fetchers_.empty() && !distillation_in_progress_) { 201 DCHECK(!distillation_cb_.is_null());
202 if (in_progress_pages_.empty()) {
203 bool first_page = true;
204 // Stitch the pages back into the article.
205 for (std::map<int, size_t>::const_iterator it =
206 distilled_pages_index_.begin();
207 it != distilled_pages_index_.end();) {
208 const DistilledPageData* page_data = distilled_pages_[it->second];
209 *(article_proto_->add_pages()) = *(page_data->proto);
210
211 if (first_page) {
212 article_proto_->set_title(page_data->title);
213 first_page = false;
214 }
215
216 distilled_pages_index_.erase(it++);
217 }
218
219 distilled_pages_.clear();
220
221 DCHECK(distilled_pages_.empty());
222 DCHECK(distilled_pages_index_.empty());
162 distillation_cb_.Run(article_proto_.Pass()); 223 distillation_cb_.Run(article_proto_.Pass());
224 distillation_cb_.Reset();
163 } 225 }
164 } 226 }
165 227
166 } // namespace dom_distiller 228 } // namespace dom_distiller
OLDNEW
« components/dom_distiller/core/distiller.h ('K') | « components/dom_distiller/core/distiller.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698