OLD | NEW |
---|---|
1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "components/dom_distiller/core/distiller.h" | 5 #include "components/dom_distiller/core/distiller.h" |
6 | 6 |
7 #include <map> | 7 #include <map> |
8 | 8 |
9 #include "base/bind.h" | 9 #include "base/bind.h" |
10 #include "base/callback.h" | 10 #include "base/callback.h" |
(...skipping 23 matching lines...) Expand all Loading... | |
34 | 34 |
35 DistillerFactoryImpl::~DistillerFactoryImpl() {} | 35 DistillerFactoryImpl::~DistillerFactoryImpl() {} |
36 | 36 |
37 scoped_ptr<Distiller> DistillerFactoryImpl::CreateDistiller() { | 37 scoped_ptr<Distiller> DistillerFactoryImpl::CreateDistiller() { |
38 scoped_ptr<DistillerImpl> distiller(new DistillerImpl( | 38 scoped_ptr<DistillerImpl> distiller(new DistillerImpl( |
39 *distiller_page_factory_, *distiller_url_fetcher_factory_)); | 39 *distiller_page_factory_, *distiller_url_fetcher_factory_)); |
40 distiller->Init(); | 40 distiller->Init(); |
41 return distiller.PassAs<Distiller>(); | 41 return distiller.PassAs<Distiller>(); |
42 } | 42 } |
43 | 43 |
44 DistillerImpl::DistilledPageData::DistilledPageData() {} | |
45 | |
46 DistillerImpl::DistilledPageData::~DistilledPageData() {} | |
47 | |
44 DistillerImpl::DistillerImpl( | 48 DistillerImpl::DistillerImpl( |
45 const DistillerPageFactory& distiller_page_factory, | 49 const DistillerPageFactory& distiller_page_factory, |
46 const DistillerURLFetcherFactory& distiller_url_fetcher_factory) | 50 const DistillerURLFetcherFactory& distiller_url_fetcher_factory) |
47 : distiller_url_fetcher_factory_(distiller_url_fetcher_factory), | 51 : distiller_url_fetcher_factory_(distiller_url_fetcher_factory) { |
48 distillation_in_progress_(false) { | |
49 page_distiller_.reset(new PageDistiller(distiller_page_factory)); | 52 page_distiller_.reset(new PageDistiller(distiller_page_factory)); |
50 } | 53 } |
51 | 54 |
52 DistillerImpl::~DistillerImpl() { | 55 DistillerImpl::~DistillerImpl() { DCHECK(in_progress_pages_.empty()); } |
53 DCHECK(image_fetchers_.empty()); | |
54 DCHECK(!distillation_in_progress_); | |
55 } | |
56 | 56 |
57 void DistillerImpl::Init() { | 57 void DistillerImpl::Init() { |
58 DCHECK(!distillation_in_progress_); | 58 DCHECK(in_progress_pages_.empty()); |
59 page_distiller_->Init(); | 59 page_distiller_->Init(); |
60 article_proto_.reset(new DistilledArticleProto()); | 60 article_proto_.reset(new DistilledArticleProto()); |
61 } | 61 } |
62 | 62 |
63 void DistillerImpl::AddToDistillationQueue(int page_no, const GURL& url) { | |
64 if (!IsValidPageNo(page_no) && url.is_valid() && | |
cjhopman
2014/02/12 20:39:09
Maybe rename IsValidPageNo to IsUsedPageNumber or
shashi
2014/02/13 01:03:11
Done.
| |
65 article_proto_->pages_size() < kMaxPagesInArticle && | |
cjhopman
2014/02/12 20:39:09
Should this account for pages in the queue but not
cjhopman
2014/02/12 20:39:09
Also, article_proto_ isn't actually created until
shashi
2014/02/13 01:03:11
Good catch, I missed that, added a test.
On 2014/0
| |
66 processed_urls_.find(url.spec()) == processed_urls_.end()) { | |
67 in_progress_pages_.insert(page_no); | |
68 pages_to_be_distilled_[page_no] = url; | |
69 } | |
70 } | |
71 | |
72 bool DistillerImpl::IsValidPageNo(int page_no) const { | |
73 return in_progress_pages_.find(page_no) != in_progress_pages_.end() || | |
74 distilled_pages_index_.find(page_no) != distilled_pages_index_.end(); | |
75 } | |
76 | |
63 void DistillerImpl::DistillPage(const GURL& url, | 77 void DistillerImpl::DistillPage(const GURL& url, |
64 const DistillerCallback& distillation_cb) { | 78 const DistillerCallback& distillation_cb) { |
65 DCHECK(!distillation_in_progress_); | 79 DCHECK(in_progress_pages_.empty()); |
66 distillation_cb_ = distillation_cb; | 80 distillation_cb_ = distillation_cb; |
67 DistillPage(url); | 81 |
82 AddToDistillationQueue(0, url); | |
83 DistillNextPage(); | |
68 } | 84 } |
69 | 85 |
70 void DistillerImpl::DistillPage(const GURL& url) { | 86 void DistillerImpl::DistillNextPage() { |
71 DCHECK(!distillation_in_progress_); | 87 if (!pages_to_be_distilled_.empty()) { |
72 if (url.is_valid() && article_proto_->pages_size() < kMaxPagesInArticle && | 88 std::map<int, GURL>::const_iterator front = pages_to_be_distilled_.begin(); |
73 processed_urls_.find(url.spec()) == processed_urls_.end()) { | 89 int page_no = front->first; |
74 distillation_in_progress_ = true; | 90 const GURL url = front->second; |
75 // Distill the next page. | 91 // Distill the next page. |
76 DCHECK(url.is_valid()); | 92 DCHECK(url.is_valid()); |
77 DCHECK_LT(article_proto_->pages_size(), kMaxPagesInArticle); | 93 DCHECK_LT(article_proto_->pages_size(), kMaxPagesInArticle); |
78 page_distiller_->DistillPage( | 94 page_distiller_->DistillPage( |
79 url, | 95 url, |
80 base::Bind(&DistillerImpl::OnPageDistillationFinished, | 96 base::Bind(&DistillerImpl::OnPageDistillationFinished, |
81 base::Unretained(this), | 97 base::Unretained(this), |
82 url)); | 98 page_no)); |
83 } else { | |
84 RunDistillerCallbackIfDone(); | |
85 } | 99 } |
86 } | 100 } |
87 | 101 |
88 void DistillerImpl::OnPageDistillationFinished( | 102 void DistillerImpl::OnPageDistillationFinished( |
89 const GURL& page_url, | 103 int page_no, |
90 scoped_ptr<DistilledPageInfo> distilled_page, | 104 scoped_ptr<DistilledPageInfo> distilled_page, |
91 bool distillation_successful) { | 105 bool distillation_successful) { |
92 DCHECK(distillation_in_progress_); | |
93 DCHECK(distilled_page.get()); | 106 DCHECK(distilled_page.get()); |
94 if (!distillation_successful) { | 107 DCHECK(IsValidPageNo(page_no)); |
95 RunDistillerCallbackIfDone(); | 108 DCHECK(pages_to_be_distilled_.end() != pages_to_be_distilled_.find(page_no)); |
cjhopman
2014/02/12 20:39:09
nit: swap the order of this comparison: foo.find(_
shashi
2014/02/13 01:03:11
I also thought so, but replicated the style in thi
| |
96 } else { | 109 const GURL page_url = pages_to_be_distilled_[page_no]; |
97 DistilledPageProto* current_page = article_proto_->add_pages(); | 110 pages_to_be_distilled_.erase(page_no); |
98 // Set the title of the article as the title of the first page. | 111 if (distillation_successful) { |
99 if (article_proto_->pages_size() == 1) { | 112 DistilledPageProto* current_page = new DistilledPageProto(); |
100 article_proto_->set_title(distilled_page->title); | 113 DistilledPageData* page_data = new DistilledPageData(); |
101 } | 114 page_data->proto.reset(current_page); |
115 page_data->page_no = page_no; | |
116 page_data->title = distilled_page->title; | |
117 distilled_pages_.push_back(page_data); | |
118 distilled_pages_index_[page_no] = distilled_pages_.size() - 1; | |
102 | 119 |
103 current_page->set_url(page_url.spec()); | 120 current_page->set_url(page_url.spec()); |
104 current_page->set_html(distilled_page->html); | 121 current_page->set_html(distilled_page->html); |
105 | 122 |
106 GURL next_page_url(distilled_page->next_page_url); | 123 GURL next_page_url(distilled_page->next_page_url); |
107 if (next_page_url.is_valid()) { | 124 if (next_page_url.is_valid()) { |
108 // The pages should be in same origin. | 125 // The pages should be in same origin. |
109 DCHECK_EQ(next_page_url.GetOrigin(), page_url.GetOrigin()); | 126 DCHECK_EQ(next_page_url.GetOrigin(), page_url.GetOrigin()); |
110 } | 127 } |
111 | 128 |
112 processed_urls_.insert(page_url.spec()); | 129 processed_urls_.insert(page_url.spec()); |
113 distillation_in_progress_ = false; | |
114 int page_number = article_proto_->pages_size(); | |
115 for (size_t img_num = 0; img_num < distilled_page->image_urls.size(); | 130 for (size_t img_num = 0; img_num < distilled_page->image_urls.size(); |
116 ++img_num) { | 131 ++img_num) { |
117 std::string image_id = | 132 std::string image_id = |
118 base::IntToString(page_number) + "_" + base::IntToString(img_num); | 133 base::IntToString(page_no + 1) + "_" + base::IntToString(img_num); |
119 FetchImage(current_page, image_id, distilled_page->image_urls[img_num]); | 134 FetchImage(page_data, image_id, distilled_page->image_urls[img_num]); |
120 } | 135 } |
121 DistillPage(next_page_url); | 136 |
137 AddToDistillationQueue(page_no + 1, next_page_url); | |
138 CheckIfPageDone(page_data); | |
139 DistillNextPage(); | |
140 } else { | |
141 in_progress_pages_.erase(page_no); | |
cjhopman
2014/02/12 20:39:09
I don't think this is properly handled in RunDisti
shashi
2014/02/13 01:03:11
Sorry, I do not understand the comment, can you be
cjhopman
2014/02/13 02:48:48
I mean that I wasn't sure what happens if one page
shashi
2014/02/13 20:09:51
I am not sure about the correct behavior either, c
| |
142 RunDistillerCallbackIfDone(); | |
122 } | 143 } |
144 | |
145 DCHECK(pages_to_be_distilled_.end() == pages_to_be_distilled_.find(page_no)); | |
cjhopman
2014/02/12 20:39:09
nit: swap order of this comparison
shashi
2014/02/13 01:03:11
Done.
| |
123 } | 146 } |
124 | 147 |
125 void DistillerImpl::FetchImage(DistilledPageProto* distilled_page_proto, | 148 void DistillerImpl::FetchImage(DistilledPageData* distilled_page_data, |
126 const std::string& image_id, | 149 const std::string& image_id, |
127 const std::string& item) { | 150 const std::string& item) { |
151 DCHECK(distilled_page_data); | |
128 DistillerURLFetcher* fetcher = | 152 DistillerURLFetcher* fetcher = |
129 distiller_url_fetcher_factory_.CreateDistillerURLFetcher(); | 153 distiller_url_fetcher_factory_.CreateDistillerURLFetcher(); |
130 image_fetchers_.push_back(fetcher); | 154 distilled_page_data->image_fetchers_.push_back(fetcher); |
155 | |
131 fetcher->FetchURL(item, | 156 fetcher->FetchURL(item, |
132 base::Bind(&DistillerImpl::OnFetchImageDone, | 157 base::Bind(&DistillerImpl::OnFetchImageDone, |
133 base::Unretained(this), | 158 base::Unretained(this), |
134 base::Unretained(distilled_page_proto), | 159 base::Unretained(distilled_page_data), |
135 base::Unretained(fetcher), | 160 base::Unretained(fetcher), |
136 image_id)); | 161 image_id)); |
137 } | 162 } |
138 | 163 |
139 void DistillerImpl::OnFetchImageDone(DistilledPageProto* distilled_page_proto, | 164 void DistillerImpl::OnFetchImageDone(DistilledPageData* distilled_page_data, |
140 DistillerURLFetcher* url_fetcher, | 165 DistillerURLFetcher* url_fetcher, |
141 const std::string& id, | 166 const std::string& id, |
142 const std::string& response) { | 167 const std::string& response) { |
143 DCHECK_GT(article_proto_->pages_size(), 0); | 168 DCHECK(distilled_page_data); |
144 DCHECK(distilled_page_proto); | 169 DCHECK(distilled_page_data->proto); |
145 DCHECK(url_fetcher); | 170 DCHECK(url_fetcher); |
146 ScopedVector<DistillerURLFetcher>::iterator fetcher_it = | 171 ScopedVector<DistillerURLFetcher>::iterator fetcher_it = |
147 std::find(image_fetchers_.begin(), image_fetchers_.end(), url_fetcher); | 172 std::find(distilled_page_data->image_fetchers_.begin(), |
173 distilled_page_data->image_fetchers_.end(), | |
174 url_fetcher); | |
148 | 175 |
149 DCHECK(fetcher_it != image_fetchers_.end()); | 176 DCHECK(fetcher_it != distilled_page_data->image_fetchers_.end()); |
150 // Delete the |url_fetcher| by DeleteSoon since the OnFetchImageDone | 177 // Delete the |url_fetcher| by DeleteSoon since the OnFetchImageDone |
151 // callback is invoked by the |url_fetcher|. | 178 // callback is invoked by the |url_fetcher|. |
152 image_fetchers_.weak_erase(fetcher_it); | 179 distilled_page_data->image_fetchers_.weak_erase(fetcher_it); |
153 base::MessageLoop::current()->DeleteSoon(FROM_HERE, url_fetcher); | 180 base::MessageLoop::current()->DeleteSoon(FROM_HERE, url_fetcher); |
154 DistilledPageProto_Image* image = distilled_page_proto->add_image(); | 181 |
182 DistilledPageProto_Image* image = distilled_page_data->proto->add_image(); | |
155 image->set_name(id); | 183 image->set_name(id); |
156 image->set_data(response); | 184 image->set_data(response); |
157 RunDistillerCallbackIfDone(); | 185 |
186 CheckIfPageDone(distilled_page_data); | |
187 } | |
188 | |
189 void DistillerImpl::CheckIfPageDone( | |
190 const DistilledPageData* distilled_page_data) { | |
191 DCHECK(distilled_page_data); | |
192 int page_no = distilled_page_data->page_no; | |
193 DCHECK(in_progress_pages_.end() != in_progress_pages_.find(page_no)); | |
cjhopman
2014/02/12 20:39:09
nit: swap order
shashi
2014/02/13 01:03:11
Done.
shashi
2014/02/13 01:03:11
Done.
| |
194 if (distilled_page_data->image_fetchers_.empty()) { | |
195 in_progress_pages_.erase(page_no); | |
196 RunDistillerCallbackIfDone(); | |
197 } | |
158 } | 198 } |
159 | 199 |
160 void DistillerImpl::RunDistillerCallbackIfDone() { | 200 void DistillerImpl::RunDistillerCallbackIfDone() { |
161 if (image_fetchers_.empty() && !distillation_in_progress_) { | 201 DCHECK(!distillation_cb_.is_null()); |
202 if (in_progress_pages_.empty()) { | |
203 bool first_page = true; | |
204 // Stitch the pages back into the article. | |
205 for (std::map<int, size_t>::const_iterator it = | |
206 distilled_pages_index_.begin(); | |
207 it != distilled_pages_index_.end();) { | |
208 const DistilledPageData* page_data = distilled_pages_[it->second]; | |
209 *(article_proto_->add_pages()) = *(page_data->proto); | |
210 | |
211 if (first_page) { | |
212 article_proto_->set_title(page_data->title); | |
213 first_page = false; | |
214 } | |
215 | |
216 distilled_pages_index_.erase(it++); | |
217 } | |
218 | |
219 distilled_pages_.clear(); | |
220 | |
221 DCHECK(distilled_pages_.empty()); | |
222 DCHECK(distilled_pages_index_.empty()); | |
162 distillation_cb_.Run(article_proto_.Pass()); | 223 distillation_cb_.Run(article_proto_.Pass()); |
224 distillation_cb_.Reset(); | |
163 } | 225 } |
164 } | 226 } |
165 | 227 |
166 } // namespace dom_distiller | 228 } // namespace dom_distiller |
OLD | NEW |