| OLD | NEW |
| 1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "components/dom_distiller/core/distiller.h" | 5 #include "components/dom_distiller/core/distiller.h" |
| 6 | 6 |
| 7 #include <map> | 7 #include <map> |
| 8 #include <utility> | 8 #include <utility> |
| 9 #include <vector> | 9 #include <vector> |
| 10 | 10 |
| (...skipping 123 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 134 url)); | 134 url)); |
| 135 } | 135 } |
| 136 } | 136 } |
| 137 | 137 |
| 138 void DistillerImpl::OnPageDistillationFinished( | 138 void DistillerImpl::OnPageDistillationFinished( |
| 139 int page_num, | 139 int page_num, |
| 140 const GURL& page_url, | 140 const GURL& page_url, |
| 141 scoped_ptr<proto::DomDistillerResult> distiller_result, | 141 scoped_ptr<proto::DomDistillerResult> distiller_result, |
| 142 bool distillation_successful) { | 142 bool distillation_successful) { |
| 143 DCHECK(started_pages_index_.find(page_num) != started_pages_index_.end()); | 143 DCHECK(started_pages_index_.find(page_num) != started_pages_index_.end()); |
| 144 if (distillation_successful) { | 144 if (!distillation_successful) { |
| 145 started_pages_index_.erase(page_num); |
| 146 RunDistillerCallbackIfDone(); |
| 147 return; |
| 148 } |
| 145 | 149 |
| 146 if (distiller_result->has_statistics_info() && page_num == 0) { | 150 if (distiller_result->has_statistics_info() && page_num == 0) { |
| 147 if (distiller_result->statistics_info().has_word_count()) { | 151 if (distiller_result->statistics_info().has_word_count()) { |
| 148 UMA_HISTOGRAM_CUSTOM_COUNTS( | 152 UMA_HISTOGRAM_CUSTOM_COUNTS( |
| 149 "DomDistiller.Statistics.FirstPageWordCount", | 153 "DomDistiller.Statistics.FirstPageWordCount", |
| 150 distiller_result->statistics_info().word_count(), | 154 distiller_result->statistics_info().word_count(), |
| 151 1, 4000, 50); | 155 1, 4000, 50); |
| 156 } |
| 157 } |
| 158 |
| 159 DCHECK(distiller_result.get()); |
| 160 DistilledPageData* page_data = |
| 161 GetPageAtIndex(started_pages_index_[page_num]); |
| 162 page_data->distilled_page_proto = |
| 163 new base::RefCountedData<DistilledPageProto>(); |
| 164 page_data->page_num = page_num; |
| 165 if (distiller_result->has_title()) { |
| 166 page_data->distilled_page_proto->data.set_title( |
| 167 distiller_result->title()); |
| 168 } |
| 169 page_data->distilled_page_proto->data.set_url(page_url.spec()); |
| 170 bool content_empty = true; |
| 171 if (distiller_result->has_distilled_content() && |
| 172 distiller_result->distilled_content().has_html()) { |
| 173 page_data->distilled_page_proto->data.set_html( |
| 174 distiller_result->distilled_content().html()); |
| 175 if (!distiller_result->distilled_content().html().empty()) { |
| 176 content_empty = false; |
| 177 } |
| 178 } |
| 179 |
| 180 if (distiller_result->has_timing_info()) { |
| 181 const proto::TimingInfo& distiller_timing_info = |
| 182 distiller_result->timing_info(); |
| 183 DistilledPageProto::TimingInfo timing_info; |
| 184 if (distiller_timing_info.has_markup_parsing_time()) { |
| 185 timing_info.set_name("markup_parsing"); |
| 186 timing_info.set_time(distiller_timing_info.markup_parsing_time()); |
| 187 *page_data->distilled_page_proto->data.add_timing_info() = timing_info; |
| 188 } |
| 189 |
| 190 if (distiller_timing_info.has_document_construction_time()) { |
| 191 timing_info.set_name("document_construction"); |
| 192 timing_info.set_time( |
| 193 distiller_timing_info.document_construction_time()); |
| 194 *page_data->distilled_page_proto->data.add_timing_info() = timing_info; |
| 195 } |
| 196 |
| 197 if (distiller_timing_info.has_article_processing_time()) { |
| 198 timing_info.set_name("article_processing"); |
| 199 timing_info.set_time( |
| 200 distiller_timing_info.article_processing_time()); |
| 201 *page_data->distilled_page_proto->data.add_timing_info() = timing_info; |
| 202 } |
| 203 |
| 204 if (distiller_timing_info.has_formatting_time()) { |
| 205 timing_info.set_name("formatting"); |
| 206 timing_info.set_time( |
| 207 distiller_timing_info.formatting_time()); |
| 208 *page_data->distilled_page_proto->data.add_timing_info() = timing_info; |
| 209 } |
| 210 |
| 211 if (distiller_timing_info.has_total_time()) { |
| 212 timing_info.set_name("total"); |
| 213 timing_info.set_time( |
| 214 distiller_timing_info.total_time()); |
| 215 *page_data->distilled_page_proto->data.add_timing_info() = timing_info; |
| 216 } |
| 217 |
| 218 for (int i = 0; i < distiller_timing_info.other_times_size(); i++) { |
| 219 timing_info.set_name(distiller_timing_info.other_times(i).name()); |
| 220 timing_info.set_time(distiller_timing_info.other_times(i).time()); |
| 221 *page_data->distilled_page_proto->data.add_timing_info() = timing_info; |
| 222 } |
| 223 } |
| 224 |
| 225 if (distiller_result->has_debug_info() && |
| 226 distiller_result->debug_info().has_log()) { |
| 227 page_data->distilled_page_proto->data.mutable_debug_info()->set_log( |
| 228 distiller_result->debug_info().log()); |
| 229 } |
| 230 |
| 231 if (distiller_result->has_text_direction()) { |
| 232 page_data->distilled_page_proto->data.set_text_direction( |
| 233 distiller_result->text_direction()); |
| 234 } else { |
| 235 page_data->distilled_page_proto->data.set_text_direction("auto"); |
| 236 } |
| 237 |
| 238 if (distiller_result->has_pagination_info()) { |
| 239 const proto::PaginationInfo& pagination_info = |
| 240 distiller_result->pagination_info(); |
| 241 // Skip the next page if the first page is empty. |
| 242 if (pagination_info.has_next_page() && |
| 243 (page_num != 0 || !content_empty)) { |
| 244 GURL next_page_url(pagination_info.next_page()); |
| 245 if (next_page_url.is_valid()) { |
| 246 // The pages should be in same origin. |
| 247 DCHECK_EQ(next_page_url.GetOrigin(), page_url.GetOrigin()); |
| 248 AddToDistillationQueue(page_num + 1, next_page_url); |
| 249 page_data->distilled_page_proto->data.mutable_pagination_info()-> |
| 250 set_next_page(next_page_url.spec()); |
| 152 } | 251 } |
| 153 } | 252 } |
| 154 | 253 |
| 155 DCHECK(distiller_result.get()); | 254 if (pagination_info.has_prev_page()) { |
| 156 DistilledPageData* page_data = | 255 GURL prev_page_url(pagination_info.prev_page()); |
| 157 GetPageAtIndex(started_pages_index_[page_num]); | 256 if (prev_page_url.is_valid()) { |
| 158 page_data->distilled_page_proto = | 257 DCHECK_EQ(prev_page_url.GetOrigin(), page_url.GetOrigin()); |
| 159 new base::RefCountedData<DistilledPageProto>(); | 258 AddToDistillationQueue(page_num - 1, prev_page_url); |
| 160 page_data->page_num = page_num; | 259 page_data->distilled_page_proto->data.mutable_pagination_info()-> |
| 161 if (distiller_result->has_title()) { | 260 set_prev_page(prev_page_url.spec()); |
| 162 page_data->distilled_page_proto->data.set_title( | |
| 163 distiller_result->title()); | |
| 164 } | |
| 165 page_data->distilled_page_proto->data.set_url(page_url.spec()); | |
| 166 if (distiller_result->has_distilled_content() && | |
| 167 distiller_result->distilled_content().has_html()) { | |
| 168 page_data->distilled_page_proto->data.set_html( | |
| 169 distiller_result->distilled_content().html()); | |
| 170 } | |
| 171 | |
| 172 if (distiller_result->has_timing_info()) { | |
| 173 const proto::TimingInfo& distiller_timing_info = | |
| 174 distiller_result->timing_info(); | |
| 175 DistilledPageProto::TimingInfo timing_info; | |
| 176 if (distiller_timing_info.has_markup_parsing_time()) { | |
| 177 timing_info.set_name("markup_parsing"); | |
| 178 timing_info.set_time(distiller_timing_info.markup_parsing_time()); | |
| 179 *page_data->distilled_page_proto->data.add_timing_info() = timing_info; | |
| 180 } | |
| 181 | |
| 182 if (distiller_timing_info.has_document_construction_time()) { | |
| 183 timing_info.set_name("document_construction"); | |
| 184 timing_info.set_time( | |
| 185 distiller_timing_info.document_construction_time()); | |
| 186 *page_data->distilled_page_proto->data.add_timing_info() = timing_info; | |
| 187 } | |
| 188 | |
| 189 if (distiller_timing_info.has_article_processing_time()) { | |
| 190 timing_info.set_name("article_processing"); | |
| 191 timing_info.set_time( | |
| 192 distiller_timing_info.article_processing_time()); | |
| 193 *page_data->distilled_page_proto->data.add_timing_info() = timing_info; | |
| 194 } | |
| 195 | |
| 196 if (distiller_timing_info.has_formatting_time()) { | |
| 197 timing_info.set_name("formatting"); | |
| 198 timing_info.set_time( | |
| 199 distiller_timing_info.formatting_time()); | |
| 200 *page_data->distilled_page_proto->data.add_timing_info() = timing_info; | |
| 201 } | |
| 202 | |
| 203 if (distiller_timing_info.has_total_time()) { | |
| 204 timing_info.set_name("total"); | |
| 205 timing_info.set_time( | |
| 206 distiller_timing_info.total_time()); | |
| 207 *page_data->distilled_page_proto->data.add_timing_info() = timing_info; | |
| 208 } | |
| 209 | |
| 210 for (int i = 0; i < distiller_timing_info.other_times_size(); i++) { | |
| 211 timing_info.set_name(distiller_timing_info.other_times(i).name()); | |
| 212 timing_info.set_time(distiller_timing_info.other_times(i).time()); | |
| 213 *page_data->distilled_page_proto->data.add_timing_info() = timing_info; | |
| 214 } | 261 } |
| 215 } | 262 } |
| 216 | 263 |
| 217 if (distiller_result->has_debug_info() && | 264 if (pagination_info.has_canonical_page()) { |
| 218 distiller_result->debug_info().has_log()) { | 265 GURL canonical_page_url(pagination_info.canonical_page()); |
| 219 page_data->distilled_page_proto->data.mutable_debug_info()->set_log( | 266 if (canonical_page_url.is_valid()) { |
| 220 distiller_result->debug_info().log()); | 267 page_data->distilled_page_proto->data.mutable_pagination_info()-> |
| 221 } | 268 set_canonical_page(canonical_page_url.spec()); |
| 222 | |
| 223 if (distiller_result->has_text_direction()) { | |
| 224 page_data->distilled_page_proto->data.set_text_direction( | |
| 225 distiller_result->text_direction()); | |
| 226 } else { | |
| 227 page_data->distilled_page_proto->data.set_text_direction("auto"); | |
| 228 } | |
| 229 | |
| 230 if (distiller_result->has_pagination_info()) { | |
| 231 const proto::PaginationInfo& pagination_info = | |
| 232 distiller_result->pagination_info(); | |
| 233 if (pagination_info.has_next_page()) { | |
| 234 GURL next_page_url(pagination_info.next_page()); | |
| 235 if (next_page_url.is_valid()) { | |
| 236 // The pages should be in same origin. | |
| 237 DCHECK_EQ(next_page_url.GetOrigin(), page_url.GetOrigin()); | |
| 238 AddToDistillationQueue(page_num + 1, next_page_url); | |
| 239 page_data->distilled_page_proto->data.mutable_pagination_info()-> | |
| 240 set_next_page(next_page_url.spec()); | |
| 241 } | |
| 242 } | |
| 243 | |
| 244 if (pagination_info.has_prev_page()) { | |
| 245 GURL prev_page_url(pagination_info.prev_page()); | |
| 246 if (prev_page_url.is_valid()) { | |
| 247 DCHECK_EQ(prev_page_url.GetOrigin(), page_url.GetOrigin()); | |
| 248 AddToDistillationQueue(page_num - 1, prev_page_url); | |
| 249 page_data->distilled_page_proto->data.mutable_pagination_info()-> | |
| 250 set_prev_page(prev_page_url.spec()); | |
| 251 } | |
| 252 } | |
| 253 | |
| 254 if (pagination_info.has_canonical_page()) { | |
| 255 GURL canonical_page_url(pagination_info.canonical_page()); | |
| 256 if (canonical_page_url.is_valid()) { | |
| 257 page_data->distilled_page_proto->data.mutable_pagination_info()-> | |
| 258 set_canonical_page(canonical_page_url.spec()); | |
| 259 } | |
| 260 } | 269 } |
| 261 } | 270 } |
| 271 } |
| 262 | 272 |
| 263 for (int img_num = 0; img_num < distiller_result->content_images_size(); | 273 for (int img_num = 0; img_num < distiller_result->content_images_size(); |
| 264 ++img_num) { | 274 ++img_num) { |
| 265 std::string image_id = | 275 std::string image_id = |
| 266 base::IntToString(page_num + 1) + "_" + base::IntToString(img_num); | 276 base::IntToString(page_num + 1) + "_" + base::IntToString(img_num); |
| 267 FetchImage(page_num, image_id, | 277 FetchImage(page_num, image_id, |
| 268 distiller_result->content_images(img_num).url()); | 278 distiller_result->content_images(img_num).url()); |
| 269 } | 279 } |
| 270 | 280 |
| 271 AddPageIfDone(page_num); | 281 AddPageIfDone(page_num); |
| 272 DistillNextPage(); | 282 DistillNextPage(); |
| 273 } else { | |
| 274 started_pages_index_.erase(page_num); | |
| 275 RunDistillerCallbackIfDone(); | |
| 276 } | |
| 277 } | 283 } |
| 278 | 284 |
| 279 void DistillerImpl::FetchImage(int page_num, | 285 void DistillerImpl::FetchImage(int page_num, |
| 280 const std::string& image_id, | 286 const std::string& image_id, |
| 281 const std::string& image_url) { | 287 const std::string& image_url) { |
| 282 if (!GURL(image_url).is_valid()) return; | 288 if (!GURL(image_url).is_valid()) return; |
| 283 DCHECK(started_pages_index_.find(page_num) != started_pages_index_.end()); | 289 DCHECK(started_pages_index_.find(page_num) != started_pages_index_.end()); |
| 284 DistilledPageData* page_data = GetPageAtIndex(started_pages_index_[page_num]); | 290 DistilledPageData* page_data = GetPageAtIndex(started_pages_index_[page_num]); |
| 285 DistillerURLFetcher* fetcher = | 291 DistillerURLFetcher* fetcher = |
| 286 distiller_url_fetcher_factory_.CreateDistillerURLFetcher(); | 292 distiller_url_fetcher_factory_.CreateDistillerURLFetcher(); |
| (...skipping 101 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 388 DCHECK(finished_pages_index_.empty()); | 394 DCHECK(finished_pages_index_.empty()); |
| 389 | 395 |
| 390 base::AutoReset<bool> dont_delete_this_in_callback(&destruction_allowed_, | 396 base::AutoReset<bool> dont_delete_this_in_callback(&destruction_allowed_, |
| 391 false); | 397 false); |
| 392 finished_cb_.Run(std::move(article_proto)); | 398 finished_cb_.Run(std::move(article_proto)); |
| 393 finished_cb_.Reset(); | 399 finished_cb_.Reset(); |
| 394 } | 400 } |
| 395 } | 401 } |
| 396 | 402 |
| 397 } // namespace dom_distiller | 403 } // namespace dom_distiller |
| OLD | NEW |