| Index: components/dom_distiller/core/distiller.cc
|
| diff --git a/components/dom_distiller/core/distiller.cc b/components/dom_distiller/core/distiller.cc
|
| index c28962b7935e1f5f7d02904631bdafa05f4a00e0..f8605162e418205080af4ccda6a4801d071fd449 100644
|
| --- a/components/dom_distiller/core/distiller.cc
|
| +++ b/components/dom_distiller/core/distiller.cc
|
| @@ -141,139 +141,145 @@ void DistillerImpl::OnPageDistillationFinished(
|
| std::unique_ptr<proto::DomDistillerResult> distiller_result,
|
| bool distillation_successful) {
|
| DCHECK(started_pages_index_.find(page_num) != started_pages_index_.end());
|
| - if (distillation_successful) {
|
| -
|
| - if (distiller_result->has_statistics_info() && page_num == 0) {
|
| - if (distiller_result->statistics_info().has_word_count()) {
|
| - UMA_HISTOGRAM_CUSTOM_COUNTS(
|
| - "DomDistiller.Statistics.FirstPageWordCount",
|
| - distiller_result->statistics_info().word_count(),
|
| - 1, 4000, 50);
|
| - }
|
| - }
|
| + if (!distillation_successful) {
|
| + started_pages_index_.erase(page_num);
|
| + RunDistillerCallbackIfDone();
|
| + return;
|
| + }
|
|
|
| - DCHECK(distiller_result.get());
|
| - DistilledPageData* page_data =
|
| - GetPageAtIndex(started_pages_index_[page_num]);
|
| - page_data->distilled_page_proto =
|
| - new base::RefCountedData<DistilledPageProto>();
|
| - page_data->page_num = page_num;
|
| - if (distiller_result->has_title()) {
|
| - page_data->distilled_page_proto->data.set_title(
|
| - distiller_result->title());
|
| + if (distiller_result->has_statistics_info() && page_num == 0) {
|
| + if (distiller_result->statistics_info().has_word_count()) {
|
| + UMA_HISTOGRAM_CUSTOM_COUNTS(
|
| + "DomDistiller.Statistics.FirstPageWordCount",
|
| + distiller_result->statistics_info().word_count(),
|
| + 1, 4000, 50);
|
| }
|
| - page_data->distilled_page_proto->data.set_url(page_url.spec());
|
| - if (distiller_result->has_distilled_content() &&
|
| - distiller_result->distilled_content().has_html()) {
|
| - page_data->distilled_page_proto->data.set_html(
|
| - distiller_result->distilled_content().html());
|
| - }
|
| -
|
| - if (distiller_result->has_timing_info()) {
|
| - const proto::TimingInfo& distiller_timing_info =
|
| - distiller_result->timing_info();
|
| - DistilledPageProto::TimingInfo timing_info;
|
| - if (distiller_timing_info.has_markup_parsing_time()) {
|
| - timing_info.set_name("markup_parsing");
|
| - timing_info.set_time(distiller_timing_info.markup_parsing_time());
|
| - *page_data->distilled_page_proto->data.add_timing_info() = timing_info;
|
| - }
|
| + }
|
|
|
| - if (distiller_timing_info.has_document_construction_time()) {
|
| - timing_info.set_name("document_construction");
|
| - timing_info.set_time(
|
| - distiller_timing_info.document_construction_time());
|
| - *page_data->distilled_page_proto->data.add_timing_info() = timing_info;
|
| - }
|
| + DCHECK(distiller_result.get());
|
| + DistilledPageData* page_data =
|
| + GetPageAtIndex(started_pages_index_[page_num]);
|
| + page_data->distilled_page_proto =
|
| + new base::RefCountedData<DistilledPageProto>();
|
| + page_data->page_num = page_num;
|
| + if (distiller_result->has_title()) {
|
| + page_data->distilled_page_proto->data.set_title(
|
| + distiller_result->title());
|
| + }
|
| + page_data->distilled_page_proto->data.set_url(page_url.spec());
|
| + bool content_empty = true;
|
| + if (distiller_result->has_distilled_content() &&
|
| + distiller_result->distilled_content().has_html()) {
|
| + page_data->distilled_page_proto->data.set_html(
|
| + distiller_result->distilled_content().html());
|
| + if (!distiller_result->distilled_content().html().empty()) {
|
| + content_empty = false;
|
| + }
|
| + }
|
|
|
| - if (distiller_timing_info.has_article_processing_time()) {
|
| - timing_info.set_name("article_processing");
|
| - timing_info.set_time(
|
| - distiller_timing_info.article_processing_time());
|
| - *page_data->distilled_page_proto->data.add_timing_info() = timing_info;
|
| - }
|
| + if (distiller_result->has_timing_info()) {
|
| + const proto::TimingInfo& distiller_timing_info =
|
| + distiller_result->timing_info();
|
| + DistilledPageProto::TimingInfo timing_info;
|
| + if (distiller_timing_info.has_markup_parsing_time()) {
|
| + timing_info.set_name("markup_parsing");
|
| + timing_info.set_time(distiller_timing_info.markup_parsing_time());
|
| + *page_data->distilled_page_proto->data.add_timing_info() = timing_info;
|
| + }
|
|
|
| - if (distiller_timing_info.has_formatting_time()) {
|
| - timing_info.set_name("formatting");
|
| - timing_info.set_time(
|
| - distiller_timing_info.formatting_time());
|
| - *page_data->distilled_page_proto->data.add_timing_info() = timing_info;
|
| - }
|
| + if (distiller_timing_info.has_document_construction_time()) {
|
| + timing_info.set_name("document_construction");
|
| + timing_info.set_time(
|
| + distiller_timing_info.document_construction_time());
|
| + *page_data->distilled_page_proto->data.add_timing_info() = timing_info;
|
| + }
|
|
|
| - if (distiller_timing_info.has_total_time()) {
|
| - timing_info.set_name("total");
|
| - timing_info.set_time(
|
| - distiller_timing_info.total_time());
|
| - *page_data->distilled_page_proto->data.add_timing_info() = timing_info;
|
| - }
|
| + if (distiller_timing_info.has_article_processing_time()) {
|
| + timing_info.set_name("article_processing");
|
| + timing_info.set_time(
|
| + distiller_timing_info.article_processing_time());
|
| + *page_data->distilled_page_proto->data.add_timing_info() = timing_info;
|
| + }
|
|
|
| - for (int i = 0; i < distiller_timing_info.other_times_size(); i++) {
|
| - timing_info.set_name(distiller_timing_info.other_times(i).name());
|
| - timing_info.set_time(distiller_timing_info.other_times(i).time());
|
| - *page_data->distilled_page_proto->data.add_timing_info() = timing_info;
|
| - }
|
| + if (distiller_timing_info.has_formatting_time()) {
|
| + timing_info.set_name("formatting");
|
| + timing_info.set_time(
|
| + distiller_timing_info.formatting_time());
|
| + *page_data->distilled_page_proto->data.add_timing_info() = timing_info;
|
| }
|
|
|
| - if (distiller_result->has_debug_info() &&
|
| - distiller_result->debug_info().has_log()) {
|
| - page_data->distilled_page_proto->data.mutable_debug_info()->set_log(
|
| - distiller_result->debug_info().log());
|
| + if (distiller_timing_info.has_total_time()) {
|
| + timing_info.set_name("total");
|
| + timing_info.set_time(
|
| + distiller_timing_info.total_time());
|
| + *page_data->distilled_page_proto->data.add_timing_info() = timing_info;
|
| }
|
|
|
| - if (distiller_result->has_text_direction()) {
|
| - page_data->distilled_page_proto->data.set_text_direction(
|
| - distiller_result->text_direction());
|
| - } else {
|
| - page_data->distilled_page_proto->data.set_text_direction("auto");
|
| + for (int i = 0; i < distiller_timing_info.other_times_size(); i++) {
|
| + timing_info.set_name(distiller_timing_info.other_times(i).name());
|
| + timing_info.set_time(distiller_timing_info.other_times(i).time());
|
| + *page_data->distilled_page_proto->data.add_timing_info() = timing_info;
|
| }
|
| + }
|
|
|
| - if (distiller_result->has_pagination_info()) {
|
| - const proto::PaginationInfo& pagination_info =
|
| - distiller_result->pagination_info();
|
| - if (pagination_info.has_next_page()) {
|
| - GURL next_page_url(pagination_info.next_page());
|
| - if (next_page_url.is_valid()) {
|
| - // The pages should be in same origin.
|
| - DCHECK_EQ(next_page_url.GetOrigin(), page_url.GetOrigin());
|
| - AddToDistillationQueue(page_num + 1, next_page_url);
|
| - page_data->distilled_page_proto->data.mutable_pagination_info()->
|
| - set_next_page(next_page_url.spec());
|
| - }
|
| - }
|
| + if (distiller_result->has_debug_info() &&
|
| + distiller_result->debug_info().has_log()) {
|
| + page_data->distilled_page_proto->data.mutable_debug_info()->set_log(
|
| + distiller_result->debug_info().log());
|
| + }
|
|
|
| - if (pagination_info.has_prev_page()) {
|
| - GURL prev_page_url(pagination_info.prev_page());
|
| - if (prev_page_url.is_valid()) {
|
| - DCHECK_EQ(prev_page_url.GetOrigin(), page_url.GetOrigin());
|
| - AddToDistillationQueue(page_num - 1, prev_page_url);
|
| - page_data->distilled_page_proto->data.mutable_pagination_info()->
|
| - set_prev_page(prev_page_url.spec());
|
| - }
|
| + if (distiller_result->has_text_direction()) {
|
| + page_data->distilled_page_proto->data.set_text_direction(
|
| + distiller_result->text_direction());
|
| + } else {
|
| + page_data->distilled_page_proto->data.set_text_direction("auto");
|
| + }
|
| +
|
| + if (distiller_result->has_pagination_info()) {
|
| + const proto::PaginationInfo& pagination_info =
|
| + distiller_result->pagination_info();
|
| + // Skip the next page if the first page is empty.
|
| + if (pagination_info.has_next_page() &&
|
| + (page_num != 0 || !content_empty)) {
|
| + GURL next_page_url(pagination_info.next_page());
|
| + if (next_page_url.is_valid()) {
|
| + // The pages should be in same origin.
|
| + DCHECK_EQ(next_page_url.GetOrigin(), page_url.GetOrigin());
|
| + AddToDistillationQueue(page_num + 1, next_page_url);
|
| + page_data->distilled_page_proto->data.mutable_pagination_info()->
|
| + set_next_page(next_page_url.spec());
|
| }
|
| + }
|
|
|
| - if (pagination_info.has_canonical_page()) {
|
| - GURL canonical_page_url(pagination_info.canonical_page());
|
| - if (canonical_page_url.is_valid()) {
|
| - page_data->distilled_page_proto->data.mutable_pagination_info()->
|
| - set_canonical_page(canonical_page_url.spec());
|
| - }
|
| + if (pagination_info.has_prev_page()) {
|
| + GURL prev_page_url(pagination_info.prev_page());
|
| + if (prev_page_url.is_valid()) {
|
| + DCHECK_EQ(prev_page_url.GetOrigin(), page_url.GetOrigin());
|
| + AddToDistillationQueue(page_num - 1, prev_page_url);
|
| + page_data->distilled_page_proto->data.mutable_pagination_info()->
|
| + set_prev_page(prev_page_url.spec());
|
| }
|
| }
|
|
|
| - for (int img_num = 0; img_num < distiller_result->content_images_size();
|
| - ++img_num) {
|
| - std::string image_id =
|
| - base::IntToString(page_num + 1) + "_" + base::IntToString(img_num);
|
| - FetchImage(page_num, image_id,
|
| - distiller_result->content_images(img_num).url());
|
| + if (pagination_info.has_canonical_page()) {
|
| + GURL canonical_page_url(pagination_info.canonical_page());
|
| + if (canonical_page_url.is_valid()) {
|
| + page_data->distilled_page_proto->data.mutable_pagination_info()->
|
| + set_canonical_page(canonical_page_url.spec());
|
| + }
|
| }
|
| + }
|
|
|
| - AddPageIfDone(page_num);
|
| - DistillNextPage();
|
| - } else {
|
| - started_pages_index_.erase(page_num);
|
| - RunDistillerCallbackIfDone();
|
| + for (int img_num = 0; img_num < distiller_result->content_images_size();
|
| + ++img_num) {
|
| + std::string image_id =
|
| + base::IntToString(page_num + 1) + "_" + base::IntToString(img_num);
|
| + FetchImage(page_num, image_id,
|
| + distiller_result->content_images(img_num).url());
|
| }
|
| +
|
| + AddPageIfDone(page_num);
|
| + DistillNextPage();
|
| }
|
|
|
| void DistillerImpl::FetchImage(int page_num,
|
|
|