Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(687)

Unified Diff: components/dom_distiller/core/distiller.cc

Issue 1891103002: Stop fetching the next page if the first page has no content (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: flip logic Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | components/dom_distiller/core/distiller_unittest.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: components/dom_distiller/core/distiller.cc
diff --git a/components/dom_distiller/core/distiller.cc b/components/dom_distiller/core/distiller.cc
index c28962b7935e1f5f7d02904631bdafa05f4a00e0..f8605162e418205080af4ccda6a4801d071fd449 100644
--- a/components/dom_distiller/core/distiller.cc
+++ b/components/dom_distiller/core/distiller.cc
@@ -141,139 +141,145 @@ void DistillerImpl::OnPageDistillationFinished(
std::unique_ptr<proto::DomDistillerResult> distiller_result,
bool distillation_successful) {
DCHECK(started_pages_index_.find(page_num) != started_pages_index_.end());
- if (distillation_successful) {
-
- if (distiller_result->has_statistics_info() && page_num == 0) {
- if (distiller_result->statistics_info().has_word_count()) {
- UMA_HISTOGRAM_CUSTOM_COUNTS(
- "DomDistiller.Statistics.FirstPageWordCount",
- distiller_result->statistics_info().word_count(),
- 1, 4000, 50);
- }
- }
+ if (!distillation_successful) {
+ started_pages_index_.erase(page_num);
+ RunDistillerCallbackIfDone();
+ return;
+ }
- DCHECK(distiller_result.get());
- DistilledPageData* page_data =
- GetPageAtIndex(started_pages_index_[page_num]);
- page_data->distilled_page_proto =
- new base::RefCountedData<DistilledPageProto>();
- page_data->page_num = page_num;
- if (distiller_result->has_title()) {
- page_data->distilled_page_proto->data.set_title(
- distiller_result->title());
+ if (distiller_result->has_statistics_info() && page_num == 0) {
+ if (distiller_result->statistics_info().has_word_count()) {
+ UMA_HISTOGRAM_CUSTOM_COUNTS(
+ "DomDistiller.Statistics.FirstPageWordCount",
+ distiller_result->statistics_info().word_count(),
+ 1, 4000, 50);
}
- page_data->distilled_page_proto->data.set_url(page_url.spec());
- if (distiller_result->has_distilled_content() &&
- distiller_result->distilled_content().has_html()) {
- page_data->distilled_page_proto->data.set_html(
- distiller_result->distilled_content().html());
- }
-
- if (distiller_result->has_timing_info()) {
- const proto::TimingInfo& distiller_timing_info =
- distiller_result->timing_info();
- DistilledPageProto::TimingInfo timing_info;
- if (distiller_timing_info.has_markup_parsing_time()) {
- timing_info.set_name("markup_parsing");
- timing_info.set_time(distiller_timing_info.markup_parsing_time());
- *page_data->distilled_page_proto->data.add_timing_info() = timing_info;
- }
+ }
- if (distiller_timing_info.has_document_construction_time()) {
- timing_info.set_name("document_construction");
- timing_info.set_time(
- distiller_timing_info.document_construction_time());
- *page_data->distilled_page_proto->data.add_timing_info() = timing_info;
- }
+ DCHECK(distiller_result.get());
+ DistilledPageData* page_data =
+ GetPageAtIndex(started_pages_index_[page_num]);
+ page_data->distilled_page_proto =
+ new base::RefCountedData<DistilledPageProto>();
+ page_data->page_num = page_num;
+ if (distiller_result->has_title()) {
+ page_data->distilled_page_proto->data.set_title(
+ distiller_result->title());
+ }
+ page_data->distilled_page_proto->data.set_url(page_url.spec());
+ bool content_empty = true;
+ if (distiller_result->has_distilled_content() &&
+ distiller_result->distilled_content().has_html()) {
+ page_data->distilled_page_proto->data.set_html(
+ distiller_result->distilled_content().html());
+ if (!distiller_result->distilled_content().html().empty()) {
+ content_empty = false;
+ }
+ }
- if (distiller_timing_info.has_article_processing_time()) {
- timing_info.set_name("article_processing");
- timing_info.set_time(
- distiller_timing_info.article_processing_time());
- *page_data->distilled_page_proto->data.add_timing_info() = timing_info;
- }
+ if (distiller_result->has_timing_info()) {
+ const proto::TimingInfo& distiller_timing_info =
+ distiller_result->timing_info();
+ DistilledPageProto::TimingInfo timing_info;
+ if (distiller_timing_info.has_markup_parsing_time()) {
+ timing_info.set_name("markup_parsing");
+ timing_info.set_time(distiller_timing_info.markup_parsing_time());
+ *page_data->distilled_page_proto->data.add_timing_info() = timing_info;
+ }
- if (distiller_timing_info.has_formatting_time()) {
- timing_info.set_name("formatting");
- timing_info.set_time(
- distiller_timing_info.formatting_time());
- *page_data->distilled_page_proto->data.add_timing_info() = timing_info;
- }
+ if (distiller_timing_info.has_document_construction_time()) {
+ timing_info.set_name("document_construction");
+ timing_info.set_time(
+ distiller_timing_info.document_construction_time());
+ *page_data->distilled_page_proto->data.add_timing_info() = timing_info;
+ }
- if (distiller_timing_info.has_total_time()) {
- timing_info.set_name("total");
- timing_info.set_time(
- distiller_timing_info.total_time());
- *page_data->distilled_page_proto->data.add_timing_info() = timing_info;
- }
+ if (distiller_timing_info.has_article_processing_time()) {
+ timing_info.set_name("article_processing");
+ timing_info.set_time(
+ distiller_timing_info.article_processing_time());
+ *page_data->distilled_page_proto->data.add_timing_info() = timing_info;
+ }
- for (int i = 0; i < distiller_timing_info.other_times_size(); i++) {
- timing_info.set_name(distiller_timing_info.other_times(i).name());
- timing_info.set_time(distiller_timing_info.other_times(i).time());
- *page_data->distilled_page_proto->data.add_timing_info() = timing_info;
- }
+ if (distiller_timing_info.has_formatting_time()) {
+ timing_info.set_name("formatting");
+ timing_info.set_time(
+ distiller_timing_info.formatting_time());
+ *page_data->distilled_page_proto->data.add_timing_info() = timing_info;
}
- if (distiller_result->has_debug_info() &&
- distiller_result->debug_info().has_log()) {
- page_data->distilled_page_proto->data.mutable_debug_info()->set_log(
- distiller_result->debug_info().log());
+ if (distiller_timing_info.has_total_time()) {
+ timing_info.set_name("total");
+ timing_info.set_time(
+ distiller_timing_info.total_time());
+ *page_data->distilled_page_proto->data.add_timing_info() = timing_info;
}
- if (distiller_result->has_text_direction()) {
- page_data->distilled_page_proto->data.set_text_direction(
- distiller_result->text_direction());
- } else {
- page_data->distilled_page_proto->data.set_text_direction("auto");
+ for (int i = 0; i < distiller_timing_info.other_times_size(); i++) {
+ timing_info.set_name(distiller_timing_info.other_times(i).name());
+ timing_info.set_time(distiller_timing_info.other_times(i).time());
+ *page_data->distilled_page_proto->data.add_timing_info() = timing_info;
}
+ }
- if (distiller_result->has_pagination_info()) {
- const proto::PaginationInfo& pagination_info =
- distiller_result->pagination_info();
- if (pagination_info.has_next_page()) {
- GURL next_page_url(pagination_info.next_page());
- if (next_page_url.is_valid()) {
- // The pages should be in same origin.
- DCHECK_EQ(next_page_url.GetOrigin(), page_url.GetOrigin());
- AddToDistillationQueue(page_num + 1, next_page_url);
- page_data->distilled_page_proto->data.mutable_pagination_info()->
- set_next_page(next_page_url.spec());
- }
- }
+ if (distiller_result->has_debug_info() &&
+ distiller_result->debug_info().has_log()) {
+ page_data->distilled_page_proto->data.mutable_debug_info()->set_log(
+ distiller_result->debug_info().log());
+ }
- if (pagination_info.has_prev_page()) {
- GURL prev_page_url(pagination_info.prev_page());
- if (prev_page_url.is_valid()) {
- DCHECK_EQ(prev_page_url.GetOrigin(), page_url.GetOrigin());
- AddToDistillationQueue(page_num - 1, prev_page_url);
- page_data->distilled_page_proto->data.mutable_pagination_info()->
- set_prev_page(prev_page_url.spec());
- }
+ if (distiller_result->has_text_direction()) {
+ page_data->distilled_page_proto->data.set_text_direction(
+ distiller_result->text_direction());
+ } else {
+ page_data->distilled_page_proto->data.set_text_direction("auto");
+ }
+
+ if (distiller_result->has_pagination_info()) {
+ const proto::PaginationInfo& pagination_info =
+ distiller_result->pagination_info();
+ // Skip the next page if the first page is empty.
+ if (pagination_info.has_next_page() &&
+ (page_num != 0 || !content_empty)) {
+ GURL next_page_url(pagination_info.next_page());
+ if (next_page_url.is_valid()) {
+ // The pages should be in same origin.
+ DCHECK_EQ(next_page_url.GetOrigin(), page_url.GetOrigin());
+ AddToDistillationQueue(page_num + 1, next_page_url);
+ page_data->distilled_page_proto->data.mutable_pagination_info()->
+ set_next_page(next_page_url.spec());
}
+ }
- if (pagination_info.has_canonical_page()) {
- GURL canonical_page_url(pagination_info.canonical_page());
- if (canonical_page_url.is_valid()) {
- page_data->distilled_page_proto->data.mutable_pagination_info()->
- set_canonical_page(canonical_page_url.spec());
- }
+ if (pagination_info.has_prev_page()) {
+ GURL prev_page_url(pagination_info.prev_page());
+ if (prev_page_url.is_valid()) {
+ DCHECK_EQ(prev_page_url.GetOrigin(), page_url.GetOrigin());
+ AddToDistillationQueue(page_num - 1, prev_page_url);
+ page_data->distilled_page_proto->data.mutable_pagination_info()->
+ set_prev_page(prev_page_url.spec());
}
}
- for (int img_num = 0; img_num < distiller_result->content_images_size();
- ++img_num) {
- std::string image_id =
- base::IntToString(page_num + 1) + "_" + base::IntToString(img_num);
- FetchImage(page_num, image_id,
- distiller_result->content_images(img_num).url());
+ if (pagination_info.has_canonical_page()) {
+ GURL canonical_page_url(pagination_info.canonical_page());
+ if (canonical_page_url.is_valid()) {
+ page_data->distilled_page_proto->data.mutable_pagination_info()->
+ set_canonical_page(canonical_page_url.spec());
+ }
}
+ }
- AddPageIfDone(page_num);
- DistillNextPage();
- } else {
- started_pages_index_.erase(page_num);
- RunDistillerCallbackIfDone();
+ for (int img_num = 0; img_num < distiller_result->content_images_size();
+ ++img_num) {
+ std::string image_id =
+ base::IntToString(page_num + 1) + "_" + base::IntToString(img_num);
+ FetchImage(page_num, image_id,
+ distiller_result->content_images(img_num).url());
}
+
+ AddPageIfDone(page_num);
+ DistillNextPage();
}
void DistillerImpl::FetchImage(int page_num,
« no previous file with comments | « no previous file | components/dom_distiller/core/distiller_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698