| Index: components/dom_distiller/content/renderer/distillability_agent.cc
|
| diff --git a/components/dom_distiller/content/renderer/distillability_agent.cc b/components/dom_distiller/content/renderer/distillability_agent.cc
|
| index 660985ee3fa0c0367af8b9af3eda2a723f202e84..93e37361d9f0b01d1a066909ba08a7e9e5508cb3 100644
|
| --- a/components/dom_distiller/content/renderer/distillability_agent.cc
|
| +++ b/components/dom_distiller/content/renderer/distillability_agent.cc
|
| @@ -65,13 +65,14 @@ bool IsBlacklisted(const GURL& url) {
|
|
|
| bool IsDistillablePageAdaboost(WebDocument& doc,
|
| const DistillablePageDetector* detector,
|
| + const DistillablePageDetector* long_page,
|
| bool is_last) {
|
| WebDistillabilityFeatures features = doc.distillabilityFeatures();
|
| GURL parsed_url(doc.url());
|
| if (!parsed_url.is_valid()) {
|
| return false;
|
| }
|
| - bool distillable = detector->Classify(CalculateDerivedFeatures(
|
| + std::vector<double> derived = CalculateDerivedFeatures(
|
| features.openGraph,
|
| parsed_url,
|
| features.elementCount,
|
| @@ -80,7 +81,9 @@ bool IsDistillablePageAdaboost(WebDocument& doc,
|
| features.mozScore,
|
| features.mozScoreAllSqrt,
|
| features.mozScoreAllLinear
|
| - ));
|
| + );
|
| + bool distillable = detector->Classify(derived);
|
| + bool long_article = long_page->Classify(derived);
|
| bool blacklisted = IsBlacklisted(parsed_url);
|
|
|
| int bucket = static_cast<unsigned>(features.isMobileFriendly) |
|
| @@ -93,7 +96,13 @@ bool IsDistillablePageAdaboost(WebDocument& doc,
|
| bucket, 4);
|
| }
|
|
|
| - return distillable && (!features.isMobileFriendly) && (!blacklisted);
|
| + if (blacklisted) {
|
| + return false;
|
| + }
|
| + if (features.isMobileFriendly) {
|
| + return false;
|
| + }
|
| + return distillable && long_article;
|
| }
|
|
|
| bool IsDistillablePage(WebDocument& doc, bool is_last) {
|
| @@ -103,8 +112,9 @@ bool IsDistillablePage(WebDocument& doc, bool is_last) {
|
| case DistillerHeuristicsType::OG_ARTICLE:
|
| return doc.distillabilityFeatures().openGraph;
|
| case DistillerHeuristicsType::ADABOOST_MODEL:
|
| - return IsDistillablePageAdaboost(
|
| - doc, DistillablePageDetector::GetNewModel(), is_last);
|
| + return IsDistillablePageAdaboost(doc,
|
| + DistillablePageDetector::GetNewModel(),
|
| + DistillablePageDetector::GetLongPageModel(), is_last);
|
| case DistillerHeuristicsType::NONE:
|
| default:
|
| return false;
|
|
|