Chromium Code Reviews| Index: components/dom_distiller/content/renderer/distillability_agent.cc |
| diff --git a/components/dom_distiller/content/renderer/distillability_agent.cc b/components/dom_distiller/content/renderer/distillability_agent.cc |
| index 99592ef0b6fef51c3ca712a6de556bd29d3c8e33..8c01a6ed2b928a9cab08de1acae87298d3ee5efb 100644 |
| --- a/components/dom_distiller/content/renderer/distillability_agent.cc |
| +++ b/components/dom_distiller/content/renderer/distillability_agent.cc |
| @@ -52,13 +52,14 @@ bool IsLast(bool is_loaded) { |
| bool IsDistillablePageAdaboost(WebDocument& doc, |
| const DistillablePageDetector* detector, |
| + const DistillablePageDetector* long_page, |
|
mdjones
2016/02/18 17:17:21
If we add any more of these, we can probably just
wychen
2016/02/18 21:47:23
Acknowledged. It's hard to say whether we need mor
|
| bool is_last) { |
| WebDistillabilityFeatures features = doc.distillabilityFeatures(); |
| GURL parsed_url(doc.url()); |
| if (!parsed_url.is_valid()) { |
| return false; |
| } |
| - bool distillable = detector->Classify(CalculateDerivedFeatures( |
| + std::vector<double> derived = CalculateDerivedFeatures( |
| features.openGraph, |
| parsed_url, |
| features.elementCount, |
| @@ -67,7 +68,9 @@ bool IsDistillablePageAdaboost(WebDocument& doc, |
| features.mozScore, |
| features.mozScoreAllSqrt, |
| features.mozScoreAllLinear |
| - )); |
| + ); |
| + bool distillable = detector->Classify(derived); |
| + bool long_article = long_page->Classify(derived); |
| int bucket = static_cast<unsigned>(features.isMobileFriendly) | |
| (static_cast<unsigned>(distillable) << 1); |
| @@ -78,7 +81,7 @@ bool IsDistillablePageAdaboost(WebDocument& doc, |
| UMA_HISTOGRAM_ENUMERATION("DomDistiller.PageDistillableAfterParsing", |
| bucket, 4); |
| } |
| - return distillable && (!features.isMobileFriendly); |
| + return distillable && long_article && (!features.isMobileFriendly); |
| } |
| bool IsDistillablePage(WebDocument& doc, bool is_last) { |
| @@ -88,8 +91,9 @@ bool IsDistillablePage(WebDocument& doc, bool is_last) { |
| case DistillerHeuristicsType::OG_ARTICLE: |
| return doc.distillabilityFeatures().openGraph; |
| case DistillerHeuristicsType::ADABOOST_MODEL: |
| - return IsDistillablePageAdaboost( |
| - doc, DistillablePageDetector::GetNewModel(), is_last); |
| + return IsDistillablePageAdaboost(doc, |
| + DistillablePageDetector::GetNewModel(), |
| + DistillablePageDetector::GetLongPageModel(), is_last); |
| case DistillerHeuristicsType::NONE: |
| default: |
| return false; |