Index: components/dom_distiller/content/renderer/distillability_agent.cc |
diff --git a/components/dom_distiller/content/renderer/distillability_agent.cc b/components/dom_distiller/content/renderer/distillability_agent.cc |
index 99592ef0b6fef51c3ca712a6de556bd29d3c8e33..8c01a6ed2b928a9cab08de1acae87298d3ee5efb 100644 |
--- a/components/dom_distiller/content/renderer/distillability_agent.cc |
+++ b/components/dom_distiller/content/renderer/distillability_agent.cc |
@@ -52,13 +52,14 @@ bool IsLast(bool is_loaded) { |
bool IsDistillablePageAdaboost(WebDocument& doc, |
const DistillablePageDetector* detector, |
+ const DistillablePageDetector* long_page, |
mdjones
2016/02/18 17:17:21
If we add any more of these, we can probably just
wychen
2016/02/18 21:47:23
Acknowledged. It's hard to say whether we need mor
|
bool is_last) { |
WebDistillabilityFeatures features = doc.distillabilityFeatures(); |
GURL parsed_url(doc.url()); |
if (!parsed_url.is_valid()) { |
return false; |
} |
- bool distillable = detector->Classify(CalculateDerivedFeatures( |
+ std::vector<double> derived = CalculateDerivedFeatures( |
features.openGraph, |
parsed_url, |
features.elementCount, |
@@ -67,7 +68,9 @@ bool IsDistillablePageAdaboost(WebDocument& doc, |
features.mozScore, |
features.mozScoreAllSqrt, |
features.mozScoreAllLinear |
- )); |
+ ); |
+ bool distillable = detector->Classify(derived); |
+ bool long_article = long_page->Classify(derived); |
int bucket = static_cast<unsigned>(features.isMobileFriendly) | |
(static_cast<unsigned>(distillable) << 1); |
@@ -78,7 +81,7 @@ bool IsDistillablePageAdaboost(WebDocument& doc, |
UMA_HISTOGRAM_ENUMERATION("DomDistiller.PageDistillableAfterParsing", |
bucket, 4); |
} |
- return distillable && (!features.isMobileFriendly); |
+ return distillable && long_article && (!features.isMobileFriendly); |
} |
bool IsDistillablePage(WebDocument& doc, bool is_last) { |
@@ -88,8 +91,9 @@ bool IsDistillablePage(WebDocument& doc, bool is_last) { |
case DistillerHeuristicsType::OG_ARTICLE: |
return doc.distillabilityFeatures().openGraph; |
case DistillerHeuristicsType::ADABOOST_MODEL: |
- return IsDistillablePageAdaboost( |
- doc, DistillablePageDetector::GetNewModel(), is_last); |
+ return IsDistillablePageAdaboost(doc, |
+ DistillablePageDetector::GetNewModel(), |
+ DistillablePageDetector::GetLongPageModel(), is_last); |
case DistillerHeuristicsType::NONE: |
default: |
return false; |