Chromium Code Reviews| Index: components/dom_distiller/content/renderer/distillability_agent.cc |
| diff --git a/components/dom_distiller/content/renderer/distillability_agent.cc b/components/dom_distiller/content/renderer/distillability_agent.cc |
| index a3119f469ccbb831763783ba317ee7aeb9ec2fcc..cfe9bb037c67368e7a5eda09e694919eebd4fda9 100644 |
| --- a/components/dom_distiller/content/renderer/distillability_agent.cc |
| +++ b/components/dom_distiller/content/renderer/distillability_agent.cc |
| @@ -3,11 +3,13 @@ |
| // found in the LICENSE file. |
| #include "base/metrics/histogram.h" |
| +#include "base/strings/string_number_conversions.h" |
| #include "base/strings/string_util.h" |
| #include "components/dom_distiller/content/common/distillability_service.mojom.h" |
| #include "components/dom_distiller/content/renderer/distillability_agent.h" |
| #include "components/dom_distiller/core/distillable_page_detector.h" |
| +#include "components/dom_distiller/core/dom_distiller_features.h" |
| #include "components/dom_distiller/core/experiments.h" |
| #include "components/dom_distiller/core/page_features.h" |
| #include "components/dom_distiller/core/url_utils.h" |
| @@ -65,11 +67,46 @@ bool IsBlacklisted(const GURL& url) { |
| return false; |
| } |
| +std::string DoubleToString(double v) { |
| + // base::DoubleToString() returns format like ".1", which is invalid |
| + // in python json parser. |
| + std::stringstream ss; |
|
esprehn
2016/05/23 20:19:42
this is terrible, can we get an argument added to
wychen
2016/05/23 21:40:24
JSON writer takes care of this.
|
| + ss << v; |
| + return ss.str(); |
| +} |
| + |
| +void dumpDistillability(content::RenderFrame* render_frame, |
|
esprehn
2016/05/23 20:19:42
Dump I think? your name is wrong
wychen
2016/05/23 21:40:24
Done.
|
| + const std::vector<double>& derived, |
| + double score, |
| + bool distillable, |
| + double long_score, |
| + bool long_page, |
| + bool blacklisted) { |
| + std::string msg = "adaboost_classification = {\n"; |
| + msg += "\"derived_features\": ["; |
| + for (unsigned i = 0; i < derived.size(); i++) { |
| + if (i != 0) { |
| + msg += ", "; |
| + } |
| + msg += DoubleToString(derived[i]); |
| + } |
| + msg += "],\n"; |
| + msg += "\"score\": " + DoubleToString(score) + ",\n"; |
| + msg += "\"distillable\": " + base::IntToString(distillable) + ",\n"; |
| + msg += "\"long_score\": " + DoubleToString(long_score) + ",\n"; |
| + msg += "\"long_page\": " + base::IntToString(long_page) + ",\n"; |
|
esprehn
2016/05/23 20:19:42
Can you use base::Value and base::JSONWriter::Writ
wychen
2016/05/23 21:40:24
Awesome idea! Done. The key order is sorted though
|
| + msg += "\"blacklisted\": " + base::IntToString(blacklisted) + "\n"; |
| + msg += "}"; |
| + render_frame->AddMessageToConsole(content::CONSOLE_MESSAGE_LEVEL_DEBUG, msg); |
| +} |
| + |
| bool IsDistillablePageAdaboost(WebDocument& doc, |
| const DistillablePageDetector* detector, |
| const DistillablePageDetector* long_page, |
| - bool is_last) { |
| - WebDistillabilityFeatures features = doc.distillabilityFeatures(); |
| + bool is_last, |
| + content::RenderFrame* render_frame) { |
| + bool isDevMode = IsDistillabilityDevSet(); |
| + WebDistillabilityFeatures features = doc.distillabilityFeatures(isDevMode); |
| GURL parsed_url(doc.url()); |
| if (!parsed_url.is_valid()) { |
| return false; |
| @@ -84,10 +121,17 @@ bool IsDistillablePageAdaboost(WebDocument& doc, |
| features.mozScoreAllSqrt, |
| features.mozScoreAllLinear |
| ); |
| - bool distillable = detector->Classify(derived); |
| - bool long_article = long_page->Classify(derived); |
| + double score = detector->Score(derived) - detector->GetThreshold(); |
| + double long_score = long_page->Score(derived) - long_page->GetThreshold(); |
| + bool distillable = score > 0; |
| + bool long_article = long_score > 0; |
| bool blacklisted = IsBlacklisted(parsed_url); |
| + if (isDevMode) { |
| + dumpDistillability(render_frame, derived, score, distillable, long_score, |
| + long_article, blacklisted); |
| + } |
| + |
| int bucket = static_cast<unsigned>(features.isMobileFriendly) | |
| (static_cast<unsigned>(distillable) << 1); |
| if (is_last) { |
| @@ -107,16 +151,17 @@ bool IsDistillablePageAdaboost(WebDocument& doc, |
| return distillable && long_article; |
| } |
| -bool IsDistillablePage(WebDocument& doc, bool is_last) { |
| +bool IsDistillablePage(WebDocument& doc, bool is_last, |
| + content::RenderFrame* render_frame) { |
| switch (GetDistillerHeuristicsType()) { |
| case DistillerHeuristicsType::ALWAYS_TRUE: |
| return true; |
| case DistillerHeuristicsType::OG_ARTICLE: |
| - return doc.distillabilityFeatures().openGraph; |
| + return doc.distillabilityFeatures(false).openGraph; |
| case DistillerHeuristicsType::ADABOOST_MODEL: |
| return IsDistillablePageAdaboost(doc, |
| DistillablePageDetector::GetNewModel(), |
| - DistillablePageDetector::GetLongPageModel(), is_last); |
| + DistillablePageDetector::GetLongPageModel(), is_last, render_frame); |
| case DistillerHeuristicsType::NONE: |
| default: |
| return false; |
| @@ -154,7 +199,7 @@ void DistillabilityAgent::DidMeaningfulLayout( |
| mojo::GetProxy(&distillability_service)); |
| DCHECK(distillability_service); |
| distillability_service->NotifyIsDistillable( |
| - IsDistillablePage(doc, is_last), is_last); |
| + IsDistillablePage(doc, is_last, render_frame()), is_last); |
| } |
| DistillabilityAgent::~DistillabilityAgent() {} |