| Index: components/dom_distiller/content/renderer/distillability_agent.cc
|
| diff --git a/components/dom_distiller/content/renderer/distillability_agent.cc b/components/dom_distiller/content/renderer/distillability_agent.cc
|
| index 0534a99cc30b556e6c3fb07fb26646bce12c783e..4ab5ded4426f2c354cbfd020f280fa541d8444cb 100644
|
| --- a/components/dom_distiller/content/renderer/distillability_agent.cc
|
| +++ b/components/dom_distiller/content/renderer/distillability_agent.cc
|
| @@ -2,12 +2,14 @@
|
| // Use of this source code is governed by a BSD-style license that can be
|
| // found in the LICENSE file.
|
|
|
| +#include "base/json/json_writer.h"
|
| #include "base/metrics/histogram.h"
|
| #include "base/strings/string_util.h"
|
|
|
| #include "components/dom_distiller/content/common/distillability_service.mojom.h"
|
| #include "components/dom_distiller/content/renderer/distillability_agent.h"
|
| #include "components/dom_distiller/core/distillable_page_detector.h"
|
| +#include "components/dom_distiller/core/dom_distiller_features.h"
|
| #include "components/dom_distiller/core/experiments.h"
|
| #include "components/dom_distiller/core/page_features.h"
|
| #include "components/dom_distiller/core/url_utils.h"
|
| @@ -73,10 +75,58 @@ bool IsBlacklisted(const GURL& url) {
|
| return false;
|
| }
|
|
|
| +void DumpDistillability(content::RenderFrame* render_frame,
|
| + const WebDistillabilityFeatures& features,
|
| + const std::vector<double>& derived,
|
| + double score,
|
| + bool distillable,
|
| + double long_score,
|
| + bool long_page,
|
| + bool blacklisted) {
|
| + {
|
| + base::DictionaryValue dict;
|
| + std::string msg;
|
| +
|
| + std::unique_ptr<base::DictionaryValue> raw_features(
|
| + new base::DictionaryValue);
|
| + raw_features->SetInteger("isMobileFriendly", features.isMobileFriendly);
|
| + raw_features->SetInteger("openGraph", features.openGraph);
|
| + raw_features->SetInteger("elementCount", features.elementCount);
|
| + raw_features->SetInteger("anchorCount", features.anchorCount);
|
| + raw_features->SetInteger("formCount", features.formCount);
|
| + raw_features->SetInteger("textInputCount", features.textInputCount);
|
| + raw_features->SetInteger("passwordInputCount", features.passwordInputCount);
|
| + raw_features->SetDouble("mozScore", features.mozScore);
|
| + raw_features->SetDouble("mozScoreAllSqrt", features.mozScoreAllSqrt);
|
| + raw_features->SetDouble("mozScoreAllLinear", features.mozScoreAllLinear);
|
| + dict.Set("features", std::move(raw_features));
|
| +
|
| + std::unique_ptr<base::ListValue> derived_features(new base::ListValue());
|
| + for (unsigned i = 0; i < derived.size(); i++) {
|
| + derived_features->AppendDouble(derived[i]);
|
| + }
|
| + dict.Set("derived_features", std::move(derived_features));
|
| +
|
| + dict.SetDouble("score", score);
|
| + dict.SetInteger("distillable", distillable);
|
| + dict.SetDouble("long_score", long_score);
|
| + dict.SetInteger("long_page", long_page);
|
| + dict.SetInteger("blacklisted", blacklisted);
|
| + base::JSONWriter::WriteWithOptions(dict,
|
| + base::JSONWriter::OPTIONS_PRETTY_PRINT, &msg);
|
| + msg = "adaboost_classification = " + msg;
|
| +
|
| + render_frame->AddMessageToConsole(content::CONSOLE_MESSAGE_LEVEL_DEBUG,
|
| + msg);
|
| + }
|
| +}
|
| +
|
| bool IsDistillablePageAdaboost(WebDocument& doc,
|
| const DistillablePageDetector* detector,
|
| const DistillablePageDetector* long_page,
|
| - bool is_last) {
|
| + bool is_last,
|
| + content::RenderFrame* render_frame) {
|
| + bool isDevMode = IsDistillabilityDevSet();
|
| WebDistillabilityFeatures features = doc.distillabilityFeatures();
|
| GURL parsed_url(doc.url());
|
| if (!parsed_url.is_valid()) {
|
| @@ -98,6 +148,11 @@ bool IsDistillablePageAdaboost(WebDocument& doc,
|
| bool long_article = long_score > 0;
|
| bool blacklisted = IsBlacklisted(parsed_url);
|
|
|
| + if (isDevMode) {
|
| + DumpDistillability(render_frame, features, derived, score, distillable,
|
| + long_score, long_article, blacklisted);
|
| + }
|
| +
|
| if (!features.isMobileFriendly) {
|
| int score_int = std::round(score * 100);
|
| if (score > 0) {
|
| @@ -157,7 +212,8 @@ bool IsDistillablePageAdaboost(WebDocument& doc,
|
| return distillable && long_article;
|
| }
|
|
|
| -bool IsDistillablePage(WebDocument& doc, bool is_last) {
|
| +bool IsDistillablePage(WebDocument& doc, bool is_last,
|
| + content::RenderFrame* render_frame) {
|
| switch (GetDistillerHeuristicsType()) {
|
| case DistillerHeuristicsType::ALWAYS_TRUE:
|
| return true;
|
| @@ -166,7 +222,7 @@ bool IsDistillablePage(WebDocument& doc, bool is_last) {
|
| case DistillerHeuristicsType::ADABOOST_MODEL:
|
| return IsDistillablePageAdaboost(doc,
|
| DistillablePageDetector::GetNewModel(),
|
| - DistillablePageDetector::GetLongPageModel(), is_last);
|
| + DistillablePageDetector::GetLongPageModel(), is_last, render_frame);
|
| case DistillerHeuristicsType::NONE:
|
| default:
|
| return false;
|
| @@ -204,7 +260,7 @@ void DistillabilityAgent::DidMeaningfulLayout(
|
| &distillability_service);
|
| DCHECK(distillability_service);
|
| distillability_service->NotifyIsDistillable(
|
| - IsDistillablePage(doc, is_last), is_last);
|
| + IsDistillablePage(doc, is_last, render_frame()), is_last);
|
| }
|
|
|
| DistillabilityAgent::~DistillabilityAgent() {}
|
|
|