| Index: components/dom_distiller/content/renderer/distillability_agent.cc
|
| diff --git a/components/dom_distiller/content/renderer/distillability_agent.cc b/components/dom_distiller/content/renderer/distillability_agent.cc
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..eab0e0a66a9b7c8c5ce571ec63c9348fc88494be
|
| --- /dev/null
|
| +++ b/components/dom_distiller/content/renderer/distillability_agent.cc
|
| @@ -0,0 +1,120 @@
|
| +// Copyright 2015 The Chromium Authors. All rights reserved.
|
| +// Use of this source code is governed by a BSD-style license that can be
|
| +// found in the LICENSE file.
|
| +
|
| +#include "components/dom_distiller/content/common/distiller_messages.h"
|
| +#include "components/dom_distiller/content/renderer/distillability_agent.h"
|
| +#include "components/dom_distiller/core/distillable_page_detector.h"
|
| +#include "components/dom_distiller/core/experiments.h"
|
| +#include "components/dom_distiller/core/page_features.h"
|
| +#include "components/dom_distiller/core/url_utils.h"
|
| +#include "content/public/renderer/render_frame.h"
|
| +
|
| +#include "third_party/WebKit/public/platform/WebDistillability.h"
|
| +#include "third_party/WebKit/public/web/WebDocument.h"
|
| +#include "third_party/WebKit/public/web/WebElement.h"
|
| +#include "third_party/WebKit/public/web/WebLocalFrame.h"
|
| +
|
| +namespace dom_distiller {
|
| +
|
| +using namespace blink;
|
| +
|
| +namespace {
|
| +
|
| +// Returns whether it is necessary to send updates back to the browser.
|
| +// The number of updates can be from 0 to 2. See the tests in
|
| +// "distillable_page_utils_browsertest.cc".
|
| +// Most heuristics types only require one update after parsing.
|
| +// Adaboost is the only one doing the second update, which is after loading.
|
| +bool NeedToUpdate(bool is_loaded) {
|
| + switch (GetDistillerHeuristicsType()) {
|
| + case DistillerHeuristicsType::ALWAYS_TRUE:
|
| + return !is_loaded;
|
| + case DistillerHeuristicsType::OG_ARTICLE:
|
| + return !is_loaded;
|
| + case DistillerHeuristicsType::ADABOOST_MODEL:
|
| + return true;
|
| + case DistillerHeuristicsType::NONE:
|
| + default:
|
| + return false;
|
| + }
|
| +}
|
| +
|
| +// Returns whether this update is the last one for the page.
|
| +bool IsLast(bool is_loaded) {
|
| + if (GetDistillerHeuristicsType() == DistillerHeuristicsType::ADABOOST_MODEL)
|
| + return is_loaded;
|
| +
|
| + return true;
|
| +}
|
| +
|
| +bool IsDistillablePageAdaboost(WebDocument& doc,
|
| + const DistillablePageDetector* detector) {
|
| + WebDistillabilityFeatures features = doc.distillabilityFeatures();
|
| + GURL parsed_url(doc.url());
|
| + if (!parsed_url.is_valid()) {
|
| + return false;
|
| + }
|
| + // The adaboost model is only applied to non-mobile pages.
|
| + if (features.isMobileFriendly) {
|
| + return false;
|
| + }
|
| + return detector->Classify(CalculateDerivedFeatures(
|
| + features.openGraph,
|
| + parsed_url,
|
| + features.elementCount,
|
| + features.anchorCount,
|
| + features.formCount,
|
| + features.mozScore,
|
| + features.mozScoreAllSqrt,
|
| + features.mozScoreAllLinear
|
| + ));
|
| +}
|
| +
|
| +bool IsDistillablePage(WebDocument& doc) {
|
| + switch (GetDistillerHeuristicsType()) {
|
| + case DistillerHeuristicsType::ALWAYS_TRUE:
|
| + return true;
|
| + case DistillerHeuristicsType::OG_ARTICLE:
|
| + return doc.distillabilityFeatures().openGraph;
|
| + case DistillerHeuristicsType::ADABOOST_MODEL:
|
| + return IsDistillablePageAdaboost(
|
| + doc, DistillablePageDetector::GetNewModel());
|
| + case DistillerHeuristicsType::NONE:
|
| + default:
|
| + return false;
|
| + }
|
| +}
|
| +
|
| +} // namespace
|
| +
|
| +DistillabilityAgent::DistillabilityAgent(
|
| + content::RenderFrame* render_frame)
|
| + : RenderFrameObserver(render_frame) {
|
| +}
|
| +
|
| +void DistillabilityAgent::DidMeaningfulLayout(
|
| + WebMeaningfulLayout layout_type) {
|
| + if (layout_type != WebMeaningfulLayout::FinishedParsing &&
|
| + layout_type != WebMeaningfulLayout::FinishedLoading) {
|
| + return;
|
| + }
|
| +
|
| + DCHECK(render_frame());
|
| + if (!render_frame()->IsMainFrame()) return;
|
| + DCHECK(render_frame()->GetWebFrame());
|
| + WebDocument doc = render_frame()->GetWebFrame()->document();
|
| + if (doc.isNull() || doc.body().isNull()) return;
|
| + if (!url_utils::IsUrlDistillable(doc.url())) return;
|
| +
|
| + bool is_loaded = layout_type == WebMeaningfulLayout::FinishedLoading;
|
| + if (!NeedToUpdate(is_loaded)) return;
|
| +
|
| + Send(new FrameHostMsg_Distillability(routing_id(),
|
| + IsDistillablePage(doc), IsLast(is_loaded)));
|
| +}
|
| +
|
| +
|
| +DistillabilityAgent::~DistillabilityAgent() {}
|
| +
|
| +} // namespace dom_distiller
|
|
|