Chromium Code Reviews| Index: components/dom_distiller/content/renderer/distillability_agent.cc |
| diff --git a/components/dom_distiller/content/renderer/distillability_agent.cc b/components/dom_distiller/content/renderer/distillability_agent.cc |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..6ddc0512f41da4d4d464745af9073b617b7d1449 |
| --- /dev/null |
| +++ b/components/dom_distiller/content/renderer/distillability_agent.cc |
| @@ -0,0 +1,129 @@ |
| +// Copyright 2015 The Chromium Authors. All rights reserved. |
| +// Use of this source code is governed by a BSD-style license that can be |
| +// found in the LICENSE file. |
| + |
| +#include "components/dom_distiller/content/common/distiller_messages.h" |
| +#include "components/dom_distiller/content/renderer/distillability_agent.h" |
| +#include "components/dom_distiller/core/distillable_page_detector.h" |
| +#include "components/dom_distiller/core/experiments.h" |
| +#include "components/dom_distiller/core/page_features.h" |
| +#include "components/dom_distiller/core/url_utils.h" |
| +#include "content/public/renderer/render_frame.h" |
| + |
| +#include "third_party/WebKit/public/platform/WebDistillability.h" |
| +#include "third_party/WebKit/public/web/WebDocument.h" |
| +#include "third_party/WebKit/public/web/WebElement.h" |
| +#include "third_party/WebKit/public/web/WebLocalFrame.h" |
| +#include "third_party/WebKit/public/web/WebNode.h" |
| +#include "third_party/WebKit/public/web/WebNodeList.h" |
| + |
| +namespace dom_distiller { |
| + |
| +using namespace blink; |
| + |
| +namespace { |
| + |
| +// Returns whether it is necessary to send updates back to the browser. |
| +// The number of updates can be from 0 to 2. See the tests in |
| +// "distillable_page_utils_browsertest.cc". |
| +// Most heuristics types only require one update after parsing. |
| +// Adaboost is the only one doing the second update, which is after loading. |
| +bool needToUpdate(bool is_loaded) { |
|
nyquist
2015/11/11 21:53:45
Nit: NeedToUpdate (and IsLast below)
wychen
2015/11/12 04:59:45
Done.
|
| + switch (GetDistillerHeuristicsType()) { |
| + case DistillerHeuristicsType::ALWAYS_TRUE: |
| + return !is_loaded; |
| + case DistillerHeuristicsType::OG_ARTICLE: |
| + return !is_loaded; |
| + case DistillerHeuristicsType::ADABOOST_MODEL: |
| + return true; |
| + case DistillerHeuristicsType::NONE: |
| + default: |
| + return false; |
| + } |
| +} |
| + |
| +// Returns whether this update is the last one for the page. |
| +bool isLast(bool is_loaded) { |
| + switch (GetDistillerHeuristicsType()) { |
| + case DistillerHeuristicsType::ALWAYS_TRUE: |
| + return true; |
| + case DistillerHeuristicsType::OG_ARTICLE: |
| + return true; |
| + case DistillerHeuristicsType::ADABOOST_MODEL: |
| + return is_loaded; |
|
nyquist
2015/11/11 21:53:45
Optional nit:
Could this just be:
###
if (GetDist
wychen
2015/11/12 04:59:45
Done.
|
| + case DistillerHeuristicsType::NONE: |
| + default: |
| + return true; |
| + } |
| +} |
| + |
| +bool IsDistillablePageAdaboost(WebDocument& doc, |
| + const DistillablePageDetector* detector) { |
| + WebDistillabilityFeatures features = doc.distillabilityFeatures(); |
| + GURL parsed_url(doc.url()); |
| + if (!parsed_url.is_valid()) { |
| + return false; |
| + } |
| + // The adaboost model is only applied to non-mobile pages. |
| + if (features.isMobileFriendly) { |
| + return false; |
| + } |
| + return detector->Classify(CalculateDerivedFeatures( |
| + features.openGraph, |
| + parsed_url, |
| + features.elementCount, |
| + features.anchorCount, |
| + features.formCount, |
| + features.mozScore, |
| + features.mozScoreAllSqrt, |
| + features.mozScoreAllLinear |
| + )); |
| +} |
| + |
| +bool IsDistillablePage(WebDocument& doc) { |
| + switch (GetDistillerHeuristicsType()) { |
| + case DistillerHeuristicsType::ALWAYS_TRUE: |
| + return true; |
| + case DistillerHeuristicsType::OG_ARTICLE: |
| + return doc.distillabilityFeatures().openGraph; |
| + case DistillerHeuristicsType::ADABOOST_MODEL: |
| + return IsDistillablePageAdaboost( |
| + doc, DistillablePageDetector::GetNewModel()); |
| + case DistillerHeuristicsType::NONE: |
| + default: |
| + return false; |
| + } |
| +} |
| + |
| +} // namespace |
| + |
| +DistillabilityAgent::DistillabilityAgent( |
| + content::RenderFrame* render_frame) |
| + : RenderFrameObserver(render_frame) { |
| +} |
| + |
| +void DistillabilityAgent::DidMeaningfulLayout( |
| + WebMeaningfulLayout layout_type) { |
| + if (layout_type != WebMeaningfulLayout::FinishedParsing && |
| + layout_type != WebMeaningfulLayout::FinishedLoading) { |
| + return; |
| + } |
| + |
| + DCHECK(render_frame()); |
| + if (!render_frame()->IsMainFrame()) return; |
| + DCHECK(render_frame()->GetWebFrame()); |
| + WebDocument doc = render_frame()->GetWebFrame()->document(); |
| + if (doc.isNull() || doc.body().isNull()) return; |
| + if (!url_utils::IsUrlDistillable(doc.url())) return; |
| + |
| + bool is_loaded = layout_type == WebMeaningfulLayout::FinishedLoading; |
| + if (!needToUpdate(is_loaded)) return; |
| + |
| + Send(new FrameHostMsg_Distillability(routing_id(), |
| + IsDistillablePage(doc), isLast(is_loaded))); |
| +} |
| + |
| + |
| +DistillabilityAgent::~DistillabilityAgent() {} |
| + |
| +} // namespace dom_distiller |