Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(198)

Unified Diff: components/dom_distiller/content/renderer/distillability_agent.cc

Issue 1434433002: Pass distillability updates from renderer to browser (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@model
Patch Set: merge depend, and update interface again Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: components/dom_distiller/content/renderer/distillability_agent.cc
diff --git a/components/dom_distiller/content/renderer/distillability_agent.cc b/components/dom_distiller/content/renderer/distillability_agent.cc
new file mode 100644
index 0000000000000000000000000000000000000000..6ddc0512f41da4d4d464745af9073b617b7d1449
--- /dev/null
+++ b/components/dom_distiller/content/renderer/distillability_agent.cc
@@ -0,0 +1,129 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/dom_distiller/content/common/distiller_messages.h"
+#include "components/dom_distiller/content/renderer/distillability_agent.h"
+#include "components/dom_distiller/core/distillable_page_detector.h"
+#include "components/dom_distiller/core/experiments.h"
+#include "components/dom_distiller/core/page_features.h"
+#include "components/dom_distiller/core/url_utils.h"
+#include "content/public/renderer/render_frame.h"
+
+#include "third_party/WebKit/public/platform/WebDistillability.h"
+#include "third_party/WebKit/public/web/WebDocument.h"
+#include "third_party/WebKit/public/web/WebElement.h"
+#include "third_party/WebKit/public/web/WebLocalFrame.h"
+#include "third_party/WebKit/public/web/WebNode.h"
+#include "third_party/WebKit/public/web/WebNodeList.h"
+
+namespace dom_distiller {
+
+using namespace blink;
+
+namespace {
+
+// Returns whether it is necessary to send updates back to the browser.
+// The number of updates can be from 0 to 2. See the tests in
+// "distillable_page_utils_browsertest.cc".
+// Most heuristics types only require one update after parsing.
+// Adaboost is the only one doing the second update, which is after loading.
+bool needToUpdate(bool is_loaded) {
nyquist 2015/11/11 21:53:45 Nit: NeedToUpdate (and IsLast below)
wychen 2015/11/12 04:59:45 Done.
+ switch (GetDistillerHeuristicsType()) {
+ case DistillerHeuristicsType::ALWAYS_TRUE:
+ return !is_loaded;
+ case DistillerHeuristicsType::OG_ARTICLE:
+ return !is_loaded;
+ case DistillerHeuristicsType::ADABOOST_MODEL:
+ return true;
+ case DistillerHeuristicsType::NONE:
+ default:
+ return false;
+ }
+}
+
+// Returns whether this update is the last one for the page.
+bool isLast(bool is_loaded) {
+ switch (GetDistillerHeuristicsType()) {
+ case DistillerHeuristicsType::ALWAYS_TRUE:
+ return true;
+ case DistillerHeuristicsType::OG_ARTICLE:
+ return true;
+ case DistillerHeuristicsType::ADABOOST_MODEL:
+ return is_loaded;
nyquist 2015/11/11 21:53:45 Optional nit: Could this just be: ### if (GetDist
wychen 2015/11/12 04:59:45 Done.
+ case DistillerHeuristicsType::NONE:
+ default:
+ return true;
+ }
+}
+
+bool IsDistillablePageAdaboost(WebDocument& doc,
+ const DistillablePageDetector* detector) {
+ WebDistillabilityFeatures features = doc.distillabilityFeatures();
+ GURL parsed_url(doc.url());
+ if (!parsed_url.is_valid()) {
+ return false;
+ }
+ // The adaboost model is only applied to non-mobile pages.
+ if (features.isMobileFriendly) {
+ return false;
+ }
+ return detector->Classify(CalculateDerivedFeatures(
+ features.openGraph,
+ parsed_url,
+ features.elementCount,
+ features.anchorCount,
+ features.formCount,
+ features.mozScore,
+ features.mozScoreAllSqrt,
+ features.mozScoreAllLinear
+ ));
+}
+
+bool IsDistillablePage(WebDocument& doc) {
+ switch (GetDistillerHeuristicsType()) {
+ case DistillerHeuristicsType::ALWAYS_TRUE:
+ return true;
+ case DistillerHeuristicsType::OG_ARTICLE:
+ return doc.distillabilityFeatures().openGraph;
+ case DistillerHeuristicsType::ADABOOST_MODEL:
+ return IsDistillablePageAdaboost(
+ doc, DistillablePageDetector::GetNewModel());
+ case DistillerHeuristicsType::NONE:
+ default:
+ return false;
+ }
+}
+
+} // namespace
+
+DistillabilityAgent::DistillabilityAgent(
+ content::RenderFrame* render_frame)
+ : RenderFrameObserver(render_frame) {
+}
+
+void DistillabilityAgent::DidMeaningfulLayout(
+ WebMeaningfulLayout layout_type) {
+ if (layout_type != WebMeaningfulLayout::FinishedParsing &&
+ layout_type != WebMeaningfulLayout::FinishedLoading) {
+ return;
+ }
+
+ DCHECK(render_frame());
+ if (!render_frame()->IsMainFrame()) return;
+ DCHECK(render_frame()->GetWebFrame());
+ WebDocument doc = render_frame()->GetWebFrame()->document();
+ if (doc.isNull() || doc.body().isNull()) return;
+ if (!url_utils::IsUrlDistillable(doc.url())) return;
+
+ bool is_loaded = layout_type == WebMeaningfulLayout::FinishedLoading;
+ if (!needToUpdate(is_loaded)) return;
+
+ Send(new FrameHostMsg_Distillability(routing_id(),
+ IsDistillablePage(doc), isLast(is_loaded)));
+}
+
+
+DistillabilityAgent::~DistillabilityAgent() {}
+
+} // namespace dom_distiller

Powered by Google App Engine
This is Rietveld 408576698