Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(306)

Side by Side Diff: components/dom_distiller/content/renderer/distillability_agent.cc

Issue 1434433002: Pass distillability updates from renderer to browser (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@model
Patch Set: merge depend, and update interface again Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "components/dom_distiller/content/common/distiller_messages.h"
6 #include "components/dom_distiller/content/renderer/distillability_agent.h"
7 #include "components/dom_distiller/core/distillable_page_detector.h"
8 #include "components/dom_distiller/core/experiments.h"
9 #include "components/dom_distiller/core/page_features.h"
10 #include "components/dom_distiller/core/url_utils.h"
11 #include "content/public/renderer/render_frame.h"
12
13 #include "third_party/WebKit/public/platform/WebDistillability.h"
14 #include "third_party/WebKit/public/web/WebDocument.h"
15 #include "third_party/WebKit/public/web/WebElement.h"
16 #include "third_party/WebKit/public/web/WebLocalFrame.h"
17 #include "third_party/WebKit/public/web/WebNode.h"
18 #include "third_party/WebKit/public/web/WebNodeList.h"
19
20 namespace dom_distiller {
21
22 using namespace blink;
23
24 namespace {
25
26 // Returns whether it is necessary to send updates back to the browser.
27 // The number of updates can be from 0 to 2. See the tests in
28 // "distillable_page_utils_browsertest.cc".
29 // Most heuristics types only require one update after parsing.
30 // Adaboost is the only one doing the second update, which is after loading.
31 bool needToUpdate(bool is_loaded) {
nyquist 2015/11/11 21:53:45 Nit: NeedToUpdate (and IsLast below)
wychen 2015/11/12 04:59:45 Done.
32 switch (GetDistillerHeuristicsType()) {
33 case DistillerHeuristicsType::ALWAYS_TRUE:
34 return !is_loaded;
35 case DistillerHeuristicsType::OG_ARTICLE:
36 return !is_loaded;
37 case DistillerHeuristicsType::ADABOOST_MODEL:
38 return true;
39 case DistillerHeuristicsType::NONE:
40 default:
41 return false;
42 }
43 }
44
45 // Returns whether this update is the last one for the page.
46 bool isLast(bool is_loaded) {
47 switch (GetDistillerHeuristicsType()) {
48 case DistillerHeuristicsType::ALWAYS_TRUE:
49 return true;
50 case DistillerHeuristicsType::OG_ARTICLE:
51 return true;
52 case DistillerHeuristicsType::ADABOOST_MODEL:
53 return is_loaded;
nyquist 2015/11/11 21:53:45 Optional nit: Could this just be: ### if (GetDist
wychen 2015/11/12 04:59:45 Done.
54 case DistillerHeuristicsType::NONE:
55 default:
56 return true;
57 }
58 }
59
60 bool IsDistillablePageAdaboost(WebDocument& doc,
61 const DistillablePageDetector* detector) {
62 WebDistillabilityFeatures features = doc.distillabilityFeatures();
63 GURL parsed_url(doc.url());
64 if (!parsed_url.is_valid()) {
65 return false;
66 }
67 // The adaboost model is only applied to non-mobile pages.
68 if (features.isMobileFriendly) {
69 return false;
70 }
71 return detector->Classify(CalculateDerivedFeatures(
72 features.openGraph,
73 parsed_url,
74 features.elementCount,
75 features.anchorCount,
76 features.formCount,
77 features.mozScore,
78 features.mozScoreAllSqrt,
79 features.mozScoreAllLinear
80 ));
81 }
82
83 bool IsDistillablePage(WebDocument& doc) {
84 switch (GetDistillerHeuristicsType()) {
85 case DistillerHeuristicsType::ALWAYS_TRUE:
86 return true;
87 case DistillerHeuristicsType::OG_ARTICLE:
88 return doc.distillabilityFeatures().openGraph;
89 case DistillerHeuristicsType::ADABOOST_MODEL:
90 return IsDistillablePageAdaboost(
91 doc, DistillablePageDetector::GetNewModel());
92 case DistillerHeuristicsType::NONE:
93 default:
94 return false;
95 }
96 }
97
98 } // namespace
99
100 DistillabilityAgent::DistillabilityAgent(
101 content::RenderFrame* render_frame)
102 : RenderFrameObserver(render_frame) {
103 }
104
105 void DistillabilityAgent::DidMeaningfulLayout(
106 WebMeaningfulLayout layout_type) {
107 if (layout_type != WebMeaningfulLayout::FinishedParsing &&
108 layout_type != WebMeaningfulLayout::FinishedLoading) {
109 return;
110 }
111
112 DCHECK(render_frame());
113 if (!render_frame()->IsMainFrame()) return;
114 DCHECK(render_frame()->GetWebFrame());
115 WebDocument doc = render_frame()->GetWebFrame()->document();
116 if (doc.isNull() || doc.body().isNull()) return;
117 if (!url_utils::IsUrlDistillable(doc.url())) return;
118
119 bool is_loaded = layout_type == WebMeaningfulLayout::FinishedLoading;
120 if (!needToUpdate(is_loaded)) return;
121
122 Send(new FrameHostMsg_Distillability(routing_id(),
123 IsDistillablePage(doc), isLast(is_loaded)));
124 }
125
126
127 DistillabilityAgent::~DistillabilityAgent() {}
128
129 } // namespace dom_distiller
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698