OLD | NEW |
---|---|
(Empty) | |
1 // Copyright 2015 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "components/dom_distiller/content/common/distiller_messages.h" | |
6 #include "components/dom_distiller/content/renderer/distillability_agent.h" | |
7 #include "components/dom_distiller/core/distillable_page_detector.h" | |
8 #include "components/dom_distiller/core/experiments.h" | |
9 #include "components/dom_distiller/core/page_features.h" | |
10 #include "components/dom_distiller/core/url_utils.h" | |
11 #include "content/public/renderer/render_frame.h" | |
12 | |
13 #include "third_party/WebKit/public/platform/WebDistillability.h" | |
14 #include "third_party/WebKit/public/web/WebDocument.h" | |
15 #include "third_party/WebKit/public/web/WebElement.h" | |
16 #include "third_party/WebKit/public/web/WebLocalFrame.h" | |
17 #include "third_party/WebKit/public/web/WebNode.h" | |
18 #include "third_party/WebKit/public/web/WebNodeList.h" | |
19 | |
20 namespace dom_distiller { | |
21 | |
22 using namespace blink; | |
23 | |
24 namespace { | |
25 | |
26 // Returns whether it is necessary to send updates back to the browser. | |
27 // The number of updates can be from 0 to 2. See the tests in | |
28 // "distillable_page_utils_browsertest.cc". | |
29 // Most heuristics types only require one update after parsing. | |
30 // Adaboost is the only one doing the second update, which is after loading. | |
31 bool needToUpdate(bool is_loaded) { | |
nyquist
2015/11/11 21:53:45
Nit: NeedToUpdate (and IsLast below)
wychen
2015/11/12 04:59:45
Done.
| |
32 switch (GetDistillerHeuristicsType()) { | |
33 case DistillerHeuristicsType::ALWAYS_TRUE: | |
34 return !is_loaded; | |
35 case DistillerHeuristicsType::OG_ARTICLE: | |
36 return !is_loaded; | |
37 case DistillerHeuristicsType::ADABOOST_MODEL: | |
38 return true; | |
39 case DistillerHeuristicsType::NONE: | |
40 default: | |
41 return false; | |
42 } | |
43 } | |
44 | |
45 // Returns whether this update is the last one for the page. | |
46 bool isLast(bool is_loaded) { | |
47 switch (GetDistillerHeuristicsType()) { | |
48 case DistillerHeuristicsType::ALWAYS_TRUE: | |
49 return true; | |
50 case DistillerHeuristicsType::OG_ARTICLE: | |
51 return true; | |
52 case DistillerHeuristicsType::ADABOOST_MODEL: | |
53 return is_loaded; | |
nyquist
2015/11/11 21:53:45
Optional nit:
Could this just be:
###
if (GetDist
wychen
2015/11/12 04:59:45
Done.
| |
54 case DistillerHeuristicsType::NONE: | |
55 default: | |
56 return true; | |
57 } | |
58 } | |
59 | |
60 bool IsDistillablePageAdaboost(WebDocument& doc, | |
61 const DistillablePageDetector* detector) { | |
62 WebDistillabilityFeatures features = doc.distillabilityFeatures(); | |
63 GURL parsed_url(doc.url()); | |
64 if (!parsed_url.is_valid()) { | |
65 return false; | |
66 } | |
67 // The adaboost model is only applied to non-mobile pages. | |
68 if (features.isMobileFriendly) { | |
69 return false; | |
70 } | |
71 return detector->Classify(CalculateDerivedFeatures( | |
72 features.openGraph, | |
73 parsed_url, | |
74 features.elementCount, | |
75 features.anchorCount, | |
76 features.formCount, | |
77 features.mozScore, | |
78 features.mozScoreAllSqrt, | |
79 features.mozScoreAllLinear | |
80 )); | |
81 } | |
82 | |
83 bool IsDistillablePage(WebDocument& doc) { | |
84 switch (GetDistillerHeuristicsType()) { | |
85 case DistillerHeuristicsType::ALWAYS_TRUE: | |
86 return true; | |
87 case DistillerHeuristicsType::OG_ARTICLE: | |
88 return doc.distillabilityFeatures().openGraph; | |
89 case DistillerHeuristicsType::ADABOOST_MODEL: | |
90 return IsDistillablePageAdaboost( | |
91 doc, DistillablePageDetector::GetNewModel()); | |
92 case DistillerHeuristicsType::NONE: | |
93 default: | |
94 return false; | |
95 } | |
96 } | |
97 | |
98 } // namespace | |
99 | |
100 DistillabilityAgent::DistillabilityAgent( | |
101 content::RenderFrame* render_frame) | |
102 : RenderFrameObserver(render_frame) { | |
103 } | |
104 | |
105 void DistillabilityAgent::DidMeaningfulLayout( | |
106 WebMeaningfulLayout layout_type) { | |
107 if (layout_type != WebMeaningfulLayout::FinishedParsing && | |
108 layout_type != WebMeaningfulLayout::FinishedLoading) { | |
109 return; | |
110 } | |
111 | |
112 DCHECK(render_frame()); | |
113 if (!render_frame()->IsMainFrame()) return; | |
114 DCHECK(render_frame()->GetWebFrame()); | |
115 WebDocument doc = render_frame()->GetWebFrame()->document(); | |
116 if (doc.isNull() || doc.body().isNull()) return; | |
117 if (!url_utils::IsUrlDistillable(doc.url())) return; | |
118 | |
119 bool is_loaded = layout_type == WebMeaningfulLayout::FinishedLoading; | |
120 if (!needToUpdate(is_loaded)) return; | |
121 | |
122 Send(new FrameHostMsg_Distillability(routing_id(), | |
123 IsDistillablePage(doc), isLast(is_loaded))); | |
124 } | |
125 | |
126 | |
127 DistillabilityAgent::~DistillabilityAgent() {} | |
128 | |
129 } // namespace dom_distiller | |
OLD | NEW |