OLD | NEW |
---|---|
1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "base/metrics/histogram.h" | 5 #include "base/metrics/histogram.h" |
6 | 6 |
7 #include "components/dom_distiller/content/common/distiller_messages.h" | 7 #include "components/dom_distiller/content/common/distiller_messages.h" |
8 #include "components/dom_distiller/content/renderer/distillability_agent.h" | 8 #include "components/dom_distiller/content/renderer/distillability_agent.h" |
9 #include "components/dom_distiller/core/distillable_page_detector.h" | 9 #include "components/dom_distiller/core/distillable_page_detector.h" |
10 #include "components/dom_distiller/core/experiments.h" | 10 #include "components/dom_distiller/core/experiments.h" |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
45 // Returns whether this update is the last one for the page. | 45 // Returns whether this update is the last one for the page. |
46 bool IsLast(bool is_loaded) { | 46 bool IsLast(bool is_loaded) { |
47 if (GetDistillerHeuristicsType() == DistillerHeuristicsType::ADABOOST_MODEL) | 47 if (GetDistillerHeuristicsType() == DistillerHeuristicsType::ADABOOST_MODEL) |
48 return is_loaded; | 48 return is_loaded; |
49 | 49 |
50 return true; | 50 return true; |
51 } | 51 } |
52 | 52 |
53 bool IsDistillablePageAdaboost(WebDocument& doc, | 53 bool IsDistillablePageAdaboost(WebDocument& doc, |
54 const DistillablePageDetector* detector, | 54 const DistillablePageDetector* detector, |
55 const DistillablePageDetector* long_page, | |
mdjones
2016/02/18 17:17:21
If we add any more of these, we can probably just
wychen
2016/02/18 21:47:23
Acknowledged. It's hard to say whether we need mor
| |
55 bool is_last) { | 56 bool is_last) { |
56 WebDistillabilityFeatures features = doc.distillabilityFeatures(); | 57 WebDistillabilityFeatures features = doc.distillabilityFeatures(); |
57 GURL parsed_url(doc.url()); | 58 GURL parsed_url(doc.url()); |
58 if (!parsed_url.is_valid()) { | 59 if (!parsed_url.is_valid()) { |
59 return false; | 60 return false; |
60 } | 61 } |
61 bool distillable = detector->Classify(CalculateDerivedFeatures( | 62 std::vector<double> derived = CalculateDerivedFeatures( |
62 features.openGraph, | 63 features.openGraph, |
63 parsed_url, | 64 parsed_url, |
64 features.elementCount, | 65 features.elementCount, |
65 features.anchorCount, | 66 features.anchorCount, |
66 features.formCount, | 67 features.formCount, |
67 features.mozScore, | 68 features.mozScore, |
68 features.mozScoreAllSqrt, | 69 features.mozScoreAllSqrt, |
69 features.mozScoreAllLinear | 70 features.mozScoreAllLinear |
70 )); | 71 ); |
72 bool distillable = detector->Classify(derived); | |
73 bool long_article = long_page->Classify(derived); | |
71 | 74 |
72 int bucket = static_cast<unsigned>(features.isMobileFriendly) | | 75 int bucket = static_cast<unsigned>(features.isMobileFriendly) | |
73 (static_cast<unsigned>(distillable) << 1); | 76 (static_cast<unsigned>(distillable) << 1); |
74 if (is_last) { | 77 if (is_last) { |
75 UMA_HISTOGRAM_ENUMERATION("DomDistiller.PageDistillableAfterLoading", | 78 UMA_HISTOGRAM_ENUMERATION("DomDistiller.PageDistillableAfterLoading", |
76 bucket, 4); | 79 bucket, 4); |
77 } else { | 80 } else { |
78 UMA_HISTOGRAM_ENUMERATION("DomDistiller.PageDistillableAfterParsing", | 81 UMA_HISTOGRAM_ENUMERATION("DomDistiller.PageDistillableAfterParsing", |
79 bucket, 4); | 82 bucket, 4); |
80 } | 83 } |
81 return distillable && (!features.isMobileFriendly); | 84 return distillable && long_article && (!features.isMobileFriendly); |
82 } | 85 } |
83 | 86 |
84 bool IsDistillablePage(WebDocument& doc, bool is_last) { | 87 bool IsDistillablePage(WebDocument& doc, bool is_last) { |
85 switch (GetDistillerHeuristicsType()) { | 88 switch (GetDistillerHeuristicsType()) { |
86 case DistillerHeuristicsType::ALWAYS_TRUE: | 89 case DistillerHeuristicsType::ALWAYS_TRUE: |
87 return true; | 90 return true; |
88 case DistillerHeuristicsType::OG_ARTICLE: | 91 case DistillerHeuristicsType::OG_ARTICLE: |
89 return doc.distillabilityFeatures().openGraph; | 92 return doc.distillabilityFeatures().openGraph; |
90 case DistillerHeuristicsType::ADABOOST_MODEL: | 93 case DistillerHeuristicsType::ADABOOST_MODEL: |
91 return IsDistillablePageAdaboost( | 94 return IsDistillablePageAdaboost(doc, |
92 doc, DistillablePageDetector::GetNewModel(), is_last); | 95 DistillablePageDetector::GetNewModel(), |
96 DistillablePageDetector::GetLongPageModel(), is_last); | |
93 case DistillerHeuristicsType::NONE: | 97 case DistillerHeuristicsType::NONE: |
94 default: | 98 default: |
95 return false; | 99 return false; |
96 } | 100 } |
97 } | 101 } |
98 | 102 |
99 } // namespace | 103 } // namespace |
100 | 104 |
101 DistillabilityAgent::DistillabilityAgent( | 105 DistillabilityAgent::DistillabilityAgent( |
102 content::RenderFrame* render_frame) | 106 content::RenderFrame* render_frame) |
(...skipping 19 matching lines...) Expand all Loading... | |
122 | 126 |
123 bool is_last = IsLast(is_loaded); | 127 bool is_last = IsLast(is_loaded); |
124 Send(new FrameHostMsg_Distillability(routing_id(), | 128 Send(new FrameHostMsg_Distillability(routing_id(), |
125 IsDistillablePage(doc, is_last), is_last)); | 129 IsDistillablePage(doc, is_last), is_last)); |
126 } | 130 } |
127 | 131 |
128 | 132 |
129 DistillabilityAgent::~DistillabilityAgent() {} | 133 DistillabilityAgent::~DistillabilityAgent() {} |
130 | 134 |
131 } // namespace dom_distiller | 135 } // namespace dom_distiller |
OLD | NEW |