| OLD | NEW |
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "base/json/json_writer.h" |
| 5 #include "base/metrics/histogram.h" | 6 #include "base/metrics/histogram.h" |
| 6 #include "base/strings/string_util.h" | 7 #include "base/strings/string_util.h" |
| 7 | 8 |
| 8 #include "components/dom_distiller/content/common/distillability_service.mojom.h
" | 9 #include "components/dom_distiller/content/common/distillability_service.mojom.h
" |
| 9 #include "components/dom_distiller/content/renderer/distillability_agent.h" | 10 #include "components/dom_distiller/content/renderer/distillability_agent.h" |
| 10 #include "components/dom_distiller/core/distillable_page_detector.h" | 11 #include "components/dom_distiller/core/distillable_page_detector.h" |
| 12 #include "components/dom_distiller/core/dom_distiller_features.h" |
| 11 #include "components/dom_distiller/core/experiments.h" | 13 #include "components/dom_distiller/core/experiments.h" |
| 12 #include "components/dom_distiller/core/page_features.h" | 14 #include "components/dom_distiller/core/page_features.h" |
| 13 #include "components/dom_distiller/core/url_utils.h" | 15 #include "components/dom_distiller/core/url_utils.h" |
| 14 #include "content/public/renderer/render_frame.h" | 16 #include "content/public/renderer/render_frame.h" |
| 15 #include "services/shell/public/cpp/interface_provider.h" | 17 #include "services/shell/public/cpp/interface_provider.h" |
| 16 #include "third_party/WebKit/public/platform/WebDistillability.h" | 18 #include "third_party/WebKit/public/platform/WebDistillability.h" |
| 17 #include "third_party/WebKit/public/web/WebDocument.h" | 19 #include "third_party/WebKit/public/web/WebDocument.h" |
| 18 #include "third_party/WebKit/public/web/WebElement.h" | 20 #include "third_party/WebKit/public/web/WebElement.h" |
| 19 #include "third_party/WebKit/public/web/WebLocalFrame.h" | 21 #include "third_party/WebKit/public/web/WebLocalFrame.h" |
| 20 | 22 |
| (...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 66 | 68 |
| 67 bool IsBlacklisted(const GURL& url) { | 69 bool IsBlacklisted(const GURL& url) { |
| 68 for (size_t i = 0; i < arraysize(kBlacklist); ++i) { | 70 for (size_t i = 0; i < arraysize(kBlacklist); ++i) { |
| 69 if (base::LowerCaseEqualsASCII(url.host(), kBlacklist[i])) { | 71 if (base::LowerCaseEqualsASCII(url.host(), kBlacklist[i])) { |
| 70 return true; | 72 return true; |
| 71 } | 73 } |
| 72 } | 74 } |
| 73 return false; | 75 return false; |
| 74 } | 76 } |
| 75 | 77 |
| 78 void DumpDistillability(content::RenderFrame* render_frame, |
| 79 const WebDistillabilityFeatures& features, |
| 80 const std::vector<double>& derived, |
| 81 double score, |
| 82 bool distillable, |
| 83 double long_score, |
| 84 bool long_page, |
| 85 bool blacklisted) { |
| 86 { |
| 87 base::DictionaryValue dict; |
| 88 std::string msg; |
| 89 |
| 90 std::unique_ptr<base::DictionaryValue> raw_features( |
| 91 new base::DictionaryValue); |
| 92 raw_features->SetInteger("isMobileFriendly", features.isMobileFriendly); |
| 93 raw_features->SetInteger("openGraph", features.openGraph); |
| 94 raw_features->SetInteger("elementCount", features.elementCount); |
| 95 raw_features->SetInteger("anchorCount", features.anchorCount); |
| 96 raw_features->SetInteger("formCount", features.formCount); |
| 97 raw_features->SetInteger("textInputCount", features.textInputCount); |
| 98 raw_features->SetInteger("passwordInputCount", features.passwordInputCount); |
| 99 raw_features->SetDouble("mozScore", features.mozScore); |
| 100 raw_features->SetDouble("mozScoreAllSqrt", features.mozScoreAllSqrt); |
| 101 raw_features->SetDouble("mozScoreAllLinear", features.mozScoreAllLinear); |
| 102 dict.Set("features", std::move(raw_features)); |
| 103 |
| 104 std::unique_ptr<base::ListValue> derived_features(new base::ListValue()); |
| 105 for (unsigned i = 0; i < derived.size(); i++) { |
| 106 derived_features->AppendDouble(derived[i]); |
| 107 } |
| 108 dict.Set("derived_features", std::move(derived_features)); |
| 109 |
| 110 dict.SetDouble("score", score); |
| 111 dict.SetInteger("distillable", distillable); |
| 112 dict.SetDouble("long_score", long_score); |
| 113 dict.SetInteger("long_page", long_page); |
| 114 dict.SetInteger("blacklisted", blacklisted); |
| 115 base::JSONWriter::WriteWithOptions(dict, |
| 116 base::JSONWriter::OPTIONS_PRETTY_PRINT, &msg); |
| 117 msg = "adaboost_classification = " + msg; |
| 118 |
| 119 render_frame->AddMessageToConsole(content::CONSOLE_MESSAGE_LEVEL_DEBUG, |
| 120 msg); |
| 121 } |
| 122 } |
| 123 |
| 76 bool IsDistillablePageAdaboost(WebDocument& doc, | 124 bool IsDistillablePageAdaboost(WebDocument& doc, |
| 77 const DistillablePageDetector* detector, | 125 const DistillablePageDetector* detector, |
| 78 const DistillablePageDetector* long_page, | 126 const DistillablePageDetector* long_page, |
| 79 bool is_last) { | 127 bool is_last, |
| 128 content::RenderFrame* render_frame) { |
| 129 bool isDevMode = IsDistillabilityDevSet(); |
| 80 WebDistillabilityFeatures features = doc.distillabilityFeatures(); | 130 WebDistillabilityFeatures features = doc.distillabilityFeatures(); |
| 81 GURL parsed_url(doc.url()); | 131 GURL parsed_url(doc.url()); |
| 82 if (!parsed_url.is_valid()) { | 132 if (!parsed_url.is_valid()) { |
| 83 return false; | 133 return false; |
| 84 } | 134 } |
| 85 std::vector<double> derived = CalculateDerivedFeatures( | 135 std::vector<double> derived = CalculateDerivedFeatures( |
| 86 features.openGraph, | 136 features.openGraph, |
| 87 parsed_url, | 137 parsed_url, |
| 88 features.elementCount, | 138 features.elementCount, |
| 89 features.anchorCount, | 139 features.anchorCount, |
| 90 features.formCount, | 140 features.formCount, |
| 91 features.mozScore, | 141 features.mozScore, |
| 92 features.mozScoreAllSqrt, | 142 features.mozScoreAllSqrt, |
| 93 features.mozScoreAllLinear | 143 features.mozScoreAllLinear |
| 94 ); | 144 ); |
| 95 double score = detector->Score(derived) - detector->GetThreshold(); | 145 double score = detector->Score(derived) - detector->GetThreshold(); |
| 96 double long_score = long_page->Score(derived) - long_page->GetThreshold(); | 146 double long_score = long_page->Score(derived) - long_page->GetThreshold(); |
| 97 bool distillable = score > 0; | 147 bool distillable = score > 0; |
| 98 bool long_article = long_score > 0; | 148 bool long_article = long_score > 0; |
| 99 bool blacklisted = IsBlacklisted(parsed_url); | 149 bool blacklisted = IsBlacklisted(parsed_url); |
| 100 | 150 |
| 151 if (isDevMode) { |
| 152 DumpDistillability(render_frame, features, derived, score, distillable, |
| 153 long_score, long_article, blacklisted); |
| 154 } |
| 155 |
| 101 if (!features.isMobileFriendly) { | 156 if (!features.isMobileFriendly) { |
| 102 int score_int = std::round(score * 100); | 157 int score_int = std::round(score * 100); |
| 103 if (score > 0) { | 158 if (score > 0) { |
| 104 UMA_HISTOGRAM_COUNTS_1000("DomDistiller.DistillabilityScoreNMF.Positive", | 159 UMA_HISTOGRAM_COUNTS_1000("DomDistiller.DistillabilityScoreNMF.Positive", |
| 105 score_int); | 160 score_int); |
| 106 } else { | 161 } else { |
| 107 UMA_HISTOGRAM_COUNTS_1000("DomDistiller.DistillabilityScoreNMF.Negative", | 162 UMA_HISTOGRAM_COUNTS_1000("DomDistiller.DistillabilityScoreNMF.Negative", |
| 108 -score_int); | 163 -score_int); |
| 109 } | 164 } |
| 110 if (distillable) { | 165 if (distillable) { |
| (...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 150 | 205 |
| 151 if (blacklisted) { | 206 if (blacklisted) { |
| 152 return false; | 207 return false; |
| 153 } | 208 } |
| 154 if (features.isMobileFriendly) { | 209 if (features.isMobileFriendly) { |
| 155 return false; | 210 return false; |
| 156 } | 211 } |
| 157 return distillable && long_article; | 212 return distillable && long_article; |
| 158 } | 213 } |
| 159 | 214 |
| 160 bool IsDistillablePage(WebDocument& doc, bool is_last) { | 215 bool IsDistillablePage(WebDocument& doc, bool is_last, |
| 216 content::RenderFrame* render_frame) { |
| 161 switch (GetDistillerHeuristicsType()) { | 217 switch (GetDistillerHeuristicsType()) { |
| 162 case DistillerHeuristicsType::ALWAYS_TRUE: | 218 case DistillerHeuristicsType::ALWAYS_TRUE: |
| 163 return true; | 219 return true; |
| 164 case DistillerHeuristicsType::OG_ARTICLE: | 220 case DistillerHeuristicsType::OG_ARTICLE: |
| 165 return doc.distillabilityFeatures().openGraph; | 221 return doc.distillabilityFeatures().openGraph; |
| 166 case DistillerHeuristicsType::ADABOOST_MODEL: | 222 case DistillerHeuristicsType::ADABOOST_MODEL: |
| 167 return IsDistillablePageAdaboost(doc, | 223 return IsDistillablePageAdaboost(doc, |
| 168 DistillablePageDetector::GetNewModel(), | 224 DistillablePageDetector::GetNewModel(), |
| 169 DistillablePageDetector::GetLongPageModel(), is_last); | 225 DistillablePageDetector::GetLongPageModel(), is_last, render_frame); |
| 170 case DistillerHeuristicsType::NONE: | 226 case DistillerHeuristicsType::NONE: |
| 171 default: | 227 default: |
| 172 return false; | 228 return false; |
| 173 } | 229 } |
| 174 } | 230 } |
| 175 | 231 |
| 176 } // namespace | 232 } // namespace |
| 177 | 233 |
| 178 DistillabilityAgent::DistillabilityAgent( | 234 DistillabilityAgent::DistillabilityAgent( |
| 179 content::RenderFrame* render_frame) | 235 content::RenderFrame* render_frame) |
| (...skipping 17 matching lines...) Expand all Loading... |
| 197 bool is_loaded = layout_type == WebMeaningfulLayout::FinishedLoading; | 253 bool is_loaded = layout_type == WebMeaningfulLayout::FinishedLoading; |
| 198 if (!NeedToUpdate(is_loaded)) return; | 254 if (!NeedToUpdate(is_loaded)) return; |
| 199 | 255 |
| 200 bool is_last = IsLast(is_loaded); | 256 bool is_last = IsLast(is_loaded); |
| 201 // Connect to Mojo service on browser to notify page distillability. | 257 // Connect to Mojo service on browser to notify page distillability. |
| 202 mojom::DistillabilityServicePtr distillability_service; | 258 mojom::DistillabilityServicePtr distillability_service; |
| 203 render_frame()->GetRemoteInterfaces()->GetInterface( | 259 render_frame()->GetRemoteInterfaces()->GetInterface( |
| 204 &distillability_service); | 260 &distillability_service); |
| 205 DCHECK(distillability_service); | 261 DCHECK(distillability_service); |
| 206 distillability_service->NotifyIsDistillable( | 262 distillability_service->NotifyIsDistillable( |
| 207 IsDistillablePage(doc, is_last), is_last); | 263 IsDistillablePage(doc, is_last, render_frame()), is_last); |
| 208 } | 264 } |
| 209 | 265 |
| 210 DistillabilityAgent::~DistillabilityAgent() {} | 266 DistillabilityAgent::~DistillabilityAgent() {} |
| 211 | 267 |
| 212 void DistillabilityAgent::OnDestruct() { | 268 void DistillabilityAgent::OnDestruct() { |
| 213 delete this; | 269 delete this; |
| 214 } | 270 } |
| 215 | 271 |
| 216 } // namespace dom_distiller | 272 } // namespace dom_distiller |
| OLD | NEW |