| OLD | NEW |
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "base/metrics/histogram.h" | 5 #include "base/metrics/histogram.h" |
| 6 | 6 |
| 7 #include "components/dom_distiller/content/common/distiller_messages.h" | 7 #include "components/dom_distiller/content/common/distiller_messages.h" |
| 8 #include "components/dom_distiller/content/renderer/distillability_agent.h" | 8 #include "components/dom_distiller/content/renderer/distillability_agent.h" |
| 9 #include "components/dom_distiller/core/distillable_page_detector.h" | 9 #include "components/dom_distiller/core/distillable_page_detector.h" |
| 10 #include "components/dom_distiller/core/experiments.h" | 10 #include "components/dom_distiller/core/experiments.h" |
| (...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 58 for (size_t i = 0; i < arraysize(kBlacklist); ++i) { | 58 for (size_t i = 0; i < arraysize(kBlacklist); ++i) { |
| 59 if (base::LowerCaseEqualsASCII(url.host(), kBlacklist[i])) { | 59 if (base::LowerCaseEqualsASCII(url.host(), kBlacklist[i])) { |
| 60 return true; | 60 return true; |
| 61 } | 61 } |
| 62 } | 62 } |
| 63 return false; | 63 return false; |
| 64 } | 64 } |
| 65 | 65 |
| 66 bool IsDistillablePageAdaboost(WebDocument& doc, | 66 bool IsDistillablePageAdaboost(WebDocument& doc, |
| 67 const DistillablePageDetector* detector, | 67 const DistillablePageDetector* detector, |
| 68 const DistillablePageDetector* long_page, |
| 68 bool is_last) { | 69 bool is_last) { |
| 69 WebDistillabilityFeatures features = doc.distillabilityFeatures(); | 70 WebDistillabilityFeatures features = doc.distillabilityFeatures(); |
| 70 GURL parsed_url(doc.url()); | 71 GURL parsed_url(doc.url()); |
| 71 if (!parsed_url.is_valid()) { | 72 if (!parsed_url.is_valid()) { |
| 72 return false; | 73 return false; |
| 73 } | 74 } |
| 74 bool distillable = detector->Classify(CalculateDerivedFeatures( | 75 std::vector<double> derived = CalculateDerivedFeatures( |
| 75 features.openGraph, | 76 features.openGraph, |
| 76 parsed_url, | 77 parsed_url, |
| 77 features.elementCount, | 78 features.elementCount, |
| 78 features.anchorCount, | 79 features.anchorCount, |
| 79 features.formCount, | 80 features.formCount, |
| 80 features.mozScore, | 81 features.mozScore, |
| 81 features.mozScoreAllSqrt, | 82 features.mozScoreAllSqrt, |
| 82 features.mozScoreAllLinear | 83 features.mozScoreAllLinear |
| 83 )); | 84 ); |
| 85 bool distillable = detector->Classify(derived); |
| 86 bool long_article = long_page->Classify(derived); |
| 84 bool blacklisted = IsBlacklisted(parsed_url); | 87 bool blacklisted = IsBlacklisted(parsed_url); |
| 85 | 88 |
| 86 int bucket = static_cast<unsigned>(features.isMobileFriendly) | | 89 int bucket = static_cast<unsigned>(features.isMobileFriendly) | |
| 87 (static_cast<unsigned>(distillable) << 1); | 90 (static_cast<unsigned>(distillable) << 1); |
| 88 if (is_last) { | 91 if (is_last) { |
| 89 UMA_HISTOGRAM_ENUMERATION("DomDistiller.PageDistillableAfterLoading", | 92 UMA_HISTOGRAM_ENUMERATION("DomDistiller.PageDistillableAfterLoading", |
| 90 bucket, 4); | 93 bucket, 4); |
| 91 } else { | 94 } else { |
| 92 UMA_HISTOGRAM_ENUMERATION("DomDistiller.PageDistillableAfterParsing", | 95 UMA_HISTOGRAM_ENUMERATION("DomDistiller.PageDistillableAfterParsing", |
| 93 bucket, 4); | 96 bucket, 4); |
| 94 } | 97 } |
| 95 | 98 |
| 96 return distillable && (!features.isMobileFriendly) && (!blacklisted); | 99 if (blacklisted) { |
| 100 return false; |
| 101 } |
| 102 if (features.isMobileFriendly) { |
| 103 return false; |
| 104 } |
| 105 return distillable && long_article; |
| 97 } | 106 } |
| 98 | 107 |
| 99 bool IsDistillablePage(WebDocument& doc, bool is_last) { | 108 bool IsDistillablePage(WebDocument& doc, bool is_last) { |
| 100 switch (GetDistillerHeuristicsType()) { | 109 switch (GetDistillerHeuristicsType()) { |
| 101 case DistillerHeuristicsType::ALWAYS_TRUE: | 110 case DistillerHeuristicsType::ALWAYS_TRUE: |
| 102 return true; | 111 return true; |
| 103 case DistillerHeuristicsType::OG_ARTICLE: | 112 case DistillerHeuristicsType::OG_ARTICLE: |
| 104 return doc.distillabilityFeatures().openGraph; | 113 return doc.distillabilityFeatures().openGraph; |
| 105 case DistillerHeuristicsType::ADABOOST_MODEL: | 114 case DistillerHeuristicsType::ADABOOST_MODEL: |
| 106 return IsDistillablePageAdaboost( | 115 return IsDistillablePageAdaboost(doc, |
| 107 doc, DistillablePageDetector::GetNewModel(), is_last); | 116 DistillablePageDetector::GetNewModel(), |
| 117 DistillablePageDetector::GetLongPageModel(), is_last); |
| 108 case DistillerHeuristicsType::NONE: | 118 case DistillerHeuristicsType::NONE: |
| 109 default: | 119 default: |
| 110 return false; | 120 return false; |
| 111 } | 121 } |
| 112 } | 122 } |
| 113 | 123 |
| 114 } // namespace | 124 } // namespace |
| 115 | 125 |
| 116 DistillabilityAgent::DistillabilityAgent( | 126 DistillabilityAgent::DistillabilityAgent( |
| 117 content::RenderFrame* render_frame) | 127 content::RenderFrame* render_frame) |
| (...skipping 19 matching lines...) Expand all Loading... |
| 137 | 147 |
| 138 bool is_last = IsLast(is_loaded); | 148 bool is_last = IsLast(is_loaded); |
| 139 Send(new FrameHostMsg_Distillability(routing_id(), | 149 Send(new FrameHostMsg_Distillability(routing_id(), |
| 140 IsDistillablePage(doc, is_last), is_last)); | 150 IsDistillablePage(doc, is_last), is_last)); |
| 141 } | 151 } |
| 142 | 152 |
| 143 | 153 |
| 144 DistillabilityAgent::~DistillabilityAgent() {} | 154 DistillabilityAgent::~DistillabilityAgent() {} |
| 145 | 155 |
| 146 } // namespace dom_distiller | 156 } // namespace dom_distiller |
| OLD | NEW |