Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "base/metrics/histogram.h" | 5 #include "base/metrics/histogram.h" |
| 6 | 6 |
| 7 #include "components/dom_distiller/content/common/distiller_messages.h" | 7 #include "components/dom_distiller/content/common/distiller_messages.h" |
| 8 #include "components/dom_distiller/content/renderer/distillability_agent.h" | 8 #include "components/dom_distiller/content/renderer/distillability_agent.h" |
| 9 #include "components/dom_distiller/core/distillable_page_detector.h" | 9 #include "components/dom_distiller/core/distillable_page_detector.h" |
| 10 #include "components/dom_distiller/core/experiments.h" | 10 #include "components/dom_distiller/core/experiments.h" |
| (...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 43 } | 43 } |
| 44 | 44 |
| 45 // Returns whether this update is the last one for the page. | 45 // Returns whether this update is the last one for the page. |
| 46 bool IsLast(bool is_loaded) { | 46 bool IsLast(bool is_loaded) { |
| 47 if (GetDistillerHeuristicsType() == DistillerHeuristicsType::ADABOOST_MODEL) | 47 if (GetDistillerHeuristicsType() == DistillerHeuristicsType::ADABOOST_MODEL) |
| 48 return is_loaded; | 48 return is_loaded; |
| 49 | 49 |
| 50 return true; | 50 return true; |
| 51 } | 51 } |
| 52 | 52 |
| 53 const char* const kBlacklist[] = { | |
|
mdjones
2016/02/18 17:13:27
Being a constant, this could probably be at the to
| |
| 54 "www.reddit.com" | |
| 55 }; | |
| 56 | |
| 57 bool IsBlacklisted(const GURL& url) { | |
| 58 for (size_t i = 0; i < arraysize(kBlacklist); ++i) { | |
| 59 if (base::LowerCaseEqualsASCII(url.host(), kBlacklist[i])) { | |
| 60 return true; | |
| 61 } | |
| 62 } | |
| 63 return false; | |
| 64 } | |
| 65 | |
| 53 bool IsDistillablePageAdaboost(WebDocument& doc, | 66 bool IsDistillablePageAdaboost(WebDocument& doc, |
| 54 const DistillablePageDetector* detector, | 67 const DistillablePageDetector* detector, |
| 55 bool is_last) { | 68 bool is_last) { |
| 56 WebDistillabilityFeatures features = doc.distillabilityFeatures(); | 69 WebDistillabilityFeatures features = doc.distillabilityFeatures(); |
| 57 GURL parsed_url(doc.url()); | 70 GURL parsed_url(doc.url()); |
| 58 if (!parsed_url.is_valid()) { | 71 if (!parsed_url.is_valid()) { |
| 59 return false; | 72 return false; |
| 60 } | 73 } |
| 61 bool distillable = detector->Classify(CalculateDerivedFeatures( | 74 bool distillable = detector->Classify(CalculateDerivedFeatures( |
| 62 features.openGraph, | 75 features.openGraph, |
| 63 parsed_url, | 76 parsed_url, |
| 64 features.elementCount, | 77 features.elementCount, |
| 65 features.anchorCount, | 78 features.anchorCount, |
| 66 features.formCount, | 79 features.formCount, |
| 67 features.mozScore, | 80 features.mozScore, |
| 68 features.mozScoreAllSqrt, | 81 features.mozScoreAllSqrt, |
| 69 features.mozScoreAllLinear | 82 features.mozScoreAllLinear |
| 70 )); | 83 )); |
| 84 bool blacklisted = IsBlacklisted(parsed_url); | |
| 71 | 85 |
| 72 int bucket = static_cast<unsigned>(features.isMobileFriendly) | | 86 int bucket = static_cast<unsigned>(features.isMobileFriendly) | |
| 73 (static_cast<unsigned>(distillable) << 1); | 87 (static_cast<unsigned>(distillable) << 1); |
| 74 if (is_last) { | 88 if (is_last) { |
| 75 UMA_HISTOGRAM_ENUMERATION("DomDistiller.PageDistillableAfterLoading", | 89 UMA_HISTOGRAM_ENUMERATION("DomDistiller.PageDistillableAfterLoading", |
| 76 bucket, 4); | 90 bucket, 4); |
| 77 } else { | 91 } else { |
| 78 UMA_HISTOGRAM_ENUMERATION("DomDistiller.PageDistillableAfterParsing", | 92 UMA_HISTOGRAM_ENUMERATION("DomDistiller.PageDistillableAfterParsing", |
| 79 bucket, 4); | 93 bucket, 4); |
| 80 } | 94 } |
| 81 return distillable && (!features.isMobileFriendly); | 95 |
| 96 return distillable && (!features.isMobileFriendly) && (!blacklisted); | |
| 82 } | 97 } |
| 83 | 98 |
| 84 bool IsDistillablePage(WebDocument& doc, bool is_last) { | 99 bool IsDistillablePage(WebDocument& doc, bool is_last) { |
| 85 switch (GetDistillerHeuristicsType()) { | 100 switch (GetDistillerHeuristicsType()) { |
| 86 case DistillerHeuristicsType::ALWAYS_TRUE: | 101 case DistillerHeuristicsType::ALWAYS_TRUE: |
| 87 return true; | 102 return true; |
| 88 case DistillerHeuristicsType::OG_ARTICLE: | 103 case DistillerHeuristicsType::OG_ARTICLE: |
| 89 return doc.distillabilityFeatures().openGraph; | 104 return doc.distillabilityFeatures().openGraph; |
| 90 case DistillerHeuristicsType::ADABOOST_MODEL: | 105 case DistillerHeuristicsType::ADABOOST_MODEL: |
| 91 return IsDistillablePageAdaboost( | 106 return IsDistillablePageAdaboost( |
| (...skipping 30 matching lines...) Expand all Loading... | |
| 122 | 137 |
| 123 bool is_last = IsLast(is_loaded); | 138 bool is_last = IsLast(is_loaded); |
| 124 Send(new FrameHostMsg_Distillability(routing_id(), | 139 Send(new FrameHostMsg_Distillability(routing_id(), |
| 125 IsDistillablePage(doc, is_last), is_last)); | 140 IsDistillablePage(doc, is_last), is_last)); |
| 126 } | 141 } |
| 127 | 142 |
| 128 | 143 |
| 129 DistillabilityAgent::~DistillabilityAgent() {} | 144 DistillabilityAgent::~DistillabilityAgent() {} |
| 130 | 145 |
| 131 } // namespace dom_distiller | 146 } // namespace dom_distiller |
| OLD | NEW |