| OLD | NEW |
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "base/metrics/histogram.h" | 5 #include "base/metrics/histogram.h" |
| 6 #include "base/strings/string_util.h" | 6 #include "base/strings/string_util.h" |
| 7 | 7 |
| 8 #include "components/dom_distiller/content/common/distillability_service.mojom.h
" | 8 #include "components/dom_distiller/content/common/distillability_service.mojom.h
" |
| 9 #include "components/dom_distiller/content/renderer/distillability_agent.h" | 9 #include "components/dom_distiller/content/renderer/distillability_agent.h" |
| 10 #include "components/dom_distiller/core/distillable_page_detector.h" | 10 #include "components/dom_distiller/core/distillable_page_detector.h" |
| (...skipping 11 matching lines...) Expand all Loading... |
| 22 namespace dom_distiller { | 22 namespace dom_distiller { |
| 23 | 23 |
| 24 using namespace blink; | 24 using namespace blink; |
| 25 | 25 |
| 26 namespace { | 26 namespace { |
| 27 | 27 |
| 28 const char* const kBlacklist[] = { | 28 const char* const kBlacklist[] = { |
| 29 "www.reddit.com" | 29 "www.reddit.com" |
| 30 }; | 30 }; |
| 31 | 31 |
| 32 enum RejectionBuckets { |
| 33 NOT_ARTICLE = 0, |
| 34 MOBILE_FRIENDLY, |
| 35 BLACKLISTED, |
| 36 TOO_SHORT, |
| 37 NOT_REJECTED, |
| 38 REJECTION_BUCKET_BOUNDARY |
| 39 }; |
| 40 |
| 32 // Returns whether it is necessary to send updates back to the browser. | 41 // Returns whether it is necessary to send updates back to the browser. |
| 33 // The number of updates can be from 0 to 2. See the tests in | 42 // The number of updates can be from 0 to 2. See the tests in |
| 34 // "distillable_page_utils_browsertest.cc". | 43 // "distillable_page_utils_browsertest.cc". |
| 35 // Most heuristics types only require one update after parsing. | 44 // Most heuristics types only require one update after parsing. |
| 36 // Adaboost is the only one doing the second update, which is after loading. | 45 // Adaboost is the only one doing the second update, which is after loading. |
| 37 bool NeedToUpdate(bool is_loaded) { | 46 bool NeedToUpdate(bool is_loaded) { |
| 38 switch (GetDistillerHeuristicsType()) { | 47 switch (GetDistillerHeuristicsType()) { |
| 39 case DistillerHeuristicsType::ALWAYS_TRUE: | 48 case DistillerHeuristicsType::ALWAYS_TRUE: |
| 40 return !is_loaded; | 49 return !is_loaded; |
| 41 case DistillerHeuristicsType::OG_ARTICLE: | 50 case DistillerHeuristicsType::OG_ARTICLE: |
| (...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 89 bool blacklisted = IsBlacklisted(parsed_url); | 98 bool blacklisted = IsBlacklisted(parsed_url); |
| 90 | 99 |
| 91 int bucket = static_cast<unsigned>(features.isMobileFriendly) | | 100 int bucket = static_cast<unsigned>(features.isMobileFriendly) | |
| 92 (static_cast<unsigned>(distillable) << 1); | 101 (static_cast<unsigned>(distillable) << 1); |
| 93 if (is_last) { | 102 if (is_last) { |
| 94 UMA_HISTOGRAM_ENUMERATION("DomDistiller.PageDistillableAfterLoading", | 103 UMA_HISTOGRAM_ENUMERATION("DomDistiller.PageDistillableAfterLoading", |
| 95 bucket, 4); | 104 bucket, 4); |
| 96 } else { | 105 } else { |
| 97 UMA_HISTOGRAM_ENUMERATION("DomDistiller.PageDistillableAfterParsing", | 106 UMA_HISTOGRAM_ENUMERATION("DomDistiller.PageDistillableAfterParsing", |
| 98 bucket, 4); | 107 bucket, 4); |
| 108 if (!distillable) { |
| 109 UMA_HISTOGRAM_ENUMERATION("DomDistiller.DistillabilityRejection", |
| 110 NOT_ARTICLE, REJECTION_BUCKET_BOUNDARY); |
| 111 } else if (features.isMobileFriendly) { |
| 112 UMA_HISTOGRAM_ENUMERATION("DomDistiller.DistillabilityRejection", |
| 113 MOBILE_FRIENDLY, REJECTION_BUCKET_BOUNDARY); |
| 114 } else if (blacklisted) { |
| 115 UMA_HISTOGRAM_ENUMERATION("DomDistiller.DistillabilityRejection", |
| 116 BLACKLISTED, REJECTION_BUCKET_BOUNDARY); |
| 117 } else if (!long_article) { |
| 118 UMA_HISTOGRAM_ENUMERATION("DomDistiller.DistillabilityRejection", |
| 119 TOO_SHORT, REJECTION_BUCKET_BOUNDARY); |
| 120 } else { |
| 121 UMA_HISTOGRAM_ENUMERATION("DomDistiller.DistillabilityRejection", |
| 122 NOT_REJECTED, REJECTION_BUCKET_BOUNDARY); |
| 123 } |
| 99 } | 124 } |
| 100 | 125 |
| 101 if (blacklisted) { | 126 if (blacklisted) { |
| 102 return false; | 127 return false; |
| 103 } | 128 } |
| 104 if (features.isMobileFriendly) { | 129 if (features.isMobileFriendly) { |
| 105 return false; | 130 return false; |
| 106 } | 131 } |
| 107 return distillable && long_article; | 132 return distillable && long_article; |
| 108 } | 133 } |
| (...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 153 render_frame()->GetServiceRegistry()->ConnectToRemoteService( | 178 render_frame()->GetServiceRegistry()->ConnectToRemoteService( |
| 154 mojo::GetProxy(&distillability_service)); | 179 mojo::GetProxy(&distillability_service)); |
| 155 DCHECK(distillability_service); | 180 DCHECK(distillability_service); |
| 156 distillability_service->NotifyIsDistillable( | 181 distillability_service->NotifyIsDistillable( |
| 157 IsDistillablePage(doc, is_last), is_last); | 182 IsDistillablePage(doc, is_last), is_last); |
| 158 } | 183 } |
| 159 | 184 |
| 160 DistillabilityAgent::~DistillabilityAgent() {} | 185 DistillabilityAgent::~DistillabilityAgent() {} |
| 161 | 186 |
| 162 } // namespace dom_distiller | 187 } // namespace dom_distiller |
| OLD | NEW |