Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1053)

Side by Side Diff: components/dom_distiller/content/renderer/distillability_agent.cc

Issue 1705323004: Support blacklisting domains for Reader Mode (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: move constant Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2015 The Chromium Authors. All rights reserved. 1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "base/metrics/histogram.h" 5 #include "base/metrics/histogram.h"
6 6
7 #include "components/dom_distiller/content/common/distiller_messages.h" 7 #include "components/dom_distiller/content/common/distiller_messages.h"
8 #include "components/dom_distiller/content/renderer/distillability_agent.h" 8 #include "components/dom_distiller/content/renderer/distillability_agent.h"
9 #include "components/dom_distiller/core/distillable_page_detector.h" 9 #include "components/dom_distiller/core/distillable_page_detector.h"
10 #include "components/dom_distiller/core/experiments.h" 10 #include "components/dom_distiller/core/experiments.h"
11 #include "components/dom_distiller/core/page_features.h" 11 #include "components/dom_distiller/core/page_features.h"
12 #include "components/dom_distiller/core/url_utils.h" 12 #include "components/dom_distiller/core/url_utils.h"
13 #include "content/public/renderer/render_frame.h" 13 #include "content/public/renderer/render_frame.h"
14 14
15 #include "third_party/WebKit/public/platform/WebDistillability.h" 15 #include "third_party/WebKit/public/platform/WebDistillability.h"
16 #include "third_party/WebKit/public/web/WebDocument.h" 16 #include "third_party/WebKit/public/web/WebDocument.h"
17 #include "third_party/WebKit/public/web/WebElement.h" 17 #include "third_party/WebKit/public/web/WebElement.h"
18 #include "third_party/WebKit/public/web/WebLocalFrame.h" 18 #include "third_party/WebKit/public/web/WebLocalFrame.h"
19 19
20 namespace dom_distiller { 20 namespace dom_distiller {
21 21
22 using namespace blink; 22 using namespace blink;
23 23
24 namespace { 24 namespace {
25 25
26 const char* const kBlacklist[] = {
27 "www.reddit.com"
28 };
29
26 // Returns whether it is necessary to send updates back to the browser. 30 // Returns whether it is necessary to send updates back to the browser.
27 // The number of updates can be from 0 to 2. See the tests in 31 // The number of updates can be from 0 to 2. See the tests in
28 // "distillable_page_utils_browsertest.cc". 32 // "distillable_page_utils_browsertest.cc".
29 // Most heuristics types only require one update after parsing. 33 // Most heuristics types only require one update after parsing.
30 // Adaboost is the only one doing the second update, which is after loading. 34 // Adaboost is the only one doing the second update, which is after loading.
31 bool NeedToUpdate(bool is_loaded) { 35 bool NeedToUpdate(bool is_loaded) {
32 switch (GetDistillerHeuristicsType()) { 36 switch (GetDistillerHeuristicsType()) {
33 case DistillerHeuristicsType::ALWAYS_TRUE: 37 case DistillerHeuristicsType::ALWAYS_TRUE:
34 return !is_loaded; 38 return !is_loaded;
35 case DistillerHeuristicsType::OG_ARTICLE: 39 case DistillerHeuristicsType::OG_ARTICLE:
36 return !is_loaded; 40 return !is_loaded;
37 case DistillerHeuristicsType::ADABOOST_MODEL: 41 case DistillerHeuristicsType::ADABOOST_MODEL:
38 return true; 42 return true;
39 case DistillerHeuristicsType::NONE: 43 case DistillerHeuristicsType::NONE:
40 default: 44 default:
41 return false; 45 return false;
42 } 46 }
43 } 47 }
44 48
45 // Returns whether this update is the last one for the page. 49 // Returns whether this update is the last one for the page.
46 bool IsLast(bool is_loaded) { 50 bool IsLast(bool is_loaded) {
47 if (GetDistillerHeuristicsType() == DistillerHeuristicsType::ADABOOST_MODEL) 51 if (GetDistillerHeuristicsType() == DistillerHeuristicsType::ADABOOST_MODEL)
48 return is_loaded; 52 return is_loaded;
49 53
50 return true; 54 return true;
51 } 55 }
52 56
57 bool IsBlacklisted(const GURL& url) {
58 for (size_t i = 0; i < arraysize(kBlacklist); ++i) {
59 if (base::LowerCaseEqualsASCII(url.host(), kBlacklist[i])) {
60 return true;
61 }
62 }
63 return false;
64 }
65
53 bool IsDistillablePageAdaboost(WebDocument& doc, 66 bool IsDistillablePageAdaboost(WebDocument& doc,
54 const DistillablePageDetector* detector, 67 const DistillablePageDetector* detector,
55 bool is_last) { 68 bool is_last) {
56 WebDistillabilityFeatures features = doc.distillabilityFeatures(); 69 WebDistillabilityFeatures features = doc.distillabilityFeatures();
57 GURL parsed_url(doc.url()); 70 GURL parsed_url(doc.url());
58 if (!parsed_url.is_valid()) { 71 if (!parsed_url.is_valid()) {
59 return false; 72 return false;
60 } 73 }
61 bool distillable = detector->Classify(CalculateDerivedFeatures( 74 bool distillable = detector->Classify(CalculateDerivedFeatures(
62 features.openGraph, 75 features.openGraph,
63 parsed_url, 76 parsed_url,
64 features.elementCount, 77 features.elementCount,
65 features.anchorCount, 78 features.anchorCount,
66 features.formCount, 79 features.formCount,
67 features.mozScore, 80 features.mozScore,
68 features.mozScoreAllSqrt, 81 features.mozScoreAllSqrt,
69 features.mozScoreAllLinear 82 features.mozScoreAllLinear
70 )); 83 ));
84 bool blacklisted = IsBlacklisted(parsed_url);
71 85
72 int bucket = static_cast<unsigned>(features.isMobileFriendly) | 86 int bucket = static_cast<unsigned>(features.isMobileFriendly) |
73 (static_cast<unsigned>(distillable) << 1); 87 (static_cast<unsigned>(distillable) << 1);
74 if (is_last) { 88 if (is_last) {
75 UMA_HISTOGRAM_ENUMERATION("DomDistiller.PageDistillableAfterLoading", 89 UMA_HISTOGRAM_ENUMERATION("DomDistiller.PageDistillableAfterLoading",
76 bucket, 4); 90 bucket, 4);
77 } else { 91 } else {
78 UMA_HISTOGRAM_ENUMERATION("DomDistiller.PageDistillableAfterParsing", 92 UMA_HISTOGRAM_ENUMERATION("DomDistiller.PageDistillableAfterParsing",
79 bucket, 4); 93 bucket, 4);
80 } 94 }
81 return distillable && (!features.isMobileFriendly); 95
96 return distillable && (!features.isMobileFriendly) && (!blacklisted);
82 } 97 }
83 98
84 bool IsDistillablePage(WebDocument& doc, bool is_last) { 99 bool IsDistillablePage(WebDocument& doc, bool is_last) {
85 switch (GetDistillerHeuristicsType()) { 100 switch (GetDistillerHeuristicsType()) {
86 case DistillerHeuristicsType::ALWAYS_TRUE: 101 case DistillerHeuristicsType::ALWAYS_TRUE:
87 return true; 102 return true;
88 case DistillerHeuristicsType::OG_ARTICLE: 103 case DistillerHeuristicsType::OG_ARTICLE:
89 return doc.distillabilityFeatures().openGraph; 104 return doc.distillabilityFeatures().openGraph;
90 case DistillerHeuristicsType::ADABOOST_MODEL: 105 case DistillerHeuristicsType::ADABOOST_MODEL:
91 return IsDistillablePageAdaboost( 106 return IsDistillablePageAdaboost(
(...skipping 30 matching lines...) Expand all
122 137
123 bool is_last = IsLast(is_loaded); 138 bool is_last = IsLast(is_loaded);
124 Send(new FrameHostMsg_Distillability(routing_id(), 139 Send(new FrameHostMsg_Distillability(routing_id(),
125 IsDistillablePage(doc, is_last), is_last)); 140 IsDistillablePage(doc, is_last), is_last));
126 } 141 }
127 142
128 143
129 DistillabilityAgent::~DistillabilityAgent() {} 144 DistillabilityAgent::~DistillabilityAgent() {}
130 145
131 } // namespace dom_distiller 146 } // namespace dom_distiller
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698