Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(59)

Side by Side Diff: components/dom_distiller/content/renderer/distillability_agent.cc

Issue 1703313003: Only show Reader Mode infobar on long articles (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: rebase Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2015 The Chromium Authors. All rights reserved. 1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "base/metrics/histogram.h" 5 #include "base/metrics/histogram.h"
6 6
7 #include "components/dom_distiller/content/common/distiller_messages.h" 7 #include "components/dom_distiller/content/common/distiller_messages.h"
8 #include "components/dom_distiller/content/renderer/distillability_agent.h" 8 #include "components/dom_distiller/content/renderer/distillability_agent.h"
9 #include "components/dom_distiller/core/distillable_page_detector.h" 9 #include "components/dom_distiller/core/distillable_page_detector.h"
10 #include "components/dom_distiller/core/experiments.h" 10 #include "components/dom_distiller/core/experiments.h"
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after
58 for (size_t i = 0; i < arraysize(kBlacklist); ++i) { 58 for (size_t i = 0; i < arraysize(kBlacklist); ++i) {
59 if (base::LowerCaseEqualsASCII(url.host(), kBlacklist[i])) { 59 if (base::LowerCaseEqualsASCII(url.host(), kBlacklist[i])) {
60 return true; 60 return true;
61 } 61 }
62 } 62 }
63 return false; 63 return false;
64 } 64 }
65 65
66 bool IsDistillablePageAdaboost(WebDocument& doc, 66 bool IsDistillablePageAdaboost(WebDocument& doc,
67 const DistillablePageDetector* detector, 67 const DistillablePageDetector* detector,
68 const DistillablePageDetector* long_page,
68 bool is_last) { 69 bool is_last) {
69 WebDistillabilityFeatures features = doc.distillabilityFeatures(); 70 WebDistillabilityFeatures features = doc.distillabilityFeatures();
70 GURL parsed_url(doc.url()); 71 GURL parsed_url(doc.url());
71 if (!parsed_url.is_valid()) { 72 if (!parsed_url.is_valid()) {
72 return false; 73 return false;
73 } 74 }
74 bool distillable = detector->Classify(CalculateDerivedFeatures( 75 std::vector<double> derived = CalculateDerivedFeatures(
75 features.openGraph, 76 features.openGraph,
76 parsed_url, 77 parsed_url,
77 features.elementCount, 78 features.elementCount,
78 features.anchorCount, 79 features.anchorCount,
79 features.formCount, 80 features.formCount,
80 features.mozScore, 81 features.mozScore,
81 features.mozScoreAllSqrt, 82 features.mozScoreAllSqrt,
82 features.mozScoreAllLinear 83 features.mozScoreAllLinear
83 )); 84 );
85 bool distillable = detector->Classify(derived);
86 bool long_article = long_page->Classify(derived);
84 bool blacklisted = IsBlacklisted(parsed_url); 87 bool blacklisted = IsBlacklisted(parsed_url);
85 88
86 int bucket = static_cast<unsigned>(features.isMobileFriendly) | 89 int bucket = static_cast<unsigned>(features.isMobileFriendly) |
87 (static_cast<unsigned>(distillable) << 1); 90 (static_cast<unsigned>(distillable) << 1);
88 if (is_last) { 91 if (is_last) {
89 UMA_HISTOGRAM_ENUMERATION("DomDistiller.PageDistillableAfterLoading", 92 UMA_HISTOGRAM_ENUMERATION("DomDistiller.PageDistillableAfterLoading",
90 bucket, 4); 93 bucket, 4);
91 } else { 94 } else {
92 UMA_HISTOGRAM_ENUMERATION("DomDistiller.PageDistillableAfterParsing", 95 UMA_HISTOGRAM_ENUMERATION("DomDistiller.PageDistillableAfterParsing",
93 bucket, 4); 96 bucket, 4);
94 } 97 }
95 98
96 return distillable && (!features.isMobileFriendly) && (!blacklisted); 99 if (blacklisted) {
100 return false;
101 }
102 if (features.isMobileFriendly) {
103 return false;
104 }
105 return distillable && long_article;
97 } 106 }
98 107
99 bool IsDistillablePage(WebDocument& doc, bool is_last) { 108 bool IsDistillablePage(WebDocument& doc, bool is_last) {
100 switch (GetDistillerHeuristicsType()) { 109 switch (GetDistillerHeuristicsType()) {
101 case DistillerHeuristicsType::ALWAYS_TRUE: 110 case DistillerHeuristicsType::ALWAYS_TRUE:
102 return true; 111 return true;
103 case DistillerHeuristicsType::OG_ARTICLE: 112 case DistillerHeuristicsType::OG_ARTICLE:
104 return doc.distillabilityFeatures().openGraph; 113 return doc.distillabilityFeatures().openGraph;
105 case DistillerHeuristicsType::ADABOOST_MODEL: 114 case DistillerHeuristicsType::ADABOOST_MODEL:
106 return IsDistillablePageAdaboost( 115 return IsDistillablePageAdaboost(doc,
107 doc, DistillablePageDetector::GetNewModel(), is_last); 116 DistillablePageDetector::GetNewModel(),
117 DistillablePageDetector::GetLongPageModel(), is_last);
108 case DistillerHeuristicsType::NONE: 118 case DistillerHeuristicsType::NONE:
109 default: 119 default:
110 return false; 120 return false;
111 } 121 }
112 } 122 }
113 123
114 } // namespace 124 } // namespace
115 125
116 DistillabilityAgent::DistillabilityAgent( 126 DistillabilityAgent::DistillabilityAgent(
117 content::RenderFrame* render_frame) 127 content::RenderFrame* render_frame)
(...skipping 19 matching lines...) Expand all
137 147
138 bool is_last = IsLast(is_loaded); 148 bool is_last = IsLast(is_loaded);
139 Send(new FrameHostMsg_Distillability(routing_id(), 149 Send(new FrameHostMsg_Distillability(routing_id(),
140 IsDistillablePage(doc, is_last), is_last)); 150 IsDistillablePage(doc, is_last), is_last));
141 } 151 }
142 152
143 153
144 DistillabilityAgent::~DistillabilityAgent() {} 154 DistillabilityAgent::~DistillabilityAgent() {}
145 155
146 } // namespace dom_distiller 156 } // namespace dom_distiller
OLDNEW
« no previous file with comments | « chrome/test/data/dom_distiller/simple_article_iframe.html ('k') | components/dom_distiller/core/data/long_page_model.bin » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698