Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(674)

Side by Side Diff: components/dom_distiller/content/renderer/distillability_agent.cc

Issue 1612803002: Test distillability on all pages (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@mobile-distillable
Patch Set: update description Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2015 The Chromium Authors. All rights reserved. 1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "base/metrics/histogram.h"
6
5 #include "components/dom_distiller/content/common/distiller_messages.h" 7 #include "components/dom_distiller/content/common/distiller_messages.h"
6 #include "components/dom_distiller/content/renderer/distillability_agent.h" 8 #include "components/dom_distiller/content/renderer/distillability_agent.h"
7 #include "components/dom_distiller/core/distillable_page_detector.h" 9 #include "components/dom_distiller/core/distillable_page_detector.h"
8 #include "components/dom_distiller/core/experiments.h" 10 #include "components/dom_distiller/core/experiments.h"
9 #include "components/dom_distiller/core/page_features.h" 11 #include "components/dom_distiller/core/page_features.h"
10 #include "components/dom_distiller/core/url_utils.h" 12 #include "components/dom_distiller/core/url_utils.h"
11 #include "content/public/renderer/render_frame.h" 13 #include "content/public/renderer/render_frame.h"
12 14
13 #include "third_party/WebKit/public/platform/WebDistillability.h" 15 #include "third_party/WebKit/public/platform/WebDistillability.h"
14 #include "third_party/WebKit/public/web/WebDocument.h" 16 #include "third_party/WebKit/public/web/WebDocument.h"
(...skipping 27 matching lines...) Expand all
42 44
43 // Returns whether this update is the last one for the page. 45 // Returns whether this update is the last one for the page.
44 bool IsLast(bool is_loaded) { 46 bool IsLast(bool is_loaded) {
45 if (GetDistillerHeuristicsType() == DistillerHeuristicsType::ADABOOST_MODEL) 47 if (GetDistillerHeuristicsType() == DistillerHeuristicsType::ADABOOST_MODEL)
46 return is_loaded; 48 return is_loaded;
47 49
48 return true; 50 return true;
49 } 51 }
50 52
51 bool IsDistillablePageAdaboost(WebDocument& doc, 53 bool IsDistillablePageAdaboost(WebDocument& doc,
52 const DistillablePageDetector* detector) { 54 const DistillablePageDetector* detector,
55 bool is_last) {
53 WebDistillabilityFeatures features = doc.distillabilityFeatures(); 56 WebDistillabilityFeatures features = doc.distillabilityFeatures();
54 GURL parsed_url(doc.url()); 57 GURL parsed_url(doc.url());
55 if (!parsed_url.is_valid()) { 58 if (!parsed_url.is_valid()) {
56 return false; 59 return false;
57 } 60 }
58 // The adaboost model is only applied to non-mobile pages. 61 bool distillable = detector->Classify(CalculateDerivedFeatures(
59 if (features.isMobileFriendly) {
60 return false;
61 }
62 return detector->Classify(CalculateDerivedFeatures(
63 features.openGraph, 62 features.openGraph,
64 parsed_url, 63 parsed_url,
65 features.elementCount, 64 features.elementCount,
66 features.anchorCount, 65 features.anchorCount,
67 features.formCount, 66 features.formCount,
68 features.mozScore, 67 features.mozScore,
69 features.mozScoreAllSqrt, 68 features.mozScoreAllSqrt,
70 features.mozScoreAllLinear 69 features.mozScoreAllLinear
71 )); 70 ));
71
72 int bucket = static_cast<unsigned>(features.isMobileFriendly) |
73 (static_cast<unsigned>(distillable) << 1);
74 if (is_last) {
75 UMA_HISTOGRAM_ENUMERATION("DomDistiller.PageDistillableAfterLoading",
76 bucket, 4);
77 } else {
78 UMA_HISTOGRAM_ENUMERATION("DomDistiller.PageDistillableAfterParsing",
79 bucket, 4);
80 }
81 return distillable && (!features.isMobileFriendly);
72 } 82 }
73 83
74 bool IsDistillablePage(WebDocument& doc) { 84 bool IsDistillablePage(WebDocument& doc, bool is_last) {
75 switch (GetDistillerHeuristicsType()) { 85 switch (GetDistillerHeuristicsType()) {
76 case DistillerHeuristicsType::ALWAYS_TRUE: 86 case DistillerHeuristicsType::ALWAYS_TRUE:
77 return true; 87 return true;
78 case DistillerHeuristicsType::OG_ARTICLE: 88 case DistillerHeuristicsType::OG_ARTICLE:
79 return doc.distillabilityFeatures().openGraph; 89 return doc.distillabilityFeatures().openGraph;
80 case DistillerHeuristicsType::ADABOOST_MODEL: 90 case DistillerHeuristicsType::ADABOOST_MODEL:
81 return IsDistillablePageAdaboost( 91 return IsDistillablePageAdaboost(
82 doc, DistillablePageDetector::GetNewModel()); 92 doc, DistillablePageDetector::GetNewModel(), is_last);
83 case DistillerHeuristicsType::NONE: 93 case DistillerHeuristicsType::NONE:
84 default: 94 default:
85 return false; 95 return false;
86 } 96 }
87 } 97 }
88 98
89 } // namespace 99 } // namespace
90 100
91 DistillabilityAgent::DistillabilityAgent( 101 DistillabilityAgent::DistillabilityAgent(
92 content::RenderFrame* render_frame) 102 content::RenderFrame* render_frame)
(...skipping 10 matching lines...) Expand all
103 DCHECK(render_frame()); 113 DCHECK(render_frame());
104 if (!render_frame()->IsMainFrame()) return; 114 if (!render_frame()->IsMainFrame()) return;
105 DCHECK(render_frame()->GetWebFrame()); 115 DCHECK(render_frame()->GetWebFrame());
106 WebDocument doc = render_frame()->GetWebFrame()->document(); 116 WebDocument doc = render_frame()->GetWebFrame()->document();
107 if (doc.isNull() || doc.body().isNull()) return; 117 if (doc.isNull() || doc.body().isNull()) return;
108 if (!url_utils::IsUrlDistillable(doc.url())) return; 118 if (!url_utils::IsUrlDistillable(doc.url())) return;
109 119
110 bool is_loaded = layout_type == WebMeaningfulLayout::FinishedLoading; 120 bool is_loaded = layout_type == WebMeaningfulLayout::FinishedLoading;
111 if (!NeedToUpdate(is_loaded)) return; 121 if (!NeedToUpdate(is_loaded)) return;
112 122
123 bool is_last = IsLast(is_loaded);
113 Send(new FrameHostMsg_Distillability(routing_id(), 124 Send(new FrameHostMsg_Distillability(routing_id(),
114 IsDistillablePage(doc), IsLast(is_loaded))); 125 IsDistillablePage(doc, is_last), is_last));
115 } 126 }
116 127
117 128
118 DistillabilityAgent::~DistillabilityAgent() {} 129 DistillabilityAgent::~DistillabilityAgent() {}
119 130
120 } // namespace dom_distiller 131 } // namespace dom_distiller
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698