Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "components/dom_distiller/content/common/distiller_messages.h" | |
| 6 #include "components/dom_distiller/content/renderer/distillability_agent.h" | |
| 7 #include "components/dom_distiller/core/distillable_page_detector.h" | |
| 8 #include "components/dom_distiller/core/experiments.h" | |
| 9 #include "components/dom_distiller/core/page_features.h" | |
| 10 #include "components/dom_distiller/core/url_utils.h" | |
| 11 #include "content/public/renderer/render_frame.h" | |
| 12 | |
| 13 #include "third_party/WebKit/public/platform/WebDistillability.h" | |
| 14 #include "third_party/WebKit/public/web/WebDocument.h" | |
| 15 #include "third_party/WebKit/public/web/WebElement.h" | |
| 16 #include "third_party/WebKit/public/web/WebLocalFrame.h" | |
| 17 #include "third_party/WebKit/public/web/WebNode.h" | |
| 18 #include "third_party/WebKit/public/web/WebNodeList.h" | |
| 19 | |
| 20 namespace dom_distiller { | |
| 21 | |
| 22 using namespace blink; | |
| 23 | |
| 24 namespace { | |
| 25 | |
| 26 // Returns whether it is necessary to send updates back to the browser. | |
| 27 // The number of updates can be from 0 to 2. See the tests in | |
| 28 // "distillable_page_utils_browsertest.cc". | |
| 29 // Most heuristics types only require one update after parsing. | |
| 30 // Adaboost is the only one doing the second update, which is after loading. | |
| 31 bool needToUpdate(bool is_loaded) { | |
|
nyquist
2015/11/11 21:53:45
Nit: NeedToUpdate (and IsLast below)
wychen
2015/11/12 04:59:45
Done.
| |
| 32 switch (GetDistillerHeuristicsType()) { | |
| 33 case DistillerHeuristicsType::ALWAYS_TRUE: | |
| 34 return !is_loaded; | |
| 35 case DistillerHeuristicsType::OG_ARTICLE: | |
| 36 return !is_loaded; | |
| 37 case DistillerHeuristicsType::ADABOOST_MODEL: | |
| 38 return true; | |
| 39 case DistillerHeuristicsType::NONE: | |
| 40 default: | |
| 41 return false; | |
| 42 } | |
| 43 } | |
| 44 | |
| 45 // Returns whether this update is the last one for the page. | |
| 46 bool isLast(bool is_loaded) { | |
| 47 switch (GetDistillerHeuristicsType()) { | |
| 48 case DistillerHeuristicsType::ALWAYS_TRUE: | |
| 49 return true; | |
| 50 case DistillerHeuristicsType::OG_ARTICLE: | |
| 51 return true; | |
| 52 case DistillerHeuristicsType::ADABOOST_MODEL: | |
| 53 return is_loaded; | |
|
nyquist
2015/11/11 21:53:45
Optional nit:
Could this just be:
###
if (GetDist
wychen
2015/11/12 04:59:45
Done.
| |
| 54 case DistillerHeuristicsType::NONE: | |
| 55 default: | |
| 56 return true; | |
| 57 } | |
| 58 } | |
| 59 | |
| 60 bool IsDistillablePageAdaboost(WebDocument& doc, | |
| 61 const DistillablePageDetector* detector) { | |
| 62 WebDistillabilityFeatures features = doc.distillabilityFeatures(); | |
| 63 GURL parsed_url(doc.url()); | |
| 64 if (!parsed_url.is_valid()) { | |
| 65 return false; | |
| 66 } | |
| 67 // The adaboost model is only applied to non-mobile pages. | |
| 68 if (features.isMobileFriendly) { | |
| 69 return false; | |
| 70 } | |
| 71 return detector->Classify(CalculateDerivedFeatures( | |
| 72 features.openGraph, | |
| 73 parsed_url, | |
| 74 features.elementCount, | |
| 75 features.anchorCount, | |
| 76 features.formCount, | |
| 77 features.mozScore, | |
| 78 features.mozScoreAllSqrt, | |
| 79 features.mozScoreAllLinear | |
| 80 )); | |
| 81 } | |
| 82 | |
| 83 bool IsDistillablePage(WebDocument& doc) { | |
| 84 switch (GetDistillerHeuristicsType()) { | |
| 85 case DistillerHeuristicsType::ALWAYS_TRUE: | |
| 86 return true; | |
| 87 case DistillerHeuristicsType::OG_ARTICLE: | |
| 88 return doc.distillabilityFeatures().openGraph; | |
| 89 case DistillerHeuristicsType::ADABOOST_MODEL: | |
| 90 return IsDistillablePageAdaboost( | |
| 91 doc, DistillablePageDetector::GetNewModel()); | |
| 92 case DistillerHeuristicsType::NONE: | |
| 93 default: | |
| 94 return false; | |
| 95 } | |
| 96 } | |
| 97 | |
| 98 } // namespace | |
| 99 | |
| 100 DistillabilityAgent::DistillabilityAgent( | |
| 101 content::RenderFrame* render_frame) | |
| 102 : RenderFrameObserver(render_frame) { | |
| 103 } | |
| 104 | |
| 105 void DistillabilityAgent::DidMeaningfulLayout( | |
| 106 WebMeaningfulLayout layout_type) { | |
| 107 if (layout_type != WebMeaningfulLayout::FinishedParsing && | |
| 108 layout_type != WebMeaningfulLayout::FinishedLoading) { | |
| 109 return; | |
| 110 } | |
| 111 | |
| 112 DCHECK(render_frame()); | |
| 113 if (!render_frame()->IsMainFrame()) return; | |
| 114 DCHECK(render_frame()->GetWebFrame()); | |
| 115 WebDocument doc = render_frame()->GetWebFrame()->document(); | |
| 116 if (doc.isNull() || doc.body().isNull()) return; | |
| 117 if (!url_utils::IsUrlDistillable(doc.url())) return; | |
| 118 | |
| 119 bool is_loaded = layout_type == WebMeaningfulLayout::FinishedLoading; | |
| 120 if (!needToUpdate(is_loaded)) return; | |
| 121 | |
| 122 Send(new FrameHostMsg_Distillability(routing_id(), | |
| 123 IsDistillablePage(doc), isLast(is_loaded))); | |
| 124 } | |
| 125 | |
| 126 | |
| 127 DistillabilityAgent::~DistillabilityAgent() {} | |
| 128 | |
| 129 } // namespace dom_distiller | |
| OLD | NEW |