OLD | NEW |
(Empty) | |
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #include "components/dom_distiller/content/common/distiller_messages.h" |
| 6 #include "components/dom_distiller/content/renderer/distillability_agent.h" |
| 7 #include "components/dom_distiller/core/distillable_page_detector.h" |
| 8 #include "components/dom_distiller/core/experiments.h" |
| 9 #include "components/dom_distiller/core/page_features.h" |
| 10 #include "components/dom_distiller/core/url_utils.h" |
| 11 #include "content/public/renderer/render_frame.h" |
| 12 |
| 13 #include "third_party/WebKit/public/platform/WebDistillability.h" |
| 14 #include "third_party/WebKit/public/web/WebDocument.h" |
| 15 #include "third_party/WebKit/public/web/WebElement.h" |
| 16 #include "third_party/WebKit/public/web/WebLocalFrame.h" |
| 17 |
| 18 namespace dom_distiller { |
| 19 |
| 20 using namespace blink; |
| 21 |
| 22 namespace { |
| 23 |
| 24 // Returns whether it is necessary to send updates back to the browser. |
| 25 // The number of updates can be from 0 to 2. See the tests in |
| 26 // "distillable_page_utils_browsertest.cc". |
| 27 // Most heuristics types only require one update after parsing. |
| 28 // Adaboost is the only one doing the second update, which is after loading. |
| 29 bool NeedToUpdate(bool is_loaded) { |
| 30 switch (GetDistillerHeuristicsType()) { |
| 31 case DistillerHeuristicsType::ALWAYS_TRUE: |
| 32 return !is_loaded; |
| 33 case DistillerHeuristicsType::OG_ARTICLE: |
| 34 return !is_loaded; |
| 35 case DistillerHeuristicsType::ADABOOST_MODEL: |
| 36 return true; |
| 37 case DistillerHeuristicsType::NONE: |
| 38 default: |
| 39 return false; |
| 40 } |
| 41 } |
| 42 |
| 43 // Returns whether this update is the last one for the page. |
| 44 bool IsLast(bool is_loaded) { |
| 45 if (GetDistillerHeuristicsType() == DistillerHeuristicsType::ADABOOST_MODEL) |
| 46 return is_loaded; |
| 47 |
| 48 return true; |
| 49 } |
| 50 |
| 51 bool IsDistillablePageAdaboost(WebDocument& doc, |
| 52 const DistillablePageDetector* detector) { |
| 53 WebDistillabilityFeatures features = doc.distillabilityFeatures(); |
| 54 GURL parsed_url(doc.url()); |
| 55 if (!parsed_url.is_valid()) { |
| 56 return false; |
| 57 } |
| 58 // The adaboost model is only applied to non-mobile pages. |
| 59 if (features.isMobileFriendly) { |
| 60 return false; |
| 61 } |
| 62 return detector->Classify(CalculateDerivedFeatures( |
| 63 features.openGraph, |
| 64 parsed_url, |
| 65 features.elementCount, |
| 66 features.anchorCount, |
| 67 features.formCount, |
| 68 features.mozScore, |
| 69 features.mozScoreAllSqrt, |
| 70 features.mozScoreAllLinear |
| 71 )); |
| 72 } |
| 73 |
| 74 bool IsDistillablePage(WebDocument& doc) { |
| 75 switch (GetDistillerHeuristicsType()) { |
| 76 case DistillerHeuristicsType::ALWAYS_TRUE: |
| 77 return true; |
| 78 case DistillerHeuristicsType::OG_ARTICLE: |
| 79 return doc.distillabilityFeatures().openGraph; |
| 80 case DistillerHeuristicsType::ADABOOST_MODEL: |
| 81 return IsDistillablePageAdaboost( |
| 82 doc, DistillablePageDetector::GetNewModel()); |
| 83 case DistillerHeuristicsType::NONE: |
| 84 default: |
| 85 return false; |
| 86 } |
| 87 } |
| 88 |
| 89 } // namespace |
| 90 |
| 91 DistillabilityAgent::DistillabilityAgent( |
| 92 content::RenderFrame* render_frame) |
| 93 : RenderFrameObserver(render_frame) { |
| 94 } |
| 95 |
| 96 void DistillabilityAgent::DidMeaningfulLayout( |
| 97 WebMeaningfulLayout layout_type) { |
| 98 if (layout_type != WebMeaningfulLayout::FinishedParsing && |
| 99 layout_type != WebMeaningfulLayout::FinishedLoading) { |
| 100 return; |
| 101 } |
| 102 |
| 103 DCHECK(render_frame()); |
| 104 if (!render_frame()->IsMainFrame()) return; |
| 105 DCHECK(render_frame()->GetWebFrame()); |
| 106 WebDocument doc = render_frame()->GetWebFrame()->document(); |
| 107 if (doc.isNull() || doc.body().isNull()) return; |
| 108 if (!url_utils::IsUrlDistillable(doc.url())) return; |
| 109 |
| 110 bool is_loaded = layout_type == WebMeaningfulLayout::FinishedLoading; |
| 111 if (!NeedToUpdate(is_loaded)) return; |
| 112 |
| 113 Send(new FrameHostMsg_Distillability(routing_id(), |
| 114 IsDistillablePage(doc), IsLast(is_loaded))); |
| 115 } |
| 116 |
| 117 |
| 118 DistillabilityAgent::~DistillabilityAgent() {} |
| 119 |
| 120 } // namespace dom_distiller |
OLD | NEW |