OLD | NEW |
(Empty) | |
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #include "components/dom_distiller/content/common/distiller_messages.h" |
| 6 #include "components/dom_distiller/content/renderer/distillability_agent.h" |
| 7 |
| 8 #include "content/public/renderer/render_frame.h" |
| 9 |
| 10 #include "third_party/WebKit/public/web/WebDocument.h" |
| 11 #include "third_party/WebKit/public/web/WebElement.h" |
| 12 #include "third_party/WebKit/public/web/WebLocalFrame.h" |
| 13 #include "third_party/WebKit/public/web/WebNode.h" |
| 14 #include "third_party/WebKit/public/web/WebNodeList.h" |
| 15 |
| 16 namespace dom_distiller { |
| 17 |
| 18 namespace { |
| 19 |
| 20 using namespace blink; |
| 21 |
| 22 class ExtractFeatureWalker { |
| 23 public: |
| 24 ExtractFeatureWalker() |
| 25 : numElements(0), |
| 26 numAnchors(0), |
| 27 numForms(0) { |
| 28 } |
| 29 void walk(WebElement root) { |
| 30 blink::WebNodeList children = root.childNodes(); |
| 31 for (unsigned i = 0; i < children.length(); ++i) { |
| 32 WebNode child = children.item(i); |
| 33 if (!child.isElementNode()) |
| 34 continue; |
| 35 WebElement elem = child.to<WebElement>(); |
| 36 numElements++; |
| 37 if (elem.hasHTMLTagName("a")) { |
| 38 numAnchors++; |
| 39 } |
| 40 if (elem.hasHTMLTagName("form")) { |
| 41 numForms++; |
| 42 } |
| 43 walk(elem); |
| 44 } |
| 45 } |
| 46 int numElements; |
| 47 int numAnchors; |
| 48 int numForms; |
| 49 }; |
| 50 |
| 51 bool hasOGArticle(WebElement head) { |
| 52 blink::WebNodeList children = head.childNodes(); |
| 53 for (unsigned i = 0; i < children.length(); ++i) { |
| 54 WebNode child = children.item(i); |
| 55 if (!child.isElementNode()) |
| 56 continue; |
| 57 WebElement elem = child.to<WebElement>(); |
| 58 if (!elem.hasHTMLTagName("meta")) |
| 59 continue; |
| 60 if ((elem.getAttribute("name").equals("og:type")) || |
| 61 (elem.getAttribute("property").equals("og:type"))) { |
| 62 std::string content = elem.getAttribute("content").utf8(); |
| 63 if (base::ToUpperASCII(content) == "ARTICLE") { |
| 64 return true; |
| 65 } |
| 66 } |
| 67 } |
| 68 return false; |
| 69 }; |
| 70 |
| 71 } // namespace |
| 72 |
| 73 DistillabilityAgent::DistillabilityAgent( |
| 74 content::RenderFrame* render_frame) |
| 75 : RenderFrameObserver(render_frame) { |
| 76 } |
| 77 |
| 78 bool DistillabilityAgent::OnMessageReceived(const IPC::Message& msg) { |
| 79 bool handled = true; |
| 80 IPC_BEGIN_MESSAGE_MAP(DistillabilityAgent, msg) |
| 81 IPC_MESSAGE_HANDLER(FrameMsg_ExtractFeatureRequest, |
| 82 OnExtractFeatureRequest) |
| 83 IPC_MESSAGE_UNHANDLED(handled = false) |
| 84 IPC_END_MESSAGE_MAP() |
| 85 return handled; |
| 86 } |
| 87 |
| 88 void DistillabilityAgent::OnExtractFeatureRequest(int id) { |
| 89 FrameHostMsg_ExtractFeatureResponse_Params result; |
| 90 DCHECK(render_frame()); |
| 91 if (!render_frame() || !render_frame()->GetWebFrame()) { |
| 92 Send(new FrameHostMsg_ExtractFeatureResponse(routing_id(), id, result)); |
| 93 return; |
| 94 } |
| 95 WebDocument doc = render_frame()->GetWebFrame()->document(); |
| 96 if (doc.isNull() || doc.body().isNull()) { |
| 97 Send(new FrameHostMsg_ExtractFeatureResponse(routing_id(), id, result)); |
| 98 return; |
| 99 } |
| 100 |
| 101 result.isOGArticle = hasOGArticle(doc.head()); |
| 102 result.url = doc.url().string().utf8(); |
| 103 |
| 104 ExtractFeatureWalker walker; |
| 105 walker.walk(doc.body()); |
| 106 |
| 107 result.numElements = walker.numElements; |
| 108 result.numAnchors = walker.numAnchors; |
| 109 result.numForms = walker.numForms; |
| 110 |
| 111 result.innerText = doc.contentAsTextForTesting().utf8(); |
| 112 result.textContent = doc.body().textContent().utf8(); |
| 113 result.innerHTML = doc.body().innerHTML().utf8(); |
| 114 |
| 115 Send(new FrameHostMsg_ExtractFeatureResponse(routing_id(), id, result)); |
| 116 } |
| 117 |
| 118 DistillabilityAgent::~DistillabilityAgent() {} |
| 119 |
| 120 } // namespace dom_distiller |
OLD | NEW |