Index: components/dom_distiller/content/renderer/distillability_agent.cc |
diff --git a/components/dom_distiller/content/renderer/distillability_agent.cc b/components/dom_distiller/content/renderer/distillability_agent.cc |
new file mode 100644 |
index 0000000000000000000000000000000000000000..d70f61787e14d48b57e3d38b1176b56b7c3ffcc9 |
--- /dev/null |
+++ b/components/dom_distiller/content/renderer/distillability_agent.cc |
@@ -0,0 +1,120 @@ |
+// Copyright 2015 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+#include "components/dom_distiller/content/common/distiller_messages.h" |
+#include "components/dom_distiller/content/renderer/distillability_agent.h" |
+ |
+#include "content/public/renderer/render_frame.h" |
+ |
+#include "third_party/WebKit/public/web/WebDocument.h" |
+#include "third_party/WebKit/public/web/WebElement.h" |
+#include "third_party/WebKit/public/web/WebLocalFrame.h" |
+#include "third_party/WebKit/public/web/WebNode.h" |
+#include "third_party/WebKit/public/web/WebNodeList.h" |
+ |
+namespace dom_distiller { |
+ |
+namespace { |
+ |
+using namespace blink; |
+ |
+class ExtractFeatureWalker { |
+public: |
+ ExtractFeatureWalker() |
+ : numElements(0), |
+ numAnchors(0), |
+ numForms(0) { |
+ } |
+ void walk(WebElement root) { |
+ blink::WebNodeList children = root.childNodes(); |
+ for (unsigned i = 0; i < children.length(); ++i) { |
+ WebNode child = children.item(i); |
+ if (!child.isElementNode()) |
+ continue; |
+ WebElement elem = child.to<WebElement>(); |
+ numElements++; |
+ if (elem.hasHTMLTagName("a")) { |
+ numAnchors++; |
+ } |
+ if (elem.hasHTMLTagName("form")) { |
+ numForms++; |
+ } |
+ walk(elem); |
+ } |
+ } |
+ int numElements; |
+ int numAnchors; |
+ int numForms; |
+}; |
+ |
+bool hasOGArticle(WebElement head) { |
+ blink::WebNodeList children = head.childNodes(); |
+ for (unsigned i = 0; i < children.length(); ++i) { |
+ WebNode child = children.item(i); |
+ if (!child.isElementNode()) |
+ continue; |
+ WebElement elem = child.to<WebElement>(); |
+ if (!elem.hasHTMLTagName("meta")) |
+ continue; |
+ if ((elem.getAttribute("name").equals("og:type")) || |
+ (elem.getAttribute("property").equals("og:type"))) { |
+ std::string content = elem.getAttribute("content").utf8(); |
+ if (base::ToUpperASCII(content) == "ARTICLE") { |
+ return true; |
+ } |
+ } |
+ } |
+ return false; |
+}; |
+ |
+} // namespace |
+ |
+DistillabilityAgent::DistillabilityAgent( |
+ content::RenderFrame* render_frame) |
+ : RenderFrameObserver(render_frame) { |
+} |
+ |
+bool DistillabilityAgent::OnMessageReceived(const IPC::Message& msg) { |
+ bool handled = true; |
+ IPC_BEGIN_MESSAGE_MAP(DistillabilityAgent, msg) |
+ IPC_MESSAGE_HANDLER(FrameMsg_ExtractFeatureRequest, |
+ OnExtractFeatureRequest) |
+ IPC_MESSAGE_UNHANDLED(handled = false) |
+ IPC_END_MESSAGE_MAP() |
+ return handled; |
+} |
+ |
+void DistillabilityAgent::OnExtractFeatureRequest(int id) { |
+ FrameHostMsg_ExtractFeatureResponse_Params result; |
+ DCHECK(render_frame()); |
+ if (!render_frame() || !render_frame()->GetWebFrame()) { |
+ Send(new FrameHostMsg_ExtractFeatureResponse(routing_id(), id, result)); |
+ return; |
+ } |
+ WebDocument doc = render_frame()->GetWebFrame()->document(); |
+ if (doc.isNull() || doc.body().isNull()) { |
+ Send(new FrameHostMsg_ExtractFeatureResponse(routing_id(), id, result)); |
+ return; |
+ } |
+ |
+ result.isOGArticle = hasOGArticle(doc.head()); |
+ result.url = doc.url().string().utf8(); |
+ |
+ ExtractFeatureWalker walker; |
+ walker.walk(doc.body()); |
+ |
+ result.numElements = walker.numElements; |
+ result.numAnchors = walker.numAnchors; |
+ result.numForms = walker.numForms; |
+ |
+ result.innerText = doc.contentAsTextForTesting().utf8(); |
+ result.textContent = doc.body().textContent().utf8(); |
+ result.innerHTML = doc.body().innerHTML().utf8(); |
+ |
+ Send(new FrameHostMsg_ExtractFeatureResponse(routing_id(), id, result)); |
+} |
+ |
+DistillabilityAgent::~DistillabilityAgent() {} |
+ |
+} // namespace dom_distiller |