| Index: components/dom_distiller/content/renderer/distillability_agent.cc
|
| diff --git a/components/dom_distiller/content/renderer/distillability_agent.cc b/components/dom_distiller/content/renderer/distillability_agent.cc
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..d70f61787e14d48b57e3d38b1176b56b7c3ffcc9
|
| --- /dev/null
|
| +++ b/components/dom_distiller/content/renderer/distillability_agent.cc
|
| @@ -0,0 +1,120 @@
|
| +// Copyright 2015 The Chromium Authors. All rights reserved.
|
| +// Use of this source code is governed by a BSD-style license that can be
|
| +// found in the LICENSE file.
|
| +
|
| +#include "components/dom_distiller/content/common/distiller_messages.h"
|
| +#include "components/dom_distiller/content/renderer/distillability_agent.h"
|
| +
|
| +#include "content/public/renderer/render_frame.h"
|
| +
|
| +#include "third_party/WebKit/public/web/WebDocument.h"
|
| +#include "third_party/WebKit/public/web/WebElement.h"
|
| +#include "third_party/WebKit/public/web/WebLocalFrame.h"
|
| +#include "third_party/WebKit/public/web/WebNode.h"
|
| +#include "third_party/WebKit/public/web/WebNodeList.h"
|
| +
|
| +namespace dom_distiller {
|
| +
|
| +namespace {
|
| +
|
| +using namespace blink;
|
| +
|
| +class ExtractFeatureWalker {
|
| +public:
|
| + ExtractFeatureWalker()
|
| + : numElements(0),
|
| + numAnchors(0),
|
| + numForms(0) {
|
| + }
|
| + void walk(WebElement root) {
|
| + blink::WebNodeList children = root.childNodes();
|
| + for (unsigned i = 0; i < children.length(); ++i) {
|
| + WebNode child = children.item(i);
|
| + if (!child.isElementNode())
|
| + continue;
|
| + WebElement elem = child.to<WebElement>();
|
| + numElements++;
|
| + if (elem.hasHTMLTagName("a")) {
|
| + numAnchors++;
|
| + }
|
| + if (elem.hasHTMLTagName("form")) {
|
| + numForms++;
|
| + }
|
| + walk(elem);
|
| + }
|
| + }
|
| + int numElements;
|
| + int numAnchors;
|
| + int numForms;
|
| +};
|
| +
|
| +bool hasOGArticle(WebElement head) {
|
| + blink::WebNodeList children = head.childNodes();
|
| + for (unsigned i = 0; i < children.length(); ++i) {
|
| + WebNode child = children.item(i);
|
| + if (!child.isElementNode())
|
| + continue;
|
| + WebElement elem = child.to<WebElement>();
|
| + if (!elem.hasHTMLTagName("meta"))
|
| + continue;
|
| + if ((elem.getAttribute("name").equals("og:type")) ||
|
| + (elem.getAttribute("property").equals("og:type"))) {
|
| + std::string content = elem.getAttribute("content").utf8();
|
| + if (base::ToUpperASCII(content) == "ARTICLE") {
|
| + return true;
|
| + }
|
| + }
|
| + }
|
| + return false;
|
| +};
|
| +
|
| +} // namespace
|
| +
|
| +DistillabilityAgent::DistillabilityAgent(
|
| + content::RenderFrame* render_frame)
|
| + : RenderFrameObserver(render_frame) {
|
| +}
|
| +
|
| +bool DistillabilityAgent::OnMessageReceived(const IPC::Message& msg) {
|
| + bool handled = true;
|
| + IPC_BEGIN_MESSAGE_MAP(DistillabilityAgent, msg)
|
| + IPC_MESSAGE_HANDLER(FrameMsg_ExtractFeatureRequest,
|
| + OnExtractFeatureRequest)
|
| + IPC_MESSAGE_UNHANDLED(handled = false)
|
| + IPC_END_MESSAGE_MAP()
|
| + return handled;
|
| +}
|
| +
|
| +void DistillabilityAgent::OnExtractFeatureRequest(int id) {
|
| + FrameHostMsg_ExtractFeatureResponse_Params result;
|
| + DCHECK(render_frame());
|
| + if (!render_frame() || !render_frame()->GetWebFrame()) {
|
| + Send(new FrameHostMsg_ExtractFeatureResponse(routing_id(), id, result));
|
| + return;
|
| + }
|
| + WebDocument doc = render_frame()->GetWebFrame()->document();
|
| + if (doc.isNull() || doc.body().isNull()) {
|
| + Send(new FrameHostMsg_ExtractFeatureResponse(routing_id(), id, result));
|
| + return;
|
| + }
|
| +
|
| + result.isOGArticle = hasOGArticle(doc.head());
|
| + result.url = doc.url().string().utf8();
|
| +
|
| + ExtractFeatureWalker walker;
|
| + walker.walk(doc.body());
|
| +
|
| + result.numElements = walker.numElements;
|
| + result.numAnchors = walker.numAnchors;
|
| + result.numForms = walker.numForms;
|
| +
|
| + result.innerText = doc.contentAsTextForTesting().utf8();
|
| + result.textContent = doc.body().textContent().utf8();
|
| + result.innerHTML = doc.body().innerHTML().utf8();
|
| +
|
| + Send(new FrameHostMsg_ExtractFeatureResponse(routing_id(), id, result));
|
| +}
|
| +
|
| +DistillabilityAgent::~DistillabilityAgent() {}
|
| +
|
| +} // namespace dom_distiller
|
|
|