Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(445)

Unified Diff: components/dom_distiller/content/renderer/distillability_agent.cc

Issue 1248643004: Test distillability without JavaScript (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@early
Patch Set: move tests, remove dbg msg Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: components/dom_distiller/content/renderer/distillability_agent.cc
diff --git a/components/dom_distiller/content/renderer/distillability_agent.cc b/components/dom_distiller/content/renderer/distillability_agent.cc
new file mode 100644
index 0000000000000000000000000000000000000000..d70f61787e14d48b57e3d38b1176b56b7c3ffcc9
--- /dev/null
+++ b/components/dom_distiller/content/renderer/distillability_agent.cc
@@ -0,0 +1,120 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/dom_distiller/content/common/distiller_messages.h"
+#include "components/dom_distiller/content/renderer/distillability_agent.h"
+
+#include "content/public/renderer/render_frame.h"
+
+#include "third_party/WebKit/public/web/WebDocument.h"
+#include "third_party/WebKit/public/web/WebElement.h"
+#include "third_party/WebKit/public/web/WebLocalFrame.h"
+#include "third_party/WebKit/public/web/WebNode.h"
+#include "third_party/WebKit/public/web/WebNodeList.h"
+
+namespace dom_distiller {
+
+namespace {
+
+using namespace blink;
+
+class ExtractFeatureWalker {
+public:
+ ExtractFeatureWalker()
+ : numElements(0),
+ numAnchors(0),
+ numForms(0) {
+ }
+ void walk(WebElement root) {
+ blink::WebNodeList children = root.childNodes();
+ for (unsigned i = 0; i < children.length(); ++i) {
+ WebNode child = children.item(i);
+ if (!child.isElementNode())
+ continue;
+ WebElement elem = child.to<WebElement>();
+ numElements++;
+ if (elem.hasHTMLTagName("a")) {
+ numAnchors++;
+ }
+ if (elem.hasHTMLTagName("form")) {
+ numForms++;
+ }
+ walk(elem);
+ }
+ }
+ int numElements;
+ int numAnchors;
+ int numForms;
+};
+
+bool hasOGArticle(WebElement head) {
+ blink::WebNodeList children = head.childNodes();
+ for (unsigned i = 0; i < children.length(); ++i) {
+ WebNode child = children.item(i);
+ if (!child.isElementNode())
+ continue;
+ WebElement elem = child.to<WebElement>();
+ if (!elem.hasHTMLTagName("meta"))
+ continue;
+ if ((elem.getAttribute("name").equals("og:type")) ||
+ (elem.getAttribute("property").equals("og:type"))) {
+ std::string content = elem.getAttribute("content").utf8();
+ if (base::ToUpperASCII(content) == "ARTICLE") {
+ return true;
+ }
+ }
+ }
+ return false;
+};
+
+} // namespace
+
+DistillabilityAgent::DistillabilityAgent(
+ content::RenderFrame* render_frame)
+ : RenderFrameObserver(render_frame) {
+}
+
+bool DistillabilityAgent::OnMessageReceived(const IPC::Message& msg) {
+ bool handled = true;
+ IPC_BEGIN_MESSAGE_MAP(DistillabilityAgent, msg)
+ IPC_MESSAGE_HANDLER(FrameMsg_ExtractFeatureRequest,
+ OnExtractFeatureRequest)
+ IPC_MESSAGE_UNHANDLED(handled = false)
+ IPC_END_MESSAGE_MAP()
+ return handled;
+}
+
+void DistillabilityAgent::OnExtractFeatureRequest(int id) {
+ FrameHostMsg_ExtractFeatureResponse_Params result;
+ DCHECK(render_frame());
+ if (!render_frame() || !render_frame()->GetWebFrame()) {
+ Send(new FrameHostMsg_ExtractFeatureResponse(routing_id(), id, result));
+ return;
+ }
+ WebDocument doc = render_frame()->GetWebFrame()->document();
+ if (doc.isNull() || doc.body().isNull()) {
+ Send(new FrameHostMsg_ExtractFeatureResponse(routing_id(), id, result));
+ return;
+ }
+
+ result.isOGArticle = hasOGArticle(doc.head());
+ result.url = doc.url().string().utf8();
+
+ ExtractFeatureWalker walker;
+ walker.walk(doc.body());
+
+ result.numElements = walker.numElements;
+ result.numAnchors = walker.numAnchors;
+ result.numForms = walker.numForms;
+
+ result.innerText = doc.contentAsTextForTesting().utf8();
+ result.textContent = doc.body().textContent().utf8();
+ result.innerHTML = doc.body().innerHTML().utf8();
+
+ Send(new FrameHostMsg_ExtractFeatureResponse(routing_id(), id, result));
+}
+
+DistillabilityAgent::~DistillabilityAgent() {}
+
+} // namespace dom_distiller

Powered by Google App Engine
This is Rietveld 408576698