Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(310)

Unified Diff: headless/public/util/dom_tree_extractor_browsertest.cc

Issue 2385653003: Add a utility class for extracting details of the DOM (Closed)
Patch Set: Refactor Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: headless/public/util/dom_tree_extractor_browsertest.cc
diff --git a/headless/public/util/dom_tree_extractor_browsertest.cc b/headless/public/util/dom_tree_extractor_browsertest.cc
new file mode 100644
index 0000000000000000000000000000000000000000..c3a62e7772cc756145ef0ceb93aacfad9608d9fd
--- /dev/null
+++ b/headless/public/util/dom_tree_extractor_browsertest.cc
@@ -0,0 +1,484 @@
+// Copyright 2016 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "headless/public/util/dom_tree_extractor.h"
+
+#include <memory>
+#include "base/json/json_writer.h"
+#include "base/strings/string_util.h"
+#include "content/public/browser/render_widget_host_view.h"
+#include "content/public/browser/web_contents.h"
+#include "content/public/test/browser_test.h"
+#include "headless/lib/browser/headless_web_contents_impl.h"
+#include "headless/public/domains/browser.h"
+#include "headless/public/domains/emulation.h"
+#include "headless/public/domains/network.h"
+#include "headless/public/domains/page.h"
+#include "headless/public/headless_browser.h"
+#include "headless/public/headless_devtools_client.h"
+#include "headless/public/headless_devtools_target.h"
+#include "headless/test/headless_browser_test.h"
+#include "testing/gtest/include/gtest/gtest.h"
+#include "url/gurl.h"
+
+namespace headless {
+
+class DomTreeExtractorBrowserTest : public HeadlessAsyncDevTooledBrowserTest,
+ public page::Observer {
+ public:
+ void RunDevTooledTest() override {
+ EXPECT_TRUE(embedded_test_server()->Start());
+ devtools_client_->GetPage()->AddObserver(this);
+ devtools_client_->GetPage()->Enable();
+ devtools_client_->GetPage()->Navigate(
+ embedded_test_server()->GetURL("/dom_tree_test.html").spec());
+ }
+
+ void OnLoadEventFired(const page::LoadEventFiredParams& params) override {
+ devtools_client_->GetPage()->RemoveObserver(this);
+
+ extractor_.reset(new DomTreeExtractor(devtools_client_.get()));
+ extractor_->ExtractDomTree(
+ base::Bind(&DomTreeExtractorBrowserTest::OnDomTreeExtracted,
+ base::Unretained(this)));
+ }
+
+ void OnDomTreeExtracted(DomTreeExtractor::DomTree dom_tree) {
+ GURL::Replacements replace_port;
+ replace_port.SetPortStr("");
+
+ std::vector<std::unique_ptr<base::DictionaryValue>> dom_nodes(
+ dom_tree.dom_nodes_.size());
+
+ // For convenience flatten the dom tree into an array.
+ for (size_t i = 0; i < dom_tree.dom_nodes_.size(); i++) {
+ dom::Node* node = const_cast<dom::Node*>(dom_tree.dom_nodes_[i]);
+
+ dom_nodes[i].reset(
+ static_cast<base::DictionaryValue*>(node->Serialize().release()));
+
+ // Convert child & content document pointers into indexes.
+ if (node->HasChildren()) {
+ std::unique_ptr<base::ListValue> children(new base::ListValue());
+ for (const std::unique_ptr<dom::Node>& child : *node->GetChildren()) {
+ children->AppendInteger(
+ dom_tree.node_id_to_index_[child->GetNodeId()]);
+ }
+ dom_nodes[i]->Set("childIndices", std::move(children));
+ dom_nodes[i]->Remove("children", nullptr);
+ }
+
+ if (node->HasContentDocument()) {
+ dom_nodes[i]->SetInteger(
+ "contentDocumentIndex",
+ dom_tree
+ .node_id_to_index_[node->GetContentDocument()->GetNodeId()]);
+ dom_nodes[i]->Remove("contentDocument", nullptr);
+ }
+
+ dom_nodes[i]->Remove("childNodeCount", nullptr);
+
+ // Frame IDs are random.
+ if (dom_nodes[i]->HasKey("frameId"))
+ dom_nodes[i]->SetString("frameId", "?");
+
+ // Ports are random.
+ std::string url;
+ if (dom_nodes[i]->GetString("baseURL", &url)) {
+ dom_nodes[i]->SetString(
+ "baseURL", GURL(url).ReplaceComponents(replace_port).spec());
+ }
+
+ if (dom_nodes[i]->GetString("documentURL", &url)) {
+ dom_nodes[i]->SetString(
+ "documentURL", GURL(url).ReplaceComponents(replace_port).spec());
+ }
+ }
+
+ // Merge LayoutTreeNode data into the dictionaries.
+ for (const dom::LayoutTreeNode* layout_node : dom_tree.layout_tree_nodes_) {
+ auto it =
+ dom_tree.node_id_to_index_.find(layout_node->GetBackendNodeId());
+ ASSERT_TRUE(it != dom_tree.node_id_to_index_.end());
+
+ base::DictionaryValue* node_dict = dom_nodes[it->second].get();
+ node_dict->Set("boundingBox", layout_node->GetBoundingBox()->Serialize());
+
+ if (layout_node->HasLayoutText())
+ node_dict->SetString("layoutText", layout_node->GetLayoutText());
+
+ if (layout_node->HasInlineTextNodes()) {
+ std::unique_ptr<base::ListValue> inline_text_nodes(
+ new base::ListValue());
+ for (const std::unique_ptr<dom::InlineTextBox>& inline_text_box :
+ *layout_node->GetInlineTextNodes()) {
+ size_t index = inline_text_nodes->GetSize();
+ inline_text_nodes->Set(index, inline_text_box->Serialize());
+ }
+ node_dict->Set("inlineTextNodes", std::move(inline_text_nodes));
+ }
+ }
+
+ const std::vector<std::string> expected_results = {
+ "{\n"
+ " 'baseURL': 'http://127.0.0.1/dom_tree_test.html',\n"
+ " 'childIndices': [ 1 ],\n"
+ " 'documentURL': 'http://127.0.0.1/dom_tree_test.html',\n"
+ " 'localName': '',\n"
+ " 'nodeId': 1,\n"
+ " 'nodeName': '#document',\n"
+ " 'nodeType': 9,\n"
+ " 'nodeValue': '',\n"
+ " 'xmlVersion': ''\n"
+ "}\n",
+
+ "{\n"
+ " 'attributes': [ ],\n"
+ " 'childIndices': [ 2, 5 ],\n"
+ " 'frameId': '?',\n"
+ " 'localName': 'html',\n"
+ " 'nodeId': 2,\n"
+ " 'nodeName': 'HTML',\n"
+ " 'nodeType': 1,\n"
+ " 'nodeValue': ''\n"
+ "}\n",
+
+ "{\n"
+ " 'attributes': [ ],\n"
+ " 'boundingBox': {\n"
+ " 'height': 600.0,\n"
+ " 'width': 800.0,\n"
+ " 'x': 0.0,\n"
+ " 'y': 0.0\n"
+ " },\n"
+ " 'childIndices': [ 3 ],\n"
+ " 'localName': 'head',\n"
+ " 'nodeId': 3,\n"
+ " 'nodeName': 'HEAD',\n"
+ " 'nodeType': 1,\n"
+ " 'nodeValue': ''\n"
+ "}\n",
+
+ "{\n"
+ " 'attributes': [ ],\n"
+ " 'boundingBox': {\n"
+ " 'height': 584.0,\n"
+ " 'width': 784.0,\n"
+ " 'x': 8.0,\n"
+ " 'y': 8.0\n"
+ " },\n"
+ " 'childIndices': [ 4 ],\n"
+ " 'localName': 'title',\n"
+ " 'nodeId': 5,\n"
+ " 'nodeName': 'TITLE',\n"
+ " 'nodeType': 1,\n"
+ " 'nodeValue': ''\n"
+ "}\n",
+
+ "{\n"
+ " 'boundingBox': {\n"
+ " 'height': 226.0,\n"
+ " 'width': 784.0,\n"
+ " 'x': 8.0,\n"
+ " 'y': 8.0\n"
+ " },\n"
+ " 'localName': '',\n"
+ " 'nodeId': 6,\n"
+ " 'nodeName': '#text',\n"
+ " 'nodeType': 3,\n"
+ " 'nodeValue': 'Hello world!'\n"
+ "}\n",
+
+ "{\n"
+ " 'attributes': [ ],\n"
+ " 'boundingBox': {\n"
+ " 'height': 600.0,\n"
+ " 'width': 800.0,\n"
+ " 'x': 0.0,\n"
+ " 'y': 0.0\n"
+ " },\n"
+ " 'childIndices': [ 6 ],\n"
+ " 'localName': 'body',\n"
+ " 'nodeId': 4,\n"
+ " 'nodeName': 'BODY',\n"
+ " 'nodeType': 1,\n"
+ " 'nodeValue': ''\n"
+ "}\n",
+
+ "{\n"
+ " 'attributes': [ 'id', 'id1' ],\n"
+ " 'boundingBox': {\n"
+ " 'height': 17.0,\n"
+ " 'width': 73.0,\n"
+ " 'x': 8.0,\n"
+ " 'y': 198.0\n"
+ " },\n"
+ " 'childIndices': [ 7, 8, 15 ],\n"
+ " 'inlineTextNodes': [ {\n"
+ " 'boundingBox': {\n"
+ " 'height': 17.0,\n"
+ " 'width': 72.4375,\n"
+ " 'x': 8.0,\n"
+ " 'y': 198.0\n"
+ " },\n"
+ " 'numCharacters': 11,\n"
+ " 'startCharacterIndex': 1\n"
+ " } ],\n"
+ " 'layoutText': '\\nSome text.\\n',\n"
+ " 'localName': 'div',\n"
+ " 'nodeId': 7,\n"
+ " 'nodeName': 'DIV',\n"
+ " 'nodeType': 1,\n"
+ " 'nodeValue': ''\n"
+ "}\n",
+
+ "{\n"
+ " 'boundingBox': {\n"
+ " 'height': 200.0,\n"
+ " 'width': 400.0,\n"
+ " 'x': 82.0,\n"
+ " 'y': 10.0\n"
+ " },\n"
+ " 'localName': '',\n"
+ " 'nodeId': 8,\n"
+ " 'nodeName': '#text',\n"
+ " 'nodeType': 3,\n"
+ " 'nodeValue': '\\nSome text.\\n'\n"
+ "}\n",
+
+ "{\n"
+ " 'attributes': [ 'src', '/iframe.html', 'width', '400', 'height',"
+ " '200' ],\n"
+ " 'boundingBox': {\n"
+ " 'height': 171.0,\n"
+ " 'width': 384.0,\n"
+ " 'x': 90.0,\n"
+ " 'y': 18.0\n"
+ " },\n"
+ " 'childIndices': [ ],\n"
+ " 'contentDocumentIndex': 9,\n"
+ " 'frameId': '?',\n"
+ " 'localName': 'iframe',\n"
+ " 'nodeId': 9,\n"
+ " 'nodeName': 'IFRAME',\n"
+ " 'nodeType': 1,\n"
+ " 'nodeValue': ''\n"
+ "}\n",
+
+ "{\n"
+ " 'baseURL': 'http://127.0.0.1/iframe.html',\n"
+ " 'boundingBox': {\n"
+ " 'height': 37.0,\n"
+ " 'width': 384.0,\n"
+ " 'x': 90.0,\n"
+ " 'y': 18.0\n"
+ " },\n"
+ " 'childIndices': [ 10 ],\n"
+ " 'documentURL': 'http://127.0.0.1/iframe.html',\n"
+ " 'localName': '',\n"
+ " 'nodeId': 10,\n"
+ " 'nodeName': '#document',\n"
+ " 'nodeType': 9,\n"
+ " 'nodeValue': '',\n"
+ " 'xmlVersion': ''\n"
+ "}\n",
+
+ "{\n"
+ " 'attributes': [ ],\n"
+ " 'boundingBox': {\n"
+ " 'height': 36.0,\n"
+ " 'width': 308.0,\n"
+ " 'x': 8.0,\n"
+ " 'y': 8.0\n"
+ " },\n"
+ " 'childIndices': [ 11, 12 ],\n"
+ " 'frameId': '?',\n"
+ " 'inlineTextNodes': [ {\n"
+ " 'boundingBox': {\n"
+ " 'height': 36.0,\n"
+ " 'width': 307.734375,\n"
+ " 'x': 8.0,\n"
+ " 'y': 8.0\n"
+ " },\n"
+ " 'numCharacters': 22,\n"
+ " 'startCharacterIndex': 0\n"
+ " } ],\n"
+ " 'layoutText': 'Hello from the iframe!',\n"
+ " 'localName': 'html',\n"
+ " 'nodeId': 11,\n"
+ " 'nodeName': 'HTML',\n"
+ " 'nodeType': 1,\n"
+ " 'nodeValue': ''\n"
+ "}\n",
+
+ "{\n"
+ " 'attributes': [ ],\n"
+ " 'boundingBox': {\n"
+ " 'height': 204.0,\n"
+ " 'width': 405.0,\n"
+ " 'x': 80.0,\n"
+ " 'y': 8.0\n"
+ " },\n"
+ " 'childIndices': [ ],\n"
+ " 'localName': 'head',\n"
+ " 'nodeId': 12,\n"
+ " 'nodeName': 'HEAD',\n"
+ " 'nodeType': 1,\n"
+ " 'nodeValue': ''\n"
+ "}\n",
+
+ "{\n"
+ " 'attributes': [ ],\n"
+ " 'boundingBox': {\n"
+ " 'height': 0.0,\n"
+ " 'width': 0.0,\n"
+ " 'x': 0.0,\n"
+ " 'y': 0.0\n"
+ " },\n"
+ " 'childIndices': [ 13 ],\n"
+ " 'layoutText': '\\n',\n"
+ " 'localName': 'body',\n"
+ " 'nodeId': 13,\n"
+ " 'nodeName': 'BODY',\n"
+ " 'nodeType': 1,\n"
+ " 'nodeValue': ''\n"
+ "}\n",
+
+ "{\n"
+ " 'attributes': [ ],\n"
+ " 'boundingBox': {\n"
+ " 'height': 18.0,\n"
+ " 'width': 784.0,\n"
+ " 'x': 8.0,\n"
+ " 'y': 216.0\n"
+ " },\n"
+ " 'childIndices': [ 14 ],\n"
+ " 'localName': 'h1',\n"
+ " 'nodeId': 14,\n"
+ " 'nodeName': 'H1',\n"
+ " 'nodeType': 1,\n"
+ " 'nodeValue': ''\n"
+ "}\n",
+
+ "{\n"
+ " 'boundingBox': {\n"
+ " 'height': 18.0,\n"
+ " 'width': 784.0,\n"
+ " 'x': 8.0,\n"
+ " 'y': 216.0\n"
+ " },\n"
+ " 'localName': '',\n"
+ " 'nodeId': 15,\n"
+ " 'nodeName': '#text',\n"
+ " 'nodeType': 3,\n"
+ " 'nodeValue': 'Hello from the iframe!'\n"
+ "}\n",
+
+ "{\n"
+ " 'attributes': [ 'id', 'id2' ],\n"
+ " 'boundingBox': {\n"
+ " 'height': 18.0,\n"
+ " 'width': 784.0,\n"
+ " 'x': 8.0,\n"
+ " 'y': 216.0\n"
+ " },\n"
+ " 'childIndices': [ 16 ],\n"
+ " 'localName': 'div',\n"
+ " 'nodeId': 16,\n"
+ " 'nodeName': 'DIV',\n"
+ " 'nodeType': 1,\n"
+ " 'nodeValue': ''\n"
+ "}\n",
+
+ "{\n"
+ " 'attributes': [ 'id', 'id3' ],\n"
+ " 'boundingBox': {\n"
+ " 'height': 17.0,\n"
+ " 'width': 53.0,\n"
+ " 'x': 8.0,\n"
+ " 'y': 216.0\n"
+ " },\n"
+ " 'childIndices': [ 17 ],\n"
+ " 'localName': 'div',\n"
+ " 'nodeId': 17,\n"
+ " 'nodeName': 'DIV',\n"
+ " 'nodeType': 1,\n"
+ " 'nodeValue': ''\n"
+ "}\n",
+
+ "{\n"
+ " 'attributes': [ 'id', 'id4' ],\n"
+ " 'boundingBox': {\n"
+ " 'height': 17.0,\n"
+ " 'width': 53.0,\n"
+ " 'x': 8.0,\n"
+ " 'y': 216.0\n"
+ " },\n"
+ " 'childIndices': [ 18 ],\n"
+ " 'inlineTextNodes': [ {\n"
+ " 'boundingBox': {\n"
+ " 'height': 17.0,\n"
+ " 'width': 52.421875,\n"
+ " 'x': 8.0,\n"
+ " 'y': 216.0\n"
+ " },\n"
+ " 'numCharacters': 7,\n"
+ " 'startCharacterIndex': 0\n"
+ " } ],\n"
+ " 'layoutText': 'Google!',\n"
+ " 'localName': 'div',\n"
+ " 'nodeId': 18,\n"
+ " 'nodeName': 'DIV',\n"
+ " 'nodeType': 1,\n"
+ " 'nodeValue': ''\n"
+ "}\n",
+
+ "{\n"
+ " 'attributes': [ 'href', 'https://www.google.com' ],\n"
+ " 'boundingBox': {\n"
+ " 'height': 0.0,\n"
+ " 'width': 0.0,\n"
+ " 'x': 0.0,\n"
+ " 'y': 0.0\n"
+ " },\n"
+ " 'childIndices': [ 19 ],\n"
+ " 'layoutText': '\\n ',\n"
+ " 'localName': 'a',\n"
+ " 'nodeId': 19,\n"
+ " 'nodeName': 'A',\n"
+ " 'nodeType': 1,\n"
+ " 'nodeValue': ''\n"
+ "}\n",
+
+ "{\n"
+ " 'localName': '',\n"
+ " 'nodeId': 20,\n"
+ " 'nodeName': '#text',\n"
+ " 'nodeType': 3,\n"
+ " 'nodeValue': 'Google!'\n"
+ "}\n"};
+
+ EXPECT_EQ(expected_results.size(), dom_nodes.size());
+
+ for (size_t i = 0; i < dom_nodes.size(); i++) {
+ std::string result_json;
+ base::JSONWriter::WriteWithOptions(
+ *dom_nodes[i], base::JSONWriter::OPTIONS_PRETTY_PRINT, &result_json);
+
+ // Not allowed to use C++11 string literals so we make do as best we can.
+ base::ReplaceChars(result_json, "\"", "'", &result_json);
+
+ ASSERT_LT(i, expected_results.size());
+ EXPECT_EQ(expected_results[i], result_json) << " Node # " << i;
+ }
+
+ FinishAsynchronousTest();
+ }
+
+ std::unique_ptr<DomTreeExtractor> extractor_;
+};
+
+HEADLESS_ASYNC_DEVTOOLED_TEST_F(DomTreeExtractorBrowserTest);
+
+} // namespace headless

Powered by Google App Engine
This is Rietveld 408576698