Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(304)

Unified Diff: headless/public/util/dom_tree_extractor.cc

Issue 2385653003: Add a utility class for extracting details of the DOM (Closed)
Patch Set: Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: headless/public/util/dom_tree_extractor.cc
diff --git a/headless/public/util/dom_tree_extractor.cc b/headless/public/util/dom_tree_extractor.cc
new file mode 100644
index 0000000000000000000000000000000000000000..82b4a6bcf4d1f4b8f3a0c3be7ea807bcb14cc13c
--- /dev/null
+++ b/headless/public/util/dom_tree_extractor.cc
@@ -0,0 +1,168 @@
+// Copyright 2016 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "headless/public/util/dom_tree_extractor.h"
+
+#include "base/bind.h"
+#include "base/json/json_writer.h"
+#include "headless/public/headless_devtools_client.h"
+
+namespace headless {
+
+DomTreeExtractor::DomTreeExtractor(HeadlessDevToolsClient* devtools_client)
+ : child_nodes_fetched_(false),
+ dom_observer_registered_(false),
+ work_in_progress_(false),
+ devtools_client_(devtools_client),
+ weak_factory_(this) {}
+
+DomTreeExtractor::~DomTreeExtractor() {
+ if (dom_observer_registered_)
+ devtools_client_->GetDOM()->RemoveObserver(this);
+}
+
+void DomTreeExtractor::ExtractDom(DomResultCB callback) {
+ DCHECK(!work_in_progress_);
+ work_in_progress_ = true;
+
+ callback_ = std::move(callback);
+
+ // Fetching the DOM nodes is a two step process. First we fetch the Document
+ // (which only contains a few nodes) and then we fetch all it's children
+ // including any iframe content documents.
+ devtools_client_->GetDOM()->GetDocument(base::Bind(
+ &DomTreeExtractor::OnRootDocumentFetched, weak_factory_.GetWeakPtr()));
+
+ devtools_client_->GetDOM()->GetExperimental()->GetLayoutTreeNodes(
+ dom::GetLayoutTreeNodesParams::Builder().Build(),
+ base::Bind(&DomTreeExtractor::OnLayoutTreeNodesFetched,
+ weak_factory_.GetWeakPtr()));
+}
+
+void DomTreeExtractor::OnRootDocumentFetched(
+ std::unique_ptr<dom::GetDocumentResult> result) {
+ document_result_ = std::move(result);
+
+ devtools_client_->GetDOM()->AddObserver(this);
Sami 2016/09/30 10:56:03 Probably should avoid doing this twice?
alex clarke (OOO till 29th) 2016/09/30 13:16:32 Done.
+ dom_observer_registered_ = true;
+
+ devtools_client_->GetDOM()->RequestChildNodes(
+ dom::RequestChildNodesParams::Builder()
+ .SetNodeId(document_result_->GetRoot()->GetNodeId())
+ .SetDepth(-1)
+ .SetTraverseFrames(true)
+ .Build());
+}
+
+void DomTreeExtractor::OnLayoutTreeNodesFetched(
+ std::unique_ptr<dom::GetLayoutTreeNodesResult> result) {
+ layout_tree_result_ = std::move(result);
+ MaybeExtractDomTree();
+}
+
+void DomTreeExtractor::OnSetChildNodes(const dom::SetChildNodesParams& params) {
+ // Ignore nodes we're not looking for.
+ if (params.GetParentId() != document_result_->GetRoot()->GetNodeId()) {
+ LOG(WARNING) << "Received unexpected child nodes for parent id "
+ << params.GetParentId();
+ return;
+ }
+
+ // Move the missing children into the |document_result_|.
+ dom::Node* parent_node = const_cast<dom::Node*>(document_result_->GetRoot());
+ std::vector<std::unique_ptr<dom::Node>>* child_nodes =
+ const_cast<std::vector<std::unique_ptr<dom::Node>>*>(params.GetNodes());
+ parent_node->SetChildren(std::move(*child_nodes));
+
+ child_nodes_fetched_ = true;
+ MaybeExtractDomTree();
+}
+
+void DomTreeExtractor::MaybeExtractDomTree() {
+ if (document_result_ && layout_tree_result_ && child_nodes_fetched_) {
+ EnumerateNodes(document_result_->GetRoot());
+ ExtractDomTree();
+ }
+}
+
+void DomTreeExtractor::EnumerateNodes(const dom::Node* node) {
+ // Allocate an index and record the node pointer.
+ size_t index = node_id_to_index_.size();
+ node_id_to_index_[node->GetNodeId()] = index;
+ nodes_.push_back(node);
+
+ if (node->HasContentDocument())
+ EnumerateNodes(node->GetContentDocument());
+
+ if (node->HasChildren()) {
+ for (const std::unique_ptr<dom::Node>& child : *node->GetChildren()) {
+ EnumerateNodes(child.get());
+ }
+ }
+}
+
+void DomTreeExtractor::ExtractDomTree() {
+ std::vector<std::unique_ptr<base::DictionaryValue>> dom_nodes(
+ node_id_to_index_.size());
+
+ // Serialize DOM nodes into a flat array.
+ for (size_t i = 0; i < nodes_.size(); i++) {
+ dom::Node* node = const_cast<dom::Node*>(nodes_[i]);
+ dom_nodes[i].reset(
+ static_cast<base::DictionaryValue*>(node->Serialize().release()));
+
+ if (node->HasChildren()) {
+ std::unique_ptr<base::ListValue> children(new base::ListValue());
+ for (const std::unique_ptr<dom::Node>& child : *node->GetChildren()) {
+ children->AppendInteger(node_id_to_index_[child->GetNodeId()]);
+ }
+ dom_nodes[i]->Set("childIndicies", std::move(children));
Sami 2016/09/30 10:56:03 typo: indices
alex clarke (OOO till 29th) 2016/09/30 13:16:32 Done.
+ dom_nodes[i]->Remove("children", nullptr);
+ }
+
+ if (node->HasContentDocument()) {
+ dom_nodes[i]->SetInteger(
+ "contentDocumentIndex",
+ node_id_to_index_[node->GetContentDocument()->GetNodeId()]);
+ dom_nodes[i]->Remove("contentDocument", nullptr);
+ }
+
+ dom_nodes[i]->Remove("childNodeCount", nullptr);
+ }
+
+ // Merge in Render Tree.
Sami 2016/09/30 10:56:03 nit: layout tree
alex clarke (OOO till 29th) 2016/09/30 13:16:32 Done.
+ for (const std::unique_ptr<dom::LayoutTreeNode>& layout_node :
+ *layout_tree_result_->GetLayoutTreeNodes()) {
+ std::unordered_map<NodeId, size_t>::const_iterator it =
+ node_id_to_index_.find(layout_node->GetBackendNodeId());
+ if (it == node_id_to_index_.end())
+ continue;
+
+ base::DictionaryValue* node_dict = dom_nodes[it->second].get();
+ node_dict->Set("boundingBox", layout_node->GetBoundingBox()->Serialize());
+
+ if (layout_node->HasLayoutText())
+ node_dict->SetString("layoutText", layout_node->GetLayoutText());
+
+ if (layout_node->HasInlineTextNodes()) {
+ std::unique_ptr<base::ListValue> inline_text_nodes(new base::ListValue());
+ for (const std::unique_ptr<dom::InlineTextBox>& inline_text_box :
+ *layout_node->GetInlineTextNodes()) {
+ size_t index = inline_text_nodes->GetSize();
+ inline_text_nodes->Set(index, inline_text_box->Serialize());
+ }
+ node_dict->Set("inlineTextNodes", std::move(inline_text_nodes));
+ }
+ }
+
+ nodes_.clear();
+ document_result_.reset();
+ layout_tree_result_.reset();
+ child_nodes_fetched_ = false;
+ work_in_progress_ = false;
+
+ callback_.Run(std::move(dom_nodes));
+}
+
+} // namespace headless

Powered by Google App Engine
This is Rietveld 408576698