Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1076)

Unified Diff: headless/public/util/dom_tree_extractor.cc

Issue 2385653003: Add a utility class for extracting details of the DOM (Closed)
Patch Set: Refactor Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: headless/public/util/dom_tree_extractor.cc
diff --git a/headless/public/util/dom_tree_extractor.cc b/headless/public/util/dom_tree_extractor.cc
new file mode 100644
index 0000000000000000000000000000000000000000..f6e96893221ffbb778800de3f2926b6df0d10c19
--- /dev/null
+++ b/headless/public/util/dom_tree_extractor.cc
@@ -0,0 +1,145 @@
+// Copyright 2016 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "headless/public/util/dom_tree_extractor.h"
+
+#include "base/bind.h"
+#include "base/json/json_writer.h"
+#include "headless/public/headless_devtools_client.h"
+
+namespace headless {
+
+DomTreeExtractor::DomTreeExtractor(HeadlessDevToolsClient* devtools_client)
+ : child_nodes_fetched_(false),
+ dom_observer_registered_(false),
+ work_in_progress_(false),
+ devtools_client_(devtools_client),
+ weak_factory_(this) {}
+
+DomTreeExtractor::~DomTreeExtractor() {
+ if (dom_observer_registered_)
+ devtools_client_->GetDOM()->RemoveObserver(this);
+}
+
+void DomTreeExtractor::ExtractDomTree(DomResultCB callback) {
+ DCHECK(!work_in_progress_);
+ work_in_progress_ = true;
+
+ callback_ = std::move(callback);
+
+ // Fetching the DOM nodes is a two step process. First we fetch the Document
+ // (which only contains a few nodes) and then we fetch all it's children
Sami 2016/10/03 10:42:18 s/it's/its/
alex clarke (OOO till 29th) 2016/10/19 16:23:26 Acknowledged.
+ // including any iframe content documents.
+ devtools_client_->GetDOM()->GetDocument(base::Bind(
+ &DomTreeExtractor::OnRootDocumentFetched, weak_factory_.GetWeakPtr()));
+
+ devtools_client_->GetDOM()->GetExperimental()->GetLayoutTreeNodes(
+ dom::GetLayoutTreeNodesParams::Builder().Build(),
+ base::Bind(&DomTreeExtractor::OnLayoutTreeNodesFetched,
+ weak_factory_.GetWeakPtr()));
+}
+
+void DomTreeExtractor::OnRootDocumentFetched(
+ std::unique_ptr<dom::GetDocumentResult> result) {
+ dom_tree_.document_result_ = std::move(result);
+
+ DCHECK(!dom_observer_registered_);
+ devtools_client_->GetDOM()->AddObserver(this);
+ dom_observer_registered_ = true;
+
+ devtools_client_->GetDOM()->RequestChildNodes(
Sami 2016/10/03 10:42:18 Curious: does this work on a document that is comp
alex clarke (OOO till 29th) 2016/10/19 16:23:26 It would have. Anyway it's gone now, since we can
+ dom::RequestChildNodesParams::Builder()
+ .SetNodeId(dom_tree_.document_result_->GetRoot()->GetNodeId())
+ .SetDepth(-1)
+ .SetTraverseFrames(true)
+ .Build());
+}
+
+void DomTreeExtractor::OnLayoutTreeNodesFetched(
+ std::unique_ptr<dom::GetLayoutTreeNodesResult> result) {
+ dom_tree_.layout_tree_result_ = std::move(result);
+ MaybeExtractDomTree();
+}
+
+void DomTreeExtractor::OnSetChildNodes(const dom::SetChildNodesParams& params) {
+ // Ignore nodes we're not looking for.
+ if (params.GetParentId() !=
+ dom_tree_.document_result_->GetRoot()->GetNodeId()) {
+ LOG(WARNING) << "Received unexpected child nodes for parent id "
+ << params.GetParentId();
+ return;
+ }
+
+ // Move the missing children into the |dom_tree_.document_result_|.
+ dom::Node* parent_node =
+ const_cast<dom::Node*>(dom_tree_.document_result_->GetRoot());
+ std::vector<std::unique_ptr<dom::Node>>* child_nodes =
+ const_cast<std::vector<std::unique_ptr<dom::Node>>*>(params.GetNodes());
+ parent_node->SetChildren(std::move(*child_nodes));
+
+ if (dom_observer_registered_)
+ devtools_client_->GetDOM()->RemoveObserver(this);
+ dom_observer_registered_ = false;
+
+ child_nodes_fetched_ = true;
+ MaybeExtractDomTree();
+}
+
+void DomTreeExtractor::MaybeExtractDomTree() {
+ if (dom_tree_.document_result_ && dom_tree_.layout_tree_result_ &&
+ child_nodes_fetched_) {
+ EnumerateNodes(dom_tree_.document_result_->GetRoot());
+ ExtractLayoutTreeNodes();
+
+ child_nodes_fetched_ = false;
+ work_in_progress_ = false;
+
+ callback_.Run(std::move(dom_tree_));
+ }
+}
+
+void DomTreeExtractor::EnumerateNodes(const dom::Node* node) {
+ // Allocate an index and record the node pointer.
+ size_t index = dom_tree_.node_id_to_index_.size();
+ dom_tree_.node_id_to_index_[node->GetNodeId()] = index;
+ dom_tree_.dom_nodes_.push_back(node);
+
+ if (node->HasContentDocument())
+ EnumerateNodes(node->GetContentDocument());
+
+ if (node->HasChildren()) {
+ for (const std::unique_ptr<dom::Node>& child : *node->GetChildren()) {
+ EnumerateNodes(child.get());
+ }
+ }
+}
+
+void DomTreeExtractor::ExtractLayoutTreeNodes() {
+ dom_tree_.layout_tree_nodes_.reserve(
+ dom_tree_.layout_tree_result_->GetLayoutTreeNodes()->size());
+
+ // Only extract layout tree nodes that map to a DOM node.
+ for (const std::unique_ptr<dom::LayoutTreeNode>& layout_node :
+ *dom_tree_.layout_tree_result_->GetLayoutTreeNodes()) {
+ std::unordered_map<NodeId, size_t>::const_iterator it =
+ dom_tree_.node_id_to_index_.find(layout_node->GetBackendNodeId());
+ if (it == dom_tree_.node_id_to_index_.end())
+ continue;
+
+ dom_tree_.layout_tree_nodes_.push_back(layout_node.get());
+ }
+}
+
+DomTreeExtractor::DomTree::DomTree() {}
+DomTreeExtractor::DomTree::~DomTree() {}
+
+DomTreeExtractor::DomTree::DomTree(DomTree&& other) {
Sami 2016/10/03 10:42:18 Could this be "= default"?
alex clarke (OOO till 29th) 2016/10/19 16:23:26 Done.
+ dom_nodes_ = std::move(other.dom_nodes_);
+ node_id_to_index_ = std::move(other.node_id_to_index_);
+ layout_tree_nodes_ = std::move(other.layout_tree_nodes_);
+ document_result_ = std::move(other.document_result_);
+ layout_tree_result_ = std::move(other.layout_tree_result_);
+}
+
+} // namespace headless

Powered by Google App Engine
This is Rietveld 408576698