Index: headless/public/util/dom_tree_extractor.cc |
diff --git a/headless/public/util/dom_tree_extractor.cc b/headless/public/util/dom_tree_extractor.cc |
new file mode 100644 |
index 0000000000000000000000000000000000000000..f6e96893221ffbb778800de3f2926b6df0d10c19 |
--- /dev/null |
+++ b/headless/public/util/dom_tree_extractor.cc |
@@ -0,0 +1,145 @@ |
+// Copyright 2016 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+#include "headless/public/util/dom_tree_extractor.h" |
+ |
+#include "base/bind.h" |
+#include "base/json/json_writer.h" |
+#include "headless/public/headless_devtools_client.h" |
+ |
+namespace headless { |
+ |
+DomTreeExtractor::DomTreeExtractor(HeadlessDevToolsClient* devtools_client) |
+ : child_nodes_fetched_(false), |
+ dom_observer_registered_(false), |
+ work_in_progress_(false), |
+ devtools_client_(devtools_client), |
+ weak_factory_(this) {} |
+ |
+DomTreeExtractor::~DomTreeExtractor() { |
+ if (dom_observer_registered_) |
+ devtools_client_->GetDOM()->RemoveObserver(this); |
+} |
+ |
+void DomTreeExtractor::ExtractDomTree(DomResultCB callback) { |
+ DCHECK(!work_in_progress_); |
+ work_in_progress_ = true; |
+ |
+ callback_ = std::move(callback); |
+ |
+ // Fetching the DOM nodes is a two step process. First we fetch the Document |
+ // (which only contains a few nodes) and then we fetch all it's children |
Sami
2016/10/03 10:42:18
s/it's/its/
alex clarke (OOO till 29th)
2016/10/19 16:23:26
Acknowledged.
|
+ // including any iframe content documents. |
+ devtools_client_->GetDOM()->GetDocument(base::Bind( |
+ &DomTreeExtractor::OnRootDocumentFetched, weak_factory_.GetWeakPtr())); |
+ |
+ devtools_client_->GetDOM()->GetExperimental()->GetLayoutTreeNodes( |
+ dom::GetLayoutTreeNodesParams::Builder().Build(), |
+ base::Bind(&DomTreeExtractor::OnLayoutTreeNodesFetched, |
+ weak_factory_.GetWeakPtr())); |
+} |
+ |
+void DomTreeExtractor::OnRootDocumentFetched( |
+ std::unique_ptr<dom::GetDocumentResult> result) { |
+ dom_tree_.document_result_ = std::move(result); |
+ |
+ DCHECK(!dom_observer_registered_); |
+ devtools_client_->GetDOM()->AddObserver(this); |
+ dom_observer_registered_ = true; |
+ |
+ devtools_client_->GetDOM()->RequestChildNodes( |
Sami
2016/10/03 10:42:18
Curious: does this work on a document that is comp
alex clarke (OOO till 29th)
2016/10/19 16:23:26
It would have. Anyway it's gone now, since we can
|
+ dom::RequestChildNodesParams::Builder() |
+ .SetNodeId(dom_tree_.document_result_->GetRoot()->GetNodeId()) |
+ .SetDepth(-1) |
+ .SetTraverseFrames(true) |
+ .Build()); |
+} |
+ |
+void DomTreeExtractor::OnLayoutTreeNodesFetched( |
+ std::unique_ptr<dom::GetLayoutTreeNodesResult> result) { |
+ dom_tree_.layout_tree_result_ = std::move(result); |
+ MaybeExtractDomTree(); |
+} |
+ |
+void DomTreeExtractor::OnSetChildNodes(const dom::SetChildNodesParams& params) { |
+ // Ignore nodes we're not looking for. |
+ if (params.GetParentId() != |
+ dom_tree_.document_result_->GetRoot()->GetNodeId()) { |
+ LOG(WARNING) << "Received unexpected child nodes for parent id " |
+ << params.GetParentId(); |
+ return; |
+ } |
+ |
+ // Move the missing children into the |dom_tree_.document_result_|. |
+ dom::Node* parent_node = |
+ const_cast<dom::Node*>(dom_tree_.document_result_->GetRoot()); |
+ std::vector<std::unique_ptr<dom::Node>>* child_nodes = |
+ const_cast<std::vector<std::unique_ptr<dom::Node>>*>(params.GetNodes()); |
+ parent_node->SetChildren(std::move(*child_nodes)); |
+ |
+ if (dom_observer_registered_) |
+ devtools_client_->GetDOM()->RemoveObserver(this); |
+ dom_observer_registered_ = false; |
+ |
+ child_nodes_fetched_ = true; |
+ MaybeExtractDomTree(); |
+} |
+ |
+void DomTreeExtractor::MaybeExtractDomTree() { |
+ if (dom_tree_.document_result_ && dom_tree_.layout_tree_result_ && |
+ child_nodes_fetched_) { |
+ EnumerateNodes(dom_tree_.document_result_->GetRoot()); |
+ ExtractLayoutTreeNodes(); |
+ |
+ child_nodes_fetched_ = false; |
+ work_in_progress_ = false; |
+ |
+ callback_.Run(std::move(dom_tree_)); |
+ } |
+} |
+ |
+void DomTreeExtractor::EnumerateNodes(const dom::Node* node) { |
+ // Allocate an index and record the node pointer. |
+ size_t index = dom_tree_.node_id_to_index_.size(); |
+ dom_tree_.node_id_to_index_[node->GetNodeId()] = index; |
+ dom_tree_.dom_nodes_.push_back(node); |
+ |
+ if (node->HasContentDocument()) |
+ EnumerateNodes(node->GetContentDocument()); |
+ |
+ if (node->HasChildren()) { |
+ for (const std::unique_ptr<dom::Node>& child : *node->GetChildren()) { |
+ EnumerateNodes(child.get()); |
+ } |
+ } |
+} |
+ |
+void DomTreeExtractor::ExtractLayoutTreeNodes() { |
+ dom_tree_.layout_tree_nodes_.reserve( |
+ dom_tree_.layout_tree_result_->GetLayoutTreeNodes()->size()); |
+ |
+ // Only extract layout tree nodes that map to a DOM node. |
+ for (const std::unique_ptr<dom::LayoutTreeNode>& layout_node : |
+ *dom_tree_.layout_tree_result_->GetLayoutTreeNodes()) { |
+ std::unordered_map<NodeId, size_t>::const_iterator it = |
+ dom_tree_.node_id_to_index_.find(layout_node->GetBackendNodeId()); |
+ if (it == dom_tree_.node_id_to_index_.end()) |
+ continue; |
+ |
+ dom_tree_.layout_tree_nodes_.push_back(layout_node.get()); |
+ } |
+} |
+ |
+DomTreeExtractor::DomTree::DomTree() {} |
+DomTreeExtractor::DomTree::~DomTree() {} |
+ |
+DomTreeExtractor::DomTree::DomTree(DomTree&& other) { |
Sami
2016/10/03 10:42:18
Could this be "= default"?
alex clarke (OOO till 29th)
2016/10/19 16:23:26
Done.
|
+ dom_nodes_ = std::move(other.dom_nodes_); |
+ node_id_to_index_ = std::move(other.node_id_to_index_); |
+ layout_tree_nodes_ = std::move(other.layout_tree_nodes_); |
+ document_result_ = std::move(other.document_result_); |
+ layout_tree_result_ = std::move(other.layout_tree_result_); |
+} |
+ |
+} // namespace headless |