Index: headless/public/util/dom_tree_extractor.cc |
diff --git a/headless/public/util/dom_tree_extractor.cc b/headless/public/util/dom_tree_extractor.cc |
new file mode 100644 |
index 0000000000000000000000000000000000000000..82b4a6bcf4d1f4b8f3a0c3be7ea807bcb14cc13c |
--- /dev/null |
+++ b/headless/public/util/dom_tree_extractor.cc |
@@ -0,0 +1,168 @@ |
+// Copyright 2016 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+#include "headless/public/util/dom_tree_extractor.h" |
+ |
+#include "base/bind.h" |
+#include "base/json/json_writer.h" |
+#include "headless/public/headless_devtools_client.h" |
+ |
+namespace headless { |
+ |
+DomTreeExtractor::DomTreeExtractor(HeadlessDevToolsClient* devtools_client) |
+ : child_nodes_fetched_(false), |
+ dom_observer_registered_(false), |
+ work_in_progress_(false), |
+ devtools_client_(devtools_client), |
+ weak_factory_(this) {} |
+ |
+DomTreeExtractor::~DomTreeExtractor() { |
+ if (dom_observer_registered_) |
+ devtools_client_->GetDOM()->RemoveObserver(this); |
+} |
+ |
+void DomTreeExtractor::ExtractDom(DomResultCB callback) { |
+ DCHECK(!work_in_progress_); |
+ work_in_progress_ = true; |
+ |
+ callback_ = std::move(callback); |
+ |
+ // Fetching the DOM nodes is a two step process. First we fetch the Document |
+ // (which only contains a few nodes) and then we fetch all it's children |
+ // including any iframe content documents. |
+ devtools_client_->GetDOM()->GetDocument(base::Bind( |
+ &DomTreeExtractor::OnRootDocumentFetched, weak_factory_.GetWeakPtr())); |
+ |
+ devtools_client_->GetDOM()->GetExperimental()->GetLayoutTreeNodes( |
+ dom::GetLayoutTreeNodesParams::Builder().Build(), |
+ base::Bind(&DomTreeExtractor::OnLayoutTreeNodesFetched, |
+ weak_factory_.GetWeakPtr())); |
+} |
+ |
+void DomTreeExtractor::OnRootDocumentFetched( |
+ std::unique_ptr<dom::GetDocumentResult> result) { |
+ document_result_ = std::move(result); |
+ |
+ devtools_client_->GetDOM()->AddObserver(this); |
Sami
2016/09/30 10:56:03
Probably should avoid doing this twice?
alex clarke (OOO till 29th)
2016/09/30 13:16:32
Done.
|
+ dom_observer_registered_ = true; |
+ |
+ devtools_client_->GetDOM()->RequestChildNodes( |
+ dom::RequestChildNodesParams::Builder() |
+ .SetNodeId(document_result_->GetRoot()->GetNodeId()) |
+ .SetDepth(-1) |
+ .SetTraverseFrames(true) |
+ .Build()); |
+} |
+ |
+void DomTreeExtractor::OnLayoutTreeNodesFetched( |
+ std::unique_ptr<dom::GetLayoutTreeNodesResult> result) { |
+ layout_tree_result_ = std::move(result); |
+ MaybeExtractDomTree(); |
+} |
+ |
+void DomTreeExtractor::OnSetChildNodes(const dom::SetChildNodesParams& params) { |
+ // Ignore nodes we're not looking for. |
+ if (params.GetParentId() != document_result_->GetRoot()->GetNodeId()) { |
+ LOG(WARNING) << "Received unexpected child nodes for parent id " |
+ << params.GetParentId(); |
+ return; |
+ } |
+ |
+ // Move the missing children into the |document_result_|. |
+ dom::Node* parent_node = const_cast<dom::Node*>(document_result_->GetRoot()); |
+ std::vector<std::unique_ptr<dom::Node>>* child_nodes = |
+ const_cast<std::vector<std::unique_ptr<dom::Node>>*>(params.GetNodes()); |
+ parent_node->SetChildren(std::move(*child_nodes)); |
+ |
+ child_nodes_fetched_ = true; |
+ MaybeExtractDomTree(); |
+} |
+ |
+void DomTreeExtractor::MaybeExtractDomTree() { |
+ if (document_result_ && layout_tree_result_ && child_nodes_fetched_) { |
+ EnumerateNodes(document_result_->GetRoot()); |
+ ExtractDomTree(); |
+ } |
+} |
+ |
+void DomTreeExtractor::EnumerateNodes(const dom::Node* node) { |
+ // Allocate an index and record the node pointer. |
+ size_t index = node_id_to_index_.size(); |
+ node_id_to_index_[node->GetNodeId()] = index; |
+ nodes_.push_back(node); |
+ |
+ if (node->HasContentDocument()) |
+ EnumerateNodes(node->GetContentDocument()); |
+ |
+ if (node->HasChildren()) { |
+ for (const std::unique_ptr<dom::Node>& child : *node->GetChildren()) { |
+ EnumerateNodes(child.get()); |
+ } |
+ } |
+} |
+ |
+void DomTreeExtractor::ExtractDomTree() { |
+ std::vector<std::unique_ptr<base::DictionaryValue>> dom_nodes( |
+ node_id_to_index_.size()); |
+ |
+ // Serialize DOM nodes into a flat array. |
+ for (size_t i = 0; i < nodes_.size(); i++) { |
+ dom::Node* node = const_cast<dom::Node*>(nodes_[i]); |
+ dom_nodes[i].reset( |
+ static_cast<base::DictionaryValue*>(node->Serialize().release())); |
+ |
+ if (node->HasChildren()) { |
+ std::unique_ptr<base::ListValue> children(new base::ListValue()); |
+ for (const std::unique_ptr<dom::Node>& child : *node->GetChildren()) { |
+ children->AppendInteger(node_id_to_index_[child->GetNodeId()]); |
+ } |
+ dom_nodes[i]->Set("childIndicies", std::move(children)); |
Sami
2016/09/30 10:56:03
typo: indices
alex clarke (OOO till 29th)
2016/09/30 13:16:32
Done.
|
+ dom_nodes[i]->Remove("children", nullptr); |
+ } |
+ |
+ if (node->HasContentDocument()) { |
+ dom_nodes[i]->SetInteger( |
+ "contentDocumentIndex", |
+ node_id_to_index_[node->GetContentDocument()->GetNodeId()]); |
+ dom_nodes[i]->Remove("contentDocument", nullptr); |
+ } |
+ |
+ dom_nodes[i]->Remove("childNodeCount", nullptr); |
+ } |
+ |
+ // Merge in Render Tree. |
Sami
2016/09/30 10:56:03
nit: layout tree
alex clarke (OOO till 29th)
2016/09/30 13:16:32
Done.
|
+ for (const std::unique_ptr<dom::LayoutTreeNode>& layout_node : |
+ *layout_tree_result_->GetLayoutTreeNodes()) { |
+ std::unordered_map<NodeId, size_t>::const_iterator it = |
+ node_id_to_index_.find(layout_node->GetBackendNodeId()); |
+ if (it == node_id_to_index_.end()) |
+ continue; |
+ |
+ base::DictionaryValue* node_dict = dom_nodes[it->second].get(); |
+ node_dict->Set("boundingBox", layout_node->GetBoundingBox()->Serialize()); |
+ |
+ if (layout_node->HasLayoutText()) |
+ node_dict->SetString("layoutText", layout_node->GetLayoutText()); |
+ |
+ if (layout_node->HasInlineTextNodes()) { |
+ std::unique_ptr<base::ListValue> inline_text_nodes(new base::ListValue()); |
+ for (const std::unique_ptr<dom::InlineTextBox>& inline_text_box : |
+ *layout_node->GetInlineTextNodes()) { |
+ size_t index = inline_text_nodes->GetSize(); |
+ inline_text_nodes->Set(index, inline_text_box->Serialize()); |
+ } |
+ node_dict->Set("inlineTextNodes", std::move(inline_text_nodes)); |
+ } |
+ } |
+ |
+ nodes_.clear(); |
+ document_result_.reset(); |
+ layout_tree_result_.reset(); |
+ child_nodes_fetched_ = false; |
+ work_in_progress_ = false; |
+ |
+ callback_.Run(std::move(dom_nodes)); |
+} |
+ |
+} // namespace headless |