Chromium Code Reviews| Index: headless/public/util/dom_tree_extractor.cc |
| diff --git a/headless/public/util/dom_tree_extractor.cc b/headless/public/util/dom_tree_extractor.cc |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..82b4a6bcf4d1f4b8f3a0c3be7ea807bcb14cc13c |
| --- /dev/null |
| +++ b/headless/public/util/dom_tree_extractor.cc |
| @@ -0,0 +1,168 @@ |
| +// Copyright 2016 The Chromium Authors. All rights reserved. |
| +// Use of this source code is governed by a BSD-style license that can be |
| +// found in the LICENSE file. |
| + |
| +#include "headless/public/util/dom_tree_extractor.h" |
| + |
| +#include "base/bind.h" |
| +#include "base/json/json_writer.h" |
| +#include "headless/public/headless_devtools_client.h" |
| + |
| +namespace headless { |
| + |
| +DomTreeExtractor::DomTreeExtractor(HeadlessDevToolsClient* devtools_client) |
| + : child_nodes_fetched_(false), |
| + dom_observer_registered_(false), |
| + work_in_progress_(false), |
| + devtools_client_(devtools_client), |
| + weak_factory_(this) {} |
| + |
| +DomTreeExtractor::~DomTreeExtractor() { |
| + if (dom_observer_registered_) |
| + devtools_client_->GetDOM()->RemoveObserver(this); |
| +} |
| + |
| +void DomTreeExtractor::ExtractDom(DomResultCB callback) { |
| + DCHECK(!work_in_progress_); |
| + work_in_progress_ = true; |
| + |
| + callback_ = std::move(callback); |
| + |
| + // Fetching the DOM nodes is a two step process. First we fetch the Document |
| + // (which only contains a few nodes) and then we fetch all it's children |
| + // including any iframe content documents. |
| + devtools_client_->GetDOM()->GetDocument(base::Bind( |
| + &DomTreeExtractor::OnRootDocumentFetched, weak_factory_.GetWeakPtr())); |
| + |
| + devtools_client_->GetDOM()->GetExperimental()->GetLayoutTreeNodes( |
| + dom::GetLayoutTreeNodesParams::Builder().Build(), |
| + base::Bind(&DomTreeExtractor::OnLayoutTreeNodesFetched, |
| + weak_factory_.GetWeakPtr())); |
| +} |
| + |
| +void DomTreeExtractor::OnRootDocumentFetched( |
| + std::unique_ptr<dom::GetDocumentResult> result) { |
| + document_result_ = std::move(result); |
| + |
| + devtools_client_->GetDOM()->AddObserver(this); |
|
Sami
2016/09/30 10:56:03
Probably should avoid doing this twice?
alex clarke (OOO till 29th)
2016/09/30 13:16:32
Done.
|
| + dom_observer_registered_ = true; |
| + |
| + devtools_client_->GetDOM()->RequestChildNodes( |
| + dom::RequestChildNodesParams::Builder() |
| + .SetNodeId(document_result_->GetRoot()->GetNodeId()) |
| + .SetDepth(-1) |
| + .SetTraverseFrames(true) |
| + .Build()); |
| +} |
| + |
| +void DomTreeExtractor::OnLayoutTreeNodesFetched( |
| + std::unique_ptr<dom::GetLayoutTreeNodesResult> result) { |
| + layout_tree_result_ = std::move(result); |
| + MaybeExtractDomTree(); |
| +} |
| + |
| +void DomTreeExtractor::OnSetChildNodes(const dom::SetChildNodesParams& params) { |
| + // Ignore nodes we're not looking for. |
| + if (params.GetParentId() != document_result_->GetRoot()->GetNodeId()) { |
| + LOG(WARNING) << "Received unexpected child nodes for parent id " |
| + << params.GetParentId(); |
| + return; |
| + } |
| + |
| + // Move the missing children into the |document_result_|. |
| + dom::Node* parent_node = const_cast<dom::Node*>(document_result_->GetRoot()); |
| + std::vector<std::unique_ptr<dom::Node>>* child_nodes = |
| + const_cast<std::vector<std::unique_ptr<dom::Node>>*>(params.GetNodes()); |
| + parent_node->SetChildren(std::move(*child_nodes)); |
| + |
| + child_nodes_fetched_ = true; |
| + MaybeExtractDomTree(); |
| +} |
| + |
| +void DomTreeExtractor::MaybeExtractDomTree() { |
| + if (document_result_ && layout_tree_result_ && child_nodes_fetched_) { |
| + EnumerateNodes(document_result_->GetRoot()); |
| + ExtractDomTree(); |
| + } |
| +} |
| + |
| +void DomTreeExtractor::EnumerateNodes(const dom::Node* node) { |
| + // Allocate an index and record the node pointer. |
| + size_t index = node_id_to_index_.size(); |
| + node_id_to_index_[node->GetNodeId()] = index; |
| + nodes_.push_back(node); |
| + |
| + if (node->HasContentDocument()) |
| + EnumerateNodes(node->GetContentDocument()); |
| + |
| + if (node->HasChildren()) { |
| + for (const std::unique_ptr<dom::Node>& child : *node->GetChildren()) { |
| + EnumerateNodes(child.get()); |
| + } |
| + } |
| +} |
| + |
| +void DomTreeExtractor::ExtractDomTree() { |
| + std::vector<std::unique_ptr<base::DictionaryValue>> dom_nodes( |
| + node_id_to_index_.size()); |
| + |
| + // Serialize DOM nodes into a flat array. |
| + for (size_t i = 0; i < nodes_.size(); i++) { |
| + dom::Node* node = const_cast<dom::Node*>(nodes_[i]); |
| + dom_nodes[i].reset( |
| + static_cast<base::DictionaryValue*>(node->Serialize().release())); |
| + |
| + if (node->HasChildren()) { |
| + std::unique_ptr<base::ListValue> children(new base::ListValue()); |
| + for (const std::unique_ptr<dom::Node>& child : *node->GetChildren()) { |
| + children->AppendInteger(node_id_to_index_[child->GetNodeId()]); |
| + } |
| + dom_nodes[i]->Set("childIndicies", std::move(children)); |
|
Sami
2016/09/30 10:56:03
typo: indices
alex clarke (OOO till 29th)
2016/09/30 13:16:32
Done.
|
| + dom_nodes[i]->Remove("children", nullptr); |
| + } |
| + |
| + if (node->HasContentDocument()) { |
| + dom_nodes[i]->SetInteger( |
| + "contentDocumentIndex", |
| + node_id_to_index_[node->GetContentDocument()->GetNodeId()]); |
| + dom_nodes[i]->Remove("contentDocument", nullptr); |
| + } |
| + |
| + dom_nodes[i]->Remove("childNodeCount", nullptr); |
| + } |
| + |
| + // Merge in Render Tree. |
|
Sami
2016/09/30 10:56:03
nit: layout tree
alex clarke (OOO till 29th)
2016/09/30 13:16:32
Done.
|
| + for (const std::unique_ptr<dom::LayoutTreeNode>& layout_node : |
| + *layout_tree_result_->GetLayoutTreeNodes()) { |
| + std::unordered_map<NodeId, size_t>::const_iterator it = |
| + node_id_to_index_.find(layout_node->GetBackendNodeId()); |
| + if (it == node_id_to_index_.end()) |
| + continue; |
| + |
| + base::DictionaryValue* node_dict = dom_nodes[it->second].get(); |
| + node_dict->Set("boundingBox", layout_node->GetBoundingBox()->Serialize()); |
| + |
| + if (layout_node->HasLayoutText()) |
| + node_dict->SetString("layoutText", layout_node->GetLayoutText()); |
| + |
| + if (layout_node->HasInlineTextNodes()) { |
| + std::unique_ptr<base::ListValue> inline_text_nodes(new base::ListValue()); |
| + for (const std::unique_ptr<dom::InlineTextBox>& inline_text_box : |
| + *layout_node->GetInlineTextNodes()) { |
| + size_t index = inline_text_nodes->GetSize(); |
| + inline_text_nodes->Set(index, inline_text_box->Serialize()); |
| + } |
| + node_dict->Set("inlineTextNodes", std::move(inline_text_nodes)); |
| + } |
| + } |
| + |
| + nodes_.clear(); |
| + document_result_.reset(); |
| + layout_tree_result_.reset(); |
| + child_nodes_fetched_ = false; |
| + work_in_progress_ = false; |
| + |
| + callback_.Run(std::move(dom_nodes)); |
| +} |
| + |
| +} // namespace headless |