 Chromium Code Reviews
 Chromium Code Reviews Issue 2385653003:
  Add a utility class for extracting details of the DOM  (Closed)
    
  
    Issue 2385653003:
  Add a utility class for extracting details of the DOM  (Closed) 
  | OLD | NEW | 
|---|---|
| (Empty) | |
| 1 // Copyright 2016 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "headless/public/util/dom_tree_extractor.h" | |
| 6 | |
| 7 #include "base/bind.h" | |
| 8 #include "base/json/json_writer.h" | |
| 9 #include "headless/public/headless_devtools_client.h" | |
| 10 | |
| 11 namespace headless { | |
| 12 | |
| 13 DomTreeExtractor::DomTreeExtractor(HeadlessDevToolsClient* devtools_client) | |
| 14 : child_nodes_fetched_(false), | |
| 15 dom_observer_registered_(false), | |
| 16 work_in_progress_(false), | |
| 17 devtools_client_(devtools_client), | |
| 18 weak_factory_(this) {} | |
| 19 | |
| 20 DomTreeExtractor::~DomTreeExtractor() { | |
| 21 if (dom_observer_registered_) | |
| 22 devtools_client_->GetDOM()->RemoveObserver(this); | |
| 23 } | |
| 24 | |
| 25 void DomTreeExtractor::ExtractDom(DomResultCB callback) { | |
| 26 DCHECK(!work_in_progress_); | |
| 27 work_in_progress_ = true; | |
| 28 | |
| 29 callback_ = std::move(callback); | |
| 30 | |
| 31 // Fetching the DOM nodes is a two step process. First we fetch the Document | |
| 32 // (which only contains a few nodes) and then we fetch all it's children | |
| 33 // including any iframe content documents. | |
| 34 devtools_client_->GetDOM()->GetDocument(base::Bind( | |
| 35 &DomTreeExtractor::OnRootDocumentFetched, weak_factory_.GetWeakPtr())); | |
| 36 | |
| 37 devtools_client_->GetDOM()->GetExperimental()->GetLayoutTreeNodes( | |
| 38 dom::GetLayoutTreeNodesParams::Builder().Build(), | |
| 39 base::Bind(&DomTreeExtractor::OnLayoutTreeNodesFetched, | |
| 40 weak_factory_.GetWeakPtr())); | |
| 41 } | |
| 42 | |
| 43 void DomTreeExtractor::OnRootDocumentFetched( | |
| 44 std::unique_ptr<dom::GetDocumentResult> result) { | |
| 45 document_result_ = std::move(result); | |
| 46 | |
| 47 devtools_client_->GetDOM()->AddObserver(this); | |
| 
Sami
2016/09/30 10:56:03
Probably should avoid doing this twice?
 
alex clarke (OOO till 29th)
2016/09/30 13:16:32
Done.
 | |
| 48 dom_observer_registered_ = true; | |
| 49 | |
| 50 devtools_client_->GetDOM()->RequestChildNodes( | |
| 51 dom::RequestChildNodesParams::Builder() | |
| 52 .SetNodeId(document_result_->GetRoot()->GetNodeId()) | |
| 53 .SetDepth(-1) | |
| 54 .SetTraverseFrames(true) | |
| 55 .Build()); | |
| 56 } | |
| 57 | |
| 58 void DomTreeExtractor::OnLayoutTreeNodesFetched( | |
| 59 std::unique_ptr<dom::GetLayoutTreeNodesResult> result) { | |
| 60 layout_tree_result_ = std::move(result); | |
| 61 MaybeExtractDomTree(); | |
| 62 } | |
| 63 | |
| 64 void DomTreeExtractor::OnSetChildNodes(const dom::SetChildNodesParams& params) { | |
| 65 // Ignore nodes we're not looking for. | |
| 66 if (params.GetParentId() != document_result_->GetRoot()->GetNodeId()) { | |
| 67 LOG(WARNING) << "Received unexpected child nodes for parent id " | |
| 68 << params.GetParentId(); | |
| 69 return; | |
| 70 } | |
| 71 | |
| 72 // Move the missing children into the |document_result_|. | |
| 73 dom::Node* parent_node = const_cast<dom::Node*>(document_result_->GetRoot()); | |
| 74 std::vector<std::unique_ptr<dom::Node>>* child_nodes = | |
| 75 const_cast<std::vector<std::unique_ptr<dom::Node>>*>(params.GetNodes()); | |
| 76 parent_node->SetChildren(std::move(*child_nodes)); | |
| 77 | |
| 78 child_nodes_fetched_ = true; | |
| 79 MaybeExtractDomTree(); | |
| 80 } | |
| 81 | |
| 82 void DomTreeExtractor::MaybeExtractDomTree() { | |
| 83 if (document_result_ && layout_tree_result_ && child_nodes_fetched_) { | |
| 84 EnumerateNodes(document_result_->GetRoot()); | |
| 85 ExtractDomTree(); | |
| 86 } | |
| 87 } | |
| 88 | |
| 89 void DomTreeExtractor::EnumerateNodes(const dom::Node* node) { | |
| 90 // Allocate an index and record the node pointer. | |
| 91 size_t index = node_id_to_index_.size(); | |
| 92 node_id_to_index_[node->GetNodeId()] = index; | |
| 93 nodes_.push_back(node); | |
| 94 | |
| 95 if (node->HasContentDocument()) | |
| 96 EnumerateNodes(node->GetContentDocument()); | |
| 97 | |
| 98 if (node->HasChildren()) { | |
| 99 for (const std::unique_ptr<dom::Node>& child : *node->GetChildren()) { | |
| 100 EnumerateNodes(child.get()); | |
| 101 } | |
| 102 } | |
| 103 } | |
| 104 | |
| 105 void DomTreeExtractor::ExtractDomTree() { | |
| 106 std::vector<std::unique_ptr<base::DictionaryValue>> dom_nodes( | |
| 107 node_id_to_index_.size()); | |
| 108 | |
| 109 // Serialize DOM nodes into a flat array. | |
| 110 for (size_t i = 0; i < nodes_.size(); i++) { | |
| 111 dom::Node* node = const_cast<dom::Node*>(nodes_[i]); | |
| 112 dom_nodes[i].reset( | |
| 113 static_cast<base::DictionaryValue*>(node->Serialize().release())); | |
| 114 | |
| 115 if (node->HasChildren()) { | |
| 116 std::unique_ptr<base::ListValue> children(new base::ListValue()); | |
| 117 for (const std::unique_ptr<dom::Node>& child : *node->GetChildren()) { | |
| 118 children->AppendInteger(node_id_to_index_[child->GetNodeId()]); | |
| 119 } | |
| 120 dom_nodes[i]->Set("childIndicies", std::move(children)); | |
| 
Sami
2016/09/30 10:56:03
typo: indices
 
alex clarke (OOO till 29th)
2016/09/30 13:16:32
Done.
 | |
| 121 dom_nodes[i]->Remove("children", nullptr); | |
| 122 } | |
| 123 | |
| 124 if (node->HasContentDocument()) { | |
| 125 dom_nodes[i]->SetInteger( | |
| 126 "contentDocumentIndex", | |
| 127 node_id_to_index_[node->GetContentDocument()->GetNodeId()]); | |
| 128 dom_nodes[i]->Remove("contentDocument", nullptr); | |
| 129 } | |
| 130 | |
| 131 dom_nodes[i]->Remove("childNodeCount", nullptr); | |
| 132 } | |
| 133 | |
| 134 // Merge in Render Tree. | |
| 
Sami
2016/09/30 10:56:03
nit: layout tree
 
alex clarke (OOO till 29th)
2016/09/30 13:16:32
Done.
 | |
| 135 for (const std::unique_ptr<dom::LayoutTreeNode>& layout_node : | |
| 136 *layout_tree_result_->GetLayoutTreeNodes()) { | |
| 137 std::unordered_map<NodeId, size_t>::const_iterator it = | |
| 138 node_id_to_index_.find(layout_node->GetBackendNodeId()); | |
| 139 if (it == node_id_to_index_.end()) | |
| 140 continue; | |
| 141 | |
| 142 base::DictionaryValue* node_dict = dom_nodes[it->second].get(); | |
| 143 node_dict->Set("boundingBox", layout_node->GetBoundingBox()->Serialize()); | |
| 144 | |
| 145 if (layout_node->HasLayoutText()) | |
| 146 node_dict->SetString("layoutText", layout_node->GetLayoutText()); | |
| 147 | |
| 148 if (layout_node->HasInlineTextNodes()) { | |
| 149 std::unique_ptr<base::ListValue> inline_text_nodes(new base::ListValue()); | |
| 150 for (const std::unique_ptr<dom::InlineTextBox>& inline_text_box : | |
| 151 *layout_node->GetInlineTextNodes()) { | |
| 152 size_t index = inline_text_nodes->GetSize(); | |
| 153 inline_text_nodes->Set(index, inline_text_box->Serialize()); | |
| 154 } | |
| 155 node_dict->Set("inlineTextNodes", std::move(inline_text_nodes)); | |
| 156 } | |
| 157 } | |
| 158 | |
| 159 nodes_.clear(); | |
| 160 document_result_.reset(); | |
| 161 layout_tree_result_.reset(); | |
| 162 child_nodes_fetched_ = false; | |
| 163 work_in_progress_ = false; | |
| 164 | |
| 165 callback_.Run(std::move(dom_nodes)); | |
| 166 } | |
| 167 | |
| 168 } // namespace headless | |
| OLD | NEW |