OLD | NEW |
---|---|
(Empty) | |
1 // Copyright 2016 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "headless/public/util/dom_tree_extractor.h" | |
6 | |
7 #include "base/bind.h" | |
8 #include "base/json/json_writer.h" | |
9 #include "headless/public/headless_devtools_client.h" | |
10 | |
11 namespace headless { | |
12 | |
13 DomTreeExtractor::DomTreeExtractor(HeadlessDevToolsClient* devtools_client) | |
14 : child_nodes_fetched_(false), | |
15 dom_observer_registered_(false), | |
16 work_in_progress_(false), | |
17 devtools_client_(devtools_client), | |
18 weak_factory_(this) {} | |
19 | |
20 DomTreeExtractor::~DomTreeExtractor() { | |
21 if (dom_observer_registered_) | |
22 devtools_client_->GetDOM()->RemoveObserver(this); | |
23 } | |
24 | |
25 void DomTreeExtractor::ExtractDom(DomResultCB callback) { | |
26 DCHECK(!work_in_progress_); | |
27 work_in_progress_ = true; | |
28 | |
29 callback_ = std::move(callback); | |
30 | |
31 // Fetching the DOM nodes is a two step process. First we fetch the Document | |
32 // (which only contains a few nodes) and then we fetch all it's children | |
33 // including any iframe content documents. | |
34 devtools_client_->GetDOM()->GetDocument(base::Bind( | |
35 &DomTreeExtractor::OnRootDocumentFetched, weak_factory_.GetWeakPtr())); | |
36 | |
37 devtools_client_->GetDOM()->GetExperimental()->GetLayoutTreeNodes( | |
38 dom::GetLayoutTreeNodesParams::Builder().Build(), | |
39 base::Bind(&DomTreeExtractor::OnLayoutTreeNodesFetched, | |
40 weak_factory_.GetWeakPtr())); | |
41 } | |
42 | |
43 void DomTreeExtractor::OnRootDocumentFetched( | |
44 std::unique_ptr<dom::GetDocumentResult> result) { | |
45 document_result_ = std::move(result); | |
46 | |
47 devtools_client_->GetDOM()->AddObserver(this); | |
Sami
2016/09/30 10:56:03
Probably should avoid doing this twice?
alex clarke (OOO till 29th)
2016/09/30 13:16:32
Done.
| |
48 dom_observer_registered_ = true; | |
49 | |
50 devtools_client_->GetDOM()->RequestChildNodes( | |
51 dom::RequestChildNodesParams::Builder() | |
52 .SetNodeId(document_result_->GetRoot()->GetNodeId()) | |
53 .SetDepth(-1) | |
54 .SetTraverseFrames(true) | |
55 .Build()); | |
56 } | |
57 | |
58 void DomTreeExtractor::OnLayoutTreeNodesFetched( | |
59 std::unique_ptr<dom::GetLayoutTreeNodesResult> result) { | |
60 layout_tree_result_ = std::move(result); | |
61 MaybeExtractDomTree(); | |
62 } | |
63 | |
64 void DomTreeExtractor::OnSetChildNodes(const dom::SetChildNodesParams& params) { | |
65 // Ignore nodes we're not looking for. | |
66 if (params.GetParentId() != document_result_->GetRoot()->GetNodeId()) { | |
67 LOG(WARNING) << "Received unexpected child nodes for parent id " | |
68 << params.GetParentId(); | |
69 return; | |
70 } | |
71 | |
72 // Move the missing children into the |document_result_|. | |
73 dom::Node* parent_node = const_cast<dom::Node*>(document_result_->GetRoot()); | |
74 std::vector<std::unique_ptr<dom::Node>>* child_nodes = | |
75 const_cast<std::vector<std::unique_ptr<dom::Node>>*>(params.GetNodes()); | |
76 parent_node->SetChildren(std::move(*child_nodes)); | |
77 | |
78 child_nodes_fetched_ = true; | |
79 MaybeExtractDomTree(); | |
80 } | |
81 | |
82 void DomTreeExtractor::MaybeExtractDomTree() { | |
83 if (document_result_ && layout_tree_result_ && child_nodes_fetched_) { | |
84 EnumerateNodes(document_result_->GetRoot()); | |
85 ExtractDomTree(); | |
86 } | |
87 } | |
88 | |
89 void DomTreeExtractor::EnumerateNodes(const dom::Node* node) { | |
90 // Allocate an index and record the node pointer. | |
91 size_t index = node_id_to_index_.size(); | |
92 node_id_to_index_[node->GetNodeId()] = index; | |
93 nodes_.push_back(node); | |
94 | |
95 if (node->HasContentDocument()) | |
96 EnumerateNodes(node->GetContentDocument()); | |
97 | |
98 if (node->HasChildren()) { | |
99 for (const std::unique_ptr<dom::Node>& child : *node->GetChildren()) { | |
100 EnumerateNodes(child.get()); | |
101 } | |
102 } | |
103 } | |
104 | |
105 void DomTreeExtractor::ExtractDomTree() { | |
106 std::vector<std::unique_ptr<base::DictionaryValue>> dom_nodes( | |
107 node_id_to_index_.size()); | |
108 | |
109 // Serialize DOM nodes into a flat array. | |
110 for (size_t i = 0; i < nodes_.size(); i++) { | |
111 dom::Node* node = const_cast<dom::Node*>(nodes_[i]); | |
112 dom_nodes[i].reset( | |
113 static_cast<base::DictionaryValue*>(node->Serialize().release())); | |
114 | |
115 if (node->HasChildren()) { | |
116 std::unique_ptr<base::ListValue> children(new base::ListValue()); | |
117 for (const std::unique_ptr<dom::Node>& child : *node->GetChildren()) { | |
118 children->AppendInteger(node_id_to_index_[child->GetNodeId()]); | |
119 } | |
120 dom_nodes[i]->Set("childIndicies", std::move(children)); | |
Sami
2016/09/30 10:56:03
typo: indices
alex clarke (OOO till 29th)
2016/09/30 13:16:32
Done.
| |
121 dom_nodes[i]->Remove("children", nullptr); | |
122 } | |
123 | |
124 if (node->HasContentDocument()) { | |
125 dom_nodes[i]->SetInteger( | |
126 "contentDocumentIndex", | |
127 node_id_to_index_[node->GetContentDocument()->GetNodeId()]); | |
128 dom_nodes[i]->Remove("contentDocument", nullptr); | |
129 } | |
130 | |
131 dom_nodes[i]->Remove("childNodeCount", nullptr); | |
132 } | |
133 | |
134 // Merge in Render Tree. | |
Sami
2016/09/30 10:56:03
nit: layout tree
alex clarke (OOO till 29th)
2016/09/30 13:16:32
Done.
| |
135 for (const std::unique_ptr<dom::LayoutTreeNode>& layout_node : | |
136 *layout_tree_result_->GetLayoutTreeNodes()) { | |
137 std::unordered_map<NodeId, size_t>::const_iterator it = | |
138 node_id_to_index_.find(layout_node->GetBackendNodeId()); | |
139 if (it == node_id_to_index_.end()) | |
140 continue; | |
141 | |
142 base::DictionaryValue* node_dict = dom_nodes[it->second].get(); | |
143 node_dict->Set("boundingBox", layout_node->GetBoundingBox()->Serialize()); | |
144 | |
145 if (layout_node->HasLayoutText()) | |
146 node_dict->SetString("layoutText", layout_node->GetLayoutText()); | |
147 | |
148 if (layout_node->HasInlineTextNodes()) { | |
149 std::unique_ptr<base::ListValue> inline_text_nodes(new base::ListValue()); | |
150 for (const std::unique_ptr<dom::InlineTextBox>& inline_text_box : | |
151 *layout_node->GetInlineTextNodes()) { | |
152 size_t index = inline_text_nodes->GetSize(); | |
153 inline_text_nodes->Set(index, inline_text_box->Serialize()); | |
154 } | |
155 node_dict->Set("inlineTextNodes", std::move(inline_text_nodes)); | |
156 } | |
157 } | |
158 | |
159 nodes_.clear(); | |
160 document_result_.reset(); | |
161 layout_tree_result_.reset(); | |
162 child_nodes_fetched_ = false; | |
163 work_in_progress_ = false; | |
164 | |
165 callback_.Run(std::move(dom_nodes)); | |
166 } | |
167 | |
168 } // namespace headless | |
OLD | NEW |