Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(186)

Side by Side Diff: headless/public/util/dom_tree_extractor.cc

Issue 2385653003: Add a utility class for extracting details of the DOM (Closed)
Patch Set: Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "headless/public/util/dom_tree_extractor.h"
6
7 #include "base/bind.h"
8 #include "base/json/json_writer.h"
9 #include "headless/public/headless_devtools_client.h"
10
11 namespace headless {
12
13 DomTreeExtractor::DomTreeExtractor(HeadlessDevToolsClient* devtools_client)
14 : child_nodes_fetched_(false),
15 dom_observer_registered_(false),
16 work_in_progress_(false),
17 devtools_client_(devtools_client),
18 weak_factory_(this) {}
19
20 DomTreeExtractor::~DomTreeExtractor() {
21 if (dom_observer_registered_)
22 devtools_client_->GetDOM()->RemoveObserver(this);
23 }
24
25 void DomTreeExtractor::ExtractDom(DomResultCB callback) {
26 DCHECK(!work_in_progress_);
27 work_in_progress_ = true;
28
29 callback_ = std::move(callback);
30
31 // Fetching the DOM nodes is a two step process. First we fetch the Document
32 // (which only contains a few nodes) and then we fetch all it's children
33 // including any iframe content documents.
34 devtools_client_->GetDOM()->GetDocument(base::Bind(
35 &DomTreeExtractor::OnRootDocumentFetched, weak_factory_.GetWeakPtr()));
36
37 devtools_client_->GetDOM()->GetExperimental()->GetLayoutTreeNodes(
38 dom::GetLayoutTreeNodesParams::Builder().Build(),
39 base::Bind(&DomTreeExtractor::OnLayoutTreeNodesFetched,
40 weak_factory_.GetWeakPtr()));
41 }
42
43 void DomTreeExtractor::OnRootDocumentFetched(
44 std::unique_ptr<dom::GetDocumentResult> result) {
45 document_result_ = std::move(result);
46
47 devtools_client_->GetDOM()->AddObserver(this);
Sami 2016/09/30 10:56:03 Probably should avoid doing this twice?
alex clarke (OOO till 29th) 2016/09/30 13:16:32 Done.
48 dom_observer_registered_ = true;
49
50 devtools_client_->GetDOM()->RequestChildNodes(
51 dom::RequestChildNodesParams::Builder()
52 .SetNodeId(document_result_->GetRoot()->GetNodeId())
53 .SetDepth(-1)
54 .SetTraverseFrames(true)
55 .Build());
56 }
57
58 void DomTreeExtractor::OnLayoutTreeNodesFetched(
59 std::unique_ptr<dom::GetLayoutTreeNodesResult> result) {
60 layout_tree_result_ = std::move(result);
61 MaybeExtractDomTree();
62 }
63
64 void DomTreeExtractor::OnSetChildNodes(const dom::SetChildNodesParams& params) {
65 // Ignore nodes we're not looking for.
66 if (params.GetParentId() != document_result_->GetRoot()->GetNodeId()) {
67 LOG(WARNING) << "Received unexpected child nodes for parent id "
68 << params.GetParentId();
69 return;
70 }
71
72 // Move the missing children into the |document_result_|.
73 dom::Node* parent_node = const_cast<dom::Node*>(document_result_->GetRoot());
74 std::vector<std::unique_ptr<dom::Node>>* child_nodes =
75 const_cast<std::vector<std::unique_ptr<dom::Node>>*>(params.GetNodes());
76 parent_node->SetChildren(std::move(*child_nodes));
77
78 child_nodes_fetched_ = true;
79 MaybeExtractDomTree();
80 }
81
82 void DomTreeExtractor::MaybeExtractDomTree() {
83 if (document_result_ && layout_tree_result_ && child_nodes_fetched_) {
84 EnumerateNodes(document_result_->GetRoot());
85 ExtractDomTree();
86 }
87 }
88
89 void DomTreeExtractor::EnumerateNodes(const dom::Node* node) {
90 // Allocate an index and record the node pointer.
91 size_t index = node_id_to_index_.size();
92 node_id_to_index_[node->GetNodeId()] = index;
93 nodes_.push_back(node);
94
95 if (node->HasContentDocument())
96 EnumerateNodes(node->GetContentDocument());
97
98 if (node->HasChildren()) {
99 for (const std::unique_ptr<dom::Node>& child : *node->GetChildren()) {
100 EnumerateNodes(child.get());
101 }
102 }
103 }
104
105 void DomTreeExtractor::ExtractDomTree() {
106 std::vector<std::unique_ptr<base::DictionaryValue>> dom_nodes(
107 node_id_to_index_.size());
108
109 // Serialize DOM nodes into a flat array.
110 for (size_t i = 0; i < nodes_.size(); i++) {
111 dom::Node* node = const_cast<dom::Node*>(nodes_[i]);
112 dom_nodes[i].reset(
113 static_cast<base::DictionaryValue*>(node->Serialize().release()));
114
115 if (node->HasChildren()) {
116 std::unique_ptr<base::ListValue> children(new base::ListValue());
117 for (const std::unique_ptr<dom::Node>& child : *node->GetChildren()) {
118 children->AppendInteger(node_id_to_index_[child->GetNodeId()]);
119 }
120 dom_nodes[i]->Set("childIndicies", std::move(children));
Sami 2016/09/30 10:56:03 typo: indices
alex clarke (OOO till 29th) 2016/09/30 13:16:32 Done.
121 dom_nodes[i]->Remove("children", nullptr);
122 }
123
124 if (node->HasContentDocument()) {
125 dom_nodes[i]->SetInteger(
126 "contentDocumentIndex",
127 node_id_to_index_[node->GetContentDocument()->GetNodeId()]);
128 dom_nodes[i]->Remove("contentDocument", nullptr);
129 }
130
131 dom_nodes[i]->Remove("childNodeCount", nullptr);
132 }
133
134 // Merge in Render Tree.
Sami 2016/09/30 10:56:03 nit: layout tree
alex clarke (OOO till 29th) 2016/09/30 13:16:32 Done.
135 for (const std::unique_ptr<dom::LayoutTreeNode>& layout_node :
136 *layout_tree_result_->GetLayoutTreeNodes()) {
137 std::unordered_map<NodeId, size_t>::const_iterator it =
138 node_id_to_index_.find(layout_node->GetBackendNodeId());
139 if (it == node_id_to_index_.end())
140 continue;
141
142 base::DictionaryValue* node_dict = dom_nodes[it->second].get();
143 node_dict->Set("boundingBox", layout_node->GetBoundingBox()->Serialize());
144
145 if (layout_node->HasLayoutText())
146 node_dict->SetString("layoutText", layout_node->GetLayoutText());
147
148 if (layout_node->HasInlineTextNodes()) {
149 std::unique_ptr<base::ListValue> inline_text_nodes(new base::ListValue());
150 for (const std::unique_ptr<dom::InlineTextBox>& inline_text_box :
151 *layout_node->GetInlineTextNodes()) {
152 size_t index = inline_text_nodes->GetSize();
153 inline_text_nodes->Set(index, inline_text_box->Serialize());
154 }
155 node_dict->Set("inlineTextNodes", std::move(inline_text_nodes));
156 }
157 }
158
159 nodes_.clear();
160 document_result_.reset();
161 layout_tree_result_.reset();
162 child_nodes_fetched_ = false;
163 work_in_progress_ = false;
164
165 callback_.Run(std::move(dom_nodes));
166 }
167
168 } // namespace headless
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698