Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(49)

Side by Side Diff: headless/public/util/dom_tree_extractor_browsertest.cc

Issue 2385653003: Add a utility class for extracting details of the DOM (Closed)
Patch Set: Refactor Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "headless/public/util/dom_tree_extractor.h"
6
7 #include <memory>
8 #include "base/json/json_writer.h"
9 #include "base/strings/string_util.h"
10 #include "content/public/browser/render_widget_host_view.h"
11 #include "content/public/browser/web_contents.h"
12 #include "content/public/test/browser_test.h"
13 #include "headless/lib/browser/headless_web_contents_impl.h"
14 #include "headless/public/domains/browser.h"
15 #include "headless/public/domains/emulation.h"
16 #include "headless/public/domains/network.h"
17 #include "headless/public/domains/page.h"
18 #include "headless/public/headless_browser.h"
19 #include "headless/public/headless_devtools_client.h"
20 #include "headless/public/headless_devtools_target.h"
21 #include "headless/test/headless_browser_test.h"
22 #include "testing/gtest/include/gtest/gtest.h"
23 #include "url/gurl.h"
24
25 namespace headless {
26
27 class DomTreeExtractorBrowserTest : public HeadlessAsyncDevTooledBrowserTest,
28 public page::Observer {
29 public:
30 void RunDevTooledTest() override {
31 EXPECT_TRUE(embedded_test_server()->Start());
32 devtools_client_->GetPage()->AddObserver(this);
33 devtools_client_->GetPage()->Enable();
34 devtools_client_->GetPage()->Navigate(
35 embedded_test_server()->GetURL("/dom_tree_test.html").spec());
36 }
37
38 void OnLoadEventFired(const page::LoadEventFiredParams& params) override {
39 devtools_client_->GetPage()->RemoveObserver(this);
40
41 extractor_.reset(new DomTreeExtractor(devtools_client_.get()));
42 extractor_->ExtractDomTree(
43 base::Bind(&DomTreeExtractorBrowserTest::OnDomTreeExtracted,
44 base::Unretained(this)));
45 }
46
47 void OnDomTreeExtracted(DomTreeExtractor::DomTree dom_tree) {
48 GURL::Replacements replace_port;
49 replace_port.SetPortStr("");
50
51 std::vector<std::unique_ptr<base::DictionaryValue>> dom_nodes(
52 dom_tree.dom_nodes_.size());
53
54 // For convenience flatten the dom tree into an array.
55 for (size_t i = 0; i < dom_tree.dom_nodes_.size(); i++) {
56 dom::Node* node = const_cast<dom::Node*>(dom_tree.dom_nodes_[i]);
57
58 dom_nodes[i].reset(
59 static_cast<base::DictionaryValue*>(node->Serialize().release()));
60
61 // Convert child & content document pointers into indexes.
62 if (node->HasChildren()) {
63 std::unique_ptr<base::ListValue> children(new base::ListValue());
64 for (const std::unique_ptr<dom::Node>& child : *node->GetChildren()) {
65 children->AppendInteger(
66 dom_tree.node_id_to_index_[child->GetNodeId()]);
67 }
68 dom_nodes[i]->Set("childIndices", std::move(children));
69 dom_nodes[i]->Remove("children", nullptr);
70 }
71
72 if (node->HasContentDocument()) {
73 dom_nodes[i]->SetInteger(
74 "contentDocumentIndex",
75 dom_tree
76 .node_id_to_index_[node->GetContentDocument()->GetNodeId()]);
77 dom_nodes[i]->Remove("contentDocument", nullptr);
78 }
79
80 dom_nodes[i]->Remove("childNodeCount", nullptr);
81
82 // Frame IDs are random.
83 if (dom_nodes[i]->HasKey("frameId"))
84 dom_nodes[i]->SetString("frameId", "?");
85
86 // Ports are random.
87 std::string url;
88 if (dom_nodes[i]->GetString("baseURL", &url)) {
89 dom_nodes[i]->SetString(
90 "baseURL", GURL(url).ReplaceComponents(replace_port).spec());
91 }
92
93 if (dom_nodes[i]->GetString("documentURL", &url)) {
94 dom_nodes[i]->SetString(
95 "documentURL", GURL(url).ReplaceComponents(replace_port).spec());
96 }
97 }
98
99 // Merge LayoutTreeNode data into the dictionaries.
100 for (const dom::LayoutTreeNode* layout_node : dom_tree.layout_tree_nodes_) {
101 auto it =
102 dom_tree.node_id_to_index_.find(layout_node->GetBackendNodeId());
103 ASSERT_TRUE(it != dom_tree.node_id_to_index_.end());
104
105 base::DictionaryValue* node_dict = dom_nodes[it->second].get();
106 node_dict->Set("boundingBox", layout_node->GetBoundingBox()->Serialize());
107
108 if (layout_node->HasLayoutText())
109 node_dict->SetString("layoutText", layout_node->GetLayoutText());
110
111 if (layout_node->HasInlineTextNodes()) {
112 std::unique_ptr<base::ListValue> inline_text_nodes(
113 new base::ListValue());
114 for (const std::unique_ptr<dom::InlineTextBox>& inline_text_box :
115 *layout_node->GetInlineTextNodes()) {
116 size_t index = inline_text_nodes->GetSize();
117 inline_text_nodes->Set(index, inline_text_box->Serialize());
118 }
119 node_dict->Set("inlineTextNodes", std::move(inline_text_nodes));
120 }
121 }
122
123 const std::vector<std::string> expected_results = {
124 "{\n"
125 " 'baseURL': 'http://127.0.0.1/dom_tree_test.html',\n"
126 " 'childIndices': [ 1 ],\n"
127 " 'documentURL': 'http://127.0.0.1/dom_tree_test.html',\n"
128 " 'localName': '',\n"
129 " 'nodeId': 1,\n"
130 " 'nodeName': '#document',\n"
131 " 'nodeType': 9,\n"
132 " 'nodeValue': '',\n"
133 " 'xmlVersion': ''\n"
134 "}\n",
135
136 "{\n"
137 " 'attributes': [ ],\n"
138 " 'childIndices': [ 2, 5 ],\n"
139 " 'frameId': '?',\n"
140 " 'localName': 'html',\n"
141 " 'nodeId': 2,\n"
142 " 'nodeName': 'HTML',\n"
143 " 'nodeType': 1,\n"
144 " 'nodeValue': ''\n"
145 "}\n",
146
147 "{\n"
148 " 'attributes': [ ],\n"
149 " 'boundingBox': {\n"
150 " 'height': 600.0,\n"
151 " 'width': 800.0,\n"
152 " 'x': 0.0,\n"
153 " 'y': 0.0\n"
154 " },\n"
155 " 'childIndices': [ 3 ],\n"
156 " 'localName': 'head',\n"
157 " 'nodeId': 3,\n"
158 " 'nodeName': 'HEAD',\n"
159 " 'nodeType': 1,\n"
160 " 'nodeValue': ''\n"
161 "}\n",
162
163 "{\n"
164 " 'attributes': [ ],\n"
165 " 'boundingBox': {\n"
166 " 'height': 584.0,\n"
167 " 'width': 784.0,\n"
168 " 'x': 8.0,\n"
169 " 'y': 8.0\n"
170 " },\n"
171 " 'childIndices': [ 4 ],\n"
172 " 'localName': 'title',\n"
173 " 'nodeId': 5,\n"
174 " 'nodeName': 'TITLE',\n"
175 " 'nodeType': 1,\n"
176 " 'nodeValue': ''\n"
177 "}\n",
178
179 "{\n"
180 " 'boundingBox': {\n"
181 " 'height': 226.0,\n"
182 " 'width': 784.0,\n"
183 " 'x': 8.0,\n"
184 " 'y': 8.0\n"
185 " },\n"
186 " 'localName': '',\n"
187 " 'nodeId': 6,\n"
188 " 'nodeName': '#text',\n"
189 " 'nodeType': 3,\n"
190 " 'nodeValue': 'Hello world!'\n"
191 "}\n",
192
193 "{\n"
194 " 'attributes': [ ],\n"
195 " 'boundingBox': {\n"
196 " 'height': 600.0,\n"
197 " 'width': 800.0,\n"
198 " 'x': 0.0,\n"
199 " 'y': 0.0\n"
200 " },\n"
201 " 'childIndices': [ 6 ],\n"
202 " 'localName': 'body',\n"
203 " 'nodeId': 4,\n"
204 " 'nodeName': 'BODY',\n"
205 " 'nodeType': 1,\n"
206 " 'nodeValue': ''\n"
207 "}\n",
208
209 "{\n"
210 " 'attributes': [ 'id', 'id1' ],\n"
211 " 'boundingBox': {\n"
212 " 'height': 17.0,\n"
213 " 'width': 73.0,\n"
214 " 'x': 8.0,\n"
215 " 'y': 198.0\n"
216 " },\n"
217 " 'childIndices': [ 7, 8, 15 ],\n"
218 " 'inlineTextNodes': [ {\n"
219 " 'boundingBox': {\n"
220 " 'height': 17.0,\n"
221 " 'width': 72.4375,\n"
222 " 'x': 8.0,\n"
223 " 'y': 198.0\n"
224 " },\n"
225 " 'numCharacters': 11,\n"
226 " 'startCharacterIndex': 1\n"
227 " } ],\n"
228 " 'layoutText': '\\nSome text.\\n',\n"
229 " 'localName': 'div',\n"
230 " 'nodeId': 7,\n"
231 " 'nodeName': 'DIV',\n"
232 " 'nodeType': 1,\n"
233 " 'nodeValue': ''\n"
234 "}\n",
235
236 "{\n"
237 " 'boundingBox': {\n"
238 " 'height': 200.0,\n"
239 " 'width': 400.0,\n"
240 " 'x': 82.0,\n"
241 " 'y': 10.0\n"
242 " },\n"
243 " 'localName': '',\n"
244 " 'nodeId': 8,\n"
245 " 'nodeName': '#text',\n"
246 " 'nodeType': 3,\n"
247 " 'nodeValue': '\\nSome text.\\n'\n"
248 "}\n",
249
250 "{\n"
251 " 'attributes': [ 'src', '/iframe.html', 'width', '400', 'height',"
252 " '200' ],\n"
253 " 'boundingBox': {\n"
254 " 'height': 171.0,\n"
255 " 'width': 384.0,\n"
256 " 'x': 90.0,\n"
257 " 'y': 18.0\n"
258 " },\n"
259 " 'childIndices': [ ],\n"
260 " 'contentDocumentIndex': 9,\n"
261 " 'frameId': '?',\n"
262 " 'localName': 'iframe',\n"
263 " 'nodeId': 9,\n"
264 " 'nodeName': 'IFRAME',\n"
265 " 'nodeType': 1,\n"
266 " 'nodeValue': ''\n"
267 "}\n",
268
269 "{\n"
270 " 'baseURL': 'http://127.0.0.1/iframe.html',\n"
271 " 'boundingBox': {\n"
272 " 'height': 37.0,\n"
273 " 'width': 384.0,\n"
274 " 'x': 90.0,\n"
275 " 'y': 18.0\n"
276 " },\n"
277 " 'childIndices': [ 10 ],\n"
278 " 'documentURL': 'http://127.0.0.1/iframe.html',\n"
279 " 'localName': '',\n"
280 " 'nodeId': 10,\n"
281 " 'nodeName': '#document',\n"
282 " 'nodeType': 9,\n"
283 " 'nodeValue': '',\n"
284 " 'xmlVersion': ''\n"
285 "}\n",
286
287 "{\n"
288 " 'attributes': [ ],\n"
289 " 'boundingBox': {\n"
290 " 'height': 36.0,\n"
291 " 'width': 308.0,\n"
292 " 'x': 8.0,\n"
293 " 'y': 8.0\n"
294 " },\n"
295 " 'childIndices': [ 11, 12 ],\n"
296 " 'frameId': '?',\n"
297 " 'inlineTextNodes': [ {\n"
298 " 'boundingBox': {\n"
299 " 'height': 36.0,\n"
300 " 'width': 307.734375,\n"
301 " 'x': 8.0,\n"
302 " 'y': 8.0\n"
303 " },\n"
304 " 'numCharacters': 22,\n"
305 " 'startCharacterIndex': 0\n"
306 " } ],\n"
307 " 'layoutText': 'Hello from the iframe!',\n"
308 " 'localName': 'html',\n"
309 " 'nodeId': 11,\n"
310 " 'nodeName': 'HTML',\n"
311 " 'nodeType': 1,\n"
312 " 'nodeValue': ''\n"
313 "}\n",
314
315 "{\n"
316 " 'attributes': [ ],\n"
317 " 'boundingBox': {\n"
318 " 'height': 204.0,\n"
319 " 'width': 405.0,\n"
320 " 'x': 80.0,\n"
321 " 'y': 8.0\n"
322 " },\n"
323 " 'childIndices': [ ],\n"
324 " 'localName': 'head',\n"
325 " 'nodeId': 12,\n"
326 " 'nodeName': 'HEAD',\n"
327 " 'nodeType': 1,\n"
328 " 'nodeValue': ''\n"
329 "}\n",
330
331 "{\n"
332 " 'attributes': [ ],\n"
333 " 'boundingBox': {\n"
334 " 'height': 0.0,\n"
335 " 'width': 0.0,\n"
336 " 'x': 0.0,\n"
337 " 'y': 0.0\n"
338 " },\n"
339 " 'childIndices': [ 13 ],\n"
340 " 'layoutText': '\\n',\n"
341 " 'localName': 'body',\n"
342 " 'nodeId': 13,\n"
343 " 'nodeName': 'BODY',\n"
344 " 'nodeType': 1,\n"
345 " 'nodeValue': ''\n"
346 "}\n",
347
348 "{\n"
349 " 'attributes': [ ],\n"
350 " 'boundingBox': {\n"
351 " 'height': 18.0,\n"
352 " 'width': 784.0,\n"
353 " 'x': 8.0,\n"
354 " 'y': 216.0\n"
355 " },\n"
356 " 'childIndices': [ 14 ],\n"
357 " 'localName': 'h1',\n"
358 " 'nodeId': 14,\n"
359 " 'nodeName': 'H1',\n"
360 " 'nodeType': 1,\n"
361 " 'nodeValue': ''\n"
362 "}\n",
363
364 "{\n"
365 " 'boundingBox': {\n"
366 " 'height': 18.0,\n"
367 " 'width': 784.0,\n"
368 " 'x': 8.0,\n"
369 " 'y': 216.0\n"
370 " },\n"
371 " 'localName': '',\n"
372 " 'nodeId': 15,\n"
373 " 'nodeName': '#text',\n"
374 " 'nodeType': 3,\n"
375 " 'nodeValue': 'Hello from the iframe!'\n"
376 "}\n",
377
378 "{\n"
379 " 'attributes': [ 'id', 'id2' ],\n"
380 " 'boundingBox': {\n"
381 " 'height': 18.0,\n"
382 " 'width': 784.0,\n"
383 " 'x': 8.0,\n"
384 " 'y': 216.0\n"
385 " },\n"
386 " 'childIndices': [ 16 ],\n"
387 " 'localName': 'div',\n"
388 " 'nodeId': 16,\n"
389 " 'nodeName': 'DIV',\n"
390 " 'nodeType': 1,\n"
391 " 'nodeValue': ''\n"
392 "}\n",
393
394 "{\n"
395 " 'attributes': [ 'id', 'id3' ],\n"
396 " 'boundingBox': {\n"
397 " 'height': 17.0,\n"
398 " 'width': 53.0,\n"
399 " 'x': 8.0,\n"
400 " 'y': 216.0\n"
401 " },\n"
402 " 'childIndices': [ 17 ],\n"
403 " 'localName': 'div',\n"
404 " 'nodeId': 17,\n"
405 " 'nodeName': 'DIV',\n"
406 " 'nodeType': 1,\n"
407 " 'nodeValue': ''\n"
408 "}\n",
409
410 "{\n"
411 " 'attributes': [ 'id', 'id4' ],\n"
412 " 'boundingBox': {\n"
413 " 'height': 17.0,\n"
414 " 'width': 53.0,\n"
415 " 'x': 8.0,\n"
416 " 'y': 216.0\n"
417 " },\n"
418 " 'childIndices': [ 18 ],\n"
419 " 'inlineTextNodes': [ {\n"
420 " 'boundingBox': {\n"
421 " 'height': 17.0,\n"
422 " 'width': 52.421875,\n"
423 " 'x': 8.0,\n"
424 " 'y': 216.0\n"
425 " },\n"
426 " 'numCharacters': 7,\n"
427 " 'startCharacterIndex': 0\n"
428 " } ],\n"
429 " 'layoutText': 'Google!',\n"
430 " 'localName': 'div',\n"
431 " 'nodeId': 18,\n"
432 " 'nodeName': 'DIV',\n"
433 " 'nodeType': 1,\n"
434 " 'nodeValue': ''\n"
435 "}\n",
436
437 "{\n"
438 " 'attributes': [ 'href', 'https://www.google.com' ],\n"
439 " 'boundingBox': {\n"
440 " 'height': 0.0,\n"
441 " 'width': 0.0,\n"
442 " 'x': 0.0,\n"
443 " 'y': 0.0\n"
444 " },\n"
445 " 'childIndices': [ 19 ],\n"
446 " 'layoutText': '\\n ',\n"
447 " 'localName': 'a',\n"
448 " 'nodeId': 19,\n"
449 " 'nodeName': 'A',\n"
450 " 'nodeType': 1,\n"
451 " 'nodeValue': ''\n"
452 "}\n",
453
454 "{\n"
455 " 'localName': '',\n"
456 " 'nodeId': 20,\n"
457 " 'nodeName': '#text',\n"
458 " 'nodeType': 3,\n"
459 " 'nodeValue': 'Google!'\n"
460 "}\n"};
461
462 EXPECT_EQ(expected_results.size(), dom_nodes.size());
463
464 for (size_t i = 0; i < dom_nodes.size(); i++) {
465 std::string result_json;
466 base::JSONWriter::WriteWithOptions(
467 *dom_nodes[i], base::JSONWriter::OPTIONS_PRETTY_PRINT, &result_json);
468
469 // Not allowed to use C++11 string literals so we make do as best we can.
470 base::ReplaceChars(result_json, "\"", "'", &result_json);
471
472 ASSERT_LT(i, expected_results.size());
473 EXPECT_EQ(expected_results[i], result_json) << " Node # " << i;
474 }
475
476 FinishAsynchronousTest();
477 }
478
479 std::unique_ptr<DomTreeExtractor> extractor_;
480 };
481
482 HEADLESS_ASYNC_DEVTOOLED_TEST_F(DomTreeExtractorBrowserTest);
483
484 } // namespace headless
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698