OLD | NEW |
(Empty) | |
| 1 // Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #include "headless/public/util/dom_tree_extractor.h" |
| 6 |
| 7 #include <memory> |
| 8 #include "base/json/json_writer.h" |
| 9 #include "base/strings/string_util.h" |
| 10 #include "content/public/browser/render_widget_host_view.h" |
| 11 #include "content/public/browser/web_contents.h" |
| 12 #include "content/public/test/browser_test.h" |
| 13 #include "headless/lib/browser/headless_web_contents_impl.h" |
| 14 #include "headless/public/domains/browser.h" |
| 15 #include "headless/public/domains/emulation.h" |
| 16 #include "headless/public/domains/network.h" |
| 17 #include "headless/public/domains/page.h" |
| 18 #include "headless/public/headless_browser.h" |
| 19 #include "headless/public/headless_devtools_client.h" |
| 20 #include "headless/public/headless_devtools_target.h" |
| 21 #include "headless/test/headless_browser_test.h" |
| 22 #include "testing/gtest/include/gtest/gtest.h" |
| 23 #include "url/gurl.h" |
| 24 |
| 25 namespace headless { |
| 26 |
| 27 class DomTreeExtractorBrowserTest : public HeadlessAsyncDevTooledBrowserTest, |
| 28 public page::Observer { |
| 29 public: |
| 30 void RunDevTooledTest() override { |
| 31 EXPECT_TRUE(embedded_test_server()->Start()); |
| 32 devtools_client_->GetPage()->AddObserver(this); |
| 33 devtools_client_->GetPage()->Enable(); |
| 34 devtools_client_->GetPage()->Navigate( |
| 35 embedded_test_server()->GetURL("/dom_tree_test.html").spec()); |
| 36 } |
| 37 |
| 38 void OnLoadEventFired(const page::LoadEventFiredParams& params) override { |
| 39 devtools_client_->GetPage()->RemoveObserver(this); |
| 40 |
| 41 extractor_.reset(new DomTreeExtractor(devtools_client_.get())); |
| 42 extractor_->ExtractDomTree( |
| 43 base::Bind(&DomTreeExtractorBrowserTest::OnDomTreeExtracted, |
| 44 base::Unretained(this))); |
| 45 } |
| 46 |
| 47 void OnDomTreeExtracted(DomTreeExtractor::DomTree dom_tree) { |
| 48 GURL::Replacements replace_port; |
| 49 replace_port.SetPortStr(""); |
| 50 |
| 51 std::vector<std::unique_ptr<base::DictionaryValue>> dom_nodes( |
| 52 dom_tree.dom_nodes_.size()); |
| 53 |
| 54 // For convenience flatten the dom tree into an array. |
| 55 for (size_t i = 0; i < dom_tree.dom_nodes_.size(); i++) { |
| 56 dom::Node* node = const_cast<dom::Node*>(dom_tree.dom_nodes_[i]); |
| 57 |
| 58 dom_nodes[i].reset( |
| 59 static_cast<base::DictionaryValue*>(node->Serialize().release())); |
| 60 |
| 61 // Convert child & content document pointers into indexes. |
| 62 if (node->HasChildren()) { |
| 63 std::unique_ptr<base::ListValue> children(new base::ListValue()); |
| 64 for (const std::unique_ptr<dom::Node>& child : *node->GetChildren()) { |
| 65 children->AppendInteger( |
| 66 dom_tree.node_id_to_index_[child->GetNodeId()]); |
| 67 } |
| 68 dom_nodes[i]->Set("childIndices", std::move(children)); |
| 69 dom_nodes[i]->Remove("children", nullptr); |
| 70 } |
| 71 |
| 72 if (node->HasContentDocument()) { |
| 73 dom_nodes[i]->SetInteger( |
| 74 "contentDocumentIndex", |
| 75 dom_tree |
| 76 .node_id_to_index_[node->GetContentDocument()->GetNodeId()]); |
| 77 dom_nodes[i]->Remove("contentDocument", nullptr); |
| 78 } |
| 79 |
| 80 dom_nodes[i]->Remove("childNodeCount", nullptr); |
| 81 |
| 82 // Frame IDs are random. |
| 83 if (dom_nodes[i]->HasKey("frameId")) |
| 84 dom_nodes[i]->SetString("frameId", "?"); |
| 85 |
| 86 // Ports are random. |
| 87 std::string url; |
| 88 if (dom_nodes[i]->GetString("baseURL", &url)) { |
| 89 dom_nodes[i]->SetString( |
| 90 "baseURL", GURL(url).ReplaceComponents(replace_port).spec()); |
| 91 } |
| 92 |
| 93 if (dom_nodes[i]->GetString("documentURL", &url)) { |
| 94 dom_nodes[i]->SetString( |
| 95 "documentURL", GURL(url).ReplaceComponents(replace_port).spec()); |
| 96 } |
| 97 } |
| 98 |
| 99 // Merge LayoutTreeNode data into the dictionaries. |
| 100 for (const dom::LayoutTreeNode* layout_node : dom_tree.layout_tree_nodes_) { |
| 101 auto it = |
| 102 dom_tree.node_id_to_index_.find(layout_node->GetBackendNodeId()); |
| 103 ASSERT_TRUE(it != dom_tree.node_id_to_index_.end()); |
| 104 |
| 105 base::DictionaryValue* node_dict = dom_nodes[it->second].get(); |
| 106 node_dict->Set("boundingBox", layout_node->GetBoundingBox()->Serialize()); |
| 107 |
| 108 if (layout_node->HasLayoutText()) |
| 109 node_dict->SetString("layoutText", layout_node->GetLayoutText()); |
| 110 |
| 111 if (layout_node->HasInlineTextNodes()) { |
| 112 std::unique_ptr<base::ListValue> inline_text_nodes( |
| 113 new base::ListValue()); |
| 114 for (const std::unique_ptr<dom::InlineTextBox>& inline_text_box : |
| 115 *layout_node->GetInlineTextNodes()) { |
| 116 size_t index = inline_text_nodes->GetSize(); |
| 117 inline_text_nodes->Set(index, inline_text_box->Serialize()); |
| 118 } |
| 119 node_dict->Set("inlineTextNodes", std::move(inline_text_nodes)); |
| 120 } |
| 121 } |
| 122 |
| 123 const std::vector<std::string> expected_results = { |
| 124 "{\n" |
| 125 " 'baseURL': 'http://127.0.0.1/dom_tree_test.html',\n" |
| 126 " 'childIndices': [ 1 ],\n" |
| 127 " 'documentURL': 'http://127.0.0.1/dom_tree_test.html',\n" |
| 128 " 'localName': '',\n" |
| 129 " 'nodeId': 1,\n" |
| 130 " 'nodeName': '#document',\n" |
| 131 " 'nodeType': 9,\n" |
| 132 " 'nodeValue': '',\n" |
| 133 " 'xmlVersion': ''\n" |
| 134 "}\n", |
| 135 |
| 136 "{\n" |
| 137 " 'attributes': [ ],\n" |
| 138 " 'childIndices': [ 2, 5 ],\n" |
| 139 " 'frameId': '?',\n" |
| 140 " 'localName': 'html',\n" |
| 141 " 'nodeId': 2,\n" |
| 142 " 'nodeName': 'HTML',\n" |
| 143 " 'nodeType': 1,\n" |
| 144 " 'nodeValue': ''\n" |
| 145 "}\n", |
| 146 |
| 147 "{\n" |
| 148 " 'attributes': [ ],\n" |
| 149 " 'boundingBox': {\n" |
| 150 " 'height': 600.0,\n" |
| 151 " 'width': 800.0,\n" |
| 152 " 'x': 0.0,\n" |
| 153 " 'y': 0.0\n" |
| 154 " },\n" |
| 155 " 'childIndices': [ 3 ],\n" |
| 156 " 'localName': 'head',\n" |
| 157 " 'nodeId': 3,\n" |
| 158 " 'nodeName': 'HEAD',\n" |
| 159 " 'nodeType': 1,\n" |
| 160 " 'nodeValue': ''\n" |
| 161 "}\n", |
| 162 |
| 163 "{\n" |
| 164 " 'attributes': [ ],\n" |
| 165 " 'boundingBox': {\n" |
| 166 " 'height': 584.0,\n" |
| 167 " 'width': 784.0,\n" |
| 168 " 'x': 8.0,\n" |
| 169 " 'y': 8.0\n" |
| 170 " },\n" |
| 171 " 'childIndices': [ 4 ],\n" |
| 172 " 'localName': 'title',\n" |
| 173 " 'nodeId': 5,\n" |
| 174 " 'nodeName': 'TITLE',\n" |
| 175 " 'nodeType': 1,\n" |
| 176 " 'nodeValue': ''\n" |
| 177 "}\n", |
| 178 |
| 179 "{\n" |
| 180 " 'boundingBox': {\n" |
| 181 " 'height': 226.0,\n" |
| 182 " 'width': 784.0,\n" |
| 183 " 'x': 8.0,\n" |
| 184 " 'y': 8.0\n" |
| 185 " },\n" |
| 186 " 'localName': '',\n" |
| 187 " 'nodeId': 6,\n" |
| 188 " 'nodeName': '#text',\n" |
| 189 " 'nodeType': 3,\n" |
| 190 " 'nodeValue': 'Hello world!'\n" |
| 191 "}\n", |
| 192 |
| 193 "{\n" |
| 194 " 'attributes': [ ],\n" |
| 195 " 'boundingBox': {\n" |
| 196 " 'height': 600.0,\n" |
| 197 " 'width': 800.0,\n" |
| 198 " 'x': 0.0,\n" |
| 199 " 'y': 0.0\n" |
| 200 " },\n" |
| 201 " 'childIndices': [ 6 ],\n" |
| 202 " 'localName': 'body',\n" |
| 203 " 'nodeId': 4,\n" |
| 204 " 'nodeName': 'BODY',\n" |
| 205 " 'nodeType': 1,\n" |
| 206 " 'nodeValue': ''\n" |
| 207 "}\n", |
| 208 |
| 209 "{\n" |
| 210 " 'attributes': [ 'id', 'id1' ],\n" |
| 211 " 'boundingBox': {\n" |
| 212 " 'height': 17.0,\n" |
| 213 " 'width': 73.0,\n" |
| 214 " 'x': 8.0,\n" |
| 215 " 'y': 198.0\n" |
| 216 " },\n" |
| 217 " 'childIndices': [ 7, 8, 15 ],\n" |
| 218 " 'inlineTextNodes': [ {\n" |
| 219 " 'boundingBox': {\n" |
| 220 " 'height': 17.0,\n" |
| 221 " 'width': 72.4375,\n" |
| 222 " 'x': 8.0,\n" |
| 223 " 'y': 198.0\n" |
| 224 " },\n" |
| 225 " 'numCharacters': 11,\n" |
| 226 " 'startCharacterIndex': 1\n" |
| 227 " } ],\n" |
| 228 " 'layoutText': '\\nSome text.\\n',\n" |
| 229 " 'localName': 'div',\n" |
| 230 " 'nodeId': 7,\n" |
| 231 " 'nodeName': 'DIV',\n" |
| 232 " 'nodeType': 1,\n" |
| 233 " 'nodeValue': ''\n" |
| 234 "}\n", |
| 235 |
| 236 "{\n" |
| 237 " 'boundingBox': {\n" |
| 238 " 'height': 200.0,\n" |
| 239 " 'width': 400.0,\n" |
| 240 " 'x': 82.0,\n" |
| 241 " 'y': 10.0\n" |
| 242 " },\n" |
| 243 " 'localName': '',\n" |
| 244 " 'nodeId': 8,\n" |
| 245 " 'nodeName': '#text',\n" |
| 246 " 'nodeType': 3,\n" |
| 247 " 'nodeValue': '\\nSome text.\\n'\n" |
| 248 "}\n", |
| 249 |
| 250 "{\n" |
| 251 " 'attributes': [ 'src', '/iframe.html', 'width', '400', 'height'," |
| 252 " '200' ],\n" |
| 253 " 'boundingBox': {\n" |
| 254 " 'height': 171.0,\n" |
| 255 " 'width': 384.0,\n" |
| 256 " 'x': 90.0,\n" |
| 257 " 'y': 18.0\n" |
| 258 " },\n" |
| 259 " 'childIndices': [ ],\n" |
| 260 " 'contentDocumentIndex': 9,\n" |
| 261 " 'frameId': '?',\n" |
| 262 " 'localName': 'iframe',\n" |
| 263 " 'nodeId': 9,\n" |
| 264 " 'nodeName': 'IFRAME',\n" |
| 265 " 'nodeType': 1,\n" |
| 266 " 'nodeValue': ''\n" |
| 267 "}\n", |
| 268 |
| 269 "{\n" |
| 270 " 'baseURL': 'http://127.0.0.1/iframe.html',\n" |
| 271 " 'boundingBox': {\n" |
| 272 " 'height': 37.0,\n" |
| 273 " 'width': 384.0,\n" |
| 274 " 'x': 90.0,\n" |
| 275 " 'y': 18.0\n" |
| 276 " },\n" |
| 277 " 'childIndices': [ 10 ],\n" |
| 278 " 'documentURL': 'http://127.0.0.1/iframe.html',\n" |
| 279 " 'localName': '',\n" |
| 280 " 'nodeId': 10,\n" |
| 281 " 'nodeName': '#document',\n" |
| 282 " 'nodeType': 9,\n" |
| 283 " 'nodeValue': '',\n" |
| 284 " 'xmlVersion': ''\n" |
| 285 "}\n", |
| 286 |
| 287 "{\n" |
| 288 " 'attributes': [ ],\n" |
| 289 " 'boundingBox': {\n" |
| 290 " 'height': 36.0,\n" |
| 291 " 'width': 308.0,\n" |
| 292 " 'x': 8.0,\n" |
| 293 " 'y': 8.0\n" |
| 294 " },\n" |
| 295 " 'childIndices': [ 11, 12 ],\n" |
| 296 " 'frameId': '?',\n" |
| 297 " 'inlineTextNodes': [ {\n" |
| 298 " 'boundingBox': {\n" |
| 299 " 'height': 36.0,\n" |
| 300 " 'width': 307.734375,\n" |
| 301 " 'x': 8.0,\n" |
| 302 " 'y': 8.0\n" |
| 303 " },\n" |
| 304 " 'numCharacters': 22,\n" |
| 305 " 'startCharacterIndex': 0\n" |
| 306 " } ],\n" |
| 307 " 'layoutText': 'Hello from the iframe!',\n" |
| 308 " 'localName': 'html',\n" |
| 309 " 'nodeId': 11,\n" |
| 310 " 'nodeName': 'HTML',\n" |
| 311 " 'nodeType': 1,\n" |
| 312 " 'nodeValue': ''\n" |
| 313 "}\n", |
| 314 |
| 315 "{\n" |
| 316 " 'attributes': [ ],\n" |
| 317 " 'boundingBox': {\n" |
| 318 " 'height': 204.0,\n" |
| 319 " 'width': 405.0,\n" |
| 320 " 'x': 80.0,\n" |
| 321 " 'y': 8.0\n" |
| 322 " },\n" |
| 323 " 'childIndices': [ ],\n" |
| 324 " 'localName': 'head',\n" |
| 325 " 'nodeId': 12,\n" |
| 326 " 'nodeName': 'HEAD',\n" |
| 327 " 'nodeType': 1,\n" |
| 328 " 'nodeValue': ''\n" |
| 329 "}\n", |
| 330 |
| 331 "{\n" |
| 332 " 'attributes': [ ],\n" |
| 333 " 'boundingBox': {\n" |
| 334 " 'height': 0.0,\n" |
| 335 " 'width': 0.0,\n" |
| 336 " 'x': 0.0,\n" |
| 337 " 'y': 0.0\n" |
| 338 " },\n" |
| 339 " 'childIndices': [ 13 ],\n" |
| 340 " 'layoutText': '\\n',\n" |
| 341 " 'localName': 'body',\n" |
| 342 " 'nodeId': 13,\n" |
| 343 " 'nodeName': 'BODY',\n" |
| 344 " 'nodeType': 1,\n" |
| 345 " 'nodeValue': ''\n" |
| 346 "}\n", |
| 347 |
| 348 "{\n" |
| 349 " 'attributes': [ ],\n" |
| 350 " 'boundingBox': {\n" |
| 351 " 'height': 18.0,\n" |
| 352 " 'width': 784.0,\n" |
| 353 " 'x': 8.0,\n" |
| 354 " 'y': 216.0\n" |
| 355 " },\n" |
| 356 " 'childIndices': [ 14 ],\n" |
| 357 " 'localName': 'h1',\n" |
| 358 " 'nodeId': 14,\n" |
| 359 " 'nodeName': 'H1',\n" |
| 360 " 'nodeType': 1,\n" |
| 361 " 'nodeValue': ''\n" |
| 362 "}\n", |
| 363 |
| 364 "{\n" |
| 365 " 'boundingBox': {\n" |
| 366 " 'height': 18.0,\n" |
| 367 " 'width': 784.0,\n" |
| 368 " 'x': 8.0,\n" |
| 369 " 'y': 216.0\n" |
| 370 " },\n" |
| 371 " 'localName': '',\n" |
| 372 " 'nodeId': 15,\n" |
| 373 " 'nodeName': '#text',\n" |
| 374 " 'nodeType': 3,\n" |
| 375 " 'nodeValue': 'Hello from the iframe!'\n" |
| 376 "}\n", |
| 377 |
| 378 "{\n" |
| 379 " 'attributes': [ 'id', 'id2' ],\n" |
| 380 " 'boundingBox': {\n" |
| 381 " 'height': 18.0,\n" |
| 382 " 'width': 784.0,\n" |
| 383 " 'x': 8.0,\n" |
| 384 " 'y': 216.0\n" |
| 385 " },\n" |
| 386 " 'childIndices': [ 16 ],\n" |
| 387 " 'localName': 'div',\n" |
| 388 " 'nodeId': 16,\n" |
| 389 " 'nodeName': 'DIV',\n" |
| 390 " 'nodeType': 1,\n" |
| 391 " 'nodeValue': ''\n" |
| 392 "}\n", |
| 393 |
| 394 "{\n" |
| 395 " 'attributes': [ 'id', 'id3' ],\n" |
| 396 " 'boundingBox': {\n" |
| 397 " 'height': 17.0,\n" |
| 398 " 'width': 53.0,\n" |
| 399 " 'x': 8.0,\n" |
| 400 " 'y': 216.0\n" |
| 401 " },\n" |
| 402 " 'childIndices': [ 17 ],\n" |
| 403 " 'localName': 'div',\n" |
| 404 " 'nodeId': 17,\n" |
| 405 " 'nodeName': 'DIV',\n" |
| 406 " 'nodeType': 1,\n" |
| 407 " 'nodeValue': ''\n" |
| 408 "}\n", |
| 409 |
| 410 "{\n" |
| 411 " 'attributes': [ 'id', 'id4' ],\n" |
| 412 " 'boundingBox': {\n" |
| 413 " 'height': 17.0,\n" |
| 414 " 'width': 53.0,\n" |
| 415 " 'x': 8.0,\n" |
| 416 " 'y': 216.0\n" |
| 417 " },\n" |
| 418 " 'childIndices': [ 18 ],\n" |
| 419 " 'inlineTextNodes': [ {\n" |
| 420 " 'boundingBox': {\n" |
| 421 " 'height': 17.0,\n" |
| 422 " 'width': 52.421875,\n" |
| 423 " 'x': 8.0,\n" |
| 424 " 'y': 216.0\n" |
| 425 " },\n" |
| 426 " 'numCharacters': 7,\n" |
| 427 " 'startCharacterIndex': 0\n" |
| 428 " } ],\n" |
| 429 " 'layoutText': 'Google!',\n" |
| 430 " 'localName': 'div',\n" |
| 431 " 'nodeId': 18,\n" |
| 432 " 'nodeName': 'DIV',\n" |
| 433 " 'nodeType': 1,\n" |
| 434 " 'nodeValue': ''\n" |
| 435 "}\n", |
| 436 |
| 437 "{\n" |
| 438 " 'attributes': [ 'href', 'https://www.google.com' ],\n" |
| 439 " 'boundingBox': {\n" |
| 440 " 'height': 0.0,\n" |
| 441 " 'width': 0.0,\n" |
| 442 " 'x': 0.0,\n" |
| 443 " 'y': 0.0\n" |
| 444 " },\n" |
| 445 " 'childIndices': [ 19 ],\n" |
| 446 " 'layoutText': '\\n ',\n" |
| 447 " 'localName': 'a',\n" |
| 448 " 'nodeId': 19,\n" |
| 449 " 'nodeName': 'A',\n" |
| 450 " 'nodeType': 1,\n" |
| 451 " 'nodeValue': ''\n" |
| 452 "}\n", |
| 453 |
| 454 "{\n" |
| 455 " 'localName': '',\n" |
| 456 " 'nodeId': 20,\n" |
| 457 " 'nodeName': '#text',\n" |
| 458 " 'nodeType': 3,\n" |
| 459 " 'nodeValue': 'Google!'\n" |
| 460 "}\n"}; |
| 461 |
| 462 EXPECT_EQ(expected_results.size(), dom_nodes.size()); |
| 463 |
| 464 for (size_t i = 0; i < dom_nodes.size(); i++) { |
| 465 std::string result_json; |
| 466 base::JSONWriter::WriteWithOptions( |
| 467 *dom_nodes[i], base::JSONWriter::OPTIONS_PRETTY_PRINT, &result_json); |
| 468 |
| 469 // Not allowed to use C++11 string literals so we make do as best we can. |
| 470 base::ReplaceChars(result_json, "\"", "'", &result_json); |
| 471 |
| 472 ASSERT_LT(i, expected_results.size()); |
| 473 EXPECT_EQ(expected_results[i], result_json) << " Node # " << i; |
| 474 } |
| 475 |
| 476 FinishAsynchronousTest(); |
| 477 } |
| 478 |
| 479 std::unique_ptr<DomTreeExtractor> extractor_; |
| 480 }; |
| 481 |
| 482 HEADLESS_ASYNC_DEVTOOLED_TEST_F(DomTreeExtractorBrowserTest); |
| 483 |
| 484 } // namespace headless |
OLD | NEW |