| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "base/compiler_specific.h" | |
| 6 #include "base/file_util.h" | |
| 7 #include "base/files/file_path.h" | |
| 8 #include "base/hash_tables.h" | |
| 9 #include "base/string_util.h" | |
| 10 #include "base/utf_string_conversions.h" | |
| 11 #include "net/base/net_util.h" | |
| 12 #include "net/url_request/url_request_context.h" | |
| 13 #include "third_party/WebKit/Source/Platform/chromium/public/WebCString.h" | |
| 14 #include "third_party/WebKit/Source/Platform/chromium/public/WebData.h" | |
| 15 #include "third_party/WebKit/Source/Platform/chromium/public/WebString.h" | |
| 16 #include "third_party/WebKit/Source/Platform/chromium/public/WebURL.h" | |
| 17 #include "third_party/WebKit/Source/Platform/chromium/public/WebVector.h" | |
| 18 #include "third_party/WebKit/Source/WebKit/chromium/public/WebDocument.h" | |
| 19 #include "third_party/WebKit/Source/WebKit/chromium/public/WebElement.h" | |
| 20 #include "third_party/WebKit/Source/WebKit/chromium/public/WebFrame.h" | |
| 21 #include "third_party/WebKit/Source/WebKit/chromium/public/WebNode.h" | |
| 22 #include "third_party/WebKit/Source/WebKit/chromium/public/WebNodeCollection.h" | |
| 23 #include "third_party/WebKit/Source/WebKit/chromium/public/WebNodeList.h" | |
| 24 #include "third_party/WebKit/Source/WebKit/chromium/public/WebPageSerializer.h" | |
| 25 #include "third_party/WebKit/Source/WebKit/chromium/public/WebPageSerializerClie
nt.h" | |
| 26 #include "third_party/WebKit/Source/WebKit/chromium/public/WebView.h" | |
| 27 #include "webkit/base/file_path_string_conversions.h" | |
| 28 #include "webkit/glue/dom_operations.h" | |
| 29 #include "webkit/tools/test_shell/simple_resource_loader_bridge.h" | |
| 30 #include "webkit/tools/test_shell/test_shell_test.h" | |
| 31 | |
| 32 using WebKit::WebCString; | |
| 33 using WebKit::WebData; | |
| 34 using WebKit::WebDocument; | |
| 35 using WebKit::WebElement; | |
| 36 using WebKit::WebFrame; | |
| 37 using WebKit::WebNode; | |
| 38 using WebKit::WebNodeCollection; | |
| 39 using WebKit::WebNodeList; | |
| 40 using WebKit::WebPageSerializer; | |
| 41 using WebKit::WebPageSerializerClient; | |
| 42 using WebKit::WebNode; | |
| 43 using WebKit::WebString; | |
| 44 using WebKit::WebURL; | |
| 45 using WebKit::WebView; | |
| 46 using WebKit::WebVector; | |
| 47 | |
| 48 namespace { | |
| 49 | |
| 50 // Iterate recursively over sub-frames to find one with with a given url. | |
| 51 WebFrame* FindSubFrameByURL(WebView* web_view, const GURL& url) { | |
| 52 if (!web_view->mainFrame()) | |
| 53 return NULL; | |
| 54 | |
| 55 std::vector<WebFrame*> stack; | |
| 56 stack.push_back(web_view->mainFrame()); | |
| 57 | |
| 58 while (!stack.empty()) { | |
| 59 WebFrame* current_frame = stack.back(); | |
| 60 stack.pop_back(); | |
| 61 if (GURL(current_frame->document().url()) == url) | |
| 62 return current_frame; | |
| 63 WebNodeCollection all = current_frame->document().all(); | |
| 64 for (WebNode node = all.firstItem(); | |
| 65 !node.isNull(); node = all.nextItem()) { | |
| 66 if (!node.isElementNode()) | |
| 67 continue; | |
| 68 // Check frame tag and iframe tag | |
| 69 WebElement element = node.to<WebElement>(); | |
| 70 if (!element.hasTagName("frame") && !element.hasTagName("iframe")) | |
| 71 continue; | |
| 72 WebFrame* sub_frame = WebFrame::fromFrameOwnerElement(element); | |
| 73 if (sub_frame) | |
| 74 stack.push_back(sub_frame); | |
| 75 } | |
| 76 } | |
| 77 return NULL; | |
| 78 } | |
| 79 | |
| 80 class DomSerializerTests : public TestShellTest, | |
| 81 public WebPageSerializerClient { | |
| 82 public: | |
| 83 DomSerializerTests() | |
| 84 : local_directory_name_(FILE_PATH_LITERAL("./dummy_files/")) { } | |
| 85 | |
| 86 // DomSerializerDelegate. | |
| 87 virtual void didSerializeDataForFrame(const WebURL& frame_web_url, | |
| 88 const WebCString& data, | |
| 89 PageSerializationStatus status) { | |
| 90 | |
| 91 GURL frame_url(frame_web_url); | |
| 92 // If the all frames are finished saving, check all finish status | |
| 93 if (status == WebPageSerializerClient::AllFramesAreFinished) { | |
| 94 SerializationFinishStatusMap::iterator it = | |
| 95 serialization_finish_status_.begin(); | |
| 96 for (; it != serialization_finish_status_.end(); ++it) | |
| 97 ASSERT_TRUE(it->second); | |
| 98 serialized_ = true; | |
| 99 return; | |
| 100 } | |
| 101 | |
| 102 // Check finish status of current frame. | |
| 103 SerializationFinishStatusMap::iterator it = | |
| 104 serialization_finish_status_.find(frame_url.spec()); | |
| 105 // New frame, set initial status as false. | |
| 106 if (it == serialization_finish_status_.end()) | |
| 107 serialization_finish_status_[frame_url.spec()] = false; | |
| 108 | |
| 109 it = serialization_finish_status_.find(frame_url.spec()); | |
| 110 ASSERT_TRUE(it != serialization_finish_status_.end()); | |
| 111 // In process frame, finish status should be false. | |
| 112 ASSERT_FALSE(it->second); | |
| 113 | |
| 114 // Add data to corresponding frame's content. | |
| 115 serialized_frame_map_[frame_url.spec()] += data.data(); | |
| 116 | |
| 117 // Current frame is completed saving, change the finish status. | |
| 118 if (status == WebPageSerializerClient::CurrentFrameIsFinished) | |
| 119 it->second = true; | |
| 120 } | |
| 121 | |
| 122 bool HasSerializedFrame(const GURL& frame_url) { | |
| 123 return serialized_frame_map_.find(frame_url.spec()) != | |
| 124 serialized_frame_map_.end(); | |
| 125 } | |
| 126 | |
| 127 const std::string& GetSerializedContentForFrame( | |
| 128 const GURL& frame_url) { | |
| 129 return serialized_frame_map_[frame_url.spec()]; | |
| 130 } | |
| 131 | |
| 132 // Load web page according to specific URL. | |
| 133 void LoadPageFromURL(const GURL& page_url) { | |
| 134 // Load the test file. | |
| 135 test_shell_->ResetTestController(); | |
| 136 test_shell_->LoadURL(page_url); | |
| 137 test_shell_->WaitTestFinished(); | |
| 138 } | |
| 139 | |
| 140 // Load web page according to input content and relative URLs within | |
| 141 // the document. | |
| 142 void LoadContents(const std::string& contents, | |
| 143 const GURL& base_url, | |
| 144 const WebString encoding_info) { | |
| 145 test_shell_->ResetTestController(); | |
| 146 // If input encoding is empty, use UTF-8 as default encoding. | |
| 147 if (encoding_info.isEmpty()) { | |
| 148 test_shell_->webView()->mainFrame()->loadHTMLString(contents, base_url); | |
| 149 } else { | |
| 150 WebData data(contents.data(), contents.length()); | |
| 151 | |
| 152 // Do not use WebFrame.LoadHTMLString because it assumes that input | |
| 153 // html contents use UTF-8 encoding. | |
| 154 // TODO(darin): This should use WebFrame::loadData. | |
| 155 WebFrame* web_frame = | |
| 156 test_shell_->webView()->mainFrame(); | |
| 157 | |
| 158 ASSERT_TRUE(web_frame != NULL); | |
| 159 | |
| 160 web_frame->loadData(data, "text/html", encoding_info, base_url); | |
| 161 } | |
| 162 | |
| 163 test_shell_->WaitTestFinished(); | |
| 164 } | |
| 165 | |
| 166 // Serialize page DOM according to specific page URL. The parameter | |
| 167 // recursive_serialization indicates whether we will serialize all | |
| 168 // sub-frames. | |
| 169 void SerializeDomForURL(const GURL& page_url, | |
| 170 bool recursive_serialization) { | |
| 171 // Find corresponding WebFrame according to page_url. | |
| 172 WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), | |
| 173 page_url); | |
| 174 ASSERT_TRUE(web_frame != NULL); | |
| 175 // Add input file URl to links_. | |
| 176 links_.assign(&page_url,1); | |
| 177 // Add dummy file path to local_path_. | |
| 178 WebString file_path = webkit_base::FilePathStringToWebString( | |
| 179 FILE_PATH_LITERAL("c:\\dummy.htm")); | |
| 180 local_paths_.assign(&file_path, 1); | |
| 181 // Start serializing DOM. | |
| 182 bool result = WebPageSerializer::serialize(web_frame, | |
| 183 recursive_serialization, | |
| 184 static_cast<WebPageSerializerClient*>(this), | |
| 185 links_, | |
| 186 local_paths_, | |
| 187 webkit_base::FilePathToWebString(local_directory_name_)); | |
| 188 ASSERT_TRUE(result); | |
| 189 ASSERT_TRUE(serialized_); | |
| 190 } | |
| 191 | |
| 192 private: | |
| 193 // Map frame_url to corresponding serialized_content. | |
| 194 typedef base::hash_map<std::string, std::string> SerializedFrameContentMap; | |
| 195 SerializedFrameContentMap serialized_frame_map_; | |
| 196 // Map frame_url to corresponding status of serialization finish. | |
| 197 typedef base::hash_map<std::string, bool> SerializationFinishStatusMap; | |
| 198 SerializationFinishStatusMap serialization_finish_status_; | |
| 199 // Flag indicates whether the process of serializing DOM is finished or not. | |
| 200 bool serialized_; | |
| 201 // The links_ contain dummy original URLs of all saved links. | |
| 202 WebVector<WebURL> links_; | |
| 203 // The local_paths_ contain dummy corresponding local file paths of all saved | |
| 204 // links, which matched links_ one by one. | |
| 205 WebVector<WebString> local_paths_; | |
| 206 // The local_directory_name_ is dummy relative path of directory which | |
| 207 // contain all saved auxiliary files included all sub frames and resources. | |
| 208 const base::FilePath local_directory_name_; | |
| 209 | |
| 210 protected: | |
| 211 // testing::Test | |
| 212 virtual void SetUp() { | |
| 213 TestShellTest::SetUp(); | |
| 214 serialized_ = false; | |
| 215 } | |
| 216 | |
| 217 virtual void TearDown() { | |
| 218 TestShellTest::TearDown(); | |
| 219 } | |
| 220 }; | |
| 221 | |
| 222 // Helper function that test whether the first node in the doc is a doc type | |
| 223 // node. | |
| 224 bool HasDocType(const WebDocument& doc) { | |
| 225 WebNode node = doc.firstChild(); | |
| 226 if (node.isNull()) | |
| 227 return false; | |
| 228 return node.nodeType() == WebNode::DocumentTypeNode; | |
| 229 } | |
| 230 | |
| 231 // Helper function for checking whether input node is META tag. Return true | |
| 232 // means it is META element, otherwise return false. The parameter charset_info | |
| 233 // return actual charset info if the META tag has charset declaration. | |
| 234 bool IsMetaElement(const WebNode& node, std::string& charset_info) { | |
| 235 if (!node.isElementNode()) | |
| 236 return false; | |
| 237 const WebElement meta = node.toConst<WebElement>(); | |
| 238 if (!meta.hasTagName("meta")) | |
| 239 return false; | |
| 240 charset_info.erase(0, charset_info.length()); | |
| 241 // Check the META charset declaration. | |
| 242 WebString httpEquiv = meta.getAttribute("http-equiv"); | |
| 243 if (LowerCaseEqualsASCII(httpEquiv, "content-type")) { | |
| 244 std::string content = meta.getAttribute("content").utf8(); | |
| 245 int pos = content.find("charset", 0); | |
| 246 if (pos > -1) { | |
| 247 // Add a dummy charset declaration to charset_info, which indicates this | |
| 248 // META tag has charset declaration although we do not get correct value | |
| 249 // yet. | |
| 250 charset_info.append("has-charset-declaration"); | |
| 251 int remaining_length = content.length() - pos - 7; | |
| 252 if (!remaining_length) | |
| 253 return true; | |
| 254 int start_pos = pos + 7; | |
| 255 // Find "=" symbol. | |
| 256 while (remaining_length--) | |
| 257 if (content[start_pos++] == L'=') | |
| 258 break; | |
| 259 // Skip beginning space. | |
| 260 while (remaining_length) { | |
| 261 if (content[start_pos] > 0x0020) | |
| 262 break; | |
| 263 ++start_pos; | |
| 264 --remaining_length; | |
| 265 } | |
| 266 if (!remaining_length) | |
| 267 return true; | |
| 268 int end_pos = start_pos; | |
| 269 // Now we find out the start point of charset info. Search the end point. | |
| 270 while (remaining_length--) { | |
| 271 if (content[end_pos] <= 0x0020 || content[end_pos] == L';') | |
| 272 break; | |
| 273 ++end_pos; | |
| 274 } | |
| 275 // Get actual charset info. | |
| 276 charset_info = content.substr(start_pos, end_pos - start_pos); | |
| 277 return true; | |
| 278 } | |
| 279 } | |
| 280 return true; | |
| 281 } | |
| 282 | |
| 283 // If original contents have document type, the serialized contents also have | |
| 284 // document type. | |
| 285 TEST_F(DomSerializerTests, SerializeHTMLDOMWithDocType) { | |
| 286 base::FilePath page_file_path = data_dir_; | |
| 287 page_file_path = page_file_path.AppendASCII("dom_serializer"); | |
| 288 page_file_path = page_file_path.AppendASCII("youtube_1.htm"); | |
| 289 GURL file_url = net::FilePathToFileURL(page_file_path); | |
| 290 ASSERT_TRUE(file_url.SchemeIsFile()); | |
| 291 // Load the test file. | |
| 292 LoadPageFromURL(file_url); | |
| 293 // Make sure original contents have document type. | |
| 294 WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url); | |
| 295 ASSERT_TRUE(web_frame != NULL); | |
| 296 WebDocument doc = web_frame->document(); | |
| 297 ASSERT_TRUE(HasDocType(doc)); | |
| 298 // Do serialization. | |
| 299 SerializeDomForURL(file_url, false); | |
| 300 // Load the serialized contents. | |
| 301 ASSERT_TRUE(HasSerializedFrame(file_url)); | |
| 302 const std::string& serialized_contents = | |
| 303 GetSerializedContentForFrame(file_url); | |
| 304 LoadContents(serialized_contents, file_url, | |
| 305 web_frame->document().encoding()); | |
| 306 // Make sure serialized contents still have document type. | |
| 307 web_frame = test_shell_->webView()->mainFrame(); | |
| 308 doc = web_frame->document(); | |
| 309 ASSERT_TRUE(HasDocType(doc)); | |
| 310 } | |
| 311 | |
| 312 // If original contents do not have document type, the serialized contents | |
| 313 // also do not have document type. | |
| 314 TEST_F(DomSerializerTests, SerializeHTMLDOMWithoutDocType) { | |
| 315 base::FilePath page_file_path = data_dir_; | |
| 316 page_file_path = page_file_path.AppendASCII("dom_serializer"); | |
| 317 page_file_path = page_file_path.AppendASCII("youtube_2.htm"); | |
| 318 GURL file_url = net::FilePathToFileURL(page_file_path); | |
| 319 ASSERT_TRUE(file_url.SchemeIsFile()); | |
| 320 // Load the test file. | |
| 321 LoadPageFromURL(file_url); | |
| 322 // Make sure original contents do not have document type. | |
| 323 WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url); | |
| 324 ASSERT_TRUE(web_frame != NULL); | |
| 325 WebDocument doc = web_frame->document(); | |
| 326 ASSERT_TRUE(!HasDocType(doc)); | |
| 327 // Do serialization. | |
| 328 SerializeDomForURL(file_url, false); | |
| 329 // Load the serialized contents. | |
| 330 ASSERT_TRUE(HasSerializedFrame(file_url)); | |
| 331 const std::string& serialized_contents = | |
| 332 GetSerializedContentForFrame(file_url); | |
| 333 LoadContents(serialized_contents, file_url, | |
| 334 web_frame->document().encoding()); | |
| 335 // Make sure serialized contents do not have document type. | |
| 336 web_frame = test_shell_->webView()->mainFrame(); | |
| 337 doc = web_frame->document(); | |
| 338 ASSERT_TRUE(!HasDocType(doc)); | |
| 339 } | |
| 340 | |
| 341 // Serialize XML document which has all 5 built-in entities. After | |
| 342 // finishing serialization, the serialized contents should be same | |
| 343 // with original XML document. | |
| 344 TEST_F(DomSerializerTests, SerializeXMLDocWithBuiltInEntities) { | |
| 345 base::FilePath page_file_path = data_dir_; | |
| 346 page_file_path = page_file_path.AppendASCII("dom_serializer"); | |
| 347 page_file_path = page_file_path.AppendASCII("note.html"); | |
| 348 base::FilePath xml_file_path = data_dir_; | |
| 349 xml_file_path = xml_file_path.AppendASCII("dom_serializer"); | |
| 350 xml_file_path = xml_file_path.AppendASCII("note.xml"); | |
| 351 // Read original contents for later comparison. | |
| 352 std::string original_contents; | |
| 353 ASSERT_TRUE(file_util::ReadFileToString(xml_file_path, &original_contents)); | |
| 354 // Get file URL. | |
| 355 GURL file_url = net::FilePathToFileURL(page_file_path); | |
| 356 GURL xml_file_url = net::FilePathToFileURL(xml_file_path); | |
| 357 ASSERT_TRUE(file_url.SchemeIsFile()); | |
| 358 // Load the test file. | |
| 359 LoadPageFromURL(file_url); | |
| 360 // Do serialization. | |
| 361 SerializeDomForURL(xml_file_url, false); | |
| 362 // Compare the serialized contents with original contents. | |
| 363 ASSERT_TRUE(HasSerializedFrame(xml_file_url)); | |
| 364 const std::string& serialized_contents = | |
| 365 GetSerializedContentForFrame(xml_file_url); | |
| 366 ASSERT_EQ(original_contents, serialized_contents); | |
| 367 } | |
| 368 | |
| 369 // When serializing DOM, we add MOTW declaration before html tag. | |
| 370 TEST_F(DomSerializerTests, SerializeHTMLDOMWithAddingMOTW) { | |
| 371 base::FilePath page_file_path = data_dir_; | |
| 372 page_file_path = page_file_path.AppendASCII("dom_serializer"); | |
| 373 page_file_path = page_file_path.AppendASCII("youtube_2.htm"); | |
| 374 // Read original contents for later comparison . | |
| 375 std::string original_contents; | |
| 376 ASSERT_TRUE(file_util::ReadFileToString(page_file_path, &original_contents)); | |
| 377 // Get file URL. | |
| 378 GURL file_url = net::FilePathToFileURL(page_file_path); | |
| 379 ASSERT_TRUE(file_url.SchemeIsFile()); | |
| 380 // Make sure original contents does not have MOTW; | |
| 381 std::string motw_declaration = | |
| 382 WebPageSerializer::generateMarkOfTheWebDeclaration(file_url).utf8(); | |
| 383 ASSERT_FALSE(motw_declaration.empty()); | |
| 384 // The encoding of original contents is ISO-8859-1, so we convert the MOTW | |
| 385 // declaration to ASCII and search whether original contents has it or not. | |
| 386 ASSERT_TRUE(std::string::npos == | |
| 387 original_contents.find(motw_declaration)); | |
| 388 // Load the test file. | |
| 389 LoadPageFromURL(file_url); | |
| 390 // Do serialization. | |
| 391 SerializeDomForURL(file_url, false); | |
| 392 // Make sure the serialized contents have MOTW ; | |
| 393 ASSERT_TRUE(HasSerializedFrame(file_url)); | |
| 394 const std::string& serialized_contents = | |
| 395 GetSerializedContentForFrame(file_url); | |
| 396 ASSERT_FALSE(std::string::npos == | |
| 397 serialized_contents.find(motw_declaration)); | |
| 398 } | |
| 399 | |
| 400 // When serializing DOM, we will add the META which have correct charset | |
| 401 // declaration as first child of HEAD element for resolving WebKit bug: | |
| 402 // http://bugs.webkit.org/show_bug.cgi?id=16621 even the original document | |
| 403 // does not have META charset declaration. | |
| 404 TEST_F(DomSerializerTests, SerializeHTMLDOMWithNoMetaCharsetInOriginalDoc) { | |
| 405 base::FilePath page_file_path = data_dir_; | |
| 406 page_file_path = page_file_path.AppendASCII("dom_serializer"); | |
| 407 page_file_path = page_file_path.AppendASCII("youtube_1.htm"); | |
| 408 // Get file URL. | |
| 409 GURL file_url = net::FilePathToFileURL(page_file_path); | |
| 410 ASSERT_TRUE(file_url.SchemeIsFile()); | |
| 411 // Load the test file. | |
| 412 LoadPageFromURL(file_url); | |
| 413 | |
| 414 // Make sure there is no META charset declaration in original document. | |
| 415 WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url); | |
| 416 ASSERT_TRUE(web_frame != NULL); | |
| 417 WebDocument doc = web_frame->document(); | |
| 418 ASSERT_TRUE(doc.isHTMLDocument()); | |
| 419 WebElement head_element = doc.head(); | |
| 420 ASSERT_TRUE(!head_element.isNull()); | |
| 421 // Go through all children of HEAD element. | |
| 422 for (WebNode child = head_element.firstChild(); !child.isNull(); | |
| 423 child = child.nextSibling()) { | |
| 424 std::string charset_info; | |
| 425 if (IsMetaElement(child, charset_info)) | |
| 426 ASSERT_TRUE(charset_info.empty()); | |
| 427 } | |
| 428 // Do serialization. | |
| 429 SerializeDomForURL(file_url, false); | |
| 430 | |
| 431 // Load the serialized contents. | |
| 432 ASSERT_TRUE(HasSerializedFrame(file_url)); | |
| 433 const std::string& serialized_contents = | |
| 434 GetSerializedContentForFrame(file_url); | |
| 435 LoadContents(serialized_contents, file_url, | |
| 436 web_frame->document().encoding()); | |
| 437 // Make sure the first child of HEAD element is META which has charset | |
| 438 // declaration in serialized contents. | |
| 439 web_frame = test_shell_->webView()->mainFrame(); | |
| 440 ASSERT_TRUE(web_frame != NULL); | |
| 441 doc = web_frame->document(); | |
| 442 ASSERT_TRUE(doc.isHTMLDocument()); | |
| 443 head_element = doc.head(); | |
| 444 ASSERT_TRUE(!head_element.isNull()); | |
| 445 WebNode meta_node = head_element.firstChild(); | |
| 446 ASSERT_TRUE(!meta_node.isNull()); | |
| 447 // Get meta charset info. | |
| 448 std::string charset_info2; | |
| 449 ASSERT_TRUE(IsMetaElement(meta_node, charset_info2)); | |
| 450 ASSERT_TRUE(!charset_info2.empty()); | |
| 451 ASSERT_EQ(charset_info2, | |
| 452 std::string(web_frame->document().encoding().utf8())); | |
| 453 | |
| 454 // Make sure no more additional META tags which have charset declaration. | |
| 455 for (WebNode child = meta_node.nextSibling(); !child.isNull(); | |
| 456 child = child.nextSibling()) { | |
| 457 std::string charset_info; | |
| 458 if (IsMetaElement(child, charset_info)) | |
| 459 ASSERT_TRUE(charset_info.empty()); | |
| 460 } | |
| 461 } | |
| 462 | |
| 463 // When serializing DOM, if the original document has multiple META charset | |
| 464 // declaration, we will add the META which have correct charset declaration | |
| 465 // as first child of HEAD element and remove all original META charset | |
| 466 // declarations. | |
| 467 TEST_F(DomSerializerTests, | |
| 468 SerializeHTMLDOMWithMultipleMetaCharsetInOriginalDoc) { | |
| 469 base::FilePath page_file_path = data_dir_; | |
| 470 page_file_path = page_file_path.AppendASCII("dom_serializer"); | |
| 471 page_file_path = page_file_path.AppendASCII("youtube_2.htm"); | |
| 472 // Get file URL. | |
| 473 GURL file_url = net::FilePathToFileURL(page_file_path); | |
| 474 ASSERT_TRUE(file_url.SchemeIsFile()); | |
| 475 // Load the test file. | |
| 476 LoadPageFromURL(file_url); | |
| 477 | |
| 478 // Make sure there are multiple META charset declarations in original | |
| 479 // document. | |
| 480 WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url); | |
| 481 ASSERT_TRUE(web_frame != NULL); | |
| 482 WebDocument doc = web_frame->document(); | |
| 483 ASSERT_TRUE(doc.isHTMLDocument()); | |
| 484 WebElement head_ele = doc.head(); | |
| 485 ASSERT_TRUE(!head_ele.isNull()); | |
| 486 // Go through all children of HEAD element. | |
| 487 int charset_declaration_count = 0; | |
| 488 for (WebNode child = head_ele.firstChild(); !child.isNull(); | |
| 489 child = child.nextSibling()) { | |
| 490 std::string charset_info; | |
| 491 if (IsMetaElement(child, charset_info) && !charset_info.empty()) | |
| 492 charset_declaration_count++; | |
| 493 } | |
| 494 // The original doc has more than META tags which have charset declaration. | |
| 495 ASSERT_TRUE(charset_declaration_count > 1); | |
| 496 | |
| 497 // Do serialization. | |
| 498 SerializeDomForURL(file_url, false); | |
| 499 | |
| 500 // Load the serialized contents. | |
| 501 ASSERT_TRUE(HasSerializedFrame(file_url)); | |
| 502 const std::string& serialized_contents = | |
| 503 GetSerializedContentForFrame(file_url); | |
| 504 LoadContents(serialized_contents, file_url, | |
| 505 web_frame->document().encoding()); | |
| 506 // Make sure only first child of HEAD element is META which has charset | |
| 507 // declaration in serialized contents. | |
| 508 web_frame = test_shell_->webView()->mainFrame(); | |
| 509 ASSERT_TRUE(web_frame != NULL); | |
| 510 doc = web_frame->document(); | |
| 511 ASSERT_TRUE(doc.isHTMLDocument()); | |
| 512 head_ele = doc.head(); | |
| 513 ASSERT_TRUE(!head_ele.isNull()); | |
| 514 WebNode meta_node = head_ele.firstChild(); | |
| 515 ASSERT_TRUE(!meta_node.isNull()); | |
| 516 // Get meta charset info. | |
| 517 std::string charset_info2; | |
| 518 ASSERT_TRUE(IsMetaElement(meta_node, charset_info2)); | |
| 519 ASSERT_TRUE(!charset_info2.empty()); | |
| 520 ASSERT_EQ(charset_info2, | |
| 521 std::string(web_frame->document().encoding().utf8())); | |
| 522 | |
| 523 // Make sure no more additional META tags which have charset declaration. | |
| 524 for (WebNode child = meta_node.nextSibling(); !child.isNull(); | |
| 525 child = child.nextSibling()) { | |
| 526 std::string charset_info; | |
| 527 if (IsMetaElement(child, charset_info)) | |
| 528 ASSERT_TRUE(charset_info.empty()); | |
| 529 } | |
| 530 } | |
| 531 | |
| 532 // Test situation of html entities in text when serializing HTML DOM. | |
| 533 TEST_F(DomSerializerTests, SerializeHTMLDOMWithEntitiesInText) { | |
| 534 base::FilePath page_file_path = data_dir_; | |
| 535 page_file_path = page_file_path.AppendASCII( | |
| 536 "dom_serializer/htmlentities_in_text.htm"); | |
| 537 // Get file URL. The URL is dummy URL to identify the following loading | |
| 538 // actions. The test content is in constant:original_contents. | |
| 539 GURL file_url = net::FilePathToFileURL(page_file_path); | |
| 540 ASSERT_TRUE(file_url.SchemeIsFile()); | |
| 541 // Test contents. | |
| 542 static const char* const original_contents = | |
| 543 "<html><body>&<>\"\'</body></html>"; | |
| 544 // Load the test contents. | |
| 545 LoadContents(original_contents, file_url, WebString()); | |
| 546 | |
| 547 // Get BODY's text content in DOM. | |
| 548 WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url); | |
| 549 ASSERT_TRUE(web_frame != NULL); | |
| 550 WebDocument doc = web_frame->document(); | |
| 551 ASSERT_TRUE(doc.isHTMLDocument()); | |
| 552 WebElement body_ele = doc.body(); | |
| 553 ASSERT_TRUE(!body_ele.isNull()); | |
| 554 WebNode text_node = body_ele.firstChild(); | |
| 555 ASSERT_TRUE(text_node.isTextNode()); | |
| 556 ASSERT_TRUE(std::string(text_node.createMarkup().utf8()) == | |
| 557 "&<>\"\'"); | |
| 558 // Do serialization. | |
| 559 SerializeDomForURL(file_url, false); | |
| 560 // Compare the serialized contents with original contents. | |
| 561 ASSERT_TRUE(HasSerializedFrame(file_url)); | |
| 562 const std::string& serialized_contents = | |
| 563 GetSerializedContentForFrame(file_url); | |
| 564 // Compare the serialized contents with original contents to make sure | |
| 565 // they are same. | |
| 566 // Because we add MOTW when serializing DOM, so before comparison, we also | |
| 567 // need to add MOTW to original_contents. | |
| 568 std::string original_str = | |
| 569 WebPageSerializer::generateMarkOfTheWebDeclaration(file_url).utf8(); | |
| 570 original_str += original_contents; | |
| 571 // Since WebCore now inserts a new HEAD element if there is no HEAD element | |
| 572 // when creating BODY element. (Please see HTMLParser::bodyCreateErrorCheck.) | |
| 573 // We need to append the HEAD content and corresponding META content if we | |
| 574 // find WebCore-generated HEAD element. | |
| 575 if (!doc.head().isNull()) { | |
| 576 WebString encoding = web_frame->document().encoding(); | |
| 577 std::string htmlTag("<html>"); | |
| 578 std::string::size_type pos = original_str.find(htmlTag); | |
| 579 ASSERT_NE(std::string::npos, pos); | |
| 580 pos += htmlTag.length(); | |
| 581 std::string head_part("<head>"); | |
| 582 head_part += | |
| 583 WebPageSerializer::generateMetaCharsetDeclaration(encoding).utf8(); | |
| 584 head_part += "</head>"; | |
| 585 original_str.insert(pos, head_part); | |
| 586 } | |
| 587 ASSERT_EQ(original_str, serialized_contents); | |
| 588 } | |
| 589 | |
| 590 // Test situation of html entities in attribute value when serializing | |
| 591 // HTML DOM. | |
| 592 // This test started to fail at WebKit r65388. See http://crbug.com/52279. | |
| 593 TEST_F(DomSerializerTests, SerializeHTMLDOMWithEntitiesInAttributeValue) { | |
| 594 base::FilePath page_file_path = data_dir_; | |
| 595 page_file_path = page_file_path.AppendASCII( | |
| 596 "dom_serializer/htmlentities_in_attribute_value.htm"); | |
| 597 // Get file URL. The URL is dummy URL to identify the following loading | |
| 598 // actions. The test content is in constant:original_contents. | |
| 599 GURL file_url = net::FilePathToFileURL(page_file_path); | |
| 600 ASSERT_TRUE(file_url.SchemeIsFile()); | |
| 601 // Test contents. | |
| 602 static const char* const original_contents = | |
| 603 "<html><body title=\"&<>"'\"></body></html>"; | |
| 604 // Load the test contents. | |
| 605 LoadContents(original_contents, file_url, WebString()); | |
| 606 // Get value of BODY's title attribute in DOM. | |
| 607 WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url); | |
| 608 ASSERT_TRUE(web_frame != NULL); | |
| 609 WebDocument doc = web_frame->document(); | |
| 610 ASSERT_TRUE(doc.isHTMLDocument()); | |
| 611 WebElement body_ele = doc.body(); | |
| 612 ASSERT_TRUE(!body_ele.isNull()); | |
| 613 WebString value = body_ele.getAttribute("title"); | |
| 614 ASSERT_TRUE(std::string(value.utf8()) == "&<>\"\'"); | |
| 615 // Do serialization. | |
| 616 SerializeDomForURL(file_url, false); | |
| 617 // Compare the serialized contents with original contents. | |
| 618 ASSERT_TRUE(HasSerializedFrame(file_url)); | |
| 619 const std::string& serialized_contents = | |
| 620 GetSerializedContentForFrame(file_url); | |
| 621 // Compare the serialized contents with original contents to make sure | |
| 622 // they are same. | |
| 623 std::string original_str = | |
| 624 WebPageSerializer::generateMarkOfTheWebDeclaration(file_url).utf8(); | |
| 625 original_str += original_contents; | |
| 626 if (!doc.isNull()) { | |
| 627 WebString encoding = web_frame->document().encoding(); | |
| 628 std::string htmlTag("<html>"); | |
| 629 std::string::size_type pos = original_str.find(htmlTag); | |
| 630 ASSERT_NE(std::string::npos, pos); | |
| 631 pos += htmlTag.length(); | |
| 632 std::string head_part("<head>"); | |
| 633 head_part += | |
| 634 WebPageSerializer::generateMetaCharsetDeclaration(encoding).utf8(); | |
| 635 head_part += "</head>"; | |
| 636 original_str.insert(pos, head_part); | |
| 637 } | |
| 638 ASSERT_EQ(original_str, serialized_contents); | |
| 639 } | |
| 640 | |
| 641 // Test situation of non-standard HTML entities when serializing HTML DOM. | |
| 642 // This test started to fail at WebKit r65351. See http://crbug.com/52279. | |
| 643 TEST_F(DomSerializerTests, SerializeHTMLDOMWithNonStandardEntities) { | |
| 644 // Make a test file URL and load it. | |
| 645 base::FilePath page_file_path = data_dir_; | |
| 646 page_file_path = page_file_path.AppendASCII("dom_serializer"); | |
| 647 page_file_path = page_file_path.AppendASCII("nonstandard_htmlentities.htm"); | |
| 648 GURL file_url = net::FilePathToFileURL(page_file_path); | |
| 649 LoadPageFromURL(file_url); | |
| 650 | |
| 651 // Get value of BODY's title attribute in DOM. | |
| 652 WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url); | |
| 653 WebDocument doc = web_frame->document(); | |
| 654 ASSERT_TRUE(doc.isHTMLDocument()); | |
| 655 WebElement body_element = doc.body(); | |
| 656 // Unescaped string for "%⊅¹'". | |
| 657 static const wchar_t parsed_value[] = { | |
| 658 '%', 0x2285, 0x00b9, '\'', 0 | |
| 659 }; | |
| 660 WebString value = body_element.getAttribute("title"); | |
| 661 ASSERT_TRUE(UTF16ToWide(value) == parsed_value); | |
| 662 ASSERT_TRUE(UTF16ToWide(body_element.innerText()) == parsed_value); | |
| 663 | |
| 664 // Do serialization. | |
| 665 SerializeDomForURL(file_url, false); | |
| 666 // Check the serialized string. | |
| 667 ASSERT_TRUE(HasSerializedFrame(file_url)); | |
| 668 const std::string& serialized_contents = | |
| 669 GetSerializedContentForFrame(file_url); | |
| 670 // Confirm that the serialized string has no non-standard HTML entities. | |
| 671 ASSERT_EQ(std::string::npos, serialized_contents.find("%")); | |
| 672 ASSERT_EQ(std::string::npos, serialized_contents.find("⊅")); | |
| 673 ASSERT_EQ(std::string::npos, serialized_contents.find("¹")); | |
| 674 ASSERT_EQ(std::string::npos, serialized_contents.find("'")); | |
| 675 } | |
| 676 | |
| 677 // Test situation of BASE tag in original document when serializing HTML DOM. | |
| 678 // When serializing, we should comment the BASE tag, append a new BASE tag. | |
| 679 // rewrite all the savable URLs to relative local path, and change other URLs | |
| 680 // to absolute URLs. | |
| 681 TEST_F(DomSerializerTests, SerializeHTMLDOMWithBaseTag) { | |
| 682 // There are total 2 available base tags in this test file. | |
| 683 const int kTotalBaseTagCountInTestFile = 2; | |
| 684 | |
| 685 base::FilePath page_file_path = | |
| 686 data_dir_.AppendASCII("dom_serializer").AsEndingWithSeparator(); | |
| 687 | |
| 688 // Get page dir URL which is base URL of this file. | |
| 689 GURL path_dir_url = net::FilePathToFileURL(page_file_path); | |
| 690 // Get file path. | |
| 691 page_file_path = | |
| 692 page_file_path.AppendASCII("html_doc_has_base_tag.htm"); | |
| 693 // Get file URL. | |
| 694 GURL file_url = net::FilePathToFileURL(page_file_path); | |
| 695 ASSERT_TRUE(file_url.SchemeIsFile()); | |
| 696 // Load the test file. | |
| 697 LoadPageFromURL(file_url); | |
| 698 // Since for this test, we assume there is no savable sub-resource links for | |
| 699 // this test file, also all links are relative URLs in this test file, so we | |
| 700 // need to check those relative URLs and make sure document has BASE tag. | |
| 701 WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url); | |
| 702 ASSERT_TRUE(web_frame != NULL); | |
| 703 WebDocument doc = web_frame->document(); | |
| 704 ASSERT_TRUE(doc.isHTMLDocument()); | |
| 705 // Go through all descent nodes. | |
| 706 WebNodeCollection all = doc.all(); | |
| 707 int original_base_tag_count = 0; | |
| 708 for (WebNode node = all.firstItem(); !node.isNull(); | |
| 709 node = all.nextItem()) { | |
| 710 if (!node.isElementNode()) | |
| 711 continue; | |
| 712 WebElement element = node.to<WebElement>(); | |
| 713 if (element.hasTagName("base")) { | |
| 714 original_base_tag_count++; | |
| 715 } else { | |
| 716 // Get link. | |
| 717 WebString value = | |
| 718 webkit_glue::GetSubResourceLinkFromElement(element); | |
| 719 if (value.isNull() && element.hasTagName("a")) { | |
| 720 value = element.getAttribute("href"); | |
| 721 if (value.isEmpty()) | |
| 722 value = WebString(); | |
| 723 } | |
| 724 // Each link is relative link. | |
| 725 if (!value.isNull()) { | |
| 726 GURL link(value.utf8()); | |
| 727 ASSERT_TRUE(link.scheme().empty()); | |
| 728 } | |
| 729 } | |
| 730 } | |
| 731 ASSERT_EQ(original_base_tag_count, kTotalBaseTagCountInTestFile); | |
| 732 // Make sure in original document, the base URL is not equal with the | |
| 733 // |path_dir_url|. | |
| 734 GURL original_base_url(doc.baseURL()); | |
| 735 ASSERT_NE(original_base_url, path_dir_url); | |
| 736 | |
| 737 // Do serialization. | |
| 738 SerializeDomForURL(file_url, false); | |
| 739 | |
| 740 // Load the serialized contents. | |
| 741 ASSERT_TRUE(HasSerializedFrame(file_url)); | |
| 742 const std::string& serialized_contents = | |
| 743 GetSerializedContentForFrame(file_url); | |
| 744 LoadContents(serialized_contents, file_url, | |
| 745 web_frame->document().encoding()); | |
| 746 | |
| 747 // Make sure all links are absolute URLs and doc there are some number of | |
| 748 // BASE tags in serialized HTML data. Each of those BASE tags have same base | |
| 749 // URL which is as same as URL of current test file. | |
| 750 web_frame = test_shell_->webView()->mainFrame(); | |
| 751 ASSERT_TRUE(web_frame != NULL); | |
| 752 doc = web_frame->document(); | |
| 753 ASSERT_TRUE(doc.isHTMLDocument()); | |
| 754 // Go through all descent nodes. | |
| 755 all = doc.all(); | |
| 756 int new_base_tag_count = 0; | |
| 757 for (WebNode node = all.firstItem(); !node.isNull(); | |
| 758 node = all.nextItem()) { | |
| 759 if (!node.isElementNode()) | |
| 760 continue; | |
| 761 WebElement element = node.to<WebElement>(); | |
| 762 if (element.hasTagName("base")) { | |
| 763 new_base_tag_count++; | |
| 764 } else { | |
| 765 // Get link. | |
| 766 WebString value = | |
| 767 webkit_glue::GetSubResourceLinkFromElement(element); | |
| 768 if (value.isNull() && element.hasTagName("a")) { | |
| 769 value = element.getAttribute("href"); | |
| 770 if (value.isEmpty()) | |
| 771 value = WebString(); | |
| 772 } | |
| 773 // Each link is absolute link. | |
| 774 if (!value.isNull()) { | |
| 775 GURL link(std::string(value.utf8())); | |
| 776 ASSERT_FALSE(link.scheme().empty()); | |
| 777 } | |
| 778 } | |
| 779 } | |
| 780 // We have one more added BASE tag which is generated by JavaScript. | |
| 781 ASSERT_EQ(new_base_tag_count, original_base_tag_count + 1); | |
| 782 // Make sure in new document, the base URL is equal with the |path_dir_url|. | |
| 783 GURL new_base_url(doc.baseURL()); | |
| 784 ASSERT_EQ(new_base_url, path_dir_url); | |
| 785 } | |
| 786 | |
| 787 // Serializing page which has an empty HEAD tag. | |
| 788 TEST_F(DomSerializerTests, SerializeHTMLDOMWithEmptyHead) { | |
| 789 base::FilePath page_file_path = data_dir_; | |
| 790 page_file_path = page_file_path.AppendASCII("dom_serializer"); | |
| 791 page_file_path = page_file_path.AppendASCII("empty_head.htm"); | |
| 792 GURL file_url = net::FilePathToFileURL(page_file_path); | |
| 793 ASSERT_TRUE(file_url.SchemeIsFile()); | |
| 794 | |
| 795 // Load the test html content. | |
| 796 static const char* const empty_head_contents = | |
| 797 "<html><head></head><body>hello world</body></html>"; | |
| 798 LoadContents(empty_head_contents, file_url, WebString()); | |
| 799 | |
| 800 // Make sure the head tag is empty. | |
| 801 WebFrame* web_frame = test_shell_->webView()->mainFrame(); | |
| 802 ASSERT_TRUE(web_frame != NULL); | |
| 803 WebDocument doc = web_frame->document(); | |
| 804 ASSERT_TRUE(doc.isHTMLDocument()); | |
| 805 WebElement head_element = doc.head(); | |
| 806 ASSERT_TRUE(!head_element.isNull()); | |
| 807 ASSERT_TRUE(!head_element.hasChildNodes()); | |
| 808 ASSERT_TRUE(head_element.childNodes().length() == 0); | |
| 809 | |
| 810 // Do serialization. | |
| 811 SerializeDomForURL(file_url, false); | |
| 812 // Make sure the serialized contents have META ; | |
| 813 ASSERT_TRUE(HasSerializedFrame(file_url)); | |
| 814 const std::string& serialized_contents = | |
| 815 GetSerializedContentForFrame(file_url); | |
| 816 | |
| 817 // Reload serialized contents and make sure there is only one META tag. | |
| 818 LoadContents(serialized_contents, file_url, web_frame->document().encoding()); | |
| 819 web_frame = test_shell_->webView()->mainFrame(); | |
| 820 ASSERT_TRUE(web_frame != NULL); | |
| 821 doc = web_frame->document(); | |
| 822 ASSERT_TRUE(doc.isHTMLDocument()); | |
| 823 head_element = doc.head(); | |
| 824 ASSERT_TRUE(!head_element.isNull()); | |
| 825 ASSERT_TRUE(head_element.hasChildNodes()); | |
| 826 ASSERT_TRUE(head_element.childNodes().length() == 1); | |
| 827 WebNode meta_node = head_element.firstChild(); | |
| 828 ASSERT_TRUE(!meta_node.isNull()); | |
| 829 // Get meta charset info. | |
| 830 std::string charset_info; | |
| 831 ASSERT_TRUE(IsMetaElement(meta_node, charset_info)); | |
| 832 ASSERT_TRUE(!charset_info.empty()); | |
| 833 ASSERT_EQ(charset_info, | |
| 834 std::string(web_frame->document().encoding().utf8())); | |
| 835 | |
| 836 // Check the body's first node is text node and its contents are | |
| 837 // "hello world" | |
| 838 WebElement body_element = doc.body(); | |
| 839 ASSERT_TRUE(!body_element.isNull()); | |
| 840 WebNode text_node = body_element.firstChild(); | |
| 841 ASSERT_TRUE(text_node.isTextNode()); | |
| 842 WebString text_node_contents = text_node.nodeValue(); | |
| 843 ASSERT_TRUE(std::string(text_node_contents.utf8()) == "hello world"); | |
| 844 } | |
| 845 | |
| 846 // Test that we don't crash when the page contains an iframe that | |
| 847 // was handled as a download (http://crbug.com/42212). | |
| 848 TEST_F(DomSerializerTests, SerializeDocumentWithDownloadedIFrame) { | |
| 849 base::FilePath page_file_path = data_dir_; | |
| 850 page_file_path = page_file_path.AppendASCII("dom_serializer"); | |
| 851 page_file_path = page_file_path.AppendASCII("iframe-src-is-exe.htm"); | |
| 852 GURL file_url = net::FilePathToFileURL(page_file_path); | |
| 853 ASSERT_TRUE(file_url.SchemeIsFile()); | |
| 854 // Load the test file. | |
| 855 LoadPageFromURL(file_url); | |
| 856 // Do a recursive serialization. We pass if we don't crash. | |
| 857 SerializeDomForURL(file_url, true); | |
| 858 } | |
| 859 | |
| 860 TEST_F(DomSerializerTests, SubResourceForElementsInNonHTMLNamespace) { | |
| 861 base::FilePath page_file_path = data_dir_; | |
| 862 page_file_path = page_file_path.AppendASCII("dom_serializer"); | |
| 863 page_file_path = page_file_path.AppendASCII("non_html_namespace.htm"); | |
| 864 GURL file_url = net::FilePathToFileURL(page_file_path); | |
| 865 LoadPageFromURL(file_url); | |
| 866 WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url); | |
| 867 ASSERT_TRUE(web_frame != NULL); | |
| 868 WebDocument doc = web_frame->document(); | |
| 869 WebNode lastNodeInBody = doc.body().lastChild(); | |
| 870 ASSERT_EQ(WebNode::ElementNode, lastNodeInBody.nodeType()); | |
| 871 WebString uri = webkit_glue::GetSubResourceLinkFromElement( | |
| 872 lastNodeInBody.to<WebElement>()); | |
| 873 EXPECT_TRUE(uri.isNull()); | |
| 874 } | |
| 875 | |
| 876 } // namespace | |
| OLD | NEW |