OLD | NEW |
| (Empty) |
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "base/compiler_specific.h" | |
6 #include "base/file_util.h" | |
7 #include "base/files/file_path.h" | |
8 #include "base/hash_tables.h" | |
9 #include "base/string_util.h" | |
10 #include "base/utf_string_conversions.h" | |
11 #include "net/base/net_util.h" | |
12 #include "net/url_request/url_request_context.h" | |
13 #include "third_party/WebKit/Source/Platform/chromium/public/WebCString.h" | |
14 #include "third_party/WebKit/Source/Platform/chromium/public/WebData.h" | |
15 #include "third_party/WebKit/Source/Platform/chromium/public/WebString.h" | |
16 #include "third_party/WebKit/Source/Platform/chromium/public/WebURL.h" | |
17 #include "third_party/WebKit/Source/Platform/chromium/public/WebVector.h" | |
18 #include "third_party/WebKit/Source/WebKit/chromium/public/WebDocument.h" | |
19 #include "third_party/WebKit/Source/WebKit/chromium/public/WebElement.h" | |
20 #include "third_party/WebKit/Source/WebKit/chromium/public/WebFrame.h" | |
21 #include "third_party/WebKit/Source/WebKit/chromium/public/WebNode.h" | |
22 #include "third_party/WebKit/Source/WebKit/chromium/public/WebNodeCollection.h" | |
23 #include "third_party/WebKit/Source/WebKit/chromium/public/WebNodeList.h" | |
24 #include "third_party/WebKit/Source/WebKit/chromium/public/WebPageSerializer.h" | |
25 #include "third_party/WebKit/Source/WebKit/chromium/public/WebPageSerializerClie
nt.h" | |
26 #include "third_party/WebKit/Source/WebKit/chromium/public/WebView.h" | |
27 #include "webkit/base/file_path_string_conversions.h" | |
28 #include "webkit/glue/dom_operations.h" | |
29 #include "webkit/tools/test_shell/simple_resource_loader_bridge.h" | |
30 #include "webkit/tools/test_shell/test_shell_test.h" | |
31 | |
32 using WebKit::WebCString; | |
33 using WebKit::WebData; | |
34 using WebKit::WebDocument; | |
35 using WebKit::WebElement; | |
36 using WebKit::WebFrame; | |
37 using WebKit::WebNode; | |
38 using WebKit::WebNodeCollection; | |
39 using WebKit::WebNodeList; | |
40 using WebKit::WebPageSerializer; | |
41 using WebKit::WebPageSerializerClient; | |
42 using WebKit::WebNode; | |
43 using WebKit::WebString; | |
44 using WebKit::WebURL; | |
45 using WebKit::WebView; | |
46 using WebKit::WebVector; | |
47 | |
48 namespace { | |
49 | |
50 // Iterate recursively over sub-frames to find one with with a given url. | |
51 WebFrame* FindSubFrameByURL(WebView* web_view, const GURL& url) { | |
52 if (!web_view->mainFrame()) | |
53 return NULL; | |
54 | |
55 std::vector<WebFrame*> stack; | |
56 stack.push_back(web_view->mainFrame()); | |
57 | |
58 while (!stack.empty()) { | |
59 WebFrame* current_frame = stack.back(); | |
60 stack.pop_back(); | |
61 if (GURL(current_frame->document().url()) == url) | |
62 return current_frame; | |
63 WebNodeCollection all = current_frame->document().all(); | |
64 for (WebNode node = all.firstItem(); | |
65 !node.isNull(); node = all.nextItem()) { | |
66 if (!node.isElementNode()) | |
67 continue; | |
68 // Check frame tag and iframe tag | |
69 WebElement element = node.to<WebElement>(); | |
70 if (!element.hasTagName("frame") && !element.hasTagName("iframe")) | |
71 continue; | |
72 WebFrame* sub_frame = WebFrame::fromFrameOwnerElement(element); | |
73 if (sub_frame) | |
74 stack.push_back(sub_frame); | |
75 } | |
76 } | |
77 return NULL; | |
78 } | |
79 | |
80 class DomSerializerTests : public TestShellTest, | |
81 public WebPageSerializerClient { | |
82 public: | |
83 DomSerializerTests() | |
84 : local_directory_name_(FILE_PATH_LITERAL("./dummy_files/")) { } | |
85 | |
86 // DomSerializerDelegate. | |
87 virtual void didSerializeDataForFrame(const WebURL& frame_web_url, | |
88 const WebCString& data, | |
89 PageSerializationStatus status) { | |
90 | |
91 GURL frame_url(frame_web_url); | |
92 // If the all frames are finished saving, check all finish status | |
93 if (status == WebPageSerializerClient::AllFramesAreFinished) { | |
94 SerializationFinishStatusMap::iterator it = | |
95 serialization_finish_status_.begin(); | |
96 for (; it != serialization_finish_status_.end(); ++it) | |
97 ASSERT_TRUE(it->second); | |
98 serialized_ = true; | |
99 return; | |
100 } | |
101 | |
102 // Check finish status of current frame. | |
103 SerializationFinishStatusMap::iterator it = | |
104 serialization_finish_status_.find(frame_url.spec()); | |
105 // New frame, set initial status as false. | |
106 if (it == serialization_finish_status_.end()) | |
107 serialization_finish_status_[frame_url.spec()] = false; | |
108 | |
109 it = serialization_finish_status_.find(frame_url.spec()); | |
110 ASSERT_TRUE(it != serialization_finish_status_.end()); | |
111 // In process frame, finish status should be false. | |
112 ASSERT_FALSE(it->second); | |
113 | |
114 // Add data to corresponding frame's content. | |
115 serialized_frame_map_[frame_url.spec()] += data.data(); | |
116 | |
117 // Current frame is completed saving, change the finish status. | |
118 if (status == WebPageSerializerClient::CurrentFrameIsFinished) | |
119 it->second = true; | |
120 } | |
121 | |
122 bool HasSerializedFrame(const GURL& frame_url) { | |
123 return serialized_frame_map_.find(frame_url.spec()) != | |
124 serialized_frame_map_.end(); | |
125 } | |
126 | |
127 const std::string& GetSerializedContentForFrame( | |
128 const GURL& frame_url) { | |
129 return serialized_frame_map_[frame_url.spec()]; | |
130 } | |
131 | |
132 // Load web page according to specific URL. | |
133 void LoadPageFromURL(const GURL& page_url) { | |
134 // Load the test file. | |
135 test_shell_->ResetTestController(); | |
136 test_shell_->LoadURL(page_url); | |
137 test_shell_->WaitTestFinished(); | |
138 } | |
139 | |
140 // Load web page according to input content and relative URLs within | |
141 // the document. | |
142 void LoadContents(const std::string& contents, | |
143 const GURL& base_url, | |
144 const WebString encoding_info) { | |
145 test_shell_->ResetTestController(); | |
146 // If input encoding is empty, use UTF-8 as default encoding. | |
147 if (encoding_info.isEmpty()) { | |
148 test_shell_->webView()->mainFrame()->loadHTMLString(contents, base_url); | |
149 } else { | |
150 WebData data(contents.data(), contents.length()); | |
151 | |
152 // Do not use WebFrame.LoadHTMLString because it assumes that input | |
153 // html contents use UTF-8 encoding. | |
154 // TODO(darin): This should use WebFrame::loadData. | |
155 WebFrame* web_frame = | |
156 test_shell_->webView()->mainFrame(); | |
157 | |
158 ASSERT_TRUE(web_frame != NULL); | |
159 | |
160 web_frame->loadData(data, "text/html", encoding_info, base_url); | |
161 } | |
162 | |
163 test_shell_->WaitTestFinished(); | |
164 } | |
165 | |
166 // Serialize page DOM according to specific page URL. The parameter | |
167 // recursive_serialization indicates whether we will serialize all | |
168 // sub-frames. | |
169 void SerializeDomForURL(const GURL& page_url, | |
170 bool recursive_serialization) { | |
171 // Find corresponding WebFrame according to page_url. | |
172 WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), | |
173 page_url); | |
174 ASSERT_TRUE(web_frame != NULL); | |
175 // Add input file URl to links_. | |
176 links_.assign(&page_url,1); | |
177 // Add dummy file path to local_path_. | |
178 WebString file_path = webkit_base::FilePathStringToWebString( | |
179 FILE_PATH_LITERAL("c:\\dummy.htm")); | |
180 local_paths_.assign(&file_path, 1); | |
181 // Start serializing DOM. | |
182 bool result = WebPageSerializer::serialize(web_frame, | |
183 recursive_serialization, | |
184 static_cast<WebPageSerializerClient*>(this), | |
185 links_, | |
186 local_paths_, | |
187 webkit_base::FilePathToWebString(local_directory_name_)); | |
188 ASSERT_TRUE(result); | |
189 ASSERT_TRUE(serialized_); | |
190 } | |
191 | |
192 private: | |
193 // Map frame_url to corresponding serialized_content. | |
194 typedef base::hash_map<std::string, std::string> SerializedFrameContentMap; | |
195 SerializedFrameContentMap serialized_frame_map_; | |
196 // Map frame_url to corresponding status of serialization finish. | |
197 typedef base::hash_map<std::string, bool> SerializationFinishStatusMap; | |
198 SerializationFinishStatusMap serialization_finish_status_; | |
199 // Flag indicates whether the process of serializing DOM is finished or not. | |
200 bool serialized_; | |
201 // The links_ contain dummy original URLs of all saved links. | |
202 WebVector<WebURL> links_; | |
203 // The local_paths_ contain dummy corresponding local file paths of all saved | |
204 // links, which matched links_ one by one. | |
205 WebVector<WebString> local_paths_; | |
206 // The local_directory_name_ is dummy relative path of directory which | |
207 // contain all saved auxiliary files included all sub frames and resources. | |
208 const base::FilePath local_directory_name_; | |
209 | |
210 protected: | |
211 // testing::Test | |
212 virtual void SetUp() { | |
213 TestShellTest::SetUp(); | |
214 serialized_ = false; | |
215 } | |
216 | |
217 virtual void TearDown() { | |
218 TestShellTest::TearDown(); | |
219 } | |
220 }; | |
221 | |
222 // Helper function that test whether the first node in the doc is a doc type | |
223 // node. | |
224 bool HasDocType(const WebDocument& doc) { | |
225 WebNode node = doc.firstChild(); | |
226 if (node.isNull()) | |
227 return false; | |
228 return node.nodeType() == WebNode::DocumentTypeNode; | |
229 } | |
230 | |
231 // Helper function for checking whether input node is META tag. Return true | |
232 // means it is META element, otherwise return false. The parameter charset_info | |
233 // return actual charset info if the META tag has charset declaration. | |
234 bool IsMetaElement(const WebNode& node, std::string& charset_info) { | |
235 if (!node.isElementNode()) | |
236 return false; | |
237 const WebElement meta = node.toConst<WebElement>(); | |
238 if (!meta.hasTagName("meta")) | |
239 return false; | |
240 charset_info.erase(0, charset_info.length()); | |
241 // Check the META charset declaration. | |
242 WebString httpEquiv = meta.getAttribute("http-equiv"); | |
243 if (LowerCaseEqualsASCII(httpEquiv, "content-type")) { | |
244 std::string content = meta.getAttribute("content").utf8(); | |
245 int pos = content.find("charset", 0); | |
246 if (pos > -1) { | |
247 // Add a dummy charset declaration to charset_info, which indicates this | |
248 // META tag has charset declaration although we do not get correct value | |
249 // yet. | |
250 charset_info.append("has-charset-declaration"); | |
251 int remaining_length = content.length() - pos - 7; | |
252 if (!remaining_length) | |
253 return true; | |
254 int start_pos = pos + 7; | |
255 // Find "=" symbol. | |
256 while (remaining_length--) | |
257 if (content[start_pos++] == L'=') | |
258 break; | |
259 // Skip beginning space. | |
260 while (remaining_length) { | |
261 if (content[start_pos] > 0x0020) | |
262 break; | |
263 ++start_pos; | |
264 --remaining_length; | |
265 } | |
266 if (!remaining_length) | |
267 return true; | |
268 int end_pos = start_pos; | |
269 // Now we find out the start point of charset info. Search the end point. | |
270 while (remaining_length--) { | |
271 if (content[end_pos] <= 0x0020 || content[end_pos] == L';') | |
272 break; | |
273 ++end_pos; | |
274 } | |
275 // Get actual charset info. | |
276 charset_info = content.substr(start_pos, end_pos - start_pos); | |
277 return true; | |
278 } | |
279 } | |
280 return true; | |
281 } | |
282 | |
283 // If original contents have document type, the serialized contents also have | |
284 // document type. | |
285 TEST_F(DomSerializerTests, SerializeHTMLDOMWithDocType) { | |
286 base::FilePath page_file_path = data_dir_; | |
287 page_file_path = page_file_path.AppendASCII("dom_serializer"); | |
288 page_file_path = page_file_path.AppendASCII("youtube_1.htm"); | |
289 GURL file_url = net::FilePathToFileURL(page_file_path); | |
290 ASSERT_TRUE(file_url.SchemeIsFile()); | |
291 // Load the test file. | |
292 LoadPageFromURL(file_url); | |
293 // Make sure original contents have document type. | |
294 WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url); | |
295 ASSERT_TRUE(web_frame != NULL); | |
296 WebDocument doc = web_frame->document(); | |
297 ASSERT_TRUE(HasDocType(doc)); | |
298 // Do serialization. | |
299 SerializeDomForURL(file_url, false); | |
300 // Load the serialized contents. | |
301 ASSERT_TRUE(HasSerializedFrame(file_url)); | |
302 const std::string& serialized_contents = | |
303 GetSerializedContentForFrame(file_url); | |
304 LoadContents(serialized_contents, file_url, | |
305 web_frame->document().encoding()); | |
306 // Make sure serialized contents still have document type. | |
307 web_frame = test_shell_->webView()->mainFrame(); | |
308 doc = web_frame->document(); | |
309 ASSERT_TRUE(HasDocType(doc)); | |
310 } | |
311 | |
312 // If original contents do not have document type, the serialized contents | |
313 // also do not have document type. | |
314 TEST_F(DomSerializerTests, SerializeHTMLDOMWithoutDocType) { | |
315 base::FilePath page_file_path = data_dir_; | |
316 page_file_path = page_file_path.AppendASCII("dom_serializer"); | |
317 page_file_path = page_file_path.AppendASCII("youtube_2.htm"); | |
318 GURL file_url = net::FilePathToFileURL(page_file_path); | |
319 ASSERT_TRUE(file_url.SchemeIsFile()); | |
320 // Load the test file. | |
321 LoadPageFromURL(file_url); | |
322 // Make sure original contents do not have document type. | |
323 WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url); | |
324 ASSERT_TRUE(web_frame != NULL); | |
325 WebDocument doc = web_frame->document(); | |
326 ASSERT_TRUE(!HasDocType(doc)); | |
327 // Do serialization. | |
328 SerializeDomForURL(file_url, false); | |
329 // Load the serialized contents. | |
330 ASSERT_TRUE(HasSerializedFrame(file_url)); | |
331 const std::string& serialized_contents = | |
332 GetSerializedContentForFrame(file_url); | |
333 LoadContents(serialized_contents, file_url, | |
334 web_frame->document().encoding()); | |
335 // Make sure serialized contents do not have document type. | |
336 web_frame = test_shell_->webView()->mainFrame(); | |
337 doc = web_frame->document(); | |
338 ASSERT_TRUE(!HasDocType(doc)); | |
339 } | |
340 | |
341 // Serialize XML document which has all 5 built-in entities. After | |
342 // finishing serialization, the serialized contents should be same | |
343 // with original XML document. | |
344 TEST_F(DomSerializerTests, SerializeXMLDocWithBuiltInEntities) { | |
345 base::FilePath page_file_path = data_dir_; | |
346 page_file_path = page_file_path.AppendASCII("dom_serializer"); | |
347 page_file_path = page_file_path.AppendASCII("note.html"); | |
348 base::FilePath xml_file_path = data_dir_; | |
349 xml_file_path = xml_file_path.AppendASCII("dom_serializer"); | |
350 xml_file_path = xml_file_path.AppendASCII("note.xml"); | |
351 // Read original contents for later comparison. | |
352 std::string original_contents; | |
353 ASSERT_TRUE(file_util::ReadFileToString(xml_file_path, &original_contents)); | |
354 // Get file URL. | |
355 GURL file_url = net::FilePathToFileURL(page_file_path); | |
356 GURL xml_file_url = net::FilePathToFileURL(xml_file_path); | |
357 ASSERT_TRUE(file_url.SchemeIsFile()); | |
358 // Load the test file. | |
359 LoadPageFromURL(file_url); | |
360 // Do serialization. | |
361 SerializeDomForURL(xml_file_url, false); | |
362 // Compare the serialized contents with original contents. | |
363 ASSERT_TRUE(HasSerializedFrame(xml_file_url)); | |
364 const std::string& serialized_contents = | |
365 GetSerializedContentForFrame(xml_file_url); | |
366 ASSERT_EQ(original_contents, serialized_contents); | |
367 } | |
368 | |
369 // When serializing DOM, we add MOTW declaration before html tag. | |
370 TEST_F(DomSerializerTests, SerializeHTMLDOMWithAddingMOTW) { | |
371 base::FilePath page_file_path = data_dir_; | |
372 page_file_path = page_file_path.AppendASCII("dom_serializer"); | |
373 page_file_path = page_file_path.AppendASCII("youtube_2.htm"); | |
374 // Read original contents for later comparison . | |
375 std::string original_contents; | |
376 ASSERT_TRUE(file_util::ReadFileToString(page_file_path, &original_contents)); | |
377 // Get file URL. | |
378 GURL file_url = net::FilePathToFileURL(page_file_path); | |
379 ASSERT_TRUE(file_url.SchemeIsFile()); | |
380 // Make sure original contents does not have MOTW; | |
381 std::string motw_declaration = | |
382 WebPageSerializer::generateMarkOfTheWebDeclaration(file_url).utf8(); | |
383 ASSERT_FALSE(motw_declaration.empty()); | |
384 // The encoding of original contents is ISO-8859-1, so we convert the MOTW | |
385 // declaration to ASCII and search whether original contents has it or not. | |
386 ASSERT_TRUE(std::string::npos == | |
387 original_contents.find(motw_declaration)); | |
388 // Load the test file. | |
389 LoadPageFromURL(file_url); | |
390 // Do serialization. | |
391 SerializeDomForURL(file_url, false); | |
392 // Make sure the serialized contents have MOTW ; | |
393 ASSERT_TRUE(HasSerializedFrame(file_url)); | |
394 const std::string& serialized_contents = | |
395 GetSerializedContentForFrame(file_url); | |
396 ASSERT_FALSE(std::string::npos == | |
397 serialized_contents.find(motw_declaration)); | |
398 } | |
399 | |
400 // When serializing DOM, we will add the META which have correct charset | |
401 // declaration as first child of HEAD element for resolving WebKit bug: | |
402 // http://bugs.webkit.org/show_bug.cgi?id=16621 even the original document | |
403 // does not have META charset declaration. | |
404 TEST_F(DomSerializerTests, SerializeHTMLDOMWithNoMetaCharsetInOriginalDoc) { | |
405 base::FilePath page_file_path = data_dir_; | |
406 page_file_path = page_file_path.AppendASCII("dom_serializer"); | |
407 page_file_path = page_file_path.AppendASCII("youtube_1.htm"); | |
408 // Get file URL. | |
409 GURL file_url = net::FilePathToFileURL(page_file_path); | |
410 ASSERT_TRUE(file_url.SchemeIsFile()); | |
411 // Load the test file. | |
412 LoadPageFromURL(file_url); | |
413 | |
414 // Make sure there is no META charset declaration in original document. | |
415 WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url); | |
416 ASSERT_TRUE(web_frame != NULL); | |
417 WebDocument doc = web_frame->document(); | |
418 ASSERT_TRUE(doc.isHTMLDocument()); | |
419 WebElement head_element = doc.head(); | |
420 ASSERT_TRUE(!head_element.isNull()); | |
421 // Go through all children of HEAD element. | |
422 for (WebNode child = head_element.firstChild(); !child.isNull(); | |
423 child = child.nextSibling()) { | |
424 std::string charset_info; | |
425 if (IsMetaElement(child, charset_info)) | |
426 ASSERT_TRUE(charset_info.empty()); | |
427 } | |
428 // Do serialization. | |
429 SerializeDomForURL(file_url, false); | |
430 | |
431 // Load the serialized contents. | |
432 ASSERT_TRUE(HasSerializedFrame(file_url)); | |
433 const std::string& serialized_contents = | |
434 GetSerializedContentForFrame(file_url); | |
435 LoadContents(serialized_contents, file_url, | |
436 web_frame->document().encoding()); | |
437 // Make sure the first child of HEAD element is META which has charset | |
438 // declaration in serialized contents. | |
439 web_frame = test_shell_->webView()->mainFrame(); | |
440 ASSERT_TRUE(web_frame != NULL); | |
441 doc = web_frame->document(); | |
442 ASSERT_TRUE(doc.isHTMLDocument()); | |
443 head_element = doc.head(); | |
444 ASSERT_TRUE(!head_element.isNull()); | |
445 WebNode meta_node = head_element.firstChild(); | |
446 ASSERT_TRUE(!meta_node.isNull()); | |
447 // Get meta charset info. | |
448 std::string charset_info2; | |
449 ASSERT_TRUE(IsMetaElement(meta_node, charset_info2)); | |
450 ASSERT_TRUE(!charset_info2.empty()); | |
451 ASSERT_EQ(charset_info2, | |
452 std::string(web_frame->document().encoding().utf8())); | |
453 | |
454 // Make sure no more additional META tags which have charset declaration. | |
455 for (WebNode child = meta_node.nextSibling(); !child.isNull(); | |
456 child = child.nextSibling()) { | |
457 std::string charset_info; | |
458 if (IsMetaElement(child, charset_info)) | |
459 ASSERT_TRUE(charset_info.empty()); | |
460 } | |
461 } | |
462 | |
463 // When serializing DOM, if the original document has multiple META charset | |
464 // declaration, we will add the META which have correct charset declaration | |
465 // as first child of HEAD element and remove all original META charset | |
466 // declarations. | |
467 TEST_F(DomSerializerTests, | |
468 SerializeHTMLDOMWithMultipleMetaCharsetInOriginalDoc) { | |
469 base::FilePath page_file_path = data_dir_; | |
470 page_file_path = page_file_path.AppendASCII("dom_serializer"); | |
471 page_file_path = page_file_path.AppendASCII("youtube_2.htm"); | |
472 // Get file URL. | |
473 GURL file_url = net::FilePathToFileURL(page_file_path); | |
474 ASSERT_TRUE(file_url.SchemeIsFile()); | |
475 // Load the test file. | |
476 LoadPageFromURL(file_url); | |
477 | |
478 // Make sure there are multiple META charset declarations in original | |
479 // document. | |
480 WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url); | |
481 ASSERT_TRUE(web_frame != NULL); | |
482 WebDocument doc = web_frame->document(); | |
483 ASSERT_TRUE(doc.isHTMLDocument()); | |
484 WebElement head_ele = doc.head(); | |
485 ASSERT_TRUE(!head_ele.isNull()); | |
486 // Go through all children of HEAD element. | |
487 int charset_declaration_count = 0; | |
488 for (WebNode child = head_ele.firstChild(); !child.isNull(); | |
489 child = child.nextSibling()) { | |
490 std::string charset_info; | |
491 if (IsMetaElement(child, charset_info) && !charset_info.empty()) | |
492 charset_declaration_count++; | |
493 } | |
494 // The original doc has more than META tags which have charset declaration. | |
495 ASSERT_TRUE(charset_declaration_count > 1); | |
496 | |
497 // Do serialization. | |
498 SerializeDomForURL(file_url, false); | |
499 | |
500 // Load the serialized contents. | |
501 ASSERT_TRUE(HasSerializedFrame(file_url)); | |
502 const std::string& serialized_contents = | |
503 GetSerializedContentForFrame(file_url); | |
504 LoadContents(serialized_contents, file_url, | |
505 web_frame->document().encoding()); | |
506 // Make sure only first child of HEAD element is META which has charset | |
507 // declaration in serialized contents. | |
508 web_frame = test_shell_->webView()->mainFrame(); | |
509 ASSERT_TRUE(web_frame != NULL); | |
510 doc = web_frame->document(); | |
511 ASSERT_TRUE(doc.isHTMLDocument()); | |
512 head_ele = doc.head(); | |
513 ASSERT_TRUE(!head_ele.isNull()); | |
514 WebNode meta_node = head_ele.firstChild(); | |
515 ASSERT_TRUE(!meta_node.isNull()); | |
516 // Get meta charset info. | |
517 std::string charset_info2; | |
518 ASSERT_TRUE(IsMetaElement(meta_node, charset_info2)); | |
519 ASSERT_TRUE(!charset_info2.empty()); | |
520 ASSERT_EQ(charset_info2, | |
521 std::string(web_frame->document().encoding().utf8())); | |
522 | |
523 // Make sure no more additional META tags which have charset declaration. | |
524 for (WebNode child = meta_node.nextSibling(); !child.isNull(); | |
525 child = child.nextSibling()) { | |
526 std::string charset_info; | |
527 if (IsMetaElement(child, charset_info)) | |
528 ASSERT_TRUE(charset_info.empty()); | |
529 } | |
530 } | |
531 | |
532 // Test situation of html entities in text when serializing HTML DOM. | |
533 TEST_F(DomSerializerTests, SerializeHTMLDOMWithEntitiesInText) { | |
534 base::FilePath page_file_path = data_dir_; | |
535 page_file_path = page_file_path.AppendASCII( | |
536 "dom_serializer/htmlentities_in_text.htm"); | |
537 // Get file URL. The URL is dummy URL to identify the following loading | |
538 // actions. The test content is in constant:original_contents. | |
539 GURL file_url = net::FilePathToFileURL(page_file_path); | |
540 ASSERT_TRUE(file_url.SchemeIsFile()); | |
541 // Test contents. | |
542 static const char* const original_contents = | |
543 "<html><body>&<>\"\'</body></html>"; | |
544 // Load the test contents. | |
545 LoadContents(original_contents, file_url, WebString()); | |
546 | |
547 // Get BODY's text content in DOM. | |
548 WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url); | |
549 ASSERT_TRUE(web_frame != NULL); | |
550 WebDocument doc = web_frame->document(); | |
551 ASSERT_TRUE(doc.isHTMLDocument()); | |
552 WebElement body_ele = doc.body(); | |
553 ASSERT_TRUE(!body_ele.isNull()); | |
554 WebNode text_node = body_ele.firstChild(); | |
555 ASSERT_TRUE(text_node.isTextNode()); | |
556 ASSERT_TRUE(std::string(text_node.createMarkup().utf8()) == | |
557 "&<>\"\'"); | |
558 // Do serialization. | |
559 SerializeDomForURL(file_url, false); | |
560 // Compare the serialized contents with original contents. | |
561 ASSERT_TRUE(HasSerializedFrame(file_url)); | |
562 const std::string& serialized_contents = | |
563 GetSerializedContentForFrame(file_url); | |
564 // Compare the serialized contents with original contents to make sure | |
565 // they are same. | |
566 // Because we add MOTW when serializing DOM, so before comparison, we also | |
567 // need to add MOTW to original_contents. | |
568 std::string original_str = | |
569 WebPageSerializer::generateMarkOfTheWebDeclaration(file_url).utf8(); | |
570 original_str += original_contents; | |
571 // Since WebCore now inserts a new HEAD element if there is no HEAD element | |
572 // when creating BODY element. (Please see HTMLParser::bodyCreateErrorCheck.) | |
573 // We need to append the HEAD content and corresponding META content if we | |
574 // find WebCore-generated HEAD element. | |
575 if (!doc.head().isNull()) { | |
576 WebString encoding = web_frame->document().encoding(); | |
577 std::string htmlTag("<html>"); | |
578 std::string::size_type pos = original_str.find(htmlTag); | |
579 ASSERT_NE(std::string::npos, pos); | |
580 pos += htmlTag.length(); | |
581 std::string head_part("<head>"); | |
582 head_part += | |
583 WebPageSerializer::generateMetaCharsetDeclaration(encoding).utf8(); | |
584 head_part += "</head>"; | |
585 original_str.insert(pos, head_part); | |
586 } | |
587 ASSERT_EQ(original_str, serialized_contents); | |
588 } | |
589 | |
590 // Test situation of html entities in attribute value when serializing | |
591 // HTML DOM. | |
592 // This test started to fail at WebKit r65388. See http://crbug.com/52279. | |
593 TEST_F(DomSerializerTests, SerializeHTMLDOMWithEntitiesInAttributeValue) { | |
594 base::FilePath page_file_path = data_dir_; | |
595 page_file_path = page_file_path.AppendASCII( | |
596 "dom_serializer/htmlentities_in_attribute_value.htm"); | |
597 // Get file URL. The URL is dummy URL to identify the following loading | |
598 // actions. The test content is in constant:original_contents. | |
599 GURL file_url = net::FilePathToFileURL(page_file_path); | |
600 ASSERT_TRUE(file_url.SchemeIsFile()); | |
601 // Test contents. | |
602 static const char* const original_contents = | |
603 "<html><body title=\"&<>"'\"></body></html>"; | |
604 // Load the test contents. | |
605 LoadContents(original_contents, file_url, WebString()); | |
606 // Get value of BODY's title attribute in DOM. | |
607 WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url); | |
608 ASSERT_TRUE(web_frame != NULL); | |
609 WebDocument doc = web_frame->document(); | |
610 ASSERT_TRUE(doc.isHTMLDocument()); | |
611 WebElement body_ele = doc.body(); | |
612 ASSERT_TRUE(!body_ele.isNull()); | |
613 WebString value = body_ele.getAttribute("title"); | |
614 ASSERT_TRUE(std::string(value.utf8()) == "&<>\"\'"); | |
615 // Do serialization. | |
616 SerializeDomForURL(file_url, false); | |
617 // Compare the serialized contents with original contents. | |
618 ASSERT_TRUE(HasSerializedFrame(file_url)); | |
619 const std::string& serialized_contents = | |
620 GetSerializedContentForFrame(file_url); | |
621 // Compare the serialized contents with original contents to make sure | |
622 // they are same. | |
623 std::string original_str = | |
624 WebPageSerializer::generateMarkOfTheWebDeclaration(file_url).utf8(); | |
625 original_str += original_contents; | |
626 if (!doc.isNull()) { | |
627 WebString encoding = web_frame->document().encoding(); | |
628 std::string htmlTag("<html>"); | |
629 std::string::size_type pos = original_str.find(htmlTag); | |
630 ASSERT_NE(std::string::npos, pos); | |
631 pos += htmlTag.length(); | |
632 std::string head_part("<head>"); | |
633 head_part += | |
634 WebPageSerializer::generateMetaCharsetDeclaration(encoding).utf8(); | |
635 head_part += "</head>"; | |
636 original_str.insert(pos, head_part); | |
637 } | |
638 ASSERT_EQ(original_str, serialized_contents); | |
639 } | |
640 | |
641 // Test situation of non-standard HTML entities when serializing HTML DOM. | |
642 // This test started to fail at WebKit r65351. See http://crbug.com/52279. | |
643 TEST_F(DomSerializerTests, SerializeHTMLDOMWithNonStandardEntities) { | |
644 // Make a test file URL and load it. | |
645 base::FilePath page_file_path = data_dir_; | |
646 page_file_path = page_file_path.AppendASCII("dom_serializer"); | |
647 page_file_path = page_file_path.AppendASCII("nonstandard_htmlentities.htm"); | |
648 GURL file_url = net::FilePathToFileURL(page_file_path); | |
649 LoadPageFromURL(file_url); | |
650 | |
651 // Get value of BODY's title attribute in DOM. | |
652 WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url); | |
653 WebDocument doc = web_frame->document(); | |
654 ASSERT_TRUE(doc.isHTMLDocument()); | |
655 WebElement body_element = doc.body(); | |
656 // Unescaped string for "%⊅¹'". | |
657 static const wchar_t parsed_value[] = { | |
658 '%', 0x2285, 0x00b9, '\'', 0 | |
659 }; | |
660 WebString value = body_element.getAttribute("title"); | |
661 ASSERT_TRUE(UTF16ToWide(value) == parsed_value); | |
662 ASSERT_TRUE(UTF16ToWide(body_element.innerText()) == parsed_value); | |
663 | |
664 // Do serialization. | |
665 SerializeDomForURL(file_url, false); | |
666 // Check the serialized string. | |
667 ASSERT_TRUE(HasSerializedFrame(file_url)); | |
668 const std::string& serialized_contents = | |
669 GetSerializedContentForFrame(file_url); | |
670 // Confirm that the serialized string has no non-standard HTML entities. | |
671 ASSERT_EQ(std::string::npos, serialized_contents.find("%")); | |
672 ASSERT_EQ(std::string::npos, serialized_contents.find("⊅")); | |
673 ASSERT_EQ(std::string::npos, serialized_contents.find("¹")); | |
674 ASSERT_EQ(std::string::npos, serialized_contents.find("'")); | |
675 } | |
676 | |
677 // Test situation of BASE tag in original document when serializing HTML DOM. | |
678 // When serializing, we should comment the BASE tag, append a new BASE tag. | |
679 // rewrite all the savable URLs to relative local path, and change other URLs | |
680 // to absolute URLs. | |
681 TEST_F(DomSerializerTests, SerializeHTMLDOMWithBaseTag) { | |
682 // There are total 2 available base tags in this test file. | |
683 const int kTotalBaseTagCountInTestFile = 2; | |
684 | |
685 base::FilePath page_file_path = | |
686 data_dir_.AppendASCII("dom_serializer").AsEndingWithSeparator(); | |
687 | |
688 // Get page dir URL which is base URL of this file. | |
689 GURL path_dir_url = net::FilePathToFileURL(page_file_path); | |
690 // Get file path. | |
691 page_file_path = | |
692 page_file_path.AppendASCII("html_doc_has_base_tag.htm"); | |
693 // Get file URL. | |
694 GURL file_url = net::FilePathToFileURL(page_file_path); | |
695 ASSERT_TRUE(file_url.SchemeIsFile()); | |
696 // Load the test file. | |
697 LoadPageFromURL(file_url); | |
698 // Since for this test, we assume there is no savable sub-resource links for | |
699 // this test file, also all links are relative URLs in this test file, so we | |
700 // need to check those relative URLs and make sure document has BASE tag. | |
701 WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url); | |
702 ASSERT_TRUE(web_frame != NULL); | |
703 WebDocument doc = web_frame->document(); | |
704 ASSERT_TRUE(doc.isHTMLDocument()); | |
705 // Go through all descent nodes. | |
706 WebNodeCollection all = doc.all(); | |
707 int original_base_tag_count = 0; | |
708 for (WebNode node = all.firstItem(); !node.isNull(); | |
709 node = all.nextItem()) { | |
710 if (!node.isElementNode()) | |
711 continue; | |
712 WebElement element = node.to<WebElement>(); | |
713 if (element.hasTagName("base")) { | |
714 original_base_tag_count++; | |
715 } else { | |
716 // Get link. | |
717 WebString value = | |
718 webkit_glue::GetSubResourceLinkFromElement(element); | |
719 if (value.isNull() && element.hasTagName("a")) { | |
720 value = element.getAttribute("href"); | |
721 if (value.isEmpty()) | |
722 value = WebString(); | |
723 } | |
724 // Each link is relative link. | |
725 if (!value.isNull()) { | |
726 GURL link(value.utf8()); | |
727 ASSERT_TRUE(link.scheme().empty()); | |
728 } | |
729 } | |
730 } | |
731 ASSERT_EQ(original_base_tag_count, kTotalBaseTagCountInTestFile); | |
732 // Make sure in original document, the base URL is not equal with the | |
733 // |path_dir_url|. | |
734 GURL original_base_url(doc.baseURL()); | |
735 ASSERT_NE(original_base_url, path_dir_url); | |
736 | |
737 // Do serialization. | |
738 SerializeDomForURL(file_url, false); | |
739 | |
740 // Load the serialized contents. | |
741 ASSERT_TRUE(HasSerializedFrame(file_url)); | |
742 const std::string& serialized_contents = | |
743 GetSerializedContentForFrame(file_url); | |
744 LoadContents(serialized_contents, file_url, | |
745 web_frame->document().encoding()); | |
746 | |
747 // Make sure all links are absolute URLs and doc there are some number of | |
748 // BASE tags in serialized HTML data. Each of those BASE tags have same base | |
749 // URL which is as same as URL of current test file. | |
750 web_frame = test_shell_->webView()->mainFrame(); | |
751 ASSERT_TRUE(web_frame != NULL); | |
752 doc = web_frame->document(); | |
753 ASSERT_TRUE(doc.isHTMLDocument()); | |
754 // Go through all descent nodes. | |
755 all = doc.all(); | |
756 int new_base_tag_count = 0; | |
757 for (WebNode node = all.firstItem(); !node.isNull(); | |
758 node = all.nextItem()) { | |
759 if (!node.isElementNode()) | |
760 continue; | |
761 WebElement element = node.to<WebElement>(); | |
762 if (element.hasTagName("base")) { | |
763 new_base_tag_count++; | |
764 } else { | |
765 // Get link. | |
766 WebString value = | |
767 webkit_glue::GetSubResourceLinkFromElement(element); | |
768 if (value.isNull() && element.hasTagName("a")) { | |
769 value = element.getAttribute("href"); | |
770 if (value.isEmpty()) | |
771 value = WebString(); | |
772 } | |
773 // Each link is absolute link. | |
774 if (!value.isNull()) { | |
775 GURL link(std::string(value.utf8())); | |
776 ASSERT_FALSE(link.scheme().empty()); | |
777 } | |
778 } | |
779 } | |
780 // We have one more added BASE tag which is generated by JavaScript. | |
781 ASSERT_EQ(new_base_tag_count, original_base_tag_count + 1); | |
782 // Make sure in new document, the base URL is equal with the |path_dir_url|. | |
783 GURL new_base_url(doc.baseURL()); | |
784 ASSERT_EQ(new_base_url, path_dir_url); | |
785 } | |
786 | |
787 // Serializing page which has an empty HEAD tag. | |
788 TEST_F(DomSerializerTests, SerializeHTMLDOMWithEmptyHead) { | |
789 base::FilePath page_file_path = data_dir_; | |
790 page_file_path = page_file_path.AppendASCII("dom_serializer"); | |
791 page_file_path = page_file_path.AppendASCII("empty_head.htm"); | |
792 GURL file_url = net::FilePathToFileURL(page_file_path); | |
793 ASSERT_TRUE(file_url.SchemeIsFile()); | |
794 | |
795 // Load the test html content. | |
796 static const char* const empty_head_contents = | |
797 "<html><head></head><body>hello world</body></html>"; | |
798 LoadContents(empty_head_contents, file_url, WebString()); | |
799 | |
800 // Make sure the head tag is empty. | |
801 WebFrame* web_frame = test_shell_->webView()->mainFrame(); | |
802 ASSERT_TRUE(web_frame != NULL); | |
803 WebDocument doc = web_frame->document(); | |
804 ASSERT_TRUE(doc.isHTMLDocument()); | |
805 WebElement head_element = doc.head(); | |
806 ASSERT_TRUE(!head_element.isNull()); | |
807 ASSERT_TRUE(!head_element.hasChildNodes()); | |
808 ASSERT_TRUE(head_element.childNodes().length() == 0); | |
809 | |
810 // Do serialization. | |
811 SerializeDomForURL(file_url, false); | |
812 // Make sure the serialized contents have META ; | |
813 ASSERT_TRUE(HasSerializedFrame(file_url)); | |
814 const std::string& serialized_contents = | |
815 GetSerializedContentForFrame(file_url); | |
816 | |
817 // Reload serialized contents and make sure there is only one META tag. | |
818 LoadContents(serialized_contents, file_url, web_frame->document().encoding()); | |
819 web_frame = test_shell_->webView()->mainFrame(); | |
820 ASSERT_TRUE(web_frame != NULL); | |
821 doc = web_frame->document(); | |
822 ASSERT_TRUE(doc.isHTMLDocument()); | |
823 head_element = doc.head(); | |
824 ASSERT_TRUE(!head_element.isNull()); | |
825 ASSERT_TRUE(head_element.hasChildNodes()); | |
826 ASSERT_TRUE(head_element.childNodes().length() == 1); | |
827 WebNode meta_node = head_element.firstChild(); | |
828 ASSERT_TRUE(!meta_node.isNull()); | |
829 // Get meta charset info. | |
830 std::string charset_info; | |
831 ASSERT_TRUE(IsMetaElement(meta_node, charset_info)); | |
832 ASSERT_TRUE(!charset_info.empty()); | |
833 ASSERT_EQ(charset_info, | |
834 std::string(web_frame->document().encoding().utf8())); | |
835 | |
836 // Check the body's first node is text node and its contents are | |
837 // "hello world" | |
838 WebElement body_element = doc.body(); | |
839 ASSERT_TRUE(!body_element.isNull()); | |
840 WebNode text_node = body_element.firstChild(); | |
841 ASSERT_TRUE(text_node.isTextNode()); | |
842 WebString text_node_contents = text_node.nodeValue(); | |
843 ASSERT_TRUE(std::string(text_node_contents.utf8()) == "hello world"); | |
844 } | |
845 | |
846 // Test that we don't crash when the page contains an iframe that | |
847 // was handled as a download (http://crbug.com/42212). | |
848 TEST_F(DomSerializerTests, SerializeDocumentWithDownloadedIFrame) { | |
849 base::FilePath page_file_path = data_dir_; | |
850 page_file_path = page_file_path.AppendASCII("dom_serializer"); | |
851 page_file_path = page_file_path.AppendASCII("iframe-src-is-exe.htm"); | |
852 GURL file_url = net::FilePathToFileURL(page_file_path); | |
853 ASSERT_TRUE(file_url.SchemeIsFile()); | |
854 // Load the test file. | |
855 LoadPageFromURL(file_url); | |
856 // Do a recursive serialization. We pass if we don't crash. | |
857 SerializeDomForURL(file_url, true); | |
858 } | |
859 | |
860 TEST_F(DomSerializerTests, SubResourceForElementsInNonHTMLNamespace) { | |
861 base::FilePath page_file_path = data_dir_; | |
862 page_file_path = page_file_path.AppendASCII("dom_serializer"); | |
863 page_file_path = page_file_path.AppendASCII("non_html_namespace.htm"); | |
864 GURL file_url = net::FilePathToFileURL(page_file_path); | |
865 LoadPageFromURL(file_url); | |
866 WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url); | |
867 ASSERT_TRUE(web_frame != NULL); | |
868 WebDocument doc = web_frame->document(); | |
869 WebNode lastNodeInBody = doc.body().lastChild(); | |
870 ASSERT_EQ(WebNode::ElementNode, lastNodeInBody.nodeType()); | |
871 WebString uri = webkit_glue::GetSubResourceLinkFromElement( | |
872 lastNodeInBody.to<WebElement>()); | |
873 EXPECT_TRUE(uri.isNull()); | |
874 } | |
875 | |
876 } // namespace | |
OLD | NEW |