OLD | NEW |
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
| 5 #include "base/bind.h" |
| 6 #include "base/command_line.h" |
5 #include "base/compiler_specific.h" | 7 #include "base/compiler_specific.h" |
6 #include "base/file_util.h" | 8 #include "base/file_util.h" |
7 #include "base/files/file_path.h" | 9 #include "base/files/file_path.h" |
8 #include "base/hash_tables.h" | 10 #include "base/hash_tables.h" |
9 #include "base/string_util.h" | 11 #include "base/string_util.h" |
10 #include "base/utf_string_conversions.h" | 12 #include "base/utf_string_conversions.h" |
| 13 #include "content/public/common/content_switches.h" |
| 14 #include "content/public/renderer/render_view.h" |
| 15 #include "content/public/renderer/render_view_observer.h" |
| 16 #include "content/public/test/test_utils.h" |
| 17 #include "content/shell/shell.h" |
| 18 #include "content/test/content_browser_test.h" |
| 19 #include "content/test/content_browser_test_utils.h" |
11 #include "net/base/net_util.h" | 20 #include "net/base/net_util.h" |
12 #include "net/url_request/url_request_context.h" | 21 #include "net/url_request/url_request_context.h" |
13 #include "third_party/WebKit/Source/Platform/chromium/public/WebCString.h" | 22 #include "third_party/WebKit/Source/Platform/chromium/public/WebCString.h" |
14 #include "third_party/WebKit/Source/Platform/chromium/public/WebData.h" | 23 #include "third_party/WebKit/Source/Platform/chromium/public/WebData.h" |
15 #include "third_party/WebKit/Source/Platform/chromium/public/WebString.h" | 24 #include "third_party/WebKit/Source/Platform/chromium/public/WebString.h" |
16 #include "third_party/WebKit/Source/Platform/chromium/public/WebURL.h" | 25 #include "third_party/WebKit/Source/Platform/chromium/public/WebURL.h" |
17 #include "third_party/WebKit/Source/Platform/chromium/public/WebVector.h" | 26 #include "third_party/WebKit/Source/Platform/chromium/public/WebVector.h" |
18 #include "third_party/WebKit/Source/WebKit/chromium/public/WebDocument.h" | 27 #include "third_party/WebKit/Source/WebKit/chromium/public/WebDocument.h" |
19 #include "third_party/WebKit/Source/WebKit/chromium/public/WebElement.h" | 28 #include "third_party/WebKit/Source/WebKit/chromium/public/WebElement.h" |
20 #include "third_party/WebKit/Source/WebKit/chromium/public/WebFrame.h" | 29 #include "third_party/WebKit/Source/WebKit/chromium/public/WebFrame.h" |
21 #include "third_party/WebKit/Source/WebKit/chromium/public/WebNode.h" | 30 #include "third_party/WebKit/Source/WebKit/chromium/public/WebNode.h" |
22 #include "third_party/WebKit/Source/WebKit/chromium/public/WebNodeCollection.h" | 31 #include "third_party/WebKit/Source/WebKit/chromium/public/WebNodeCollection.h" |
23 #include "third_party/WebKit/Source/WebKit/chromium/public/WebNodeList.h" | 32 #include "third_party/WebKit/Source/WebKit/chromium/public/WebNodeList.h" |
24 #include "third_party/WebKit/Source/WebKit/chromium/public/WebPageSerializer.h" | 33 #include "third_party/WebKit/Source/WebKit/chromium/public/WebPageSerializer.h" |
25 #include "third_party/WebKit/Source/WebKit/chromium/public/WebPageSerializerClie
nt.h" | 34 #include "third_party/WebKit/Source/WebKit/chromium/public/WebPageSerializerClie
nt.h" |
26 #include "third_party/WebKit/Source/WebKit/chromium/public/WebView.h" | 35 #include "third_party/WebKit/Source/WebKit/chromium/public/WebView.h" |
27 #include "webkit/base/file_path_string_conversions.h" | 36 #include "webkit/base/file_path_string_conversions.h" |
28 #include "webkit/glue/dom_operations.h" | 37 #include "webkit/glue/dom_operations.h" |
29 #include "webkit/tools/test_shell/simple_resource_loader_bridge.h" | |
30 #include "webkit/tools/test_shell/test_shell_test.h" | |
31 | 38 |
32 using WebKit::WebCString; | 39 using WebKit::WebCString; |
33 using WebKit::WebData; | 40 using WebKit::WebData; |
34 using WebKit::WebDocument; | 41 using WebKit::WebDocument; |
35 using WebKit::WebElement; | 42 using WebKit::WebElement; |
36 using WebKit::WebFrame; | 43 using WebKit::WebFrame; |
37 using WebKit::WebNode; | 44 using WebKit::WebNode; |
38 using WebKit::WebNodeCollection; | 45 using WebKit::WebNodeCollection; |
39 using WebKit::WebNodeList; | 46 using WebKit::WebNodeList; |
40 using WebKit::WebPageSerializer; | 47 using WebKit::WebPageSerializer; |
41 using WebKit::WebPageSerializerClient; | 48 using WebKit::WebPageSerializerClient; |
42 using WebKit::WebNode; | 49 using WebKit::WebNode; |
43 using WebKit::WebString; | 50 using WebKit::WebString; |
44 using WebKit::WebURL; | 51 using WebKit::WebURL; |
45 using WebKit::WebView; | 52 using WebKit::WebView; |
46 using WebKit::WebVector; | 53 using WebKit::WebVector; |
47 | 54 |
48 namespace { | 55 namespace content { |
49 | 56 |
50 // Iterate recursively over sub-frames to find one with with a given url. | 57 // Iterate recursively over sub-frames to find one with with a given url. |
51 WebFrame* FindSubFrameByURL(WebView* web_view, const GURL& url) { | 58 WebFrame* FindSubFrameByURL(WebView* web_view, const GURL& url) { |
52 if (!web_view->mainFrame()) | 59 if (!web_view->mainFrame()) |
53 return NULL; | 60 return NULL; |
54 | 61 |
55 std::vector<WebFrame*> stack; | 62 std::vector<WebFrame*> stack; |
56 stack.push_back(web_view->mainFrame()); | 63 stack.push_back(web_view->mainFrame()); |
57 | 64 |
58 while (!stack.empty()) { | 65 while (!stack.empty()) { |
(...skipping 11 matching lines...) Expand all Loading... |
70 if (!element.hasTagName("frame") && !element.hasTagName("iframe")) | 77 if (!element.hasTagName("frame") && !element.hasTagName("iframe")) |
71 continue; | 78 continue; |
72 WebFrame* sub_frame = WebFrame::fromFrameOwnerElement(element); | 79 WebFrame* sub_frame = WebFrame::fromFrameOwnerElement(element); |
73 if (sub_frame) | 80 if (sub_frame) |
74 stack.push_back(sub_frame); | 81 stack.push_back(sub_frame); |
75 } | 82 } |
76 } | 83 } |
77 return NULL; | 84 return NULL; |
78 } | 85 } |
79 | 86 |
80 class DomSerializerTests : public TestShellTest, | 87 // Helper function that test whether the first node in the doc is a doc type |
| 88 // node. |
| 89 bool HasDocType(const WebDocument& doc) { |
| 90 WebNode node = doc.firstChild(); |
| 91 if (node.isNull()) |
| 92 return false; |
| 93 return node.nodeType() == WebNode::DocumentTypeNode; |
| 94 } |
| 95 |
| 96 // Helper function for checking whether input node is META tag. Return true |
| 97 // means it is META element, otherwise return false. The parameter charset_info |
| 98 // return actual charset info if the META tag has charset declaration. |
| 99 bool IsMetaElement(const WebNode& node, std::string& charset_info) { |
| 100 if (!node.isElementNode()) |
| 101 return false; |
| 102 const WebElement meta = node.toConst<WebElement>(); |
| 103 if (!meta.hasTagName("meta")) |
| 104 return false; |
| 105 charset_info.erase(0, charset_info.length()); |
| 106 // Check the META charset declaration. |
| 107 WebString httpEquiv = meta.getAttribute("http-equiv"); |
| 108 if (LowerCaseEqualsASCII(httpEquiv, "content-type")) { |
| 109 std::string content = meta.getAttribute("content").utf8(); |
| 110 int pos = content.find("charset", 0); |
| 111 if (pos > -1) { |
| 112 // Add a dummy charset declaration to charset_info, which indicates this |
| 113 // META tag has charset declaration although we do not get correct value |
| 114 // yet. |
| 115 charset_info.append("has-charset-declaration"); |
| 116 int remaining_length = content.length() - pos - 7; |
| 117 if (!remaining_length) |
| 118 return true; |
| 119 int start_pos = pos + 7; |
| 120 // Find "=" symbol. |
| 121 while (remaining_length--) |
| 122 if (content[start_pos++] == L'=') |
| 123 break; |
| 124 // Skip beginning space. |
| 125 while (remaining_length) { |
| 126 if (content[start_pos] > 0x0020) |
| 127 break; |
| 128 ++start_pos; |
| 129 --remaining_length; |
| 130 } |
| 131 if (!remaining_length) |
| 132 return true; |
| 133 int end_pos = start_pos; |
| 134 // Now we find out the start point of charset info. Search the end point. |
| 135 while (remaining_length--) { |
| 136 if (content[end_pos] <= 0x0020 || content[end_pos] == L';') |
| 137 break; |
| 138 ++end_pos; |
| 139 } |
| 140 // Get actual charset info. |
| 141 charset_info = content.substr(start_pos, end_pos - start_pos); |
| 142 return true; |
| 143 } |
| 144 } |
| 145 return true; |
| 146 } |
| 147 |
| 148 class LoadObserver : public RenderViewObserver { |
| 149 public: |
| 150 LoadObserver(RenderView* render_view, const base::Closure& quit_closure) |
| 151 : RenderViewObserver(render_view), |
| 152 quit_closure_(quit_closure) {} |
| 153 |
| 154 virtual void DidFinishLoad(WebKit::WebFrame* frame) { |
| 155 if (frame == render_view()->GetWebView()->mainFrame()) |
| 156 quit_closure_.Run(); |
| 157 } |
| 158 |
| 159 private: |
| 160 base::Closure quit_closure_; |
| 161 }; |
| 162 |
| 163 class DomSerializerTests : public ContentBrowserTest, |
81 public WebPageSerializerClient { | 164 public WebPageSerializerClient { |
82 public: | 165 public: |
83 DomSerializerTests() | 166 DomSerializerTests() |
84 : local_directory_name_(FILE_PATH_LITERAL("./dummy_files/")) { } | 167 : serialized_(false), |
| 168 local_directory_name_(FILE_PATH_LITERAL("./dummy_files/")) {} |
| 169 |
| 170 virtual void SetUpCommandLine(CommandLine* command_line) { |
| 171 command_line->AppendSwitch(switches::kSingleProcess); |
| 172 } |
85 | 173 |
86 // DomSerializerDelegate. | 174 // DomSerializerDelegate. |
87 virtual void didSerializeDataForFrame(const WebURL& frame_web_url, | 175 virtual void didSerializeDataForFrame(const WebURL& frame_web_url, |
88 const WebCString& data, | 176 const WebCString& data, |
89 PageSerializationStatus status) { | 177 PageSerializationStatus status) { |
90 | 178 |
91 GURL frame_url(frame_web_url); | 179 GURL frame_url(frame_web_url); |
92 // If the all frames are finished saving, check all finish status | 180 // If the all frames are finished saving, check all finish status |
93 if (status == WebPageSerializerClient::AllFramesAreFinished) { | 181 if (status == WebPageSerializerClient::AllFramesAreFinished) { |
94 SerializationFinishStatusMap::iterator it = | 182 SerializationFinishStatusMap::iterator it = |
(...skipping 27 matching lines...) Expand all Loading... |
122 bool HasSerializedFrame(const GURL& frame_url) { | 210 bool HasSerializedFrame(const GURL& frame_url) { |
123 return serialized_frame_map_.find(frame_url.spec()) != | 211 return serialized_frame_map_.find(frame_url.spec()) != |
124 serialized_frame_map_.end(); | 212 serialized_frame_map_.end(); |
125 } | 213 } |
126 | 214 |
127 const std::string& GetSerializedContentForFrame( | 215 const std::string& GetSerializedContentForFrame( |
128 const GURL& frame_url) { | 216 const GURL& frame_url) { |
129 return serialized_frame_map_[frame_url.spec()]; | 217 return serialized_frame_map_[frame_url.spec()]; |
130 } | 218 } |
131 | 219 |
132 // Load web page according to specific URL. | 220 RenderView* GetRenderView() { |
133 void LoadPageFromURL(const GURL& page_url) { | 221 // We could have the test on the UI thread get the WebContent's routing ID, |
134 // Load the test file. | 222 // but we know this will be the first RV so skip that and just hardcode it. |
135 test_shell_->ResetTestController(); | 223 return RenderView::FromRoutingID(1); |
136 test_shell_->LoadURL(page_url); | 224 } |
137 test_shell_->WaitTestFinished(); | 225 |
| 226 WebView* GetWebView() { |
| 227 return GetRenderView()->GetWebView(); |
| 228 } |
| 229 |
| 230 WebFrame* GetMainFrame() { |
| 231 return GetWebView()->mainFrame(); |
138 } | 232 } |
139 | 233 |
140 // Load web page according to input content and relative URLs within | 234 // Load web page according to input content and relative URLs within |
141 // the document. | 235 // the document. |
142 void LoadContents(const std::string& contents, | 236 void LoadContents(const std::string& contents, |
143 const GURL& base_url, | 237 const GURL& base_url, |
144 const WebString encoding_info) { | 238 const WebString encoding_info) { |
145 test_shell_->ResetTestController(); | 239 scoped_refptr<MessageLoopRunner> runner = new MessageLoopRunner; |
| 240 LoadObserver observer(GetRenderView(), runner->QuitClosure()); |
| 241 |
146 // If input encoding is empty, use UTF-8 as default encoding. | 242 // If input encoding is empty, use UTF-8 as default encoding. |
147 if (encoding_info.isEmpty()) { | 243 if (encoding_info.isEmpty()) { |
148 test_shell_->webView()->mainFrame()->loadHTMLString(contents, base_url); | 244 GetMainFrame()->loadHTMLString(contents, base_url); |
149 } else { | 245 } else { |
150 WebData data(contents.data(), contents.length()); | 246 WebData data(contents.data(), contents.length()); |
151 | 247 |
152 // Do not use WebFrame.LoadHTMLString because it assumes that input | 248 // Do not use WebFrame.LoadHTMLString because it assumes that input |
153 // html contents use UTF-8 encoding. | 249 // html contents use UTF-8 encoding. |
154 // TODO(darin): This should use WebFrame::loadData. | 250 // TODO(darin): This should use WebFrame::loadData. |
155 WebFrame* web_frame = | 251 WebFrame* web_frame = GetMainFrame(); |
156 test_shell_->webView()->mainFrame(); | |
157 | 252 |
158 ASSERT_TRUE(web_frame != NULL); | 253 ASSERT_TRUE(web_frame != NULL); |
159 | 254 |
160 web_frame->loadData(data, "text/html", encoding_info, base_url); | 255 web_frame->loadData(data, "text/html", encoding_info, base_url); |
161 } | 256 } |
162 | 257 |
163 test_shell_->WaitTestFinished(); | 258 runner->Run(); |
164 } | 259 } |
165 | 260 |
166 // Serialize page DOM according to specific page URL. The parameter | 261 // Serialize page DOM according to specific page URL. The parameter |
167 // recursive_serialization indicates whether we will serialize all | 262 // recursive_serialization indicates whether we will serialize all |
168 // sub-frames. | 263 // sub-frames. |
169 void SerializeDomForURL(const GURL& page_url, | 264 void SerializeDomForURL(const GURL& page_url, |
170 bool recursive_serialization) { | 265 bool recursive_serialization) { |
171 // Find corresponding WebFrame according to page_url. | 266 // Find corresponding WebFrame according to page_url. |
172 WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), | 267 WebFrame* web_frame = FindSubFrameByURL(GetWebView(), page_url); |
173 page_url); | |
174 ASSERT_TRUE(web_frame != NULL); | 268 ASSERT_TRUE(web_frame != NULL); |
175 // Add input file URl to links_. | 269 // Add input file URl to links_. |
176 links_.assign(&page_url,1); | 270 links_.assign(&page_url,1); |
177 // Add dummy file path to local_path_. | 271 // Add dummy file path to local_path_. |
178 WebString file_path = webkit_base::FilePathStringToWebString( | 272 WebString file_path = webkit_base::FilePathStringToWebString( |
179 FILE_PATH_LITERAL("c:\\dummy.htm")); | 273 FILE_PATH_LITERAL("c:\\dummy.htm")); |
180 local_paths_.assign(&file_path, 1); | 274 local_paths_.assign(&file_path, 1); |
181 // Start serializing DOM. | 275 // Start serializing DOM. |
182 bool result = WebPageSerializer::serialize(web_frame, | 276 bool result = WebPageSerializer::serialize(web_frame, |
183 recursive_serialization, | 277 recursive_serialization, |
184 static_cast<WebPageSerializerClient*>(this), | 278 static_cast<WebPageSerializerClient*>(this), |
185 links_, | 279 links_, |
186 local_paths_, | 280 local_paths_, |
187 webkit_base::FilePathToWebString(local_directory_name_)); | 281 webkit_base::FilePathToWebString(local_directory_name_)); |
188 ASSERT_TRUE(result); | 282 ASSERT_TRUE(result); |
189 ASSERT_TRUE(serialized_); | 283 ASSERT_TRUE(serialized_); |
190 } | 284 } |
191 | 285 |
| 286 void SerializeHTMLDOMWithDocTypeOnRenderer(const GURL& file_url) { |
| 287 // Make sure original contents have document type. |
| 288 WebFrame* web_frame = FindSubFrameByURL(GetWebView(), file_url); |
| 289 ASSERT_TRUE(web_frame != NULL); |
| 290 WebDocument doc = web_frame->document(); |
| 291 ASSERT_TRUE(HasDocType(doc)); |
| 292 // Do serialization. |
| 293 SerializeDomForURL(file_url, false); |
| 294 // Load the serialized contents. |
| 295 ASSERT_TRUE(HasSerializedFrame(file_url)); |
| 296 const std::string& serialized_contents = |
| 297 GetSerializedContentForFrame(file_url); |
| 298 LoadContents(serialized_contents, file_url, |
| 299 web_frame->document().encoding()); |
| 300 // Make sure serialized contents still have document type. |
| 301 web_frame = GetMainFrame(); |
| 302 doc = web_frame->document(); |
| 303 ASSERT_TRUE(HasDocType(doc)); |
| 304 } |
| 305 |
| 306 void SerializeHTMLDOMWithoutDocTypeOnRenderer(const GURL& file_url) { |
| 307 // Make sure original contents do not have document type. |
| 308 WebFrame* web_frame = FindSubFrameByURL(GetWebView(), file_url); |
| 309 ASSERT_TRUE(web_frame != NULL); |
| 310 WebDocument doc = web_frame->document(); |
| 311 ASSERT_TRUE(!HasDocType(doc)); |
| 312 // Do serialization. |
| 313 SerializeDomForURL(file_url, false); |
| 314 // Load the serialized contents. |
| 315 ASSERT_TRUE(HasSerializedFrame(file_url)); |
| 316 const std::string& serialized_contents = |
| 317 GetSerializedContentForFrame(file_url); |
| 318 LoadContents(serialized_contents, file_url, |
| 319 web_frame->document().encoding()); |
| 320 // Make sure serialized contents do not have document type. |
| 321 web_frame = GetMainFrame(); |
| 322 doc = web_frame->document(); |
| 323 ASSERT_TRUE(!HasDocType(doc)); |
| 324 } |
| 325 |
| 326 void SerializeXMLDocWithBuiltInEntitiesOnRenderer( |
| 327 const GURL& xml_file_url, const std::string& original_contents) { |
| 328 // Do serialization. |
| 329 SerializeDomForURL(xml_file_url, false); |
| 330 // Compare the serialized contents with original contents. |
| 331 ASSERT_TRUE(HasSerializedFrame(xml_file_url)); |
| 332 const std::string& serialized_contents = |
| 333 GetSerializedContentForFrame(xml_file_url); |
| 334 ASSERT_EQ(original_contents, serialized_contents); |
| 335 } |
| 336 |
| 337 void SerializeHTMLDOMWithAddingMOTWOnRenderer( |
| 338 const GURL& file_url, const std::string& motw_declaration) { |
| 339 // Do serialization. |
| 340 SerializeDomForURL(file_url, false); |
| 341 // Make sure the serialized contents have MOTW ; |
| 342 ASSERT_TRUE(HasSerializedFrame(file_url)); |
| 343 const std::string& serialized_contents = |
| 344 GetSerializedContentForFrame(file_url); |
| 345 ASSERT_FALSE(std::string::npos == |
| 346 serialized_contents.find(motw_declaration)); |
| 347 } |
| 348 |
| 349 void SerializeHTMLDOMWithNoMetaCharsetInOriginalDocOnRenderer( |
| 350 const GURL& file_url) { |
| 351 // Make sure there is no META charset declaration in original document. |
| 352 WebFrame* web_frame = FindSubFrameByURL(GetWebView(), file_url); |
| 353 ASSERT_TRUE(web_frame != NULL); |
| 354 WebDocument doc = web_frame->document(); |
| 355 ASSERT_TRUE(doc.isHTMLDocument()); |
| 356 WebElement head_element = doc.head(); |
| 357 ASSERT_TRUE(!head_element.isNull()); |
| 358 // Go through all children of HEAD element. |
| 359 for (WebNode child = head_element.firstChild(); !child.isNull(); |
| 360 child = child.nextSibling()) { |
| 361 std::string charset_info; |
| 362 if (IsMetaElement(child, charset_info)) |
| 363 ASSERT_TRUE(charset_info.empty()); |
| 364 } |
| 365 // Do serialization. |
| 366 SerializeDomForURL(file_url, false); |
| 367 |
| 368 // Load the serialized contents. |
| 369 ASSERT_TRUE(HasSerializedFrame(file_url)); |
| 370 const std::string& serialized_contents = |
| 371 GetSerializedContentForFrame(file_url); |
| 372 LoadContents(serialized_contents, file_url, |
| 373 web_frame->document().encoding()); |
| 374 // Make sure the first child of HEAD element is META which has charset |
| 375 // declaration in serialized contents. |
| 376 web_frame = GetMainFrame(); |
| 377 ASSERT_TRUE(web_frame != NULL); |
| 378 doc = web_frame->document(); |
| 379 ASSERT_TRUE(doc.isHTMLDocument()); |
| 380 head_element = doc.head(); |
| 381 ASSERT_TRUE(!head_element.isNull()); |
| 382 WebNode meta_node = head_element.firstChild(); |
| 383 ASSERT_TRUE(!meta_node.isNull()); |
| 384 // Get meta charset info. |
| 385 std::string charset_info2; |
| 386 ASSERT_TRUE(IsMetaElement(meta_node, charset_info2)); |
| 387 ASSERT_TRUE(!charset_info2.empty()); |
| 388 ASSERT_EQ(charset_info2, |
| 389 std::string(web_frame->document().encoding().utf8())); |
| 390 |
| 391 // Make sure no more additional META tags which have charset declaration. |
| 392 for (WebNode child = meta_node.nextSibling(); !child.isNull(); |
| 393 child = child.nextSibling()) { |
| 394 std::string charset_info; |
| 395 if (IsMetaElement(child, charset_info)) |
| 396 ASSERT_TRUE(charset_info.empty()); |
| 397 } |
| 398 } |
| 399 |
| 400 void SerializeHTMLDOMWithMultipleMetaCharsetInOriginalDocOnRenderer( |
| 401 const GURL& file_url) { |
| 402 // Make sure there are multiple META charset declarations in original |
| 403 // document. |
| 404 WebFrame* web_frame = FindSubFrameByURL(GetWebView(), file_url); |
| 405 ASSERT_TRUE(web_frame != NULL); |
| 406 WebDocument doc = web_frame->document(); |
| 407 ASSERT_TRUE(doc.isHTMLDocument()); |
| 408 WebElement head_ele = doc.head(); |
| 409 ASSERT_TRUE(!head_ele.isNull()); |
| 410 // Go through all children of HEAD element. |
| 411 int charset_declaration_count = 0; |
| 412 for (WebNode child = head_ele.firstChild(); !child.isNull(); |
| 413 child = child.nextSibling()) { |
| 414 std::string charset_info; |
| 415 if (IsMetaElement(child, charset_info) && !charset_info.empty()) |
| 416 charset_declaration_count++; |
| 417 } |
| 418 // The original doc has more than META tags which have charset declaration. |
| 419 ASSERT_TRUE(charset_declaration_count > 1); |
| 420 |
| 421 // Do serialization. |
| 422 SerializeDomForURL(file_url, false); |
| 423 |
| 424 // Load the serialized contents. |
| 425 ASSERT_TRUE(HasSerializedFrame(file_url)); |
| 426 const std::string& serialized_contents = |
| 427 GetSerializedContentForFrame(file_url); |
| 428 LoadContents(serialized_contents, file_url, |
| 429 web_frame->document().encoding()); |
| 430 // Make sure only first child of HEAD element is META which has charset |
| 431 // declaration in serialized contents. |
| 432 web_frame = GetMainFrame(); |
| 433 ASSERT_TRUE(web_frame != NULL); |
| 434 doc = web_frame->document(); |
| 435 ASSERT_TRUE(doc.isHTMLDocument()); |
| 436 head_ele = doc.head(); |
| 437 ASSERT_TRUE(!head_ele.isNull()); |
| 438 WebNode meta_node = head_ele.firstChild(); |
| 439 ASSERT_TRUE(!meta_node.isNull()); |
| 440 // Get meta charset info. |
| 441 std::string charset_info2; |
| 442 ASSERT_TRUE(IsMetaElement(meta_node, charset_info2)); |
| 443 ASSERT_TRUE(!charset_info2.empty()); |
| 444 ASSERT_EQ(charset_info2, |
| 445 std::string(web_frame->document().encoding().utf8())); |
| 446 |
| 447 // Make sure no more additional META tags which have charset declaration. |
| 448 for (WebNode child = meta_node.nextSibling(); !child.isNull(); |
| 449 child = child.nextSibling()) { |
| 450 std::string charset_info; |
| 451 if (IsMetaElement(child, charset_info)) |
| 452 ASSERT_TRUE(charset_info.empty()); |
| 453 } |
| 454 } |
| 455 |
| 456 void SerializeHTMLDOMWithEntitiesInTextOnRenderer() { |
| 457 base::FilePath page_file_path = GetTestFilePath( |
| 458 "dom_serializer", "dom_serializer/htmlentities_in_text.htm"); |
| 459 // Get file URL. The URL is dummy URL to identify the following loading |
| 460 // actions. The test content is in constant:original_contents. |
| 461 GURL file_url = net::FilePathToFileURL(page_file_path); |
| 462 ASSERT_TRUE(file_url.SchemeIsFile()); |
| 463 // Test contents. |
| 464 static const char* const original_contents = |
| 465 "<html><body>&<>\"\'</body></html>"; |
| 466 // Load the test contents. |
| 467 LoadContents(original_contents, file_url, WebString()); |
| 468 |
| 469 // Get BODY's text content in DOM. |
| 470 WebFrame* web_frame = FindSubFrameByURL(GetWebView(), file_url); |
| 471 ASSERT_TRUE(web_frame != NULL); |
| 472 WebDocument doc = web_frame->document(); |
| 473 ASSERT_TRUE(doc.isHTMLDocument()); |
| 474 WebElement body_ele = doc.body(); |
| 475 ASSERT_TRUE(!body_ele.isNull()); |
| 476 WebNode text_node = body_ele.firstChild(); |
| 477 ASSERT_TRUE(text_node.isTextNode()); |
| 478 ASSERT_TRUE(std::string(text_node.createMarkup().utf8()) == |
| 479 "&<>\"\'"); |
| 480 // Do serialization. |
| 481 SerializeDomForURL(file_url, false); |
| 482 // Compare the serialized contents with original contents. |
| 483 ASSERT_TRUE(HasSerializedFrame(file_url)); |
| 484 const std::string& serialized_contents = |
| 485 GetSerializedContentForFrame(file_url); |
| 486 // Compare the serialized contents with original contents to make sure |
| 487 // they are same. |
| 488 // Because we add MOTW when serializing DOM, so before comparison, we also |
| 489 // need to add MOTW to original_contents. |
| 490 std::string original_str = |
| 491 WebPageSerializer::generateMarkOfTheWebDeclaration(file_url).utf8(); |
| 492 original_str += original_contents; |
| 493 // Since WebCore now inserts a new HEAD element if there is no HEAD element |
| 494 // when creating BODY element. (Please see |
| 495 // HTMLParser::bodyCreateErrorCheck.) We need to append the HEAD content and |
| 496 // corresponding META content if we find WebCore-generated HEAD element. |
| 497 if (!doc.head().isNull()) { |
| 498 WebString encoding = web_frame->document().encoding(); |
| 499 std::string htmlTag("<html>"); |
| 500 std::string::size_type pos = original_str.find(htmlTag); |
| 501 ASSERT_NE(std::string::npos, pos); |
| 502 pos += htmlTag.length(); |
| 503 std::string head_part("<head>"); |
| 504 head_part += |
| 505 WebPageSerializer::generateMetaCharsetDeclaration(encoding).utf8(); |
| 506 head_part += "</head>"; |
| 507 original_str.insert(pos, head_part); |
| 508 } |
| 509 ASSERT_EQ(original_str, serialized_contents); |
| 510 } |
| 511 |
| 512 void SerializeHTMLDOMWithEntitiesInAttributeValueOnRenderer() { |
| 513 base::FilePath page_file_path = GetTestFilePath( |
| 514 "dom_serializer", "dom_serializer/htmlentities_in_attribute_value.htm"); |
| 515 // Get file URL. The URL is dummy URL to identify the following loading |
| 516 // actions. The test content is in constant:original_contents. |
| 517 GURL file_url = net::FilePathToFileURL(page_file_path); |
| 518 ASSERT_TRUE(file_url.SchemeIsFile()); |
| 519 // Test contents. |
| 520 static const char* const original_contents = |
| 521 "<html><body title=\"&<>"'\"></body></html>"; |
| 522 // Load the test contents. |
| 523 LoadContents(original_contents, file_url, WebString()); |
| 524 // Get value of BODY's title attribute in DOM. |
| 525 WebFrame* web_frame = FindSubFrameByURL(GetWebView(), file_url); |
| 526 ASSERT_TRUE(web_frame != NULL); |
| 527 WebDocument doc = web_frame->document(); |
| 528 ASSERT_TRUE(doc.isHTMLDocument()); |
| 529 WebElement body_ele = doc.body(); |
| 530 ASSERT_TRUE(!body_ele.isNull()); |
| 531 WebString value = body_ele.getAttribute("title"); |
| 532 ASSERT_TRUE(std::string(value.utf8()) == "&<>\"\'"); |
| 533 // Do serialization. |
| 534 SerializeDomForURL(file_url, false); |
| 535 // Compare the serialized contents with original contents. |
| 536 ASSERT_TRUE(HasSerializedFrame(file_url)); |
| 537 const std::string& serialized_contents = |
| 538 GetSerializedContentForFrame(file_url); |
| 539 // Compare the serialized contents with original contents to make sure |
| 540 // they are same. |
| 541 std::string original_str = |
| 542 WebPageSerializer::generateMarkOfTheWebDeclaration(file_url).utf8(); |
| 543 original_str += original_contents; |
| 544 if (!doc.isNull()) { |
| 545 WebString encoding = web_frame->document().encoding(); |
| 546 std::string htmlTag("<html>"); |
| 547 std::string::size_type pos = original_str.find(htmlTag); |
| 548 ASSERT_NE(std::string::npos, pos); |
| 549 pos += htmlTag.length(); |
| 550 std::string head_part("<head>"); |
| 551 head_part += |
| 552 WebPageSerializer::generateMetaCharsetDeclaration(encoding).utf8(); |
| 553 head_part += "</head>"; |
| 554 original_str.insert(pos, head_part); |
| 555 } |
| 556 ASSERT_EQ(original_str, serialized_contents); |
| 557 } |
| 558 |
| 559 void SerializeHTMLDOMWithNonStandardEntitiesOnRenderer(const GURL& file_url) { |
| 560 // Get value of BODY's title attribute in DOM. |
| 561 WebFrame* web_frame = FindSubFrameByURL(GetWebView(), file_url); |
| 562 WebDocument doc = web_frame->document(); |
| 563 ASSERT_TRUE(doc.isHTMLDocument()); |
| 564 WebElement body_element = doc.body(); |
| 565 // Unescaped string for "%⊅¹'". |
| 566 static const wchar_t parsed_value[] = { |
| 567 '%', 0x2285, 0x00b9, '\'', 0 |
| 568 }; |
| 569 WebString value = body_element.getAttribute("title"); |
| 570 ASSERT_TRUE(UTF16ToWide(value) == parsed_value); |
| 571 ASSERT_TRUE(UTF16ToWide(body_element.innerText()) == parsed_value); |
| 572 |
| 573 // Do serialization. |
| 574 SerializeDomForURL(file_url, false); |
| 575 // Check the serialized string. |
| 576 ASSERT_TRUE(HasSerializedFrame(file_url)); |
| 577 const std::string& serialized_contents = |
| 578 GetSerializedContentForFrame(file_url); |
| 579 // Confirm that the serialized string has no non-standard HTML entities. |
| 580 ASSERT_EQ(std::string::npos, serialized_contents.find("%")); |
| 581 ASSERT_EQ(std::string::npos, serialized_contents.find("⊅")); |
| 582 ASSERT_EQ(std::string::npos, serialized_contents.find("¹")); |
| 583 ASSERT_EQ(std::string::npos, serialized_contents.find("'")); |
| 584 } |
| 585 |
| 586 void SerializeHTMLDOMWithBaseTagOnRenderer(const GURL& file_url, |
| 587 const GURL& path_dir_url) { |
| 588 // There are total 2 available base tags in this test file. |
| 589 const int kTotalBaseTagCountInTestFile = 2; |
| 590 |
| 591 // Since for this test, we assume there is no savable sub-resource links for |
| 592 // this test file, also all links are relative URLs in this test file, so we |
| 593 // need to check those relative URLs and make sure document has BASE tag. |
| 594 WebFrame* web_frame = FindSubFrameByURL(GetWebView(), file_url); |
| 595 ASSERT_TRUE(web_frame != NULL); |
| 596 WebDocument doc = web_frame->document(); |
| 597 ASSERT_TRUE(doc.isHTMLDocument()); |
| 598 // Go through all descent nodes. |
| 599 WebNodeCollection all = doc.all(); |
| 600 int original_base_tag_count = 0; |
| 601 for (WebNode node = all.firstItem(); !node.isNull(); |
| 602 node = all.nextItem()) { |
| 603 if (!node.isElementNode()) |
| 604 continue; |
| 605 WebElement element = node.to<WebElement>(); |
| 606 if (element.hasTagName("base")) { |
| 607 original_base_tag_count++; |
| 608 } else { |
| 609 // Get link. |
| 610 WebString value = |
| 611 webkit_glue::GetSubResourceLinkFromElement(element); |
| 612 if (value.isNull() && element.hasTagName("a")) { |
| 613 value = element.getAttribute("href"); |
| 614 if (value.isEmpty()) |
| 615 value = WebString(); |
| 616 } |
| 617 // Each link is relative link. |
| 618 if (!value.isNull()) { |
| 619 GURL link(value.utf8()); |
| 620 ASSERT_TRUE(link.scheme().empty()); |
| 621 } |
| 622 } |
| 623 } |
| 624 ASSERT_EQ(original_base_tag_count, kTotalBaseTagCountInTestFile); |
| 625 // Make sure in original document, the base URL is not equal with the |
| 626 // |path_dir_url|. |
| 627 GURL original_base_url(doc.baseURL()); |
| 628 ASSERT_NE(original_base_url, path_dir_url); |
| 629 |
| 630 // Do serialization. |
| 631 SerializeDomForURL(file_url, false); |
| 632 |
| 633 // Load the serialized contents. |
| 634 ASSERT_TRUE(HasSerializedFrame(file_url)); |
| 635 const std::string& serialized_contents = |
| 636 GetSerializedContentForFrame(file_url); |
| 637 LoadContents(serialized_contents, file_url, |
| 638 web_frame->document().encoding()); |
| 639 |
| 640 // Make sure all links are absolute URLs and doc there are some number of |
| 641 // BASE tags in serialized HTML data. Each of those BASE tags have same base |
| 642 // URL which is as same as URL of current test file. |
| 643 web_frame = GetMainFrame(); |
| 644 ASSERT_TRUE(web_frame != NULL); |
| 645 doc = web_frame->document(); |
| 646 ASSERT_TRUE(doc.isHTMLDocument()); |
| 647 // Go through all descent nodes. |
| 648 all = doc.all(); |
| 649 int new_base_tag_count = 0; |
| 650 for (WebNode node = all.firstItem(); !node.isNull(); |
| 651 node = all.nextItem()) { |
| 652 if (!node.isElementNode()) |
| 653 continue; |
| 654 WebElement element = node.to<WebElement>(); |
| 655 if (element.hasTagName("base")) { |
| 656 new_base_tag_count++; |
| 657 } else { |
| 658 // Get link. |
| 659 WebString value = |
| 660 webkit_glue::GetSubResourceLinkFromElement(element); |
| 661 if (value.isNull() && element.hasTagName("a")) { |
| 662 value = element.getAttribute("href"); |
| 663 if (value.isEmpty()) |
| 664 value = WebString(); |
| 665 } |
| 666 // Each link is absolute link. |
| 667 if (!value.isNull()) { |
| 668 GURL link(std::string(value.utf8())); |
| 669 ASSERT_FALSE(link.scheme().empty()); |
| 670 } |
| 671 } |
| 672 } |
| 673 // We have one more added BASE tag which is generated by JavaScript. |
| 674 ASSERT_EQ(new_base_tag_count, original_base_tag_count + 1); |
| 675 // Make sure in new document, the base URL is equal with the |path_dir_url|. |
| 676 GURL new_base_url(doc.baseURL()); |
| 677 ASSERT_EQ(new_base_url, path_dir_url); |
| 678 } |
| 679 |
| 680 void SerializeHTMLDOMWithEmptyHeadOnRenderer() { |
| 681 base::FilePath page_file_path = GetTestFilePath( |
| 682 "dom_serializer", "empty_head.htm"); |
| 683 GURL file_url = net::FilePathToFileURL(page_file_path); |
| 684 ASSERT_TRUE(file_url.SchemeIsFile()); |
| 685 |
| 686 // Load the test html content. |
| 687 static const char* const empty_head_contents = |
| 688 "<html><head></head><body>hello world</body></html>"; |
| 689 LoadContents(empty_head_contents, file_url, WebString()); |
| 690 |
| 691 // Make sure the head tag is empty. |
| 692 WebFrame* web_frame = GetMainFrame(); |
| 693 ASSERT_TRUE(web_frame != NULL); |
| 694 WebDocument doc = web_frame->document(); |
| 695 ASSERT_TRUE(doc.isHTMLDocument()); |
| 696 WebElement head_element = doc.head(); |
| 697 ASSERT_TRUE(!head_element.isNull()); |
| 698 ASSERT_TRUE(!head_element.hasChildNodes()); |
| 699 ASSERT_TRUE(head_element.childNodes().length() == 0); |
| 700 |
| 701 // Do serialization. |
| 702 SerializeDomForURL(file_url, false); |
| 703 // Make sure the serialized contents have META ; |
| 704 ASSERT_TRUE(HasSerializedFrame(file_url)); |
| 705 const std::string& serialized_contents = |
| 706 GetSerializedContentForFrame(file_url); |
| 707 |
| 708 // Reload serialized contents and make sure there is only one META tag. |
| 709 LoadContents(serialized_contents, file_url, |
| 710 web_frame->document().encoding()); |
| 711 web_frame = GetMainFrame(); |
| 712 ASSERT_TRUE(web_frame != NULL); |
| 713 doc = web_frame->document(); |
| 714 ASSERT_TRUE(doc.isHTMLDocument()); |
| 715 head_element = doc.head(); |
| 716 ASSERT_TRUE(!head_element.isNull()); |
| 717 ASSERT_TRUE(head_element.hasChildNodes()); |
| 718 ASSERT_TRUE(head_element.childNodes().length() == 1); |
| 719 WebNode meta_node = head_element.firstChild(); |
| 720 ASSERT_TRUE(!meta_node.isNull()); |
| 721 // Get meta charset info. |
| 722 std::string charset_info; |
| 723 ASSERT_TRUE(IsMetaElement(meta_node, charset_info)); |
| 724 ASSERT_TRUE(!charset_info.empty()); |
| 725 ASSERT_EQ(charset_info, |
| 726 std::string(web_frame->document().encoding().utf8())); |
| 727 |
| 728 // Check the body's first node is text node and its contents are |
| 729 // "hello world" |
| 730 WebElement body_element = doc.body(); |
| 731 ASSERT_TRUE(!body_element.isNull()); |
| 732 WebNode text_node = body_element.firstChild(); |
| 733 ASSERT_TRUE(text_node.isTextNode()); |
| 734 WebString text_node_contents = text_node.nodeValue(); |
| 735 ASSERT_TRUE(std::string(text_node_contents.utf8()) == "hello world"); |
| 736 } |
| 737 |
| 738 void SerializeDocumentWithDownloadedIFrameOnRenderer(const GURL& file_url) { |
| 739 // Do a recursive serialization. We pass if we don't crash. |
| 740 SerializeDomForURL(file_url, true); |
| 741 } |
| 742 |
| 743 void SubResourceForElementsInNonHTMLNamespaceOnRenderer( |
| 744 const GURL& file_url) { |
| 745 WebFrame* web_frame = FindSubFrameByURL(GetWebView(), file_url); |
| 746 ASSERT_TRUE(web_frame != NULL); |
| 747 WebDocument doc = web_frame->document(); |
| 748 WebNode lastNodeInBody = doc.body().lastChild(); |
| 749 ASSERT_EQ(WebNode::ElementNode, lastNodeInBody.nodeType()); |
| 750 WebString uri = webkit_glue::GetSubResourceLinkFromElement( |
| 751 lastNodeInBody.to<WebElement>()); |
| 752 EXPECT_TRUE(uri.isNull()); |
| 753 } |
| 754 |
192 private: | 755 private: |
193 // Map frame_url to corresponding serialized_content. | 756 // Map frame_url to corresponding serialized_content. |
194 typedef base::hash_map<std::string, std::string> SerializedFrameContentMap; | 757 typedef base::hash_map<std::string, std::string> SerializedFrameContentMap; |
195 SerializedFrameContentMap serialized_frame_map_; | 758 SerializedFrameContentMap serialized_frame_map_; |
196 // Map frame_url to corresponding status of serialization finish. | 759 // Map frame_url to corresponding status of serialization finish. |
197 typedef base::hash_map<std::string, bool> SerializationFinishStatusMap; | 760 typedef base::hash_map<std::string, bool> SerializationFinishStatusMap; |
198 SerializationFinishStatusMap serialization_finish_status_; | 761 SerializationFinishStatusMap serialization_finish_status_; |
199 // Flag indicates whether the process of serializing DOM is finished or not. | 762 // Flag indicates whether the process of serializing DOM is finished or not. |
200 bool serialized_; | 763 bool serialized_; |
201 // The links_ contain dummy original URLs of all saved links. | 764 // The links_ contain dummy original URLs of all saved links. |
202 WebVector<WebURL> links_; | 765 WebVector<WebURL> links_; |
203 // The local_paths_ contain dummy corresponding local file paths of all saved | 766 // The local_paths_ contain dummy corresponding local file paths of all saved |
204 // links, which matched links_ one by one. | 767 // links, which matched links_ one by one. |
205 WebVector<WebString> local_paths_; | 768 WebVector<WebString> local_paths_; |
206 // The local_directory_name_ is dummy relative path of directory which | 769 // The local_directory_name_ is dummy relative path of directory which |
207 // contain all saved auxiliary files included all sub frames and resources. | 770 // contain all saved auxiliary files included all sub frames and resources. |
208 const base::FilePath local_directory_name_; | 771 const base::FilePath local_directory_name_; |
209 | |
210 protected: | |
211 // testing::Test | |
212 virtual void SetUp() { | |
213 TestShellTest::SetUp(); | |
214 serialized_ = false; | |
215 } | |
216 | |
217 virtual void TearDown() { | |
218 TestShellTest::TearDown(); | |
219 } | |
220 }; | 772 }; |
221 | 773 |
222 // Helper function that test whether the first node in the doc is a doc type | |
223 // node. | |
224 bool HasDocType(const WebDocument& doc) { | |
225 WebNode node = doc.firstChild(); | |
226 if (node.isNull()) | |
227 return false; | |
228 return node.nodeType() == WebNode::DocumentTypeNode; | |
229 } | |
230 | |
231 // Helper function for checking whether input node is META tag. Return true | |
232 // means it is META element, otherwise return false. The parameter charset_info | |
233 // return actual charset info if the META tag has charset declaration. | |
234 bool IsMetaElement(const WebNode& node, std::string& charset_info) { | |
235 if (!node.isElementNode()) | |
236 return false; | |
237 const WebElement meta = node.toConst<WebElement>(); | |
238 if (!meta.hasTagName("meta")) | |
239 return false; | |
240 charset_info.erase(0, charset_info.length()); | |
241 // Check the META charset declaration. | |
242 WebString httpEquiv = meta.getAttribute("http-equiv"); | |
243 if (LowerCaseEqualsASCII(httpEquiv, "content-type")) { | |
244 std::string content = meta.getAttribute("content").utf8(); | |
245 int pos = content.find("charset", 0); | |
246 if (pos > -1) { | |
247 // Add a dummy charset declaration to charset_info, which indicates this | |
248 // META tag has charset declaration although we do not get correct value | |
249 // yet. | |
250 charset_info.append("has-charset-declaration"); | |
251 int remaining_length = content.length() - pos - 7; | |
252 if (!remaining_length) | |
253 return true; | |
254 int start_pos = pos + 7; | |
255 // Find "=" symbol. | |
256 while (remaining_length--) | |
257 if (content[start_pos++] == L'=') | |
258 break; | |
259 // Skip beginning space. | |
260 while (remaining_length) { | |
261 if (content[start_pos] > 0x0020) | |
262 break; | |
263 ++start_pos; | |
264 --remaining_length; | |
265 } | |
266 if (!remaining_length) | |
267 return true; | |
268 int end_pos = start_pos; | |
269 // Now we find out the start point of charset info. Search the end point. | |
270 while (remaining_length--) { | |
271 if (content[end_pos] <= 0x0020 || content[end_pos] == L';') | |
272 break; | |
273 ++end_pos; | |
274 } | |
275 // Get actual charset info. | |
276 charset_info = content.substr(start_pos, end_pos - start_pos); | |
277 return true; | |
278 } | |
279 } | |
280 return true; | |
281 } | |
282 | |
283 // If original contents have document type, the serialized contents also have | 774 // If original contents have document type, the serialized contents also have |
284 // document type. | 775 // document type. |
285 TEST_F(DomSerializerTests, SerializeHTMLDOMWithDocType) { | 776 IN_PROC_BROWSER_TEST_F(DomSerializerTests, SerializeHTMLDOMWithDocType) { |
286 base::FilePath page_file_path = data_dir_; | 777 base::FilePath page_file_path = |
287 page_file_path = page_file_path.AppendASCII("dom_serializer"); | 778 GetTestFilePath("dom_serializer", "youtube_1.htm"); |
288 page_file_path = page_file_path.AppendASCII("youtube_1.htm"); | |
289 GURL file_url = net::FilePathToFileURL(page_file_path); | 779 GURL file_url = net::FilePathToFileURL(page_file_path); |
290 ASSERT_TRUE(file_url.SchemeIsFile()); | 780 ASSERT_TRUE(file_url.SchemeIsFile()); |
291 // Load the test file. | 781 // Load the test file. |
292 LoadPageFromURL(file_url); | 782 NavigateToURL(shell(), file_url); |
293 // Make sure original contents have document type. | 783 |
294 WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url); | 784 PostTaskToInProcessRendererAndWait( |
295 ASSERT_TRUE(web_frame != NULL); | 785 base::Bind(&DomSerializerTests::SerializeHTMLDOMWithDocTypeOnRenderer, |
296 WebDocument doc = web_frame->document(); | 786 base::Unretained(this), file_url)); |
297 ASSERT_TRUE(HasDocType(doc)); | |
298 // Do serialization. | |
299 SerializeDomForURL(file_url, false); | |
300 // Load the serialized contents. | |
301 ASSERT_TRUE(HasSerializedFrame(file_url)); | |
302 const std::string& serialized_contents = | |
303 GetSerializedContentForFrame(file_url); | |
304 LoadContents(serialized_contents, file_url, | |
305 web_frame->document().encoding()); | |
306 // Make sure serialized contents still have document type. | |
307 web_frame = test_shell_->webView()->mainFrame(); | |
308 doc = web_frame->document(); | |
309 ASSERT_TRUE(HasDocType(doc)); | |
310 } | 787 } |
311 | 788 |
312 // If original contents do not have document type, the serialized contents | 789 // If original contents do not have document type, the serialized contents |
313 // also do not have document type. | 790 // also do not have document type. |
314 TEST_F(DomSerializerTests, SerializeHTMLDOMWithoutDocType) { | 791 IN_PROC_BROWSER_TEST_F(DomSerializerTests, SerializeHTMLDOMWithoutDocType) { |
315 base::FilePath page_file_path = data_dir_; | 792 base::FilePath page_file_path = |
316 page_file_path = page_file_path.AppendASCII("dom_serializer"); | 793 GetTestFilePath("dom_serializer", "youtube_2.htm"); |
317 page_file_path = page_file_path.AppendASCII("youtube_2.htm"); | |
318 GURL file_url = net::FilePathToFileURL(page_file_path); | 794 GURL file_url = net::FilePathToFileURL(page_file_path); |
319 ASSERT_TRUE(file_url.SchemeIsFile()); | 795 ASSERT_TRUE(file_url.SchemeIsFile()); |
320 // Load the test file. | 796 // Load the test file. |
321 LoadPageFromURL(file_url); | 797 NavigateToURL(shell(), file_url); |
322 // Make sure original contents do not have document type. | 798 |
323 WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url); | 799 PostTaskToInProcessRendererAndWait( |
324 ASSERT_TRUE(web_frame != NULL); | 800 base::Bind( |
325 WebDocument doc = web_frame->document(); | 801 &DomSerializerTests::SerializeHTMLDOMWithoutDocTypeOnRenderer, |
326 ASSERT_TRUE(!HasDocType(doc)); | 802 base::Unretained(this), file_url)); |
327 // Do serialization. | |
328 SerializeDomForURL(file_url, false); | |
329 // Load the serialized contents. | |
330 ASSERT_TRUE(HasSerializedFrame(file_url)); | |
331 const std::string& serialized_contents = | |
332 GetSerializedContentForFrame(file_url); | |
333 LoadContents(serialized_contents, file_url, | |
334 web_frame->document().encoding()); | |
335 // Make sure serialized contents do not have document type. | |
336 web_frame = test_shell_->webView()->mainFrame(); | |
337 doc = web_frame->document(); | |
338 ASSERT_TRUE(!HasDocType(doc)); | |
339 } | 803 } |
340 | 804 |
341 // Serialize XML document which has all 5 built-in entities. After | 805 // Serialize XML document which has all 5 built-in entities. After |
342 // finishing serialization, the serialized contents should be same | 806 // finishing serialization, the serialized contents should be same |
343 // with original XML document. | 807 // with original XML document. |
344 TEST_F(DomSerializerTests, SerializeXMLDocWithBuiltInEntities) { | 808 IN_PROC_BROWSER_TEST_F(DomSerializerTests, SerializeXMLDocWithBuiltInEntities) { |
345 base::FilePath page_file_path = data_dir_; | 809 base::FilePath page_file_path = |
346 page_file_path = page_file_path.AppendASCII("dom_serializer"); | 810 GetTestFilePath("dom_serializer", "note.html"); |
347 page_file_path = page_file_path.AppendASCII("note.html"); | 811 base::FilePath xml_file_path = GetTestFilePath("dom_serializer", "note.xml"); |
348 base::FilePath xml_file_path = data_dir_; | |
349 xml_file_path = xml_file_path.AppendASCII("dom_serializer"); | |
350 xml_file_path = xml_file_path.AppendASCII("note.xml"); | |
351 // Read original contents for later comparison. | 812 // Read original contents for later comparison. |
352 std::string original_contents; | 813 std::string original_contents; |
353 ASSERT_TRUE(file_util::ReadFileToString(xml_file_path, &original_contents)); | 814 ASSERT_TRUE(file_util::ReadFileToString(xml_file_path, &original_contents)); |
354 // Get file URL. | 815 // Get file URL. |
355 GURL file_url = net::FilePathToFileURL(page_file_path); | 816 GURL file_url = net::FilePathToFileURL(page_file_path); |
356 GURL xml_file_url = net::FilePathToFileURL(xml_file_path); | 817 GURL xml_file_url = net::FilePathToFileURL(xml_file_path); |
357 ASSERT_TRUE(file_url.SchemeIsFile()); | 818 ASSERT_TRUE(file_url.SchemeIsFile()); |
358 // Load the test file. | 819 // Load the test file. |
359 LoadPageFromURL(file_url); | 820 NavigateToURL(shell(), file_url); |
360 // Do serialization. | 821 |
361 SerializeDomForURL(xml_file_url, false); | 822 PostTaskToInProcessRendererAndWait( |
362 // Compare the serialized contents with original contents. | 823 base::Bind( |
363 ASSERT_TRUE(HasSerializedFrame(xml_file_url)); | 824 &DomSerializerTests::SerializeXMLDocWithBuiltInEntitiesOnRenderer, |
364 const std::string& serialized_contents = | 825 base::Unretained(this), xml_file_url, original_contents)); |
365 GetSerializedContentForFrame(xml_file_url); | |
366 ASSERT_EQ(original_contents, serialized_contents); | |
367 } | 826 } |
368 | 827 |
369 // When serializing DOM, we add MOTW declaration before html tag. | 828 // When serializing DOM, we add MOTW declaration before html tag. |
370 TEST_F(DomSerializerTests, SerializeHTMLDOMWithAddingMOTW) { | 829 IN_PROC_BROWSER_TEST_F(DomSerializerTests, SerializeHTMLDOMWithAddingMOTW) { |
371 base::FilePath page_file_path = data_dir_; | 830 base::FilePath page_file_path = |
372 page_file_path = page_file_path.AppendASCII("dom_serializer"); | 831 GetTestFilePath("dom_serializer", "youtube_2.htm"); |
373 page_file_path = page_file_path.AppendASCII("youtube_2.htm"); | |
374 // Read original contents for later comparison . | 832 // Read original contents for later comparison . |
375 std::string original_contents; | 833 std::string original_contents; |
376 ASSERT_TRUE(file_util::ReadFileToString(page_file_path, &original_contents)); | 834 ASSERT_TRUE(file_util::ReadFileToString(page_file_path, &original_contents)); |
377 // Get file URL. | 835 // Get file URL. |
378 GURL file_url = net::FilePathToFileURL(page_file_path); | 836 GURL file_url = net::FilePathToFileURL(page_file_path); |
379 ASSERT_TRUE(file_url.SchemeIsFile()); | 837 ASSERT_TRUE(file_url.SchemeIsFile()); |
380 // Make sure original contents does not have MOTW; | 838 // Make sure original contents does not have MOTW; |
381 std::string motw_declaration = | 839 std::string motw_declaration = |
382 WebPageSerializer::generateMarkOfTheWebDeclaration(file_url).utf8(); | 840 WebPageSerializer::generateMarkOfTheWebDeclaration(file_url).utf8(); |
383 ASSERT_FALSE(motw_declaration.empty()); | 841 ASSERT_FALSE(motw_declaration.empty()); |
384 // The encoding of original contents is ISO-8859-1, so we convert the MOTW | 842 // The encoding of original contents is ISO-8859-1, so we convert the MOTW |
385 // declaration to ASCII and search whether original contents has it or not. | 843 // declaration to ASCII and search whether original contents has it or not. |
386 ASSERT_TRUE(std::string::npos == | 844 ASSERT_TRUE(std::string::npos == original_contents.find(motw_declaration)); |
387 original_contents.find(motw_declaration)); | |
388 // Load the test file. | 845 // Load the test file. |
389 LoadPageFromURL(file_url); | 846 NavigateToURL(shell(), file_url); |
390 // Do serialization. | 847 |
391 SerializeDomForURL(file_url, false); | 848 PostTaskToInProcessRendererAndWait( |
392 // Make sure the serialized contents have MOTW ; | 849 base::Bind( |
393 ASSERT_TRUE(HasSerializedFrame(file_url)); | 850 &DomSerializerTests::SerializeHTMLDOMWithAddingMOTWOnRenderer, |
394 const std::string& serialized_contents = | 851 base::Unretained(this), file_url, motw_declaration)); |
395 GetSerializedContentForFrame(file_url); | |
396 ASSERT_FALSE(std::string::npos == | |
397 serialized_contents.find(motw_declaration)); | |
398 } | 852 } |
399 | 853 |
400 // When serializing DOM, we will add the META which have correct charset | 854 // When serializing DOM, we will add the META which have correct charset |
401 // declaration as first child of HEAD element for resolving WebKit bug: | 855 // declaration as first child of HEAD element for resolving WebKit bug: |
402 // http://bugs.webkit.org/show_bug.cgi?id=16621 even the original document | 856 // http://bugs.webkit.org/show_bug.cgi?id=16621 even the original document |
403 // does not have META charset declaration. | 857 // does not have META charset declaration. |
404 TEST_F(DomSerializerTests, SerializeHTMLDOMWithNoMetaCharsetInOriginalDoc) { | 858 IN_PROC_BROWSER_TEST_F(DomSerializerTests, |
405 base::FilePath page_file_path = data_dir_; | 859 SerializeHTMLDOMWithNoMetaCharsetInOriginalDoc) { |
406 page_file_path = page_file_path.AppendASCII("dom_serializer"); | 860 base::FilePath page_file_path = |
407 page_file_path = page_file_path.AppendASCII("youtube_1.htm"); | 861 GetTestFilePath("dom_serializer", "youtube_1.htm"); |
408 // Get file URL. | 862 // Get file URL. |
409 GURL file_url = net::FilePathToFileURL(page_file_path); | 863 GURL file_url = net::FilePathToFileURL(page_file_path); |
410 ASSERT_TRUE(file_url.SchemeIsFile()); | 864 ASSERT_TRUE(file_url.SchemeIsFile()); |
411 // Load the test file. | 865 // Load the test file. |
412 LoadPageFromURL(file_url); | 866 NavigateToURL(shell(), file_url); |
413 | 867 |
414 // Make sure there is no META charset declaration in original document. | 868 PostTaskToInProcessRendererAndWait( |
415 WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url); | 869 base::Bind( |
416 ASSERT_TRUE(web_frame != NULL); | 870 &DomSerializerTests:: |
417 WebDocument doc = web_frame->document(); | 871 SerializeHTMLDOMWithNoMetaCharsetInOriginalDocOnRenderer, |
418 ASSERT_TRUE(doc.isHTMLDocument()); | 872 base::Unretained(this), file_url)); |
419 WebElement head_element = doc.head(); | |
420 ASSERT_TRUE(!head_element.isNull()); | |
421 // Go through all children of HEAD element. | |
422 for (WebNode child = head_element.firstChild(); !child.isNull(); | |
423 child = child.nextSibling()) { | |
424 std::string charset_info; | |
425 if (IsMetaElement(child, charset_info)) | |
426 ASSERT_TRUE(charset_info.empty()); | |
427 } | |
428 // Do serialization. | |
429 SerializeDomForURL(file_url, false); | |
430 | |
431 // Load the serialized contents. | |
432 ASSERT_TRUE(HasSerializedFrame(file_url)); | |
433 const std::string& serialized_contents = | |
434 GetSerializedContentForFrame(file_url); | |
435 LoadContents(serialized_contents, file_url, | |
436 web_frame->document().encoding()); | |
437 // Make sure the first child of HEAD element is META which has charset | |
438 // declaration in serialized contents. | |
439 web_frame = test_shell_->webView()->mainFrame(); | |
440 ASSERT_TRUE(web_frame != NULL); | |
441 doc = web_frame->document(); | |
442 ASSERT_TRUE(doc.isHTMLDocument()); | |
443 head_element = doc.head(); | |
444 ASSERT_TRUE(!head_element.isNull()); | |
445 WebNode meta_node = head_element.firstChild(); | |
446 ASSERT_TRUE(!meta_node.isNull()); | |
447 // Get meta charset info. | |
448 std::string charset_info2; | |
449 ASSERT_TRUE(IsMetaElement(meta_node, charset_info2)); | |
450 ASSERT_TRUE(!charset_info2.empty()); | |
451 ASSERT_EQ(charset_info2, | |
452 std::string(web_frame->document().encoding().utf8())); | |
453 | |
454 // Make sure no more additional META tags which have charset declaration. | |
455 for (WebNode child = meta_node.nextSibling(); !child.isNull(); | |
456 child = child.nextSibling()) { | |
457 std::string charset_info; | |
458 if (IsMetaElement(child, charset_info)) | |
459 ASSERT_TRUE(charset_info.empty()); | |
460 } | |
461 } | 873 } |
462 | 874 |
463 // When serializing DOM, if the original document has multiple META charset | 875 // When serializing DOM, if the original document has multiple META charset |
464 // declaration, we will add the META which have correct charset declaration | 876 // declaration, we will add the META which have correct charset declaration |
465 // as first child of HEAD element and remove all original META charset | 877 // as first child of HEAD element and remove all original META charset |
466 // declarations. | 878 // declarations. |
467 TEST_F(DomSerializerTests, | 879 IN_PROC_BROWSER_TEST_F(DomSerializerTests, |
468 SerializeHTMLDOMWithMultipleMetaCharsetInOriginalDoc) { | 880 SerializeHTMLDOMWithMultipleMetaCharsetInOriginalDoc) { |
469 base::FilePath page_file_path = data_dir_; | 881 base::FilePath page_file_path = |
470 page_file_path = page_file_path.AppendASCII("dom_serializer"); | 882 GetTestFilePath("dom_serializer", "youtube_2.htm"); |
471 page_file_path = page_file_path.AppendASCII("youtube_2.htm"); | |
472 // Get file URL. | 883 // Get file URL. |
473 GURL file_url = net::FilePathToFileURL(page_file_path); | 884 GURL file_url = net::FilePathToFileURL(page_file_path); |
474 ASSERT_TRUE(file_url.SchemeIsFile()); | 885 ASSERT_TRUE(file_url.SchemeIsFile()); |
475 // Load the test file. | 886 // Load the test file. |
476 LoadPageFromURL(file_url); | 887 NavigateToURL(shell(), file_url); |
477 | 888 |
478 // Make sure there are multiple META charset declarations in original | 889 PostTaskToInProcessRendererAndWait( |
479 // document. | 890 base::Bind( |
480 WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url); | 891 &DomSerializerTests:: |
481 ASSERT_TRUE(web_frame != NULL); | 892 SerializeHTMLDOMWithMultipleMetaCharsetInOriginalDocOnRenderer, |
482 WebDocument doc = web_frame->document(); | 893 base::Unretained(this), file_url)); |
483 ASSERT_TRUE(doc.isHTMLDocument()); | |
484 WebElement head_ele = doc.head(); | |
485 ASSERT_TRUE(!head_ele.isNull()); | |
486 // Go through all children of HEAD element. | |
487 int charset_declaration_count = 0; | |
488 for (WebNode child = head_ele.firstChild(); !child.isNull(); | |
489 child = child.nextSibling()) { | |
490 std::string charset_info; | |
491 if (IsMetaElement(child, charset_info) && !charset_info.empty()) | |
492 charset_declaration_count++; | |
493 } | |
494 // The original doc has more than META tags which have charset declaration. | |
495 ASSERT_TRUE(charset_declaration_count > 1); | |
496 | |
497 // Do serialization. | |
498 SerializeDomForURL(file_url, false); | |
499 | |
500 // Load the serialized contents. | |
501 ASSERT_TRUE(HasSerializedFrame(file_url)); | |
502 const std::string& serialized_contents = | |
503 GetSerializedContentForFrame(file_url); | |
504 LoadContents(serialized_contents, file_url, | |
505 web_frame->document().encoding()); | |
506 // Make sure only first child of HEAD element is META which has charset | |
507 // declaration in serialized contents. | |
508 web_frame = test_shell_->webView()->mainFrame(); | |
509 ASSERT_TRUE(web_frame != NULL); | |
510 doc = web_frame->document(); | |
511 ASSERT_TRUE(doc.isHTMLDocument()); | |
512 head_ele = doc.head(); | |
513 ASSERT_TRUE(!head_ele.isNull()); | |
514 WebNode meta_node = head_ele.firstChild(); | |
515 ASSERT_TRUE(!meta_node.isNull()); | |
516 // Get meta charset info. | |
517 std::string charset_info2; | |
518 ASSERT_TRUE(IsMetaElement(meta_node, charset_info2)); | |
519 ASSERT_TRUE(!charset_info2.empty()); | |
520 ASSERT_EQ(charset_info2, | |
521 std::string(web_frame->document().encoding().utf8())); | |
522 | |
523 // Make sure no more additional META tags which have charset declaration. | |
524 for (WebNode child = meta_node.nextSibling(); !child.isNull(); | |
525 child = child.nextSibling()) { | |
526 std::string charset_info; | |
527 if (IsMetaElement(child, charset_info)) | |
528 ASSERT_TRUE(charset_info.empty()); | |
529 } | |
530 } | 894 } |
531 | 895 |
532 // Test situation of html entities in text when serializing HTML DOM. | 896 // Test situation of html entities in text when serializing HTML DOM. |
533 TEST_F(DomSerializerTests, SerializeHTMLDOMWithEntitiesInText) { | 897 IN_PROC_BROWSER_TEST_F(DomSerializerTests, SerializeHTMLDOMWithEntitiesInText) { |
534 base::FilePath page_file_path = data_dir_; | 898 // Need to spin up the renderer and also navigate to a file url so that the |
535 page_file_path = page_file_path.AppendASCII( | 899 // renderer code doesn't attempt a fork when it sees a load to file scheme |
536 "dom_serializer/htmlentities_in_text.htm"); | 900 // from non-file scheme. |
537 // Get file URL. The URL is dummy URL to identify the following loading | 901 NavigateToURL(shell(), GetTestUrl(".", "simple_page.html")); |
538 // actions. The test content is in constant:original_contents. | |
539 GURL file_url = net::FilePathToFileURL(page_file_path); | |
540 ASSERT_TRUE(file_url.SchemeIsFile()); | |
541 // Test contents. | |
542 static const char* const original_contents = | |
543 "<html><body>&<>\"\'</body></html>"; | |
544 // Load the test contents. | |
545 LoadContents(original_contents, file_url, WebString()); | |
546 | 902 |
547 // Get BODY's text content in DOM. | 903 PostTaskToInProcessRendererAndWait( |
548 WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url); | 904 base::Bind( |
549 ASSERT_TRUE(web_frame != NULL); | 905 &DomSerializerTests::SerializeHTMLDOMWithEntitiesInTextOnRenderer, |
550 WebDocument doc = web_frame->document(); | 906 base::Unretained(this))); |
551 ASSERT_TRUE(doc.isHTMLDocument()); | |
552 WebElement body_ele = doc.body(); | |
553 ASSERT_TRUE(!body_ele.isNull()); | |
554 WebNode text_node = body_ele.firstChild(); | |
555 ASSERT_TRUE(text_node.isTextNode()); | |
556 ASSERT_TRUE(std::string(text_node.createMarkup().utf8()) == | |
557 "&<>\"\'"); | |
558 // Do serialization. | |
559 SerializeDomForURL(file_url, false); | |
560 // Compare the serialized contents with original contents. | |
561 ASSERT_TRUE(HasSerializedFrame(file_url)); | |
562 const std::string& serialized_contents = | |
563 GetSerializedContentForFrame(file_url); | |
564 // Compare the serialized contents with original contents to make sure | |
565 // they are same. | |
566 // Because we add MOTW when serializing DOM, so before comparison, we also | |
567 // need to add MOTW to original_contents. | |
568 std::string original_str = | |
569 WebPageSerializer::generateMarkOfTheWebDeclaration(file_url).utf8(); | |
570 original_str += original_contents; | |
571 // Since WebCore now inserts a new HEAD element if there is no HEAD element | |
572 // when creating BODY element. (Please see HTMLParser::bodyCreateErrorCheck.) | |
573 // We need to append the HEAD content and corresponding META content if we | |
574 // find WebCore-generated HEAD element. | |
575 if (!doc.head().isNull()) { | |
576 WebString encoding = web_frame->document().encoding(); | |
577 std::string htmlTag("<html>"); | |
578 std::string::size_type pos = original_str.find(htmlTag); | |
579 ASSERT_NE(std::string::npos, pos); | |
580 pos += htmlTag.length(); | |
581 std::string head_part("<head>"); | |
582 head_part += | |
583 WebPageSerializer::generateMetaCharsetDeclaration(encoding).utf8(); | |
584 head_part += "</head>"; | |
585 original_str.insert(pos, head_part); | |
586 } | |
587 ASSERT_EQ(original_str, serialized_contents); | |
588 } | 907 } |
589 | 908 |
590 // Test situation of html entities in attribute value when serializing | 909 // Test situation of html entities in attribute value when serializing |
591 // HTML DOM. | 910 // HTML DOM. |
592 // This test started to fail at WebKit r65388. See http://crbug.com/52279. | 911 // This test started to fail at WebKit r65388. See http://crbug.com/52279. |
593 TEST_F(DomSerializerTests, SerializeHTMLDOMWithEntitiesInAttributeValue) { | 912 IN_PROC_BROWSER_TEST_F(DomSerializerTests, |
594 base::FilePath page_file_path = data_dir_; | 913 SerializeHTMLDOMWithEntitiesInAttributeValue) { |
595 page_file_path = page_file_path.AppendASCII( | 914 // Need to spin up the renderer and also navigate to a file url so that the |
596 "dom_serializer/htmlentities_in_attribute_value.htm"); | 915 // renderer code doesn't attempt a fork when it sees a load to file scheme |
597 // Get file URL. The URL is dummy URL to identify the following loading | 916 // from non-file scheme. |
598 // actions. The test content is in constant:original_contents. | 917 NavigateToURL(shell(), GetTestUrl(".", "simple_page.html")); |
599 GURL file_url = net::FilePathToFileURL(page_file_path); | 918 |
600 ASSERT_TRUE(file_url.SchemeIsFile()); | 919 PostTaskToInProcessRendererAndWait( |
601 // Test contents. | 920 base::Bind( |
602 static const char* const original_contents = | 921 &DomSerializerTests:: |
603 "<html><body title=\"&<>"'\"></body></html>"; | 922 SerializeHTMLDOMWithEntitiesInAttributeValueOnRenderer, |
604 // Load the test contents. | 923 base::Unretained(this))); |
605 LoadContents(original_contents, file_url, WebString()); | |
606 // Get value of BODY's title attribute in DOM. | |
607 WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url); | |
608 ASSERT_TRUE(web_frame != NULL); | |
609 WebDocument doc = web_frame->document(); | |
610 ASSERT_TRUE(doc.isHTMLDocument()); | |
611 WebElement body_ele = doc.body(); | |
612 ASSERT_TRUE(!body_ele.isNull()); | |
613 WebString value = body_ele.getAttribute("title"); | |
614 ASSERT_TRUE(std::string(value.utf8()) == "&<>\"\'"); | |
615 // Do serialization. | |
616 SerializeDomForURL(file_url, false); | |
617 // Compare the serialized contents with original contents. | |
618 ASSERT_TRUE(HasSerializedFrame(file_url)); | |
619 const std::string& serialized_contents = | |
620 GetSerializedContentForFrame(file_url); | |
621 // Compare the serialized contents with original contents to make sure | |
622 // they are same. | |
623 std::string original_str = | |
624 WebPageSerializer::generateMarkOfTheWebDeclaration(file_url).utf8(); | |
625 original_str += original_contents; | |
626 if (!doc.isNull()) { | |
627 WebString encoding = web_frame->document().encoding(); | |
628 std::string htmlTag("<html>"); | |
629 std::string::size_type pos = original_str.find(htmlTag); | |
630 ASSERT_NE(std::string::npos, pos); | |
631 pos += htmlTag.length(); | |
632 std::string head_part("<head>"); | |
633 head_part += | |
634 WebPageSerializer::generateMetaCharsetDeclaration(encoding).utf8(); | |
635 head_part += "</head>"; | |
636 original_str.insert(pos, head_part); | |
637 } | |
638 ASSERT_EQ(original_str, serialized_contents); | |
639 } | 924 } |
640 | 925 |
641 // Test situation of non-standard HTML entities when serializing HTML DOM. | 926 // Test situation of non-standard HTML entities when serializing HTML DOM. |
642 // This test started to fail at WebKit r65351. See http://crbug.com/52279. | 927 // This test started to fail at WebKit r65351. See http://crbug.com/52279. |
643 TEST_F(DomSerializerTests, SerializeHTMLDOMWithNonStandardEntities) { | 928 IN_PROC_BROWSER_TEST_F(DomSerializerTests, |
| 929 SerializeHTMLDOMWithNonStandardEntities) { |
644 // Make a test file URL and load it. | 930 // Make a test file URL and load it. |
645 base::FilePath page_file_path = data_dir_; | 931 base::FilePath page_file_path = GetTestFilePath( |
646 page_file_path = page_file_path.AppendASCII("dom_serializer"); | 932 "dom_serializer", "nonstandard_htmlentities.htm"); |
647 page_file_path = page_file_path.AppendASCII("nonstandard_htmlentities.htm"); | |
648 GURL file_url = net::FilePathToFileURL(page_file_path); | 933 GURL file_url = net::FilePathToFileURL(page_file_path); |
649 LoadPageFromURL(file_url); | 934 NavigateToURL(shell(), file_url); |
650 | 935 |
651 // Get value of BODY's title attribute in DOM. | 936 PostTaskToInProcessRendererAndWait( |
652 WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url); | 937 base::Bind( |
653 WebDocument doc = web_frame->document(); | 938 &DomSerializerTests:: |
654 ASSERT_TRUE(doc.isHTMLDocument()); | 939 SerializeHTMLDOMWithNonStandardEntitiesOnRenderer, |
655 WebElement body_element = doc.body(); | 940 base::Unretained(this), file_url)); |
656 // Unescaped string for "%⊅¹'". | |
657 static const wchar_t parsed_value[] = { | |
658 '%', 0x2285, 0x00b9, '\'', 0 | |
659 }; | |
660 WebString value = body_element.getAttribute("title"); | |
661 ASSERT_TRUE(UTF16ToWide(value) == parsed_value); | |
662 ASSERT_TRUE(UTF16ToWide(body_element.innerText()) == parsed_value); | |
663 | |
664 // Do serialization. | |
665 SerializeDomForURL(file_url, false); | |
666 // Check the serialized string. | |
667 ASSERT_TRUE(HasSerializedFrame(file_url)); | |
668 const std::string& serialized_contents = | |
669 GetSerializedContentForFrame(file_url); | |
670 // Confirm that the serialized string has no non-standard HTML entities. | |
671 ASSERT_EQ(std::string::npos, serialized_contents.find("%")); | |
672 ASSERT_EQ(std::string::npos, serialized_contents.find("⊅")); | |
673 ASSERT_EQ(std::string::npos, serialized_contents.find("¹")); | |
674 ASSERT_EQ(std::string::npos, serialized_contents.find("'")); | |
675 } | 941 } |
676 | 942 |
677 // Test situation of BASE tag in original document when serializing HTML DOM. | 943 // Test situation of BASE tag in original document when serializing HTML DOM. |
678 // When serializing, we should comment the BASE tag, append a new BASE tag. | 944 // When serializing, we should comment the BASE tag, append a new BASE tag. |
679 // rewrite all the savable URLs to relative local path, and change other URLs | 945 // rewrite all the savable URLs to relative local path, and change other URLs |
680 // to absolute URLs. | 946 // to absolute URLs. |
681 TEST_F(DomSerializerTests, SerializeHTMLDOMWithBaseTag) { | 947 IN_PROC_BROWSER_TEST_F(DomSerializerTests, SerializeHTMLDOMWithBaseTag) { |
682 // There are total 2 available base tags in this test file. | 948 base::FilePath page_file_path = GetTestFilePath( |
683 const int kTotalBaseTagCountInTestFile = 2; | 949 "dom_serializer", "html_doc_has_base_tag.htm"); |
684 | |
685 base::FilePath page_file_path = | |
686 data_dir_.AppendASCII("dom_serializer").AsEndingWithSeparator(); | |
687 | 950 |
688 // Get page dir URL which is base URL of this file. | 951 // Get page dir URL which is base URL of this file. |
689 GURL path_dir_url = net::FilePathToFileURL(page_file_path); | 952 base::FilePath dir_name = page_file_path.DirName(); |
690 // Get file path. | 953 dir_name = dir_name.Append( |
691 page_file_path = | 954 base::FilePath::StringType(base::FilePath::kSeparators[0], 1)); |
692 page_file_path.AppendASCII("html_doc_has_base_tag.htm"); | 955 GURL path_dir_url = net::FilePathToFileURL(dir_name); |
| 956 |
693 // Get file URL. | 957 // Get file URL. |
694 GURL file_url = net::FilePathToFileURL(page_file_path); | 958 GURL file_url = net::FilePathToFileURL(page_file_path); |
695 ASSERT_TRUE(file_url.SchemeIsFile()); | 959 ASSERT_TRUE(file_url.SchemeIsFile()); |
696 // Load the test file. | 960 // Load the test file. |
697 LoadPageFromURL(file_url); | 961 NavigateToURL(shell(), file_url); |
698 // Since for this test, we assume there is no savable sub-resource links for | |
699 // this test file, also all links are relative URLs in this test file, so we | |
700 // need to check those relative URLs and make sure document has BASE tag. | |
701 WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url); | |
702 ASSERT_TRUE(web_frame != NULL); | |
703 WebDocument doc = web_frame->document(); | |
704 ASSERT_TRUE(doc.isHTMLDocument()); | |
705 // Go through all descent nodes. | |
706 WebNodeCollection all = doc.all(); | |
707 int original_base_tag_count = 0; | |
708 for (WebNode node = all.firstItem(); !node.isNull(); | |
709 node = all.nextItem()) { | |
710 if (!node.isElementNode()) | |
711 continue; | |
712 WebElement element = node.to<WebElement>(); | |
713 if (element.hasTagName("base")) { | |
714 original_base_tag_count++; | |
715 } else { | |
716 // Get link. | |
717 WebString value = | |
718 webkit_glue::GetSubResourceLinkFromElement(element); | |
719 if (value.isNull() && element.hasTagName("a")) { | |
720 value = element.getAttribute("href"); | |
721 if (value.isEmpty()) | |
722 value = WebString(); | |
723 } | |
724 // Each link is relative link. | |
725 if (!value.isNull()) { | |
726 GURL link(value.utf8()); | |
727 ASSERT_TRUE(link.scheme().empty()); | |
728 } | |
729 } | |
730 } | |
731 ASSERT_EQ(original_base_tag_count, kTotalBaseTagCountInTestFile); | |
732 // Make sure in original document, the base URL is not equal with the | |
733 // |path_dir_url|. | |
734 GURL original_base_url(doc.baseURL()); | |
735 ASSERT_NE(original_base_url, path_dir_url); | |
736 | 962 |
737 // Do serialization. | 963 PostTaskToInProcessRendererAndWait( |
738 SerializeDomForURL(file_url, false); | 964 base::Bind( |
739 | 965 &DomSerializerTests::SerializeHTMLDOMWithBaseTagOnRenderer, |
740 // Load the serialized contents. | 966 base::Unretained(this), file_url, path_dir_url)); |
741 ASSERT_TRUE(HasSerializedFrame(file_url)); | |
742 const std::string& serialized_contents = | |
743 GetSerializedContentForFrame(file_url); | |
744 LoadContents(serialized_contents, file_url, | |
745 web_frame->document().encoding()); | |
746 | |
747 // Make sure all links are absolute URLs and doc there are some number of | |
748 // BASE tags in serialized HTML data. Each of those BASE tags have same base | |
749 // URL which is as same as URL of current test file. | |
750 web_frame = test_shell_->webView()->mainFrame(); | |
751 ASSERT_TRUE(web_frame != NULL); | |
752 doc = web_frame->document(); | |
753 ASSERT_TRUE(doc.isHTMLDocument()); | |
754 // Go through all descent nodes. | |
755 all = doc.all(); | |
756 int new_base_tag_count = 0; | |
757 for (WebNode node = all.firstItem(); !node.isNull(); | |
758 node = all.nextItem()) { | |
759 if (!node.isElementNode()) | |
760 continue; | |
761 WebElement element = node.to<WebElement>(); | |
762 if (element.hasTagName("base")) { | |
763 new_base_tag_count++; | |
764 } else { | |
765 // Get link. | |
766 WebString value = | |
767 webkit_glue::GetSubResourceLinkFromElement(element); | |
768 if (value.isNull() && element.hasTagName("a")) { | |
769 value = element.getAttribute("href"); | |
770 if (value.isEmpty()) | |
771 value = WebString(); | |
772 } | |
773 // Each link is absolute link. | |
774 if (!value.isNull()) { | |
775 GURL link(std::string(value.utf8())); | |
776 ASSERT_FALSE(link.scheme().empty()); | |
777 } | |
778 } | |
779 } | |
780 // We have one more added BASE tag which is generated by JavaScript. | |
781 ASSERT_EQ(new_base_tag_count, original_base_tag_count + 1); | |
782 // Make sure in new document, the base URL is equal with the |path_dir_url|. | |
783 GURL new_base_url(doc.baseURL()); | |
784 ASSERT_EQ(new_base_url, path_dir_url); | |
785 } | 967 } |
786 | 968 |
787 // Serializing page which has an empty HEAD tag. | 969 // Serializing page which has an empty HEAD tag. |
788 TEST_F(DomSerializerTests, SerializeHTMLDOMWithEmptyHead) { | 970 IN_PROC_BROWSER_TEST_F(DomSerializerTests, SerializeHTMLDOMWithEmptyHead) { |
789 base::FilePath page_file_path = data_dir_; | 971 // Need to spin up the renderer and also navigate to a file url so that the |
790 page_file_path = page_file_path.AppendASCII("dom_serializer"); | 972 // renderer code doesn't attempt a fork when it sees a load to file scheme |
791 page_file_path = page_file_path.AppendASCII("empty_head.htm"); | 973 // from non-file scheme. |
792 GURL file_url = net::FilePathToFileURL(page_file_path); | 974 NavigateToURL(shell(), GetTestUrl(".", "simple_page.html")); |
793 ASSERT_TRUE(file_url.SchemeIsFile()); | |
794 | 975 |
795 // Load the test html content. | 976 PostTaskToInProcessRendererAndWait( |
796 static const char* const empty_head_contents = | 977 base::Bind(&DomSerializerTests::SerializeHTMLDOMWithEmptyHeadOnRenderer, |
797 "<html><head></head><body>hello world</body></html>"; | 978 base::Unretained(this))); |
798 LoadContents(empty_head_contents, file_url, WebString()); | |
799 | |
800 // Make sure the head tag is empty. | |
801 WebFrame* web_frame = test_shell_->webView()->mainFrame(); | |
802 ASSERT_TRUE(web_frame != NULL); | |
803 WebDocument doc = web_frame->document(); | |
804 ASSERT_TRUE(doc.isHTMLDocument()); | |
805 WebElement head_element = doc.head(); | |
806 ASSERT_TRUE(!head_element.isNull()); | |
807 ASSERT_TRUE(!head_element.hasChildNodes()); | |
808 ASSERT_TRUE(head_element.childNodes().length() == 0); | |
809 | |
810 // Do serialization. | |
811 SerializeDomForURL(file_url, false); | |
812 // Make sure the serialized contents have META ; | |
813 ASSERT_TRUE(HasSerializedFrame(file_url)); | |
814 const std::string& serialized_contents = | |
815 GetSerializedContentForFrame(file_url); | |
816 | |
817 // Reload serialized contents and make sure there is only one META tag. | |
818 LoadContents(serialized_contents, file_url, web_frame->document().encoding()); | |
819 web_frame = test_shell_->webView()->mainFrame(); | |
820 ASSERT_TRUE(web_frame != NULL); | |
821 doc = web_frame->document(); | |
822 ASSERT_TRUE(doc.isHTMLDocument()); | |
823 head_element = doc.head(); | |
824 ASSERT_TRUE(!head_element.isNull()); | |
825 ASSERT_TRUE(head_element.hasChildNodes()); | |
826 ASSERT_TRUE(head_element.childNodes().length() == 1); | |
827 WebNode meta_node = head_element.firstChild(); | |
828 ASSERT_TRUE(!meta_node.isNull()); | |
829 // Get meta charset info. | |
830 std::string charset_info; | |
831 ASSERT_TRUE(IsMetaElement(meta_node, charset_info)); | |
832 ASSERT_TRUE(!charset_info.empty()); | |
833 ASSERT_EQ(charset_info, | |
834 std::string(web_frame->document().encoding().utf8())); | |
835 | |
836 // Check the body's first node is text node and its contents are | |
837 // "hello world" | |
838 WebElement body_element = doc.body(); | |
839 ASSERT_TRUE(!body_element.isNull()); | |
840 WebNode text_node = body_element.firstChild(); | |
841 ASSERT_TRUE(text_node.isTextNode()); | |
842 WebString text_node_contents = text_node.nodeValue(); | |
843 ASSERT_TRUE(std::string(text_node_contents.utf8()) == "hello world"); | |
844 } | 979 } |
845 | 980 |
846 // Test that we don't crash when the page contains an iframe that | 981 // Test that we don't crash when the page contains an iframe that |
847 // was handled as a download (http://crbug.com/42212). | 982 // was handled as a download (http://crbug.com/42212). |
848 TEST_F(DomSerializerTests, SerializeDocumentWithDownloadedIFrame) { | 983 IN_PROC_BROWSER_TEST_F(DomSerializerTests, |
849 base::FilePath page_file_path = data_dir_; | 984 SerializeDocumentWithDownloadedIFrame) { |
850 page_file_path = page_file_path.AppendASCII("dom_serializer"); | 985 base::FilePath page_file_path = GetTestFilePath( |
851 page_file_path = page_file_path.AppendASCII("iframe-src-is-exe.htm"); | 986 "dom_serializer", "iframe-src-is-exe.htm"); |
852 GURL file_url = net::FilePathToFileURL(page_file_path); | 987 GURL file_url = net::FilePathToFileURL(page_file_path); |
853 ASSERT_TRUE(file_url.SchemeIsFile()); | 988 ASSERT_TRUE(file_url.SchemeIsFile()); |
854 // Load the test file. | 989 // Load the test file. |
855 LoadPageFromURL(file_url); | 990 NavigateToURL(shell(), file_url); |
856 // Do a recursive serialization. We pass if we don't crash. | 991 |
857 SerializeDomForURL(file_url, true); | 992 PostTaskToInProcessRendererAndWait( |
| 993 base::Bind( |
| 994 &DomSerializerTests:: |
| 995 SerializeDocumentWithDownloadedIFrameOnRenderer, |
| 996 base::Unretained(this), file_url)); |
858 } | 997 } |
859 | 998 |
860 TEST_F(DomSerializerTests, SubResourceForElementsInNonHTMLNamespace) { | 999 IN_PROC_BROWSER_TEST_F(DomSerializerTests, |
861 base::FilePath page_file_path = data_dir_; | 1000 SubResourceForElementsInNonHTMLNamespace) { |
862 page_file_path = page_file_path.AppendASCII("dom_serializer"); | 1001 base::FilePath page_file_path = GetTestFilePath( |
863 page_file_path = page_file_path.AppendASCII("non_html_namespace.htm"); | 1002 "dom_serializer", "non_html_namespace.htm"); |
864 GURL file_url = net::FilePathToFileURL(page_file_path); | 1003 GURL file_url = net::FilePathToFileURL(page_file_path); |
865 LoadPageFromURL(file_url); | 1004 NavigateToURL(shell(), file_url); |
866 WebFrame* web_frame = FindSubFrameByURL(test_shell_->webView(), file_url); | 1005 |
867 ASSERT_TRUE(web_frame != NULL); | 1006 PostTaskToInProcessRendererAndWait( |
868 WebDocument doc = web_frame->document(); | 1007 base::Bind( |
869 WebNode lastNodeInBody = doc.body().lastChild(); | 1008 &DomSerializerTests:: |
870 ASSERT_EQ(WebNode::ElementNode, lastNodeInBody.nodeType()); | 1009 SubResourceForElementsInNonHTMLNamespaceOnRenderer, |
871 WebString uri = webkit_glue::GetSubResourceLinkFromElement( | 1010 base::Unretained(this), file_url)); |
872 lastNodeInBody.to<WebElement>()); | |
873 EXPECT_TRUE(uri.isNull()); | |
874 } | 1011 } |
875 | 1012 |
876 } // namespace | 1013 } // namespace content |
OLD | NEW |