Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(303)

Side by Side Diff: third_party/WebKit/Source/web/tests/WebFrameSerializerTest.cpp

Issue 2842653005: WebFrameSerializerSanitizationTest validates generated MHTML. (Closed)
Patch Set: Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (C) 2011 Google Inc. All rights reserved. 2 * Copyright (C) 2011 Google Inc. All rights reserved.
3 * 3 *
4 * Redistribution and use in source and binary forms, with or without 4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are 5 * modification, are permitted provided that the following conditions are
6 * met: 6 * met:
7 * 7 *
8 * * Redistributions of source code must retain the above copyright 8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer. 9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above 10 * * Redistributions in binary form must reproduce the above
(...skipping 12 matching lines...) Expand all
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */ 29 */
30 30
31 #include "public/web/WebFrameSerializer.h" 31 #include "public/web/WebFrameSerializer.h"
32 32
33 #include "platform/mhtml/MHTMLArchive.h"
34 #include "platform/mhtml/MHTMLParser.h"
33 #include "platform/testing/URLTestHelpers.h" 35 #include "platform/testing/URLTestHelpers.h"
34 #include "platform/testing/UnitTestHelpers.h" 36 #include "platform/testing/UnitTestHelpers.h"
35 #include "platform/weborigin/KURL.h" 37 #include "platform/weborigin/KURL.h"
36 #include "platform/wtf/text/StringBuilder.h" 38 #include "platform/wtf/text/StringBuilder.h"
37 #include "public/platform/Platform.h" 39 #include "public/platform/Platform.h"
38 #include "public/platform/WebCString.h" 40 #include "public/platform/WebCString.h"
39 #include "public/platform/WebCache.h" 41 #include "public/platform/WebCache.h"
40 #include "public/platform/WebString.h" 42 #include "public/platform/WebString.h"
41 #include "public/platform/WebURL.h" 43 #include "public/platform/WebURL.h"
42 #include "public/platform/WebURLLoaderMockFactory.h" 44 #include "public/platform/WebURLLoaderMockFactory.h"
(...skipping 168 matching lines...) Expand 10 before | Expand all | Expand 10 after
211 EXPECT_EQ("<!-- saved from url=(0030)http://www.test.com/?-%2Dx-%2D -->", 213 EXPECT_EQ("<!-- saved from url=(0030)http://www.test.com/?-%2Dx-%2D -->",
212 actual_html.Substring(1, 60)); 214 actual_html.Substring(1, 60));
213 } 215 }
214 216
215 class WebFrameSerializerSanitizationTest : public WebFrameSerializerTest { 217 class WebFrameSerializerSanitizationTest : public WebFrameSerializerTest {
216 protected: 218 protected:
217 WebFrameSerializerSanitizationTest() {} 219 WebFrameSerializerSanitizationTest() {}
218 220
219 ~WebFrameSerializerSanitizationTest() override {} 221 ~WebFrameSerializerSanitizationTest() override {}
220 222
221 String GenerateMHTMLParts(const String& url, 223 String GenerateMHTML(const String& url,
222 const String& file_name, 224 const String& file_name,
223 const String& mime_type = "text/html") { 225 const String& mime_type = "text/html",
226 const bool only_body_parts = false) {
224 KURL parsed_url(kParsedURLString, url); 227 KURL parsed_url(kParsedURLString, url);
225 String file_path("frameserialization/" + file_name); 228 String file_path("frameserialization/" + file_name);
226 RegisterMockedFileURLLoad(parsed_url, file_path, mime_type); 229 RegisterMockedFileURLLoad(parsed_url, file_path, mime_type);
227 FrameTestHelpers::LoadFrame(MainFrameImpl(), url.Utf8().data()); 230 FrameTestHelpers::LoadFrame(MainFrameImpl(), url.Utf8().data());
228 WebThreadSafeData result = WebFrameSerializer::GenerateMHTMLParts( 231 // This boundary is as good as any other. Plus it gets used in almost
dewittj 2017/04/25 22:32:33 I'd rewrite this comment, since it's not obvious w
carlosk 2017/04/26 00:16:49 On 2017/04/25 22:32:33, dewittj wrote: Context: th
229 WebString("boundary"), MainFrameImpl(), &mhtml_delegate_); 232 // all the examples in the MHTML spec - RFC 2557.
230 return String(result.Data(), result.size()); 233 WebString boundary("boundary-example");
234 StringBuilder mhtml;
235 if (!only_body_parts) {
236 WebThreadSafeData header_result = WebFrameSerializer::GenerateMHTMLHeader(
237 boundary, MainFrameImpl(), &mhtml_delegate_);
238 mhtml.Append(header_result.Data(), header_result.size());
239 }
240 WebThreadSafeData body_result = WebFrameSerializer::GenerateMHTMLParts(
241 boundary, MainFrameImpl(), &mhtml_delegate_);
242 mhtml.Append(body_result.Data(), body_result.size());
243 if (!only_body_parts) {
244 RefPtr<RawData> footer_data = RawData::Create();
245 MHTMLArchive::GenerateMHTMLFooterForTesting(boundary,
246 *footer_data->MutableData());
247 mhtml.Append(footer_data->data(), footer_data->length());
248 }
249 String mhtml_string = mhtml.ToString();
250
251 if (!only_body_parts) {
252 // Validate the generated MHTML.
253 MHTMLParser parser(SharedBuffer::Create(mhtml_string.Characters8(),
254 size_t(mhtml_string.length())));
255 if (parser.ParseArchive().IsEmpty()) {
256 ADD_FAILURE() << "Invalid MHTML";
257 mhtml_string = String();
258 }
259 }
260 return mhtml_string;
231 } 261 }
232 262
233 void SetRemovePopupOverlay(bool remove_popup_overlay) { 263 void SetRemovePopupOverlay(bool remove_popup_overlay) {
234 mhtml_delegate_.SetRemovePopupOverlay(remove_popup_overlay); 264 mhtml_delegate_.SetRemovePopupOverlay(remove_popup_overlay);
235 } 265 }
236 266
237 private: 267 private:
238 SimpleMHTMLPartsGenerationDelegate mhtml_delegate_; 268 SimpleMHTMLPartsGenerationDelegate mhtml_delegate_;
239 }; 269 };
240 270
241 TEST_F(WebFrameSerializerSanitizationTest, RemoveInlineScriptInAttributes) { 271 TEST_F(WebFrameSerializerSanitizationTest, RemoveInlineScriptInAttributes) {
242 String mhtml = 272 String mhtml =
243 GenerateMHTMLParts("http://www.test.com", "script_in_attributes.html"); 273 GenerateMHTML("http://www.test.com", "script_in_attributes.html");
274 ASSERT_FALSE(HasFailure());
244 275
245 // These scripting attributes should be removed. 276 // These scripting attributes should be removed.
246 EXPECT_EQ(WTF::kNotFound, mhtml.Find("onload=")); 277 EXPECT_EQ(WTF::kNotFound, mhtml.Find("onload="));
247 EXPECT_EQ(WTF::kNotFound, mhtml.Find("ONLOAD=")); 278 EXPECT_EQ(WTF::kNotFound, mhtml.Find("ONLOAD="));
248 EXPECT_EQ(WTF::kNotFound, mhtml.Find("onclick=")); 279 EXPECT_EQ(WTF::kNotFound, mhtml.Find("onclick="));
249 EXPECT_EQ(WTF::kNotFound, mhtml.Find("href=")); 280 EXPECT_EQ(WTF::kNotFound, mhtml.Find("href="));
250 EXPECT_EQ(WTF::kNotFound, mhtml.Find("from=")); 281 EXPECT_EQ(WTF::kNotFound, mhtml.Find("from="));
251 EXPECT_EQ(WTF::kNotFound, mhtml.Find("to=")); 282 EXPECT_EQ(WTF::kNotFound, mhtml.Find("to="));
252 EXPECT_EQ(WTF::kNotFound, mhtml.Find("javascript:")); 283 EXPECT_EQ(WTF::kNotFound, mhtml.Find("javascript:"));
253 284
254 // These non-scripting attributes should remain intact. 285 // These non-scripting attributes should remain intact.
255 EXPECT_NE(WTF::kNotFound, mhtml.Find("class=")); 286 EXPECT_NE(WTF::kNotFound, mhtml.Find("class="));
256 EXPECT_NE(WTF::kNotFound, mhtml.Find("id=")); 287 EXPECT_NE(WTF::kNotFound, mhtml.Find("id="));
257 288
258 // srcdoc attribute of frame element should be replaced with src attribute. 289 // srcdoc attribute of frame element should be replaced with src attribute.
259 EXPECT_EQ(WTF::kNotFound, mhtml.Find("srcdoc=")); 290 EXPECT_EQ(WTF::kNotFound, mhtml.Find("srcdoc="));
260 EXPECT_NE(WTF::kNotFound, mhtml.Find("src=")); 291 EXPECT_NE(WTF::kNotFound, mhtml.Find("src="));
261 } 292 }
262 293
263 TEST_F(WebFrameSerializerSanitizationTest, DisableFormElements) { 294 TEST_F(WebFrameSerializerSanitizationTest, DisableFormElements) {
264 String mhtml = GenerateMHTMLParts("http://www.test.com", "form.html"); 295 String mhtml = GenerateMHTML("http://www.test.com", "form.html");
296 ASSERT_FALSE(HasFailure());
265 297
266 const char kDisabledAttr[] = "disabled=3D\"\""; 298 const char kDisabledAttr[] = "disabled=3D\"\"";
267 int matches = 299 int matches =
268 MatchSubstring(mhtml, kDisabledAttr, arraysize(kDisabledAttr) - 1); 300 MatchSubstring(mhtml, kDisabledAttr, arraysize(kDisabledAttr) - 1);
269 EXPECT_EQ(21, matches); 301 EXPECT_EQ(21, matches);
270 } 302 }
271 303
272 TEST_F(WebFrameSerializerSanitizationTest, RemoveHiddenElements) { 304 TEST_F(WebFrameSerializerSanitizationTest, RemoveHiddenElements) {
273 String mhtml = 305 String mhtml = GenerateMHTML("http://www.test.com", "hidden_elements.html");
274 GenerateMHTMLParts("http://www.test.com", "hidden_elements.html"); 306 ASSERT_FALSE(HasFailure());
275 307
276 // The element with hidden attribute should be removed. 308 // The element with hidden attribute should be removed.
277 EXPECT_EQ(WTF::kNotFound, mhtml.Find("<p id=3D\"hidden_id\"")); 309 EXPECT_EQ(WTF::kNotFound, mhtml.Find("<p id=3D\"hidden_id\""));
278 310
279 // The hidden form element should be removed. 311 // The hidden form element should be removed.
280 EXPECT_EQ(WTF::kNotFound, mhtml.Find("<input type=3D\"hidden\"")); 312 EXPECT_EQ(WTF::kNotFound, mhtml.Find("<input type=3D\"hidden\""));
281 313
282 // All other hidden elements should not be removed. 314 // All other hidden elements should not be removed.
283 EXPECT_NE(WTF::kNotFound, mhtml.Find("<html")); 315 EXPECT_NE(WTF::kNotFound, mhtml.Find("<html"));
284 EXPECT_NE(WTF::kNotFound, mhtml.Find("<head")); 316 EXPECT_NE(WTF::kNotFound, mhtml.Find("<head"));
(...skipping 13 matching lines...) Expand all
298 // These visible elements should remain intact. 330 // These visible elements should remain intact.
299 EXPECT_NE(WTF::kNotFound, mhtml.Find("<p id=3D\"visible_id\"")); 331 EXPECT_NE(WTF::kNotFound, mhtml.Find("<p id=3D\"visible_id\""));
300 EXPECT_NE(WTF::kNotFound, mhtml.Find("<form")); 332 EXPECT_NE(WTF::kNotFound, mhtml.Find("<form"));
301 EXPECT_NE(WTF::kNotFound, mhtml.Find("<input type=3D\"text\"")); 333 EXPECT_NE(WTF::kNotFound, mhtml.Find("<input type=3D\"text\""));
302 EXPECT_NE(WTF::kNotFound, mhtml.Find("<div")); 334 EXPECT_NE(WTF::kNotFound, mhtml.Find("<div"));
303 } 335 }
304 336
305 // Regression test for crbug.com/678893, where in some cases serializing an 337 // Regression test for crbug.com/678893, where in some cases serializing an
306 // image document could cause code to pick an element from an empty container. 338 // image document could cause code to pick an element from an empty container.
307 TEST_F(WebFrameSerializerSanitizationTest, FromBrokenImageDocument) { 339 TEST_F(WebFrameSerializerSanitizationTest, FromBrokenImageDocument) {
308 String mhtml = GenerateMHTMLParts("http://www.test.com", "broken-image.png", 340 String mhtml = GenerateMHTML("http://www.test.com", "broken-image.png",
309 "image/png"); 341 "image/png", true);
jianli 2017/04/25 21:41:34 Please comment why we only need to generate body p
carlosk 2017/04/26 00:16:49 Done. I left the (currently unnecessary) check bel
342 ASSERT_FALSE(HasFailure());
310 EXPECT_TRUE(mhtml.IsEmpty()); 343 EXPECT_TRUE(mhtml.IsEmpty());
311 } 344 }
312 345
313 TEST_F(WebFrameSerializerSanitizationTest, ImageLoadedFromSrcsetForHiDPI) { 346 TEST_F(WebFrameSerializerSanitizationTest, ImageLoadedFromSrcsetForHiDPI) {
314 RegisterMockedFileURLLoad( 347 RegisterMockedFileURLLoad(
315 KURL(kParsedURLString, "http://www.test.com/1x.png"), 348 KURL(kParsedURLString, "http://www.test.com/1x.png"),
316 "frameserialization/1x.png"); 349 "frameserialization/1x.png");
317 RegisterMockedFileURLLoad( 350 RegisterMockedFileURLLoad(
318 KURL(kParsedURLString, "http://www.test.com/2x.png"), 351 KURL(kParsedURLString, "http://www.test.com/2x.png"),
319 "frameserialization/2x.png"); 352 "frameserialization/2x.png");
320 353
321 // Set high DPR in order to load image from srcset, instead of src. 354 // Set high DPR in order to load image from srcset, instead of src.
322 WebView()->SetDeviceScaleFactor(2.0f); 355 WebView()->SetDeviceScaleFactor(2.0f);
323 356
324 String mhtml = GenerateMHTMLParts("http://www.test.com", "img_srcset.html"); 357 String mhtml = GenerateMHTML("http://www.test.com", "img_srcset.html");
358 ASSERT_FALSE(HasFailure());
325 359
326 // srcset attribute should be skipped. 360 // srcset attribute should be skipped.
327 EXPECT_EQ(WTF::kNotFound, mhtml.Find("srcset=")); 361 EXPECT_EQ(WTF::kNotFound, mhtml.Find("srcset="));
328 362
329 // Width and height attributes should be set when none is present in <img>. 363 // Width and height attributes should be set when none is present in <img>.
330 EXPECT_NE(WTF::kNotFound, 364 EXPECT_NE(WTF::kNotFound,
331 mhtml.Find("id=3D\"i1\" width=3D\"6\" height=3D\"6\">")); 365 mhtml.Find("id=3D\"i1\" width=3D\"6\" height=3D\"6\">"));
332 366
333 // Height attribute should not be set if width attribute is already present in 367 // Height attribute should not be set if width attribute is already present in
334 // <img> 368 // <img>
335 EXPECT_NE(WTF::kNotFound, mhtml.Find("id=3D\"i2\" width=3D\"8\">")); 369 EXPECT_NE(WTF::kNotFound, mhtml.Find("id=3D\"i2\" width=3D\"8\">"));
336 } 370 }
337 371
338 TEST_F(WebFrameSerializerSanitizationTest, ImageLoadedFromSrcForNormalDPI) { 372 TEST_F(WebFrameSerializerSanitizationTest, ImageLoadedFromSrcForNormalDPI) {
339 RegisterMockedFileURLLoad( 373 RegisterMockedFileURLLoad(
340 KURL(kParsedURLString, "http://www.test.com/1x.png"), 374 KURL(kParsedURLString, "http://www.test.com/1x.png"),
341 "frameserialization/1x.png"); 375 "frameserialization/1x.png");
342 RegisterMockedFileURLLoad( 376 RegisterMockedFileURLLoad(
343 KURL(kParsedURLString, "http://www.test.com/2x.png"), 377 KURL(kParsedURLString, "http://www.test.com/2x.png"),
344 "frameserialization/2x.png"); 378 "frameserialization/2x.png");
345 379
346 String mhtml = GenerateMHTMLParts("http://www.test.com", "img_srcset.html"); 380 String mhtml = GenerateMHTML("http://www.test.com", "img_srcset.html");
381 ASSERT_FALSE(HasFailure());
347 382
348 // srcset attribute should be skipped. 383 // srcset attribute should be skipped.
349 EXPECT_EQ(WTF::kNotFound, mhtml.Find("srcset=")); 384 EXPECT_EQ(WTF::kNotFound, mhtml.Find("srcset="));
350 385
351 // New width and height attributes should not be set. 386 // New width and height attributes should not be set.
352 EXPECT_NE(WTF::kNotFound, mhtml.Find("id=3D\"i1\">")); 387 EXPECT_NE(WTF::kNotFound, mhtml.Find("id=3D\"i1\">"));
353 EXPECT_NE(WTF::kNotFound, mhtml.Find("id=3D\"i2\" width=3D\"8\">")); 388 EXPECT_NE(WTF::kNotFound, mhtml.Find("id=3D\"i2\" width=3D\"8\">"));
354 } 389 }
355 390
356 TEST_F(WebFrameSerializerSanitizationTest, RemovePopupOverlayIfRequested) { 391 TEST_F(WebFrameSerializerSanitizationTest, RemovePopupOverlayIfRequested) {
357 WebView()->Resize(WebSize(500, 500)); 392 WebView()->Resize(WebSize(500, 500));
358 SetRemovePopupOverlay(true); 393 SetRemovePopupOverlay(true);
359 String mhtml = GenerateMHTMLParts("http://www.test.com", "popup.html"); 394 String mhtml = GenerateMHTML("http://www.test.com", "popup.html");
395 ASSERT_FALSE(HasFailure());
360 EXPECT_EQ(WTF::kNotFound, mhtml.Find("class=3D\"overlay")); 396 EXPECT_EQ(WTF::kNotFound, mhtml.Find("class=3D\"overlay"));
361 EXPECT_EQ(WTF::kNotFound, mhtml.Find("class=3D\"modal")); 397 EXPECT_EQ(WTF::kNotFound, mhtml.Find("class=3D\"modal"));
362 } 398 }
363 399
364 TEST_F(WebFrameSerializerSanitizationTest, KeepPopupOverlayIfNotRequested) { 400 TEST_F(WebFrameSerializerSanitizationTest, KeepPopupOverlayIfNotRequested) {
365 WebView()->Resize(WebSize(500, 500)); 401 WebView()->Resize(WebSize(500, 500));
366 SetRemovePopupOverlay(false); 402 SetRemovePopupOverlay(false);
367 String mhtml = GenerateMHTMLParts("http://www.test.com", "popup.html"); 403 String mhtml = GenerateMHTML("http://www.test.com", "popup.html");
404 ASSERT_FALSE(HasFailure());
368 EXPECT_NE(WTF::kNotFound, mhtml.Find("class=3D\"overlay")); 405 EXPECT_NE(WTF::kNotFound, mhtml.Find("class=3D\"overlay"));
369 EXPECT_NE(WTF::kNotFound, mhtml.Find("class=3D\"modal")); 406 EXPECT_NE(WTF::kNotFound, mhtml.Find("class=3D\"modal"));
370 } 407 }
371 408
372 TEST_F(WebFrameSerializerSanitizationTest, RemoveElements) { 409 TEST_F(WebFrameSerializerSanitizationTest, RemoveElements) {
373 String mhtml = 410 String mhtml = GenerateMHTML("http://www.test.com", "remove_elements.html");
374 GenerateMHTMLParts("http://www.test.com", "remove_elements.html"); 411 ASSERT_FALSE(HasFailure());
375 LOG(ERROR) << mhtml;
376 412
377 EXPECT_EQ(WTF::kNotFound, mhtml.Find("<script")); 413 EXPECT_EQ(WTF::kNotFound, mhtml.Find("<script"));
378 EXPECT_EQ(WTF::kNotFound, mhtml.Find("<noscript")); 414 EXPECT_EQ(WTF::kNotFound, mhtml.Find("<noscript"));
379 415
380 // Only the meta element containing "Content-Security-Policy" is removed. 416 // Only the meta element containing "Content-Security-Policy" is removed.
381 // Other meta elements should be preserved. 417 // Other meta elements should be preserved.
382 EXPECT_EQ(WTF::kNotFound, 418 EXPECT_EQ(WTF::kNotFound,
383 mhtml.Find("<meta http-equiv=3D\"Content-Security-Policy")); 419 mhtml.Find("<meta http-equiv=3D\"Content-Security-Policy"));
384 EXPECT_NE(WTF::kNotFound, mhtml.Find("<meta name=3D\"description")); 420 EXPECT_NE(WTF::kNotFound, mhtml.Find("<meta name=3D\"description"));
385 EXPECT_NE(WTF::kNotFound, mhtml.Find("<meta http-equiv=3D\"refresh")); 421 EXPECT_NE(WTF::kNotFound, mhtml.Find("<meta http-equiv=3D\"refresh"));
386 422
387 // If an element is removed, its children should also be skipped. 423 // If an element is removed, its children should also be skipped.
388 EXPECT_EQ(WTF::kNotFound, mhtml.Find("<select")); 424 EXPECT_EQ(WTF::kNotFound, mhtml.Find("<select"));
389 EXPECT_EQ(WTF::kNotFound, mhtml.Find("<option")); 425 EXPECT_EQ(WTF::kNotFound, mhtml.Find("<option"));
390 } 426 }
391 427
392 } // namespace blink 428 } // namespace blink
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698