Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(193)

Side by Side Diff: content/browser/download/mhtml_generation_browsertest.cc

Issue 1977303003: Adds a feature to MHTML serialization that omits subframes and subresources marked no-store. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@no-store
Patch Set: Address dcheng's comments. Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include <stdint.h> 5 #include <stdint.h>
6 6
7 #include "base/bind.h" 7 #include "base/bind.h"
8 #include "base/callback.h" 8 #include "base/callback.h"
9 #include "base/files/file_path.h" 9 #include "base/files/file_path.h"
10 #include "base/files/file_util.h" 10 #include "base/files/file_util.h"
11 #include "base/files/scoped_temp_dir.h" 11 #include "base/files/scoped_temp_dir.h"
12 #include "base/macros.h" 12 #include "base/macros.h"
13 #include "base/run_loop.h" 13 #include "base/run_loop.h"
14 #include "base/strings/utf_string_conversions.h"
14 #include "content/public/browser/web_contents.h" 15 #include "content/public/browser/web_contents.h"
15 #include "content/public/common/mhtml_generation_params.h" 16 #include "content/public/common/mhtml_generation_params.h"
16 #include "content/public/test/browser_test_utils.h" 17 #include "content/public/test/browser_test_utils.h"
17 #include "content/public/test/content_browser_test.h" 18 #include "content/public/test/content_browser_test.h"
18 #include "content/public/test/content_browser_test_utils.h" 19 #include "content/public/test/content_browser_test_utils.h"
19 #include "content/public/test/test_utils.h" 20 #include "content/public/test/test_utils.h"
20 #include "content/shell/browser/shell.h" 21 #include "content/shell/browser/shell.h"
22 #include "net/base/filename_util.h"
21 #include "net/dns/mock_host_resolver.h" 23 #include "net/dns/mock_host_resolver.h"
22 #include "net/test/embedded_test_server/embedded_test_server.h" 24 #include "net/test/embedded_test_server/embedded_test_server.h"
23 #include "testing/gmock/include/gmock/gmock.h" 25 #include "testing/gmock/include/gmock/gmock.h"
24 #include "testing/gtest/include/gtest/gtest.h" 26 #include "testing/gtest/include/gtest/gtest.h"
27 #include "third_party/WebKit/public/web/WebFindOptions.h"
28 #include "third_party/WebKit/public/web/WebFrameSerializerCacheControlPolicy.h"
25 29
26 using testing::ContainsRegex; 30 using testing::ContainsRegex;
27 using testing::HasSubstr; 31 using testing::HasSubstr;
28 using testing::Not; 32 using testing::Not;
29 33
30 namespace content { 34 namespace content {
31 35
36 namespace {
37
38 // A dummy WebContentsDelegate which tracks the results of a find operation.
39 class FindTrackingDelegate : public WebContentsDelegate {
40 public:
41 FindTrackingDelegate(const std::string& search)
42 : search_(search), matches_(-1) {}
43
44 // Returns number of results.
45 int Wait(WebContents* web_contents) {
46 WebContentsDelegate* old_delegate = web_contents->GetDelegate();
47 web_contents->SetDelegate(this);
48
49 blink::WebFindOptions options;
50 options.matchCase = false;
51
52 web_contents->Find(global_request_id++, base::UTF8ToUTF16(search_),
53 options);
54 run_loop_.Run();
55
56 web_contents->SetDelegate(old_delegate);
57
58 return matches_;
59 }
60
61 void FindReply(WebContents* web_contents,
62 int request_id,
63 int number_of_matches,
64 const gfx::Rect& selection_rect,
65 int active_match_ordinal,
66 bool final_update) override {
67 matches_ = number_of_matches;
68 run_loop_.Quit();
69 }
70
71 static int global_request_id;
72
73 private:
74 std::string search_;
75 int matches_;
76 base::RunLoop run_loop_;
77
78 DISALLOW_COPY_AND_ASSIGN(FindTrackingDelegate);
79 };
80
81 // static
82 int FindTrackingDelegate::global_request_id = 0;
83
84 } // namespace
85
32 class MHTMLGenerationTest : public ContentBrowserTest { 86 class MHTMLGenerationTest : public ContentBrowserTest {
33 public: 87 public:
34 MHTMLGenerationTest() : has_mhtml_callback_run_(false), file_size_(0) {} 88 MHTMLGenerationTest() : has_mhtml_callback_run_(false), file_size_(0) {}
35 89
36 protected: 90 protected:
37 void SetUp() override { 91 void SetUp() override {
38 ASSERT_TRUE(temp_dir_.CreateUniqueTempDir()); 92 ASSERT_TRUE(temp_dir_.CreateUniqueTempDir());
39 ASSERT_TRUE(embedded_test_server()->Start()); 93 ASSERT_TRUE(embedded_test_server()->Start());
40 ContentBrowserTest::SetUp(); 94 ContentBrowserTest::SetUp();
41 } 95 }
(...skipping 16 matching lines...) Expand all
58 112
59 EXPECT_TRUE(has_mhtml_callback_run()); 113 EXPECT_TRUE(has_mhtml_callback_run());
60 } 114 }
61 115
62 int64_t ReadFileSizeFromDisk(base::FilePath path) { 116 int64_t ReadFileSizeFromDisk(base::FilePath path) {
63 int64_t file_size; 117 int64_t file_size;
64 if (!base::GetFileSize(path, &file_size)) return -1; 118 if (!base::GetFileSize(path, &file_size)) return -1;
65 return file_size; 119 return file_size;
66 } 120 }
67 121
122 void TestOriginalVsSavedPage(
123 const GURL& url,
124 const MHTMLGenerationParams params,
125 int expected_number_of_frames,
126 const std::vector<std::string>& expected_substrings,
127 const std::vector<std::string>& forbidden_substrings_in_saved_page,
128 bool skip_verification_of_original_page = false) {
129 // Navigate to the test page and verify if test expectations
130 // are met (this is mostly a sanity check - a failure to meet
131 // expectations would probably mean that there is a test bug
132 // (i.e. that we got called with wrong expected_foo argument).
133 NavigateToURL(shell(), url);
134 if (!skip_verification_of_original_page) {
135 AssertExpectationsAboutCurrentTab(expected_number_of_frames,
136 expected_substrings,
137 std::vector<std::string>());
138 }
139
140 GenerateMHTML(params, url);
141 ASSERT_FALSE(HasFailure());
142
143 // Stop the test server (to make sure the locally saved page
144 // is self-contained / won't try to open original resources).
145 ASSERT_TRUE(embedded_test_server()->ShutdownAndWaitUntilComplete());
146
147 // Open the saved page and verify if test expectations are
148 // met (i.e. if the same expectations are met for "after"
149 // [saved version of the page] as for the "before"
150 // [the original version of the page].
151 NavigateToURL(shell(), net::FilePathToFileURL(params.file_path));
152 AssertExpectationsAboutCurrentTab(expected_number_of_frames,
153 expected_substrings,
154 forbidden_substrings_in_saved_page);
155 }
156
157 void AssertExpectationsAboutCurrentTab(
158 int expected_number_of_frames,
159 const std::vector<std::string>& expected_substrings,
160 const std::vector<std::string>& forbidden_substrings) {
161 int actual_number_of_frames =
162 shell()->web_contents()->GetAllFrames().size();
163 EXPECT_EQ(expected_number_of_frames, actual_number_of_frames);
164
165 for (const auto& expected_substring : expected_substrings) {
166 FindTrackingDelegate delegate(expected_substring);
167 int actual_number_of_matches = delegate.Wait(shell()->web_contents());
168 EXPECT_EQ(1, actual_number_of_matches)
169 << "Verifying that \"" << expected_substring << "\" appears "
170 << "exactly once in the text of web contents of "
171 << shell()->web_contents()->GetURL().spec();
172 }
173
174 for (const auto& forbidden_substring : forbidden_substrings) {
175 FindTrackingDelegate delegate(forbidden_substring);
176 int actual_number_of_matches = delegate.Wait(shell()->web_contents());
177 EXPECT_EQ(0, actual_number_of_matches)
178 << "Verifying that \"" << forbidden_substring << "\" doesn't "
179 << "appear in the text of web contents of "
180 << shell()->web_contents()->GetURL().spec();
181 }
182 }
183
68 bool has_mhtml_callback_run() const { return has_mhtml_callback_run_; } 184 bool has_mhtml_callback_run() const { return has_mhtml_callback_run_; }
69 int64_t file_size() const { return file_size_; } 185 int64_t file_size() const { return file_size_; }
70 186
71 base::ScopedTempDir temp_dir_; 187 base::ScopedTempDir temp_dir_;
72 188
73 private: 189 private:
74 void MHTMLGenerated(base::Closure quit_closure, int64_t size) { 190 void MHTMLGenerated(base::Closure quit_closure, int64_t size) {
75 has_mhtml_callback_run_ = true; 191 has_mhtml_callback_run_ = true;
76 file_size_ = size; 192 file_size_ = size;
77 quit_closure.Run(); 193 quit_closure.Run();
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after
154 EXPECT_THAT(mhtml, Not(HasSubstr("Content-Transfer-Encoding: base64"))); 270 EXPECT_THAT(mhtml, Not(HasSubstr("Content-Transfer-Encoding: base64")));
155 EXPECT_THAT(mhtml, ContainsRegex("Content-Location:.*blank.jpg")); 271 EXPECT_THAT(mhtml, ContainsRegex("Content-Location:.*blank.jpg"));
156 } 272 }
157 273
158 IN_PROC_BROWSER_TEST_F(MHTMLGenerationTest, GenerateMHTMLIgnoreNoStore) { 274 IN_PROC_BROWSER_TEST_F(MHTMLGenerationTest, GenerateMHTMLIgnoreNoStore) {
159 base::FilePath path(temp_dir_.path()); 275 base::FilePath path(temp_dir_.path());
160 path = path.Append(FILE_PATH_LITERAL("test.mht")); 276 path = path.Append(FILE_PATH_LITERAL("test.mht"));
161 277
162 GURL url(embedded_test_server()->GetURL("/nostore.html")); 278 GURL url(embedded_test_server()->GetURL("/nostore.html"));
163 279
164 // Generate MHTML without specifying the FAIL_FOR_NO_STORE_MAIN_FRAME policy. 280 // Generate MHTML without specifying the FailForNoStoreMainFrame policy.
165 GenerateMHTML(path, url); 281 GenerateMHTML(path, url);
166 282
167 // We expect that there wasn't an error (file size -1 indicates an error.) 283 // We expect that there wasn't an error (file size -1 indicates an error.)
168 ASSERT_FALSE(HasFailure()); 284 ASSERT_FALSE(HasFailure());
169 285
170 std::string mhtml; 286 std::string mhtml;
171 ASSERT_TRUE(base::ReadFileToString(path, &mhtml)); 287 ASSERT_TRUE(base::ReadFileToString(path, &mhtml));
172 288
173 // Make sure the contents of the body are present. 289 // Make sure the contents of the body are present.
174 EXPECT_THAT(mhtml, HasSubstr("test body")); 290 EXPECT_THAT(mhtml, HasSubstr("test body"));
175 291
176 // Make sure that URL of the content is present. 292 // Make sure that URL of the content is present.
177 EXPECT_THAT(mhtml, ContainsRegex("Content-Location:.*/nostore.html")); 293 EXPECT_THAT(mhtml, ContainsRegex("Content-Location:.*/nostore.html"));
178 } 294 }
179 295
180 IN_PROC_BROWSER_TEST_F(MHTMLGenerationTest, GenerateMHTMLObeyNoStoreMainFrame) { 296 IN_PROC_BROWSER_TEST_F(MHTMLGenerationTest, GenerateMHTMLObeyNoStoreMainFrame) {
181 base::FilePath path(temp_dir_.path()); 297 base::FilePath path(temp_dir_.path());
182 path = path.Append(FILE_PATH_LITERAL("test.mht")); 298 path = path.Append(FILE_PATH_LITERAL("test.mht"));
183 299
184 GURL url(embedded_test_server()->GetURL("/nostore.html")); 300 GURL url(embedded_test_server()->GetURL("/nostore.html"));
185 301
186 // Generate MHTML, specifying the FAIL_FOR_NO_STORE_MAIN_FRAME policy. 302 // Generate MHTML, specifying the FailForNoStoreMainFrame policy.
187 MHTMLGenerationParams params(path); 303 MHTMLGenerationParams params(path);
188 params.cache_control_policy = 304 params.cache_control_policy =
189 content::MHTMLCacheControlPolicy::FAIL_FOR_NO_STORE_MAIN_FRAME; 305 blink::WebFrameSerializerCacheControlPolicy::FailForNoStoreMainFrame;
190 306
191 GenerateMHTML(params, url); 307 GenerateMHTML(params, url);
192 // We expect that there was an error (file size -1 indicates an error.) 308 // We expect that there was an error (file size -1 indicates an error.)
193 EXPECT_EQ(-1, file_size()); 309 EXPECT_EQ(-1, file_size());
194 310
195 std::string mhtml; 311 std::string mhtml;
196 ASSERT_TRUE(base::ReadFileToString(path, &mhtml)); 312 ASSERT_TRUE(base::ReadFileToString(path, &mhtml));
197 313
198 // Make sure the contents are missing. 314 // Make sure the contents are missing.
199 EXPECT_THAT(mhtml, Not(HasSubstr("test body"))); 315 EXPECT_THAT(mhtml, Not(HasSubstr("test body")));
200 } 316 }
201 317
318 IN_PROC_BROWSER_TEST_F(MHTMLGenerationTest,
319 GenerateMHTMLIgnoreNoStoreSubFrame) {
320 base::FilePath path(temp_dir_.path());
321 path = path.Append(FILE_PATH_LITERAL("test.mht"));
322
323 GURL url(embedded_test_server()->GetURL("/page_with_nostore_iframe.html"));
324
325 // Generate MHTML, specifying the FailForNoStoreMainFrame policy.
326 MHTMLGenerationParams params(path);
327 params.cache_control_policy =
328 blink::WebFrameSerializerCacheControlPolicy::FailForNoStoreMainFrame;
329
330 GenerateMHTML(params, url);
331 // We expect that there was no error (file size -1 indicates an error.)
332 EXPECT_LT(0, file_size());
333
334 std::string mhtml;
335 ASSERT_TRUE(base::ReadFileToString(path, &mhtml));
336
337 EXPECT_THAT(mhtml, HasSubstr("Main Frame"));
338 // Make sure that no-store subresources exist in this mode.
339 EXPECT_THAT(mhtml, HasSubstr("no-store test body"));
340 EXPECT_THAT(mhtml, ContainsRegex("Content-Location:.*nostore.jpg"));
341 }
342
343 IN_PROC_BROWSER_TEST_F(MHTMLGenerationTest, GenerateMHTMLObeyNoStoreSubFrame) {
344 base::FilePath path(temp_dir_.path());
345 path = path.Append(FILE_PATH_LITERAL("test.mht"));
346
347 GURL url(embedded_test_server()->GetURL("/page_with_nostore_iframe.html"));
348
349 // Generate MHTML, specifying the FailForNoStoreMainFrame policy.
350 MHTMLGenerationParams params(path);
351 params.cache_control_policy = blink::WebFrameSerializerCacheControlPolicy::
352 SkipAnyFrameOrResourceMarkedNoStore;
353
354 GenerateMHTML(params, url);
355 // We expect that there was no error (file size -1 indicates an error.)
356 EXPECT_LT(0, file_size());
357
358 std::string mhtml;
359 ASSERT_TRUE(base::ReadFileToString(path, &mhtml));
360
361 EXPECT_THAT(mhtml, HasSubstr("Main Frame"));
362 // Make sure the contents are missing.
363 EXPECT_THAT(mhtml, Not(HasSubstr("no-store test body")));
364 // This image comes from a resource marked no-store.
365 EXPECT_THAT(mhtml, Not(ContainsRegex("Content-Location:.*nostore.jpg")));
366 }
367
368 IN_PROC_BROWSER_TEST_F(
369 MHTMLGenerationTest,
370 ViewedMHTMLContainsNoStoreContentIfNoCacheControlPolicy) {
371 // Generate MHTML, specifying the FailForNoStoreMainFrame policy.
372 base::FilePath path(temp_dir_.path());
373 path = path.Append(FILE_PATH_LITERAL("test.mht"));
374 MHTMLGenerationParams params(path);
375
376 // No special cache control options so we should see both frames.
377 std::vector<std::string> expectations = {
378 "Main Frame, normal headers.", "Cache-Control: no-store test body",
379 };
380 std::vector<std::string> forbidden;
381 TestOriginalVsSavedPage(
382 embedded_test_server()->GetURL("/page_with_nostore_iframe.html"), params,
383 2 /* expected number of frames */, expectations, forbidden);
384
385 std::string mhtml;
386 ASSERT_TRUE(base::ReadFileToString(params.file_path, &mhtml));
387 }
388
389 IN_PROC_BROWSER_TEST_F(MHTMLGenerationTest,
390 ViewedMHTMLDoesNotContainNoStoreContent) {
391 // Generate MHTML, specifying the FailForNoStoreMainFrame policy.
392 base::FilePath path(temp_dir_.path());
393 path = path.Append(FILE_PATH_LITERAL("test.mht"));
394 MHTMLGenerationParams params(path);
395 params.cache_control_policy = blink::WebFrameSerializerCacheControlPolicy::
396 SkipAnyFrameOrResourceMarkedNoStore;
397
398 // No special cache control options so we should see both frames.
399 std::vector<std::string> expectations = {
400 "Main Frame, normal headers.",
401 };
402 std::vector<std::string> forbidden = {
403 "Cache-Control: no-store test body",
404 };
405 TestOriginalVsSavedPage(
406 embedded_test_server()->GetURL("/page_with_nostore_iframe.html"), params,
407 2 /* expected number of frames */, expectations, forbidden);
408
409 std::string mhtml;
410 ASSERT_TRUE(base::ReadFileToString(params.file_path, &mhtml));
411 }
412
202 // Test suite that allows testing --site-per-process against cross-site frames. 413 // Test suite that allows testing --site-per-process against cross-site frames.
203 // See http://dev.chromium.org/developers/design-documents/site-isolation. 414 // See http://dev.chromium.org/developers/design-documents/site-isolation.
204 class MHTMLGenerationSitePerProcessTest : public MHTMLGenerationTest { 415 class MHTMLGenerationSitePerProcessTest : public MHTMLGenerationTest {
205 public: 416 public:
206 MHTMLGenerationSitePerProcessTest() {} 417 MHTMLGenerationSitePerProcessTest() {}
207 418
208 protected: 419 protected:
209 void SetUpCommandLine(base::CommandLine* command_line) override { 420 void SetUpCommandLine(base::CommandLine* command_line) override {
210 MHTMLGenerationTest::SetUpCommandLine(command_line); 421 MHTMLGenerationTest::SetUpCommandLine(command_line);
211 422
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
244 455
245 // Make sure that URLs of both frames are present 456 // Make sure that URLs of both frames are present
246 // (note that these are single-line regexes). 457 // (note that these are single-line regexes).
247 EXPECT_THAT( 458 EXPECT_THAT(
248 mhtml, 459 mhtml,
249 ContainsRegex("Content-Location:.*/frame_tree/page_with_one_frame.html")); 460 ContainsRegex("Content-Location:.*/frame_tree/page_with_one_frame.html"));
250 EXPECT_THAT(mhtml, ContainsRegex("Content-Location:.*/title1.html")); 461 EXPECT_THAT(mhtml, ContainsRegex("Content-Location:.*/title1.html"));
251 } 462 }
252 463
253 } // namespace content 464 } // namespace content
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698