Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(456)

Side by Side Diff: telemetry/telemetry/internal/snap_page_util.py

Issue 3017573002: Make sure snap_page combined iframe serialized dom
Patch Set: Rebase & address review comments Created 3 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright (c) 2017 The Chromium Authors. All rights reserved. 1 # Copyright (c) 2017 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import os 5 import os
6 import json
7 import sys
6 8
7 from telemetry.core import util 9 from telemetry.core import util
8 from telemetry.internal.browser import browser_finder 10 from telemetry.internal.browser import browser_finder
9 11
10 12
11 def SnapPage(finder_options, url, interactive, snapshot_file): 13 def SnapPage(finder_options, url, interactive, snapshot_file):
12 """ Save the HTML snapshot of the page whose address is |url| to 14 """ Save the HTML snapshot of the page whose address is |url| to
13 |snapshot_file|. 15 |snapshot_file|.
14 """ 16 """
15 possible_browser = browser_finder.FindBrowser(finder_options) 17 possible_browser = browser_finder.FindBrowser(finder_options)
16 browser = possible_browser.Create(finder_options) 18 browser = possible_browser.Create(finder_options)
17 try: 19 try:
18 tab = browser.tabs[0] 20 tab = browser.tabs[0]
19 tab.Navigate(url) 21 tab.Navigate(url)
20 tab.WaitForDocumentReadyStateToBeComplete()
21 if interactive: 22 if interactive:
22 raw_input( 23 raw_input(
23 'Activating interactive mode. Press enter after you finish ' 24 'Activating interactive mode. Press enter after you finish '
24 "interacting with the page to snapshot the page's DOM content.") 25 "interacting with the page to snapshot the page's DOM content.")
25 with open( 26
26 os.path.join(util.GetTelemetryThirdPartyDir(), 'snap-it', 27 sys.stdout.write(
27 'HTMLSerializer.js')) as f: 28 'Snapshotting content of %s. This could take a while...\n' % url)
29 tab.WaitForDocumentReadyStateToBeComplete()
30 tab.action_runner.WaitForNetworkQuiescence()
31
32 with open(os.path.join(util.GetTelemetryThirdPartyDir(), 'snap-it',
33 'HTMLSerializer.js')) as f:
28 snapit_script = f.read() 34 snapit_script = f.read()
29 tab.ExecuteJavaScript(snapit_script) 35
30 tab.ExecuteJavaScript( 36 with open(os.path.join(util.GetTelemetryThirdPartyDir(), 'snap-it',
31 ''' 37 'popup.js')) as f:
32 var serializedDomArray; 38 dom_combining_script = f.read()
33 var htmlSerializer = new HTMLSerializer(); 39
34 htmlSerializer.processDocument(document); 40 serialized_doms = []
35 htmlSerializer.fillHolesAsync(document, function(s) { 41
36 serializedDomArray = s.html; 42 # Serialize the dom in each frame.
37 }); 43 for context_id in tab.EnableAllContexts():
38 ''') 44 tab.ExecuteJavaScript(snapit_script, context_id=context_id)
39 print 'Snapshotting content of %s. This could take a while...' % url 45 tab.ExecuteJavaScript(
40 tab.WaitForJavaScriptCondition('serializedDomArray !== undefined') 46 '''
41 serialized_dom = ''.join(tab.EvaluateJavaScript('serializedDomArray')) 47 var serializedDom;
42 snapshot_file.write(serialized_dom) 48 var htmlSerializer = new HTMLSerializer();
49 htmlSerializer.processDocument(document);
50 htmlSerializer.fillHolesAsync(document, function(s) {
51 serializedDom = s.asDict();
52 });
53 ''', context_id=context_id)
54 tab.WaitForJavaScriptCondition(
55 'serializedDom !== undefined', context_id=context_id)
56 serialized_doms.append(tab.EvaluateJavaScript(
57 'serializedDom', context_id=context_id))
58
59 # Execute doms combining code in blank page to minimize the chance of V8
60 # OOM.
61 tab.Navigate('about:blank')
62 tab.WaitForDocumentReadyStateToBeComplete()
63
64 # Sending all the serialized doms back to tab execution context.
65 tab.ExecuteJavaScript('var serializedDoms = [];')
66 for i in xrange(len(serialized_doms)):
67 sys.stdout.write('Processing dom of frame #%i / %i\r' %
68 (i, len(serialized_doms)))
69 sys.stdout.flush()
70 sub_dom_string = json.dumps(serialized_doms[i])
71 tab.ExecuteJavaScript('var sub_dom = "";')
72
73 # To avoid crashing devtool connection (details in crbug.com/763119#c16),
74 # we break down the json string to chunks which each chunk has a maximum
75 # size of 100000 characters (100000 seems to not break the connection and
76 # makes sending data reasonably fast).
77 k = 0
78 step_size = 100000
79 while k < len(sub_dom_string):
wkorman 2017/09/27 17:15:13 Worth adding a unit test to validate the chunking
80 sub_dom_string_part = sub_dom_string[k: k + step_size]
81 k += step_size
82 tab.ExecuteJavaScript(
83 'sub_dom += {{sub_dom_string_part}};',
84 sub_dom_string_part=sub_dom_string_part)
85 tab.ExecuteJavaScript('serializedDoms.push(JSON.parse(sub_dom));')
86
87 # Combine all the doms to one HTML string.
88 tab.EvaluateJavaScript(dom_combining_script)
89 page_snapshot = tab.EvaluateJavaScript('outputHTMLString(serializedDoms);')
90
91 snapshot_file.write(page_snapshot)
43 finally: 92 finally:
44 browser.Close() 93 browser.Close()
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698