Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(186)

Side by Side Diff: telemetry/telemetry/internal/snap_page_util.py

Issue 3017573002: Make sure snap_page combined iframe serialized dom
Patch Set: . Created 3 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright (c) 2017 The Chromium Authors. All rights reserved. 1 # Copyright (c) 2017 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import os 5 import os
6 import json
7 import sys
6 8
7 from telemetry.core import util 9 from telemetry.core import util
8 from telemetry.internal.browser import browser_finder 10 from telemetry.internal.browser import browser_finder
9 11
10 12
11 def SnapPage(finder_options, url, interactive, snapshot_file): 13 def SnapPage(finder_options, url, interactive, snapshot_file):
12 """ Save the HTML snapshot of the page whose address is |url| to 14 """ Save the HTML snapshot of the page whose address is |url| to
13 |snapshot_file|. 15 |snapshot_file|.
14 """ 16 """
15 possible_browser = browser_finder.FindBrowser(finder_options) 17 possible_browser = browser_finder.FindBrowser(finder_options)
16 browser = possible_browser.Create(finder_options) 18 browser = possible_browser.Create(finder_options)
17 try: 19 try:
18 tab = browser.tabs[0] 20 tab = browser.tabs[0]
19 tab.Navigate(url) 21 tab.Navigate(url)
20 tab.WaitForDocumentReadyStateToBeComplete()
21 if interactive: 22 if interactive:
22 raw_input( 23 raw_input(
23 'Activating interactive mode. Press enter after you finish ' 24 'Activating interactive mode. Press enter after you finish '
24 "interacting with the page to snapshot the page's DOM content.") 25 "interacting with the page to snapshot the page's DOM content.")
25 with open( 26
26 os.path.join(util.GetTelemetryThirdPartyDir(), 'snap-it', 27 sys.stdout.write(
wkorman 2017/09/20 20:11:04 Why switch to this from print? Just curious. To al
nednguyen 2017/09/26 13:01:34 To allow inline print with \r below
27 'HTMLSerializer.js')) as f: 28 'Snapshotting content of %s. This could take a while...\n' % url)
29 tab.WaitForDocumentReadyStateToBeComplete()
30 tab.action_runner.WaitForNetworkQuiescence()
31
32 with open(os.path.join(util.GetTelemetryThirdPartyDir(), 'snap-it',
33 'HTMLSerializer.js')) as f:
28 snapit_script = f.read() 34 snapit_script = f.read()
29 tab.ExecuteJavaScript(snapit_script) 35
30 tab.ExecuteJavaScript( 36 with open(os.path.join(util.GetTelemetryThirdPartyDir(), 'snap-it',
31 ''' 37 'popup.js')) as f:
32 var serializedDomArray; 38 dom_combining_script = f.read()
33 var htmlSerializer = new HTMLSerializer(); 39
34 htmlSerializer.processDocument(document); 40 serialized_doms = []
35 htmlSerializer.fillHolesAsync(document, function(s) { 41
36 serializedDomArray = s.html; 42 # Serialize the dom in each frame.
37 }); 43 for context_id in tab.EnableAllContexts():
38 ''') 44 tab.ExecuteJavaScript(snapit_script, context_id=context_id)
39 print 'Snapshotting content of %s. This could take a while...' % url 45 tab.ExecuteJavaScript(
40 tab.WaitForJavaScriptCondition('serializedDomArray !== undefined') 46 '''
41 serialized_dom = ''.join(tab.EvaluateJavaScript('serializedDomArray')) 47 var serializedDom;
42 snapshot_file.write(serialized_dom) 48 var htmlSerializer = new HTMLSerializer();
49 htmlSerializer.processDocument(document);
50 htmlSerializer.fillHolesAsync(document, function(s) {
51 serializedDom = s.asDict();
52 });
53 ''', context_id=context_id)
54 tab.WaitForJavaScriptCondition(
55 'serializedDom !== undefined', context_id=context_id)
56 serialized_doms.append(tab.EvaluateJavaScript(
57 'serializedDom', context_id=context_id))
58
59 # Execute doms combining code in blank page to minimize the chance of V8
60 # OOM.
61 tab.Navigate('about:blank')
62 tab.WaitForDocumentReadyStateToBeComplete()
63
64 # Sending all the serialized doms back to tab execution context.
65 tab.ExecuteJavaScript('var serializedDoms = [];')
66 for i in xrange(len(serialized_doms)):
67 sys.stdout.write('Processing dom of frame #%i / %i\r' %
68 (i, len(serialized_doms)))
69 sys.stdout.flush()
70 sub_dom_string = json.dumps(serialized_doms[i])
71 tab.ExecuteJavaScript('var sub_dom = "";')
72 k = 0
73 step_size = 100000
wkorman 2017/09/20 20:11:04 Add comment noting why we break into step_size chu
nednguyen 2017/09/26 13:01:34 Done.
74 while k < len(sub_dom_string):
75 sub_dom_string_part = sub_dom_string[k: k + step_size]
76 k += step_size
77 tab.ExecuteJavaScript(
78 'sub_dom += {{sub_dom_string_part}};',
79 sub_dom_string_part=sub_dom_string_part)
80 tab.ExecuteJavaScript('serializedDoms.push(JSON.parse(sub_dom));')
81
82 # Combine all the doms to one HTML string.
83 tab.EvaluateJavaScript(dom_combining_script)
84 page_snapshot = tab.EvaluateJavaScript('outputHTMLString(serializedDoms);')
85
86 snapshot_file.write(page_snapshot)
43 finally: 87 finally:
44 browser.Close() 88 browser.Close()
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698