OLD | NEW |
---|---|
1 # Copyright (c) 2017 The Chromium Authors. All rights reserved. | 1 # Copyright (c) 2017 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 import os | 5 import os |
6 import json | |
7 import sys | |
6 | 8 |
7 from telemetry.core import util | 9 from telemetry.core import util |
8 from telemetry.internal.browser import browser_finder | 10 from telemetry.internal.browser import browser_finder |
9 | 11 |
10 | 12 |
11 def SnapPage(finder_options, url, interactive, snapshot_file): | 13 def SnapPage(finder_options, url, interactive, snapshot_file): |
12 """ Save the HTML snapshot of the page whose address is |url| to | 14 """ Save the HTML snapshot of the page whose address is |url| to |
13 |snapshot_file|. | 15 |snapshot_file|. |
14 """ | 16 """ |
15 possible_browser = browser_finder.FindBrowser(finder_options) | 17 possible_browser = browser_finder.FindBrowser(finder_options) |
16 browser = possible_browser.Create(finder_options) | 18 browser = possible_browser.Create(finder_options) |
17 try: | 19 try: |
18 tab = browser.tabs[0] | 20 tab = browser.tabs[0] |
19 tab.Navigate(url) | 21 tab.Navigate(url) |
20 tab.WaitForDocumentReadyStateToBeComplete() | |
21 if interactive: | 22 if interactive: |
22 raw_input( | 23 raw_input( |
23 'Activating interactive mode. Press enter after you finish ' | 24 'Activating interactive mode. Press enter after you finish ' |
24 "interacting with the page to snapshot the page's DOM content.") | 25 "interacting with the page to snapshot the page's DOM content.") |
25 with open( | 26 |
26 os.path.join(util.GetTelemetryThirdPartyDir(), 'snap-it', | 27 sys.stdout.write( |
wkorman
2017/09/20 20:11:04
Why switch to this from print? Just curious. To al
nednguyen
2017/09/26 13:01:34
To allow inline print with \r below
| |
27 'HTMLSerializer.js')) as f: | 28 'Snapshotting content of %s. This could take a while...\n' % url) |
29 tab.WaitForDocumentReadyStateToBeComplete() | |
30 tab.action_runner.WaitForNetworkQuiescence() | |
31 | |
32 with open(os.path.join(util.GetTelemetryThirdPartyDir(), 'snap-it', | |
33 'HTMLSerializer.js')) as f: | |
28 snapit_script = f.read() | 34 snapit_script = f.read() |
29 tab.ExecuteJavaScript(snapit_script) | 35 |
30 tab.ExecuteJavaScript( | 36 with open(os.path.join(util.GetTelemetryThirdPartyDir(), 'snap-it', |
31 ''' | 37 'popup.js')) as f: |
32 var serializedDomArray; | 38 dom_combining_script = f.read() |
33 var htmlSerializer = new HTMLSerializer(); | 39 |
34 htmlSerializer.processDocument(document); | 40 serialized_doms = [] |
35 htmlSerializer.fillHolesAsync(document, function(s) { | 41 |
36 serializedDomArray = s.html; | 42 # Serialize the dom in each frame. |
37 }); | 43 for context_id in tab.EnableAllContexts(): |
38 ''') | 44 tab.ExecuteJavaScript(snapit_script, context_id=context_id) |
39 print 'Snapshotting content of %s. This could take a while...' % url | 45 tab.ExecuteJavaScript( |
40 tab.WaitForJavaScriptCondition('serializedDomArray !== undefined') | 46 ''' |
41 serialized_dom = ''.join(tab.EvaluateJavaScript('serializedDomArray')) | 47 var serializedDom; |
42 snapshot_file.write(serialized_dom) | 48 var htmlSerializer = new HTMLSerializer(); |
49 htmlSerializer.processDocument(document); | |
50 htmlSerializer.fillHolesAsync(document, function(s) { | |
51 serializedDom = s.asDict(); | |
52 }); | |
53 ''', context_id=context_id) | |
54 tab.WaitForJavaScriptCondition( | |
55 'serializedDom !== undefined', context_id=context_id) | |
56 serialized_doms.append(tab.EvaluateJavaScript( | |
57 'serializedDom', context_id=context_id)) | |
58 | |
59 # Execute doms combining code in blank page to minimize the chance of V8 | |
60 # OOM. | |
61 tab.Navigate('about:blank') | |
62 tab.WaitForDocumentReadyStateToBeComplete() | |
63 | |
64 # Sending all the serialized doms back to tab execution context. | |
65 tab.ExecuteJavaScript('var serializedDoms = [];') | |
66 for i in xrange(len(serialized_doms)): | |
67 sys.stdout.write('Processing dom of frame #%i / %i\r' % | |
68 (i, len(serialized_doms))) | |
69 sys.stdout.flush() | |
70 sub_dom_string = json.dumps(serialized_doms[i]) | |
71 tab.ExecuteJavaScript('var sub_dom = "";') | |
72 k = 0 | |
73 step_size = 100000 | |
wkorman
2017/09/20 20:11:04
Add comment noting why we break into step_size chu
nednguyen
2017/09/26 13:01:34
Done.
| |
74 while k < len(sub_dom_string): | |
75 sub_dom_string_part = sub_dom_string[k: k + step_size] | |
76 k += step_size | |
77 tab.ExecuteJavaScript( | |
78 'sub_dom += {{sub_dom_string_part}};', | |
79 sub_dom_string_part=sub_dom_string_part) | |
80 tab.ExecuteJavaScript('serializedDoms.push(JSON.parse(sub_dom));') | |
81 | |
82 # Combine all the doms to one HTML string. | |
83 tab.EvaluateJavaScript(dom_combining_script) | |
84 page_snapshot = tab.EvaluateJavaScript('outputHTMLString(serializedDoms);') | |
85 | |
86 snapshot_file.write(page_snapshot) | |
43 finally: | 87 finally: |
44 browser.Close() | 88 browser.Close() |
OLD | NEW |