Index: telemetry/telemetry/internal/snap_page_util.py |
diff --git a/telemetry/telemetry/internal/snap_page_util.py b/telemetry/telemetry/internal/snap_page_util.py |
index 57210030776187a0ebe0b2ee0350d5fc817607c3..27a40fbf85b7a0be3218206827eb4f7906ceef6a 100644 |
--- a/telemetry/telemetry/internal/snap_page_util.py |
+++ b/telemetry/telemetry/internal/snap_page_util.py |
@@ -3,6 +3,8 @@ |
# found in the LICENSE file. |
import os |
+import json |
+import sys |
from telemetry.core import util |
from telemetry.internal.browser import browser_finder |
@@ -17,28 +19,70 @@ def SnapPage(finder_options, url, interactive, snapshot_file): |
try: |
tab = browser.tabs[0] |
tab.Navigate(url) |
- tab.WaitForDocumentReadyStateToBeComplete() |
if interactive: |
raw_input( |
'Activating interactive mode. Press enter after you finish ' |
"interacting with the page to snapshot the page's DOM content.") |
- with open( |
- os.path.join(util.GetTelemetryThirdPartyDir(), 'snap-it', |
- 'HTMLSerializer.js')) as f: |
+ |
+ sys.stdout.write( |
wkorman
2017/09/20 20:11:04
Why switch to this from print? Just curious. To al
nednguyen
2017/09/26 13:01:34
To allow inline print with \r below
|
+ 'Snapshotting content of %s. This could take a while...\n' % url) |
+ tab.WaitForDocumentReadyStateToBeComplete() |
+ tab.action_runner.WaitForNetworkQuiescence() |
+ |
+ with open(os.path.join(util.GetTelemetryThirdPartyDir(), 'snap-it', |
+ 'HTMLSerializer.js')) as f: |
snapit_script = f.read() |
- tab.ExecuteJavaScript(snapit_script) |
- tab.ExecuteJavaScript( |
- ''' |
- var serializedDomArray; |
- var htmlSerializer = new HTMLSerializer(); |
- htmlSerializer.processDocument(document); |
- htmlSerializer.fillHolesAsync(document, function(s) { |
- serializedDomArray = s.html; |
- }); |
- ''') |
- print 'Snapshotting content of %s. This could take a while...' % url |
- tab.WaitForJavaScriptCondition('serializedDomArray !== undefined') |
- serialized_dom = ''.join(tab.EvaluateJavaScript('serializedDomArray')) |
- snapshot_file.write(serialized_dom) |
+ |
+ with open(os.path.join(util.GetTelemetryThirdPartyDir(), 'snap-it', |
+ 'popup.js')) as f: |
+ dom_combining_script = f.read() |
+ |
+ serialized_doms = [] |
+ |
+ # Serialize the dom in each frame. |
+ for context_id in tab.EnableAllContexts(): |
+ tab.ExecuteJavaScript(snapit_script, context_id=context_id) |
+ tab.ExecuteJavaScript( |
+ ''' |
+ var serializedDom; |
+ var htmlSerializer = new HTMLSerializer(); |
+ htmlSerializer.processDocument(document); |
+ htmlSerializer.fillHolesAsync(document, function(s) { |
+ serializedDom = s.asDict(); |
+ }); |
+ ''', context_id=context_id) |
+ tab.WaitForJavaScriptCondition( |
+ 'serializedDom !== undefined', context_id=context_id) |
+ serialized_doms.append(tab.EvaluateJavaScript( |
+ 'serializedDom', context_id=context_id)) |
+ |
+ # Execute doms combining code in blank page to minimize the chance of V8 |
+ # OOM. |
+ tab.Navigate('about:blank') |
+ tab.WaitForDocumentReadyStateToBeComplete() |
+ |
+ # Sending all the serialized doms back to tab execution context. |
+ tab.ExecuteJavaScript('var serializedDoms = [];') |
+ for i in xrange(len(serialized_doms)): |
+ sys.stdout.write('Processing dom of frame #%i / %i\r' % |
+ (i, len(serialized_doms))) |
+ sys.stdout.flush() |
+ sub_dom_string = json.dumps(serialized_doms[i]) |
+ tab.ExecuteJavaScript('var sub_dom = "";') |
+ k = 0 |
+ step_size = 100000 |
wkorman
2017/09/20 20:11:04
Add comment noting why we break into step_size chu
nednguyen
2017/09/26 13:01:34
Done.
|
+ while k < len(sub_dom_string): |
+ sub_dom_string_part = sub_dom_string[k: k + step_size] |
+ k += step_size |
+ tab.ExecuteJavaScript( |
+ 'sub_dom += {{sub_dom_string_part}};', |
+ sub_dom_string_part=sub_dom_string_part) |
+ tab.ExecuteJavaScript('serializedDoms.push(JSON.parse(sub_dom));') |
+ |
+ # Combine all the doms to one HTML string. |
+ tab.EvaluateJavaScript(dom_combining_script) |
+ page_snapshot = tab.EvaluateJavaScript('outputHTMLString(serializedDoms);') |
+ |
+ snapshot_file.write(page_snapshot) |
finally: |
browser.Close() |