Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 # Copyright (c) 2017 The Chromium Authors. All rights reserved. | 1 # Copyright (c) 2017 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 import os | 5 import os |
| 6 import json | |
| 7 import sys | |
| 6 | 8 |
| 7 from telemetry.core import util | 9 from telemetry.core import util |
| 8 from telemetry.internal.browser import browser_finder | 10 from telemetry.internal.browser import browser_finder |
| 9 | 11 |
| 10 | 12 |
| 11 def SnapPage(finder_options, url, interactive, snapshot_file): | 13 def SnapPage(finder_options, url, interactive, snapshot_file): |
| 12 """ Save the HTML snapshot of the page whose address is |url| to | 14 """ Save the HTML snapshot of the page whose address is |url| to |
| 13 |snapshot_file|. | 15 |snapshot_file|. |
| 14 """ | 16 """ |
| 15 possible_browser = browser_finder.FindBrowser(finder_options) | 17 possible_browser = browser_finder.FindBrowser(finder_options) |
| 16 browser = possible_browser.Create(finder_options) | 18 browser = possible_browser.Create(finder_options) |
| 17 try: | 19 try: |
| 18 tab = browser.tabs[0] | 20 tab = browser.tabs[0] |
| 19 tab.Navigate(url) | 21 tab.Navigate(url) |
| 20 tab.WaitForDocumentReadyStateToBeComplete() | |
| 21 if interactive: | 22 if interactive: |
| 22 raw_input( | 23 raw_input( |
| 23 'Activating interactive mode. Press enter after you finish ' | 24 'Activating interactive mode. Press enter after you finish ' |
| 24 "interacting with the page to snapshot the page's DOM content.") | 25 "interacting with the page to snapshot the page's DOM content.") |
| 25 with open( | 26 |
| 26 os.path.join(util.GetTelemetryThirdPartyDir(), 'snap-it', | 27 sys.stdout.write( |
| 27 'HTMLSerializer.js')) as f: | 28 'Snapshotting content of %s. This could take a while...\n' % url) |
| 29 tab.WaitForDocumentReadyStateToBeComplete() | |
| 30 tab.action_runner.WaitForNetworkQuiescence() | |
| 31 | |
| 32 with open(os.path.join(util.GetTelemetryThirdPartyDir(), 'snap-it', | |
| 33 'HTMLSerializer.js')) as f: | |
| 28 snapit_script = f.read() | 34 snapit_script = f.read() |
| 29 tab.ExecuteJavaScript(snapit_script) | 35 |
| 30 tab.ExecuteJavaScript( | 36 with open(os.path.join(util.GetTelemetryThirdPartyDir(), 'snap-it', |
| 31 ''' | 37 'popup.js')) as f: |
| 32 var serializedDomArray; | 38 dom_combining_script = f.read() |
| 33 var htmlSerializer = new HTMLSerializer(); | 39 |
| 34 htmlSerializer.processDocument(document); | 40 serialized_doms = [] |
| 35 htmlSerializer.fillHolesAsync(document, function(s) { | 41 |
| 36 serializedDomArray = s.html; | 42 # Serialize the dom in each frame. |
| 37 }); | 43 for context_id in tab.EnableAllContexts(): |
| 38 ''') | 44 tab.ExecuteJavaScript(snapit_script, context_id=context_id) |
| 39 print 'Snapshotting content of %s. This could take a while...' % url | 45 tab.ExecuteJavaScript( |
| 40 tab.WaitForJavaScriptCondition('serializedDomArray !== undefined') | 46 ''' |
| 41 serialized_dom = ''.join(tab.EvaluateJavaScript('serializedDomArray')) | 47 var serializedDom; |
| 42 snapshot_file.write(serialized_dom) | 48 var htmlSerializer = new HTMLSerializer(); |
| 49 htmlSerializer.processDocument(document); | |
| 50 htmlSerializer.fillHolesAsync(document, function(s) { | |
| 51 serializedDom = s.asDict(); | |
| 52 }); | |
| 53 ''', context_id=context_id) | |
| 54 tab.WaitForJavaScriptCondition( | |
| 55 'serializedDom !== undefined', context_id=context_id) | |
| 56 serialized_doms.append(tab.EvaluateJavaScript( | |
| 57 'serializedDom', context_id=context_id)) | |
| 58 | |
| 59 # Execute doms combining code in blank page to minimize the chance of V8 | |
| 60 # OOM. | |
| 61 tab.Navigate('about:blank') | |
| 62 tab.WaitForDocumentReadyStateToBeComplete() | |
| 63 | |
| 64 # Sending all the serialized doms back to tab execution context. | |
| 65 tab.ExecuteJavaScript('var serializedDoms = [];') | |
| 66 for i in xrange(len(serialized_doms)): | |
| 67 sys.stdout.write('Processing dom of frame #%i / %i\r' % | |
| 68 (i, len(serialized_doms))) | |
| 69 sys.stdout.flush() | |
| 70 sub_dom_string = json.dumps(serialized_doms[i]) | |
| 71 tab.ExecuteJavaScript('var sub_dom = "";') | |
| 72 | |
| 73 # To avoid crashing devtool connection (details in crbug.com/763119#c16), | |
| 74 # we break down the json string to chunks which each chunk has a maximum | |
| 75 # size of 100000 characters (100000 seems to not break the connection and | |
| 76 # makes sending data reasonably fast). | |
| 77 k = 0 | |
| 78 step_size = 100000 | |
| 79 while k < len(sub_dom_string): | |
|
wkorman
2017/09/27 17:15:13
Worth adding a unit test to validate the chunking
| |
| 80 sub_dom_string_part = sub_dom_string[k: k + step_size] | |
| 81 k += step_size | |
| 82 tab.ExecuteJavaScript( | |
| 83 'sub_dom += {{sub_dom_string_part}};', | |
| 84 sub_dom_string_part=sub_dom_string_part) | |
| 85 tab.ExecuteJavaScript('serializedDoms.push(JSON.parse(sub_dom));') | |
| 86 | |
| 87 # Combine all the doms to one HTML string. | |
| 88 tab.EvaluateJavaScript(dom_combining_script) | |
| 89 page_snapshot = tab.EvaluateJavaScript('outputHTMLString(serializedDoms);') | |
| 90 | |
| 91 snapshot_file.write(page_snapshot) | |
| 43 finally: | 92 finally: |
| 44 browser.Close() | 93 browser.Close() |
| OLD | NEW |