Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 # Copyright 2015 The Chromium Authors. All rights reserved. | |
| 2 # Use of this source code is governed by a BSD-style license that can be | |
| 3 # found in the LICENSE file. | |
| 4 import time | |
| 5 | |
| 6 from telemetry.core import browser_finder | |
| 7 from telemetry.core import browser_finder_exceptions | |
| 8 from telemetry.core import exceptions | |
| 9 from telemetry.core import util | |
| 10 | |
| 11 | |
| 12 class FastNavigationProfileExtender(object): | |
| 13 """ | |
| 14 This class creates or extends an existing profile by performing a set of tab | |
| 15 navigations in large batches. This is accomplished by opening a large number | |
| 16 of tabs, simultaneously navigating all the tabs, and then waiting for all the | |
| 17 tabs to load. This provides two benefits: | |
| 18 - Takes advantage of the high number of logical cores on modern CPUs. | |
| 19 - The total time spent waiting for navigations to time out scales linearly | |
| 20 with the number of batches, but does not scale with the size of the | |
| 21 batch. | |
|
nednguyen
2015/02/12 02:10:53
Style nits: please make this docstring and others
erikchen
2015/02/12 03:02:58
Done. I've updated all the formatting to be PEP co
| |
| 22 """ | |
| 23 def __init__(self): | |
| 24 super(FastNavigationProfileExtender, self).__init__() | |
| 25 | |
| 26 # A reference to the browser that will be performing all of the tab | |
| 27 # navigations. | |
| 28 self._browser = None | |
| 29 | |
| 30 # A static copy of the urls that this class is going to navigate to. | |
| 31 self._navigation_urls = None | |
| 32 | |
| 33 # The number of tabs to use. | |
| 34 self._NUM_TABS = 15 | |
| 35 | |
| 36 # The number of pages to load in parallel. | |
| 37 self._NUM_PARALLEL_PAGES = 15 | |
| 38 | |
| 39 # It doesn't make sense for the batch size to be larger than the number of | |
| 40 # available tabs. | |
|
nednguyen
2015/02/12 02:10:53
Style nits: rather than comments, you can do:
asse
erikchen
2015/02/12 03:02:58
Good point, done.
| |
| 41 assert(self._NUM_PARALLEL_PAGES <= self._NUM_TABS) | |
| 42 | |
| 43 # The amount of time to wait for pages to finish loading. | |
| 44 self._PAGE_LOAD_TIMEOUT_IN_SECONDS = 10 | |
| 45 | |
| 46 # The amount of time to wait for the retrieval of the URL of a tab. | |
| 47 self._TAB_URL_RETRIEVAL_TIMEOUT_IN_SECONDS = 1 | |
| 48 | |
| 49 # The amount of time to wait for a navigation to be committed. | |
| 50 self._NAVIGATION_COMMIT_WAIT_IN_SECONDS = 0.1 | |
| 51 | |
| 52 def Run(self, finder_options): | |
| 53 """ | |
| 54 |finder_options| contains the directory of the input profile, the directory | |
| 55 to place the output profile, and sufficient information to choose a specific | |
| 56 browser binary. | |
| 57 """ | |
| 58 try: | |
| 59 self._navigation_urls = self.GetUrlsToNavigate() | |
| 60 self._SetUpBrowser(finder_options) | |
| 61 self._PerformNavigations() | |
| 62 finally: | |
| 63 self._TearDownBrowser() | |
| 64 | |
| 65 def GetUrlsToNavigate(self): | |
| 66 """ | |
| 67 Intended for subclass override. Returns a list of urls to be navigated to. | |
| 68 """ | |
| 69 raise NotImplementedError() | |
| 70 | |
| 71 | |
| 72 def _GetPossibleBrowser(self, finder_options): | |
| 73 """Return a possible_browser with the given options.""" | |
| 74 possible_browser = browser_finder.FindBrowser(finder_options) | |
| 75 if not possible_browser: | |
| 76 raise browser_finder_exceptions.BrowserFinderException( | |
| 77 'No browser found.\n\nAvailable browsers:\n%s\n' % | |
| 78 '\n'.join(browser_finder.GetAllAvailableBrowserTypes(finder_options))) | |
| 79 finder_options.browser_options.browser_type = ( | |
| 80 possible_browser.browser_type) | |
| 81 | |
| 82 return possible_browser | |
| 83 | |
| 84 def _RetrieveTabUrl(self, tab): | |
| 85 """Retrives the URL of the tab.""" | |
| 86 try: | |
| 87 return tab.EvaluateJavaScript('document.URL', | |
| 88 self._TAB_URL_RETRIEVAL_TIMEOUT_IN_SECONDS) | |
| 89 except exceptions.DevtoolsTargetCrashException: | |
| 90 return None | |
| 91 | |
| 92 def _BatchNavigateTabs(self, batch): | |
| 93 """ | |
| 94 Performs a batch of tab navigations with minimal delay. | |
| 95 | |
| 96 |batch| is a list of tuples (tab, url). | |
| 97 | |
| 98 This method returns a list of tuples (tab, initial_url). |initial_url| is | |
| 99 the url of the |tab| prior to a navigation command being sent to it. | |
| 100 """ | |
| 101 timeout_in_seconds = 0 | |
| 102 | |
| 103 queued_tabs = [] | |
| 104 for tab, url in batch: | |
| 105 initial_url = self._RetrieveTabUrl(tab) | |
| 106 | |
| 107 try: | |
| 108 tab.Navigate(url, None, timeout_in_seconds) | |
| 109 except exceptions.DevtoolsTargetCrashException: | |
| 110 # We expect a time out, and don't mind if the webpage crashes. Ignore | |
| 111 # both exceptions. | |
|
nednguyen
2015/02/12 02:10:53
Should we close crashed tabs & reopen new tabs?
erikchen
2015/02/12 02:26:07
Yes. But there's a whole can of worms that's going
nednguyen
2015/02/12 03:17:59
SGTM.
We have a filed bug for that one: https://co
| |
| 112 pass | |
| 113 | |
| 114 queued_tabs.append((tab, initial_url)) | |
| 115 return queued_tabs | |
| 116 | |
| 117 def _WaitForQueuedTabsToLoad(self, queued_tabs): | |
| 118 """ | |
| 119 Waits for all the batch navigated tabs to finish loading. | |
| 120 | |
| 121 |queued_tabs| is a list of tuples (tab, initial_url). Each tab is | |
| 122 guaranteed to have already been sent a navigation command. | |
| 123 """ | |
| 124 end_time = time.time() + self._PAGE_LOAD_TIMEOUT_IN_SECONDS | |
|
nednguyen
2015/02/12 02:10:53
Based on the implementation below, it looks like _
erikchen
2015/02/12 02:26:07
Yes. One of the major points of this class is to b
nednguyen
2015/02/12 03:17:59
Then why not timeout after each page after 1.5 sec
erikchen
2015/02/12 03:24:18
Because most pages requires longer than 1.5 second
nednguyen
2015/02/12 03:29:53
Ok, this is legit. Maybe rename _PAGE_LOAD_TIMEOUT
| |
| 125 for tab, initial_url in queued_tabs: | |
| 126 seconds_to_wait = end_time - time.time() | |
| 127 seconds_to_wait = max(0, seconds_to_wait) | |
| 128 | |
| 129 if seconds_to_wait == 0: | |
| 130 break | |
| 131 | |
| 132 # Since we don't wait any time for the tab url navigation to commit, it's | |
| 133 # possible that the tab hasn't started navigating yet. | |
| 134 current_url = self._RetrieveTabUrl(tab) | |
| 135 | |
| 136 if current_url == initial_url: | |
| 137 # If the navigation hasn't been committed yet, wait a small amount of | |
| 138 # time. Don't bother rechecking the condition, since it's also possible | |
| 139 # that the web page isn't processing javascript. | |
| 140 time.sleep(self._NAVIGATION_COMMIT_WAIT_IN_SECONDS) | |
|
nednguyen
2015/02/12 02:10:53
I think you should use
tab.WaitForNavigate(timeout
erikchen
2015/02/12 02:26:07
That doesn't work. WaitForNavigate assumes that a
nednguyen
2015/02/12 03:17:59
Interesting. The case "navigation has already fini
erikchen
2015/02/12 03:24:18
I spoke to quickly - you're right. Navigation hasn
| |
| 141 | |
| 142 try: | |
| 143 tab.WaitForDocumentReadyStateToBeComplete(seconds_to_wait) | |
| 144 except (util.TimeoutException, exceptions.DevtoolsTargetCrashException): | |
| 145 # Ignore time outs and web page crashes. | |
| 146 pass | |
| 147 | |
| 148 def _SetUpBrowser(self, finder_options): | |
| 149 """ | |
| 150 Finds the browser, starts the browser, and opens the requisite number of | |
| 151 tabs. | |
| 152 """ | |
| 153 possible_browser = self._GetPossibleBrowser(finder_options) | |
| 154 self._browser = possible_browser.Create(finder_options) | |
| 155 | |
| 156 for _ in range(self._NUM_TABS): | |
| 157 self._browser.tabs.New() | |
| 158 | |
| 159 def _PerformNavigations(self): | |
| 160 """ | |
| 161 Performs the navigations specified by |_navigation_urls| in large batches. | |
| 162 """ | |
| 163 # The index of the first url that has not yet been navigated to. | |
| 164 navigation_url_index = 0 | |
| 165 while True: | |
| 166 # Generate the next batch of navigations. | |
| 167 batch = [] | |
| 168 max_index = min(navigation_url_index + self._NUM_PARALLEL_PAGES, | |
| 169 len(self._navigation_urls)) | |
| 170 for i in range(navigation_url_index, max_index): | |
| 171 url = self._navigation_urls[i] | |
| 172 tab = self._browser.tabs[i % self._NUM_TABS] | |
| 173 batch.append((tab, url)) | |
| 174 navigation_url_index = max_index | |
| 175 | |
| 176 queued_tabs = self._BatchNavigateTabs(batch) | |
| 177 self._WaitForQueuedTabsToLoad(queued_tabs) | |
| 178 | |
| 179 if navigation_url_index == len(self._navigation_urls): | |
| 180 break | |
| 181 | |
| 182 def _TearDownBrowser(self): | |
| 183 """ | |
| 184 Teardown that is guaranteed to be executed before the instance is destroyed. | |
| 185 """ | |
| 186 if self._browser: | |
| 187 self._browser.Close() | |
| 188 self._browser = None | |
| OLD | NEW |