| OLD | NEW |
| 1 # Copyright 2015 The Chromium Authors. All rights reserved. | 1 # Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 import time | 4 import time |
| 5 | 5 |
| 6 from telemetry.core import browser_finder | 6 from telemetry.core import browser_finder |
| 7 from telemetry.core import browser_finder_exceptions | 7 from telemetry.core import browser_finder_exceptions |
| 8 from telemetry.core import exceptions | 8 from telemetry.core import exceptions |
| 9 from telemetry.core import platform | 9 from telemetry.core import platform |
| 10 from telemetry.core import util | 10 from telemetry.core import util |
| 11 from telemetry.core.backends.chrome_inspector import devtools_http |
| 11 | 12 |
| 12 | 13 |
| 13 class FastNavigationProfileExtender(object): | 14 class FastNavigationProfileExtender(object): |
| 14 """Extends a Chrome profile. | 15 """Extends a Chrome profile. |
| 15 | 16 |
| 16 This class creates or extends an existing profile by performing a set of tab | 17 This class creates or extends an existing profile by performing a set of tab |
| 17 navigations in large batches. This is accomplished by opening a large number | 18 navigations in large batches. This is accomplished by opening a large number |
| 18 of tabs, simultaneously navigating all the tabs, and then waiting for all the | 19 of tabs, simultaneously navigating all the tabs, and then waiting for all the |
| 19 tabs to load. This provides two benefits: | 20 tabs to load. This provides two benefits: |
| 20 - Takes advantage of the high number of logical cores on modern CPUs. | 21 - Takes advantage of the high number of logical cores on modern CPUs. |
| (...skipping 12 matching lines...) Expand all Loading... |
| 33 | 34 |
| 34 # The path of the profile that the browser will use while it's running. | 35 # The path of the profile that the browser will use while it's running. |
| 35 # This member is initialized during SetUp(). | 36 # This member is initialized during SetUp(). |
| 36 self._profile_path = None | 37 self._profile_path = None |
| 37 | 38 |
| 38 # A reference to the browser that will be performing all of the tab | 39 # A reference to the browser that will be performing all of the tab |
| 39 # navigations. | 40 # navigations. |
| 40 # This member is initialized during SetUp(). | 41 # This member is initialized during SetUp(). |
| 41 self._browser = None | 42 self._browser = None |
| 42 | 43 |
| 44 # The instance keeps a list of Tabs that can be navigated successfully. |
| 45 # This means that the Tab is not crashed, and is processing JavaScript in a |
| 46 # timely fashion. |
| 47 self._navigation_tabs = [] |
| 48 |
| 43 # The number of tabs to use. | 49 # The number of tabs to use. |
| 44 self._NUM_TABS = maximum_batch_size | 50 self._NUM_TABS = maximum_batch_size |
| 45 | 51 |
| 46 # The amount of time to wait for a batch of pages to finish loading. | 52 # The amount of time to wait for a batch of pages to finish loading. |
| 47 self._BATCH_PAGE_LOAD_TIMEOUT_IN_SECONDS = 10 | 53 self._BATCH_PAGE_LOAD_TIMEOUT_IN_SECONDS = 10 |
| 48 | 54 |
| 49 # The default amount of time to wait for the retrieval of the URL of a tab. | 55 # The default amount of time to wait for the retrieval of the URL of a tab. |
| 50 self._TAB_URL_RETRIEVAL_TIMEOUT_IN_SECONDS = 1 | 56 self._TAB_URL_RETRIEVAL_TIMEOUT_IN_SECONDS = 1 |
| 51 | 57 |
| 52 def Run(self, finder_options): | 58 def Run(self, finder_options): |
| (...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 88 implementation. | 94 implementation. |
| 89 """ | 95 """ |
| 90 self._profile_path = finder_options.output_profile_path | 96 self._profile_path = finder_options.output_profile_path |
| 91 possible_browser = self._GetPossibleBrowser(finder_options) | 97 possible_browser = self._GetPossibleBrowser(finder_options) |
| 92 | 98 |
| 93 assert possible_browser.supports_tab_control | 99 assert possible_browser.supports_tab_control |
| 94 assert (platform.GetHostPlatform().GetOSName() in | 100 assert (platform.GetHostPlatform().GetOSName() in |
| 95 ["win", "mac", "linux"]) | 101 ["win", "mac", "linux"]) |
| 96 self._browser = possible_browser.Create(finder_options) | 102 self._browser = possible_browser.Create(finder_options) |
| 97 | 103 |
| 98 while(len(self._browser.tabs) < self._NUM_TABS): | |
| 99 self._browser.tabs.New() | |
| 100 | |
| 101 def TearDown(self): | 104 def TearDown(self): |
| 102 """Teardown that is guaranteed to be executed before the instance is | 105 """Teardown that is guaranteed to be executed before the instance is |
| 103 destroyed. | 106 destroyed. |
| 104 | 107 |
| 105 Can be overridden by subclasses. Subclasses must call the super class | 108 Can be overridden by subclasses. Subclasses must call the super class |
| 106 implementation. | 109 implementation. |
| 107 """ | 110 """ |
| 108 if self._browser: | 111 if self._browser: |
| 109 self._browser.Close() | 112 self._browser.Close() |
| 110 self._browser = None | 113 self._browser = None |
| 111 | 114 |
| 112 def CleanUpAfterBatchNavigation(self): | 115 def CleanUpAfterBatchNavigation(self): |
| 113 """A hook for subclasses to perform cleanup after each batch of | 116 """A hook for subclasses to perform cleanup after each batch of |
| 114 navigations. | 117 navigations. |
| 115 | 118 |
| 116 Can be overridden by subclasses. | 119 Can be overridden by subclasses. |
| 117 """ | 120 """ |
| 118 pass | 121 pass |
| 119 | 122 |
| 120 @property | 123 @property |
| 121 def profile_path(self): | 124 def profile_path(self): |
| 122 return self._profile_path | 125 return self._profile_path |
| 123 | 126 |
| 127 def _RefreshNavigationTabs(self): |
| 128 """Updates the member self._navigation_tabs to contain self._NUM_TABS |
| 129 elements, each of which is not crashed. The crashed tabs are intentionally |
| 130 leaked, since Telemetry doesn't have a good way of killing crashed tabs. |
| 131 |
| 132 It is also possible for a tab to be stalled in an infinite JavaScript loop. |
| 133 These tabs will be in self._browser.tabs, but not in self._navigation_tabs. |
| 134 There is no way to kill these tabs, so they are also leaked. This method is |
| 135 careful to only use tabs in self._navigation_tabs, or newly created tabs. |
| 136 """ |
| 137 live_tabs = [tab for tab in self._navigation_tabs if tab.IsAlive()] |
| 138 self._navigation_tabs = live_tabs |
| 139 |
| 140 while len(self._navigation_tabs) < self._NUM_TABS: |
| 141 self._navigation_tabs.append(self._browser.tabs.New()) |
| 142 |
| 143 def _RemoveNavigationTab(self, tab): |
| 144 """Removes a tab which is no longer in a useable state from |
| 145 self._navigation_tabs. The tab is not removed from self._browser.tabs, |
| 146 since there is no guarantee that the tab can be safely removed.""" |
| 147 self._navigation_tabs.remove(tab) |
| 148 |
| 124 def _GetPossibleBrowser(self, finder_options): | 149 def _GetPossibleBrowser(self, finder_options): |
| 125 """Return a possible_browser with the given options.""" | 150 """Return a possible_browser with the given options.""" |
| 126 possible_browser = browser_finder.FindBrowser(finder_options) | 151 possible_browser = browser_finder.FindBrowser(finder_options) |
| 127 if not possible_browser: | 152 if not possible_browser: |
| 128 raise browser_finder_exceptions.BrowserFinderException( | 153 raise browser_finder_exceptions.BrowserFinderException( |
| 129 'No browser found.\n\nAvailable browsers:\n%s\n' % | 154 'No browser found.\n\nAvailable browsers:\n%s\n' % |
| 130 '\n'.join(browser_finder.GetAllAvailableBrowserTypes(finder_options))) | 155 '\n'.join(browser_finder.GetAllAvailableBrowserTypes(finder_options))) |
| 131 finder_options.browser_options.browser_type = ( | 156 finder_options.browser_options.browser_type = ( |
| 132 possible_browser.browser_type) | 157 possible_browser.browser_type) |
| 133 | 158 |
| 134 return possible_browser | 159 return possible_browser |
| 135 | 160 |
| 136 def _RetrieveTabUrl(self, tab, timeout): | 161 def _RetrieveTabUrl(self, tab, timeout): |
| 137 """Retrives the URL of the tab.""" | 162 """Retrives the URL of the tab.""" |
| 138 try: | 163 try: |
| 139 return tab.EvaluateJavaScript('document.URL', timeout) | 164 return tab.EvaluateJavaScript('document.URL', timeout) |
| 140 except exceptions.DevtoolsTargetCrashException: | 165 except (exceptions.DevtoolsTargetCrashException, |
| 166 devtools_http.DevToolsClientConnectionError, |
| 167 devtools_http.DevToolsClientUrlError): |
| 141 return None | 168 return None |
| 142 | 169 |
| 143 def _WaitForUrlToChange(self, tab, initial_url, timeout): | 170 def _WaitForUrlToChange(self, tab, initial_url, timeout): |
| 144 """Waits for the tab to navigate away from its initial url.""" | 171 """Waits for the tab to navigate away from its initial url.""" |
| 145 end_time = time.time() + timeout | 172 end_time = time.time() + timeout |
| 146 while True: | 173 while True: |
| 147 seconds_to_wait = end_time - time.time() | 174 seconds_to_wait = end_time - time.time() |
| 148 seconds_to_wait = max(0, seconds_to_wait) | 175 seconds_to_wait = max(0, seconds_to_wait) |
| 149 | 176 |
| 150 if seconds_to_wait == 0: | 177 if seconds_to_wait == 0: |
| (...skipping 20 matching lines...) Expand all Loading... |
| 171 """ | 198 """ |
| 172 timeout_in_seconds = 0 | 199 timeout_in_seconds = 0 |
| 173 | 200 |
| 174 queued_tabs = [] | 201 queued_tabs = [] |
| 175 for tab, url in batch: | 202 for tab, url in batch: |
| 176 initial_url = self._RetrieveTabUrl(tab, | 203 initial_url = self._RetrieveTabUrl(tab, |
| 177 self._TAB_URL_RETRIEVAL_TIMEOUT_IN_SECONDS) | 204 self._TAB_URL_RETRIEVAL_TIMEOUT_IN_SECONDS) |
| 178 | 205 |
| 179 try: | 206 try: |
| 180 tab.Navigate(url, None, timeout_in_seconds) | 207 tab.Navigate(url, None, timeout_in_seconds) |
| 181 except exceptions.DevtoolsTargetCrashException: | 208 except (exceptions.DevtoolsTargetCrashException, |
| 182 # We expect a time out, and don't mind if the webpage crashes. Ignore | 209 devtools_http.DevToolsClientConnectionError, |
| 183 # both exceptions. | 210 devtools_http.DevToolsClientUrlError): |
| 211 # We expect a time out. It's possible for other problems to arise, but |
| 212 # this method is not responsible for dealing with them. Ignore all |
| 213 # exceptions. |
| 184 pass | 214 pass |
| 185 | 215 |
| 186 queued_tabs.append((tab, initial_url)) | 216 queued_tabs.append((tab, initial_url)) |
| 187 return queued_tabs | 217 return queued_tabs |
| 188 | 218 |
| 189 def _WaitForQueuedTabsToLoad(self, queued_tabs): | 219 def _WaitForQueuedTabsToLoad(self, queued_tabs): |
| 190 """Waits for all the batch navigated tabs to finish loading. | 220 """Waits for all the batch navigated tabs to finish loading. |
| 191 | 221 |
| 192 Args: | 222 Args: |
| 193 queued_tabs: A list of tuples (tab, initial_url). Each tab is guaranteed | 223 queued_tabs: A list of tuples (tab, initial_url). Each tab is guaranteed |
| 194 to have already been sent a navigation command. | 224 to have already been sent a navigation command. |
| 195 """ | 225 """ |
| 196 end_time = time.time() + self._BATCH_PAGE_LOAD_TIMEOUT_IN_SECONDS | 226 end_time = time.time() + self._BATCH_PAGE_LOAD_TIMEOUT_IN_SECONDS |
| 197 for tab, initial_url in queued_tabs: | 227 for tab, initial_url in queued_tabs: |
| 198 seconds_to_wait = end_time - time.time() | 228 seconds_to_wait = end_time - time.time() |
| 199 seconds_to_wait = max(0, seconds_to_wait) | 229 seconds_to_wait = max(0, seconds_to_wait) |
| 200 | 230 |
| 201 if seconds_to_wait == 0: | 231 if seconds_to_wait == 0: |
| 202 break | 232 break |
| 203 | 233 |
| 204 # Since we don't wait any time for the tab url navigation to commit, it's | 234 # Since we don't wait any time for the tab url navigation to commit, it's |
| 205 # possible that the tab hasn't started navigating yet. | 235 # possible that the tab hasn't started navigating yet. |
| 206 self._WaitForUrlToChange(tab, initial_url, seconds_to_wait) | 236 self._WaitForUrlToChange(tab, initial_url, seconds_to_wait) |
| 207 | 237 |
| 208 seconds_to_wait = end_time - time.time() | 238 seconds_to_wait = end_time - time.time() |
| 209 seconds_to_wait = max(0, seconds_to_wait) | 239 seconds_to_wait = max(0, seconds_to_wait) |
| 210 | 240 |
| 211 try: | 241 try: |
| 212 tab.WaitForDocumentReadyStateToBeComplete(seconds_to_wait) | 242 tab.WaitForDocumentReadyStateToBeComplete(seconds_to_wait) |
| 213 except (util.TimeoutException, exceptions.DevtoolsTargetCrashException): | 243 except util.TimeoutException: |
| 214 # Ignore time outs and web page crashes. | 244 # Ignore time outs. |
| 215 pass | 245 pass |
| 246 except (exceptions.DevtoolsTargetCrashException, |
| 247 devtools_http.DevToolsClientConnectionError, |
| 248 devtools_http.DevToolsClientUrlError): |
| 249 # If any error occurs, remove the tab. it's probably in an |
| 250 # unrecoverable state. |
| 251 self._RemoveNavigationTab(tab) |
| 216 | 252 |
| 217 def _GetUrlsToNavigate(self, url_iterator): | 253 def _GetUrlsToNavigate(self, url_iterator): |
| 218 """Returns an array of urls to navigate to, given a url_iterator.""" | 254 """Returns an array of urls to navigate to, given a url_iterator.""" |
| 219 urls = [] | 255 urls = [] |
| 220 for _ in xrange(self._NUM_TABS): | 256 for _ in xrange(self._NUM_TABS): |
| 221 try: | 257 try: |
| 222 urls.append(url_iterator.next()) | 258 urls.append(url_iterator.next()) |
| 223 except StopIteration: | 259 except StopIteration: |
| 224 break | 260 break |
| 225 return urls | 261 return urls |
| 226 | 262 |
| 227 def _PerformNavigations(self): | 263 def _PerformNavigations(self): |
| 228 """Repeatedly fetches a batch of urls, and navigates to those urls. This | 264 """Repeatedly fetches a batch of urls, and navigates to those urls. This |
| 229 will run until an empty batch is returned, or | 265 will run until an empty batch is returned, or |
| 230 ShouldExitAfterBatchNavigation() returns True. | 266 ShouldExitAfterBatchNavigation() returns True. |
| 231 """ | 267 """ |
| 232 url_iterator = self.GetUrlIterator() | 268 url_iterator = self.GetUrlIterator() |
| 233 while True: | 269 while True: |
| 270 self._RefreshNavigationTabs() |
| 234 urls = self._GetUrlsToNavigate(url_iterator) | 271 urls = self._GetUrlsToNavigate(url_iterator) |
| 235 | 272 |
| 236 if len(urls) == 0: | 273 if len(urls) == 0: |
| 237 break | 274 break |
| 238 | 275 |
| 239 batch = [] | 276 batch = [] |
| 240 for i in range(len(urls)): | 277 for i in range(len(urls)): |
| 241 url = urls[i] | 278 url = urls[i] |
| 242 tab = self._browser.tabs[i] | 279 tab = self._navigation_tabs[i] |
| 243 batch.append((tab, url)) | 280 batch.append((tab, url)) |
| 244 | 281 |
| 245 queued_tabs = self._BatchNavigateTabs(batch) | 282 queued_tabs = self._BatchNavigateTabs(batch) |
| 246 self._WaitForQueuedTabsToLoad(queued_tabs) | 283 self._WaitForQueuedTabsToLoad(queued_tabs) |
| 247 | 284 |
| 248 self.CleanUpAfterBatchNavigation() | 285 self.CleanUpAfterBatchNavigation() |
| 249 | 286 |
| 250 if self.ShouldExitAfterBatchNavigation(): | 287 if self.ShouldExitAfterBatchNavigation(): |
| 251 break | 288 break |
| OLD | NEW |